unity中關於MLAGENTS支持智能AI給稍微複雜的變化環境下的AI像人類玩家同樣思考,帶來了極大驚喜。分享一下機器學習的模式。
如圖:
給定的劇情比較簡單,要求AI儘量在紛繁複雜的物體中間撿拾綠色物體,就跟AI須要靠近某些攻擊對象或者分類撿拾有用道具的需求基礎。上一篇已經給你們介紹了環境的搭建,本篇之間乾貨。web
using System.Collections; using System.Collections.Generic; using UnityEngine; using MLAgents; using System; public class NPCAgent : Agent { public GameObject ground; public GameObject redcube; public GameObject greencube; RayPerception rayPer; Rigidbody rd; public override void InitializeAgent() { base.InitializeAgent(); rayPer = GetComponent<RayPerception>(); rd = GetComponent<Rigidbody>(); } public override void CollectObservations() { float rayDistance = 12f; float[] rayAngles = { 20f,60f,90f,120f,160f}; string[] detectableObjects = { "Cube+", "Cube-","wall" }; AddVectorObs(GetStepCount()/(float)agentParameters.maxStep); AddVectorObs(rayPer.Perceive(rayDistance,rayAngles,detectableObjects,0f,0f)); } public override void AgentAction(float[] vectorAction, string textAction) { // AddReward(-1f/agentParameters.maxStep); MoveAgent(vectorAction); } private void MoveAgent(float[] vectorAction) { if (GameObject.FindGameObjectsWithTag("Cube+").Length <= 0) { Done(); return; } Vector3 dirToGo = Vector3.zero; Vector3 rotateDir = Vector3.zero; dirToGo = transform.forward * Mathf.Clamp(vectorAction[0], -1f, 1f); rotateDir = transform.up * Mathf.Clamp(vectorAction[1], -1f, 1f); transform.Rotate(rotateDir,Time.deltaTime*150f); rd.AddForce(dirToGo*1.5f,ForceMode.VelocityChange); } void OnCollisionEnter(Collision col) { if (col.gameObject.CompareTag("Cube+")) { SetReward(1f); col.gameObject.transform.position= new Vector3(UnityEngine.Random.Range(-40f, 40f), 0, UnityEngine.Random.Range(-40f, 40f)); Debug.Log("ok successful!"); } if (col.gameObject.CompareTag("Cube-")) { SetReward(-1f); col.gameObject.transform.position = new Vector3(UnityEngine.Random.Range(-40f, 40f), 0, UnityEngine.Random.Range(-40f, 40f)); Debug.Log("sorry!"); } if (col.gameObject.CompareTag("wall")) { SetReward(-0.1f); } } public override void AgentReset() { transform.position = new Vector3(UnityEngine.Random.Range(-40f, 40f), 0.45f, UnityEngine.Random.Range(-40f, 40f)); transform.rotation = Quaternion.Euler(0f, UnityEngine.Random.Range(0f, 360f), 0f); // transform.rotation = Quaternion.Euler(0f, 0f, 0f); rd.velocity *= 0f; for (int i = 0; i < GameObject.FindGameObjectsWithTag("Cube+").Length ; i++) { GameObject.FindGameObjectsWithTag("Cube+")[i].transform.position = new Vector3(UnityEngine.Random.Range(-40f, 40f), 0, UnityEngine.Random.Range(-40f, 40f)); } for (int i = 0; i < GameObject.FindGameObjectsWithTag("Cube-").Length; i++) { GameObject.FindGameObjectsWithTag("Cube-")[i].transform.position = new Vector3(UnityEngine.Random.Range(-40f, 40f), 0, UnityEngine.Random.Range(-40f, 40f)); } } }
當見到綠色Cube時獎勵1分,見到有毒紅色Cube懲罰1分,當碰到牆壁減掉0.1分,這是AI的獎勵訓練依據。
觀察參數要求觀察視覺看到的物體與自身的距離,這會讓AI的大腦中儘可能選擇近距離的物體作選擇。固然我發現我最後的訓練結果就是,並非一致像一個程序機器人從身邊掃描,會跟人同樣,有時會選擇另外的方向角度去作下步選擇,固然大部分是近距離先處理。dom
public override List<float> Perceive(float rayDistance, float[] rayAngles, string[] detectableObjects, float startOffset, float endOffset) { perceptionBuffer.Clear(); // For each ray sublist stores categorical information on detected object // along with object distance. foreach (float angle in rayAngles) { endPosition = transform.TransformDirection( PolarToCartesian(rayDistance, angle)); endPosition.y = endOffset; if (Application.isEditor) { Debug.DrawRay(transform.position + new Vector3(0f, startOffset, 0f), endPosition, Color.black, 0.01f, true); } float[] subList = new float[detectableObjects.Length + 2]; if (Physics.SphereCast(transform.position + new Vector3(0f, startOffset, 0f), 0.5f, endPosition, out hit, rayDistance)) { for (int i = 0; i < detectableObjects.Length; i++) { if (hit.collider.gameObject.CompareTag(detectableObjects[i])) { subList[i] = 1; subList[detectableObjects.Length + 1] = hit.distance / rayDistance; break; } } } else { subList[detectableObjects.Length] = 1f; } perceptionBuffer.AddRange(subList); } return perceptionBuffer; }