高智商AI設計實戰

unity中關於MLAGENTS支持智能AI給稍微複雜的變化環境下的AI像人類玩家同樣思考,帶來了極大驚喜。分享一下機器學習的模式。
如圖:在這裏插入圖片描述
給定的劇情比較簡單,要求AI儘量在紛繁複雜的物體中間撿拾綠色物體,就跟AI須要靠近某些攻擊對象或者分類撿拾有用道具的需求基礎。上一篇已經給你們介紹了環境的搭建,本篇之間乾貨。web

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using MLAgents;
using System;

public class NPCAgent : Agent
{
    public GameObject ground;
    public GameObject redcube;
    public GameObject greencube;

    RayPerception rayPer;
    Rigidbody rd;

    public override void InitializeAgent()
    {
        base.InitializeAgent();
        rayPer = GetComponent<RayPerception>();
        rd = GetComponent<Rigidbody>();

    }
    public override void CollectObservations()
    {
        float rayDistance = 12f;
        float[] rayAngles = { 20f,60f,90f,120f,160f};
        string[] detectableObjects = { "Cube+", "Cube-","wall" };
        AddVectorObs(GetStepCount()/(float)agentParameters.maxStep);
        AddVectorObs(rayPer.Perceive(rayDistance,rayAngles,detectableObjects,0f,0f));
    }
    public override void AgentAction(float[] vectorAction, string textAction)
    {
      // AddReward(-1f/agentParameters.maxStep);
        MoveAgent(vectorAction);
    }

    private void MoveAgent(float[] vectorAction)
    {
        if (GameObject.FindGameObjectsWithTag("Cube+").Length <= 0)
        {
            Done();
            return;
        }
           
        Vector3 dirToGo = Vector3.zero;
        Vector3 rotateDir = Vector3.zero;
        
            dirToGo = transform.forward * Mathf.Clamp(vectorAction[0], -1f, 1f);
            rotateDir = transform.up * Mathf.Clamp(vectorAction[1], -1f, 1f);
     
        transform.Rotate(rotateDir,Time.deltaTime*150f);
        rd.AddForce(dirToGo*1.5f,ForceMode.VelocityChange);
    }
    void OnCollisionEnter(Collision col)
    {
       
        if (col.gameObject.CompareTag("Cube+"))
        {
            SetReward(1f);
            col.gameObject.transform.position= new Vector3(UnityEngine.Random.Range(-40f, 40f), 0, UnityEngine.Random.Range(-40f, 40f));
            Debug.Log("ok successful!");
           
        }
        if (col.gameObject.CompareTag("Cube-"))
        {
            SetReward(-1f);

            col.gameObject.transform.position = new Vector3(UnityEngine.Random.Range(-40f, 40f), 0, UnityEngine.Random.Range(-40f, 40f));
            Debug.Log("sorry!");
        }
        if (col.gameObject.CompareTag("wall"))
        {
            SetReward(-0.1f);
        }

    }
    public override void AgentReset()
    {
        transform.position = new Vector3(UnityEngine.Random.Range(-40f, 40f), 0.45f, UnityEngine.Random.Range(-40f, 40f));
          transform.rotation = Quaternion.Euler(0f, UnityEngine.Random.Range(0f, 360f), 0f);
    // transform.rotation = Quaternion.Euler(0f, 0f, 0f);
        rd.velocity *= 0f;
        for (int i = 0; i < GameObject.FindGameObjectsWithTag("Cube+").Length ; i++)
        {
            GameObject.FindGameObjectsWithTag("Cube+")[i].transform.position = new Vector3(UnityEngine.Random.Range(-40f, 40f), 0, UnityEngine.Random.Range(-40f, 40f));
        }
        for (int i = 0; i < GameObject.FindGameObjectsWithTag("Cube-").Length; i++)
        {
            GameObject.FindGameObjectsWithTag("Cube-")[i].transform.position = new Vector3(UnityEngine.Random.Range(-40f, 40f), 0, UnityEngine.Random.Range(-40f, 40f));
        }
    }
}

當見到綠色Cube時獎勵1分,見到有毒紅色Cube懲罰1分,當碰到牆壁減掉0.1分,這是AI的獎勵訓練依據。
觀察參數要求觀察視覺看到的物體與自身的距離,這會讓AI的大腦中儘可能選擇近距離的物體作選擇。固然我發現我最後的訓練結果就是,並非一致像一個程序機器人從身邊掃描,會跟人同樣,有時會選擇另外的方向角度去作下步選擇,固然大部分是近距離先處理。dom

public override List<float> Perceive(float rayDistance,
            float[] rayAngles, string[] detectableObjects,
            float startOffset, float endOffset)
        {
            perceptionBuffer.Clear();
            // For each ray sublist stores categorical information on detected object
            // along with object distance.
            foreach (float angle in rayAngles)
            {
                endPosition = transform.TransformDirection(
                    PolarToCartesian(rayDistance, angle));
                endPosition.y = endOffset;
                if (Application.isEditor)
                {
                    Debug.DrawRay(transform.position + new Vector3(0f, startOffset, 0f),
                        endPosition, Color.black, 0.01f, true);
                }

                float[] subList = new float[detectableObjects.Length + 2];
                if (Physics.SphereCast(transform.position +
                                       new Vector3(0f, startOffset, 0f), 0.5f,
                    endPosition, out hit, rayDistance))
                {
                    for (int i = 0; i < detectableObjects.Length; i++)
                    {
                        if (hit.collider.gameObject.CompareTag(detectableObjects[i]))
                        {
                            subList[i] = 1;
                            subList[detectableObjects.Length + 1] = hit.distance / rayDistance;
                            break;
                        }
                    }
                }
                else
                {
                    subList[detectableObjects.Length] = 1f;
                }

                perceptionBuffer.AddRange(subList);
            }

            return perceptionBuffer;
        }