Processing Q-Learning

ctremblay · February 16, 2021, 12:47am

I’m trying to make an enemy with q-learning, but my enemy seems to just run into the wall after a few minutes. He just runs into the wall & runs into the wall. Can someone check my code to see if I’m doing it correct I tried copying code that was in python.

enemy.csv

Right,Left,Up,Down
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
-0.3,-0.3,-0.4,0.0
-1.4000002,-0.8000001,-0.6,0.0
-1.8000003,-0.8000001,-0.6,0.0
-0.9000001,-0.6,-0.9000001,0.0
-0.5,-0.4,-0.1,0.0
-0.1,-0.1,-0.5,0.0
0.0,0.0,-0.1,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
-1.2000002,-0.6,-0.2,0.0
-2.3999999,-2.0000002,-1.8000003,0.0
-0.70000005,-0.8000001,-1.2000002,0.0
-0.3,-0.3,-0.2,0.0
-0.2,-0.1,0.0,0.0
0.0,0.0,-0.2,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0

wall.csv

197,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,49,50,74,75,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,124,125,149,150,174,175,199,200,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,249,250,274,275,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,324,325,349,350,374,375,399,400,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,449,450,474,475,499,500,501,502,503,504,505,506,507,508,509,510,511,512,513,514,515,516,517,518,519,520,524,525,549,550,574,575,599,600,601,602,603,604,605,606,607,608,609,610,611,612,613,614,615,616,617,618,619,620,621,622,623,624,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,

qlearningTest

Table enemyThink,wallTable;
TableRow tr;
int s = 25;
ArrayList <Wall> wall = new ArrayList<Wall>();
Enemy e;
Finish f;

void setup(){
  size(625,625);
  e = new Enemy(52);
  f = new Finish(121);
  enemyThink = loadTable("enemy.csv","header");
  /*enemyThink = new Table();
  enemyThink.addColumn("Right");
  enemyThink.addColumn("Left");
  enemyThink.addColumn("Up");
  enemyThink.addColumn("Down");
  for (int i = 0; i < 125; i++){
    tr = enemyThink.addRow();
    for (int j = 0; j < 4; j++){
      tr.setFloat(j,0);
    }
  }*/
  saveTable(enemyThink,"enemy.csv");
  wallTable = loadTable("wall.csv");
  tr = wallTable.getRow(0);
  for (int i = 1; i < wallTable.getColumnCount(); i++){
    int tn = tr.getInt(i);
    wall.add(new Wall(tn));
  }
}

void draw(){
  background(0);
  for (Wall w: wall){
    w.show();
  }
  e.show();
  e.think();
  if (e.death == true || e.win == true){
    e.pos = new PVector(2 * s, 2 * s);
    e.state = 52;
    e.death = false;
  }
  f.show();
  text(e.state,200,50);
}

wall

class Wall{
  int j = 0;
  int jj = 0;
  int num = 0;
  PVector pos;
  
  Wall(int n){
    num = n;
    j = n;
    while(j > 24){
      jj += 1;
      j -= 25;
    }
    pos = new PVector(j * s, jj * s);
  }// wall
  
  void show(){
    fill(255);
    rect(pos.x,pos.y,s,s);
    fill(0);
    textSize(10);
    text(num,pos.x + 2, pos.y + 20);
  }
}

finish

class Finish{
  PVector pos;
  int j , jj, num;
  
  Finish(int n){
    j = n;
    jj = 0;
    num = n;
    while (j > 24){
      jj += 1;
      j -= 25;
    }
    pos = new PVector(j * s,jj * s);
  }
  
  void show(){
    fill(0,255,0);
    rect(pos.x,pos.y,3*s,s);
  }
}

and here is the where i placed the q-learning
enemy

class Enemy{
  int j = 0;
  int jj =0;
  int num = 0;
  float learning_rate = 0.1;
  PVector pos;
  int state = 0;
  float explore = 1;
  float minexplore = 0.01;
  float decayRate = 0.001;
  float maxSteps = 99;
  int currentMove = 0;
  ///////////////////////////////////////////////
  boolean death = false;
  boolean win = false;
  int a = 0;
  String movement = " ";
  
  Enemy(int n){
    state = n;
    j = n;
    num = 0;
    while(j > 24){
      jj += 1;
      j -= 25;
    }
    pos = new PVector(j * s, jj * s);
  }// Enemy
  
  void show(){
    fill(255,0,0);
    rect(pos.x,pos.y,s,s);
  }
  
  void think(){
    if (currentMove < maxSteps){
      tr = enemyThink.getRow(state);
      float r = random(1);
      if (r > explore){
        float ln = 0;
        a = 0;
        for (int j = 0; j < 4; j++){
          float tn = tr.getFloat(j);
          if (tn > ln){
            a = j;
          }// n > large number
        }// j
      }// r > explore
      if (r <= explore){
        float rn = random(0,3);
        a = int(rn);
      }// do random
      switch(a){
        case 0:
          movement = "Right";
        break;
        case 1:
          movement = "Left";
        break;
        case 2:
          movement = "Up";
        break;
        case 3:
          movement = "Down";
        break;
      }
      move();
      check();
      float t = tr.getFloat(a);
      tr.setFloat(a, t - learning_rate);
      saveTable(enemyThink,"enemy.csv");
      explore -= decayRate;
      currentMove += 1;
    } else if (currentMove == maxSteps){
      currentMove = 0;
    }
  }// think
  
  void move(){
    if (movement == "Right"){
      pos.x += s;
      state += 1;
    }
    if (movement == "Left"){
      pos.x -= s;
      state -= 1;
    }
    if (movement == "Up"){
      pos.y -= s;
      state -= 25;
    }
    if (movement == "Down"){
      pos.y += s;
      state += 25;
    }
  }
  
  void check(){
    death = false;
    win = false;
    for (int i = 0; i < wall.size(); i++){
      Wall w = wall.get(i);
      if (w.pos.x == pos.x && w.pos.y == pos.y){
        death = true;
      }
    }
    if (f.pos.x <= pos.x && f.pos.x + (3 * s) > pos.x && f.pos.y == pos.y){
      win = true;
    }
  }// check
}

Topic		Replies	Views
Reinforcement learning Java Processing Coding Questions	1	75	January 31, 2025
AI reinforcement learning Coding Questions	8	355	May 31, 2021
AI Flappy Bird Processing Java Coding Questions	54	3467	April 23, 2019
Maze little problem Coding Questions homework	3	272	June 4, 2021
Real newb having a NullPointerException. Help wanted Coding Questions	1	301	June 20, 2019

Processing Q-Learning

Related topics