| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224 |
- import gym
- from gym import error, spaces, utils
- from gym.utils import seeding
- import numpy as np
- import pandas as pd
- from CBD.simulator import Simulator
- from AGV import AGVVirtual
- import matplotlib.pyplot as plt
- import matplotlib.animation as animation
- EPS = 0.000001
- class AGVEnv(gym.Env):
- def __init__(self):
- self.action_space = spaces.Discrete(9)
- self.observation_space = spaces.Box(low=np.array([0.0, 0.0, -3*np.pi]), high=np.array([1.0, 1.0, 3*np.pi]))
- self.last_action = 0.018, 0.211
- self.physical = pd.read_csv("trace_vidH.csv") # Trajectory of the recognized AGV
- self.physical["heading"] *= -1
- self.clean_path()
- self.time = self.physical["time"][0]
- self.states = [np.array([self.physical["x"][0], self.physical["y"][0], self.physical["heading"][0]])]
- self.actions = []
- self.fig, self.ax = plt.subplots(1,1)
- self.ax.plot(self.physical["x"], self.physical["y"], ls=':', c='blue')
- self.ani = animation.FuncAnimation(self.fig, lambda _: self.update(), interval=100)
- self.cart, = self.ax.plot(self.states[0][0], self.states[0][1], c='red')
- self.label = self.ax.text(0.02, 0.95, '', transform=self.ax.transAxes)
- self.same_actions = 0
- plt.ion()
- plt.show()
- def step(self, action):
- self.actions.append(self.last_action)
- r, d = self.last_action
- if action == 0:
- self.same_actions += 1
- elif action == 1:
- r += 0.001
- elif action == 2:
- r += 0.01
- elif action == 3:
- r -= 0.001
- elif action == 4:
- r -= 0.01
- elif action == 5:
- d += 0.001
- elif action == 6:
- d += 0.01
- elif action == 7:
- d -= 0.001
- elif action == 8:
- d -= 0.01
- if action > 0:
- self.same_actions = 0
- self.last_action = r, d
- if abs(r) < EPS or abs(d) < EPS:
- return self.states[-1], float('-inf'), True, {}
- # ro, do = self.last_action
- # reward = -np.power(ro - r, 2) - np.power(do - d, 2)
- reward = self.same_actions * 100
- agv = AGVVirtual("AGV", r, d, "obtained.csv", initial=self.states[-1], v=0.033, T=35, Kp=-0.01)
- sim = Simulator(agv)
- sim.setDeltaT(0.2)
- sim.run(self.time + 0.21, self.time)
- state = np.array(self.get_state(agv))
- last_state = self.states[-1]
- self.states.append(state)
- self.time = sim.getTime()
- moment = self.physical[self.physical["time"] <= self.time].iloc[-1]
- offset = self.euclidean(moment["x"], moment["y"], state[0], state[1])
- if offset > 0.1:
- reward -= 1000
- else:
- reward -= offset
- reward += self.euclidean(state[0], state[1], last_state[0], last_state[1]) ** 2
- TCP = agv.getBlockByName("TCP")
- end_time = TCP.data[TCP.time_col][-1]
- return state, reward, ((self.time >= end_time) or (reward < -500)), {}
- def reset(self):
- self.time = self.physical["time"][0]
- self.last_action = 0.018, 0.211
- self.actions.clear()
- self.states = [self.states[0]]
- return self.states[0]
- def update(self):
- x, y = [s[0] for s in self.states], [s[1] for s in self.states]
- self.cart.set_data(x, y)
- def render(self, mode='human'):
- # plt.draw()
- # plt.pause(0.001)
- self.fig.canvas.draw_idle()
- self.fig.canvas.start_event_loop(0.001)
- # def close(self):
- # pass
- def get_state(self, model):
- dd = model.getBlockByName("plot").data
- if len(dd) == 0:
- x = model.findBlock("odo.init_x")[0].getValue()
- y = model.findBlock("odo.init_y")[0].getValue()
- heading = model.findBlock("odo.init_w")[0].getValue()
- else:
- x, y = model.getBlockByName("plot").data[-1]
- heading = model.getBlockByName("headingPlot").data_xy[1][-1]
- return x, y, heading
- def clean_path(self):
- to_drop = []
- # dists = []
- for idx, row in self.physical.iterrows():
- subset = self.physical[self.physical["time"] <= row["time"] - 0.2]
- if len(subset) == 0: continue
- prev = subset.iloc[-1]
- dist = self.euclidean(prev["x"], prev["y"], row["x"], row["y"])
- # dists.append(dist)
- # REMOVE NOISE
- if dist > 0.0125:
- to_drop.append(idx)
- self.physical.drop(to_drop, inplace=True)
- @staticmethod
- def euclidean(x1, y1, x2, y2):
- dx = x2 - x1
- dy = y2 - y1
- return ((dx * dx) + (dy * dy)) ** 0.5
- if __name__ == '__main__':
- import random
- env = AGVEnv()
- action_space_size = env.action_space.n
- state_space_size = 100 * 100 * (6 * 360)
- q_table = np.zeros((state_space_size, action_space_size))
- num_episodes = 1000
- max_steps_per_episode = 100 # but it won't go higher than 1
- learning_rate = 0.1
- discount_rate = 0.99
- exploration_rate = 1
- max_exploration_rate = 1
- min_exploration_rate = 0.01
- exploration_decay_rate = 0.01 # if we decrease it, will learn slower
- rewards_all_episodes = []
- def discretize(state):
- return int(state[0] * 100), int(state[1] * 100), int(np.degrees(3 * np.pi) + np.degrees(state[2]))
- # Q-Learning algorithm
- try:
- for episode in range(num_episodes):
- state = env.reset()
- dstate = discretize(state)
- env.label.set_text("Episode: " + str(episode))
- done = False
- rewards_current_episode = 0
- for step in range(max_steps_per_episode):
- env.render()
- # Exploration -exploitation trade-off
- exploration_rate_threshold = random.uniform(0, 1)
- if exploration_rate_threshold > exploration_rate:
- action = np.argmax(q_table[dstate,:])
- else:
- action = env.action_space.sample()
- new_state, reward, done, info = env.step(action)
- dnew_state = discretize(new_state)
- # Update Q-table for Q(s,a)
- q_table[dstate, action] = (1 - learning_rate) * q_table[dstate, action] + \
- learning_rate * (reward + discount_rate * np.max(q_table[dnew_state,:]))
- state = new_state
- rewards_current_episode += reward
- if done:
- break
- # Exploration rate decay
- exploration_rate = min_exploration_rate + \
- (max_exploration_rate - min_exploration_rate) * np.exp(-exploration_decay_rate * episode)
- rewards_all_episodes.append(rewards_current_episode)
- except:
- print("ERROR!")
- # Calculate and print the average reward per 10 episodes
- rewards_per_thousand_episodes = np.split(np.array(rewards_all_episodes), num_episodes / 100)
- count = 100
- print("********** Average reward per thousand episodes **********\n")
- for r in rewards_per_thousand_episodes:
- print(count, ": ", str(sum(r / 100)))
- count += 100
- # Print updated Q-table
- # print("\n\n********** Q-table **********\n")
- # print(q_table)
- np.save("Q.npy", q_table)
- with open("actions.csv", 'w') as file:
- file.write(f"r,d")
- for r, d in env.actions:
- file.write(f"{r:.3f},{d:.3f}\n")
|