|
|
@@ -0,0 +1,223 @@
|
|
|
+import gym
|
|
|
+from gym import error, spaces, utils
|
|
|
+from gym.utils import seeding
|
|
|
+import numpy as np
|
|
|
+import pandas as pd
|
|
|
+from CBD.simulator import Simulator
|
|
|
+from AGV import AGVVirtual
|
|
|
+import matplotlib.pyplot as plt
|
|
|
+import matplotlib.animation as animation
|
|
|
+
|
|
|
+EPS = 0.000001
|
|
|
+
|
|
|
+class AGVEnv(gym.Env):
|
|
|
+ def __init__(self):
|
|
|
+ self.action_space = spaces.Discrete(9)
|
|
|
+ self.observation_space = spaces.Box(low=np.array([0.0, 0.0, -3*np.pi]), high=np.array([1.0, 1.0, 3*np.pi]))
|
|
|
+
|
|
|
+ self.last_action = 0.018, 0.211
|
|
|
+ self.physical = pd.read_csv("trace_vidH.csv") # Trajectory of the recognized AGV
|
|
|
+ self.physical["heading"] *= -1
|
|
|
+ self.clean_path()
|
|
|
+ self.time = self.physical["time"][0]
|
|
|
+
|
|
|
+ self.states = [np.array([self.physical["x"][0], self.physical["y"][0], self.physical["heading"][0]])]
|
|
|
+ self.actions = []
|
|
|
+
|
|
|
+ self.fig, self.ax = plt.subplots(1,1)
|
|
|
+ self.ax.plot(self.physical["x"], self.physical["y"], ls=':', c='blue')
|
|
|
+ self.ani = animation.FuncAnimation(self.fig, lambda _: self.update(), interval=100)
|
|
|
+ self.cart, = self.ax.plot(self.states[0][0], self.states[0][1], c='red')
|
|
|
+ self.label = self.ax.text(0.02, 0.95, '', transform=self.ax.transAxes)
|
|
|
+ self.same_actions = 0
|
|
|
+
|
|
|
+ plt.ion()
|
|
|
+ plt.show()
|
|
|
+
|
|
|
+ def step(self, action):
|
|
|
+ self.actions.append(self.last_action)
|
|
|
+ r, d = self.last_action
|
|
|
+ if action == 0:
|
|
|
+ self.same_actions += 1
|
|
|
+ elif action == 1:
|
|
|
+ r += 0.001
|
|
|
+ elif action == 2:
|
|
|
+ r += 0.01
|
|
|
+ elif action == 3:
|
|
|
+ r -= 0.001
|
|
|
+ elif action == 4:
|
|
|
+ r -= 0.01
|
|
|
+ elif action == 5:
|
|
|
+ d += 0.001
|
|
|
+ elif action == 6:
|
|
|
+ d += 0.01
|
|
|
+ elif action == 7:
|
|
|
+ d -= 0.001
|
|
|
+ elif action == 8:
|
|
|
+ d -= 0.01
|
|
|
+ if action > 0:
|
|
|
+ self.same_actions = 0
|
|
|
+ self.last_action = r, d
|
|
|
+ if abs(r) < EPS or abs(d) < EPS:
|
|
|
+ return self.states[-1], float('-inf'), True, {}
|
|
|
+ # ro, do = self.last_action
|
|
|
+ # reward = -np.power(ro - r, 2) - np.power(do - d, 2)
|
|
|
+ reward = self.same_actions * 100
|
|
|
+ agv = AGVVirtual("AGV", r, d, "obtained.csv", initial=self.states[-1], v=0.033, T=35, Kp=-0.01)
|
|
|
+ sim = Simulator(agv)
|
|
|
+ sim.setDeltaT(0.2)
|
|
|
+ sim.run(self.time + 0.21, self.time)
|
|
|
+ state = np.array(self.get_state(agv))
|
|
|
+ last_state = self.states[-1]
|
|
|
+ self.states.append(state)
|
|
|
+ self.time = sim.getTime()
|
|
|
+
|
|
|
+ moment = self.physical[self.physical["time"] <= self.time].iloc[-1]
|
|
|
+ offset = self.euclidean(moment["x"], moment["y"], state[0], state[1])
|
|
|
+ if offset > 0.1:
|
|
|
+ reward -= 1000
|
|
|
+ else:
|
|
|
+ reward -= offset
|
|
|
+ reward += self.euclidean(state[0], state[1], last_state[0], last_state[1]) ** 2
|
|
|
+
|
|
|
+ TCP = agv.getBlockByName("TCP")
|
|
|
+ end_time = TCP.data[TCP.time_col][-1]
|
|
|
+
|
|
|
+
|
|
|
+ return state, reward, ((self.time >= end_time) or (reward < -500)), {}
|
|
|
+
|
|
|
+ def reset(self):
|
|
|
+ self.time = self.physical["time"][0]
|
|
|
+ self.last_action = 0.018, 0.211
|
|
|
+ self.actions.clear()
|
|
|
+ self.states = [self.states[0]]
|
|
|
+ return self.states[0]
|
|
|
+
|
|
|
+ def update(self):
|
|
|
+ x, y = [s[0] for s in self.states], [s[1] for s in self.states]
|
|
|
+ self.cart.set_data(x, y)
|
|
|
+
|
|
|
+ def render(self, mode='human'):
|
|
|
+ # plt.draw()
|
|
|
+ # plt.pause(0.001)
|
|
|
+ self.fig.canvas.draw_idle()
|
|
|
+ self.fig.canvas.start_event_loop(0.001)
|
|
|
+
|
|
|
+ # def close(self):
|
|
|
+ # pass
|
|
|
+
|
|
|
+ def get_state(self, model):
|
|
|
+ dd = model.getBlockByName("plot").data
|
|
|
+ if len(dd) == 0:
|
|
|
+ x = model.findBlock("odo.init_x")[0].getValue()
|
|
|
+ y = model.findBlock("odo.init_y")[0].getValue()
|
|
|
+ heading = model.findBlock("odo.init_w")[0].getValue()
|
|
|
+ else:
|
|
|
+ x, y = model.getBlockByName("plot").data[-1]
|
|
|
+ heading = model.getBlockByName("headingPlot").data_xy[1][-1]
|
|
|
+ return x, y, heading
|
|
|
+
|
|
|
+ def clean_path(self):
|
|
|
+ to_drop = []
|
|
|
+ # dists = []
|
|
|
+ for idx, row in self.physical.iterrows():
|
|
|
+ subset = self.physical[self.physical["time"] <= row["time"] - 0.2]
|
|
|
+ if len(subset) == 0: continue
|
|
|
+ prev = subset.iloc[-1]
|
|
|
+ dist = self.euclidean(prev["x"], prev["y"], row["x"], row["y"])
|
|
|
+ # dists.append(dist)
|
|
|
+ # REMOVE NOISE
|
|
|
+ if dist > 0.0125:
|
|
|
+ to_drop.append(idx)
|
|
|
+ self.physical.drop(to_drop, inplace=True)
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def euclidean(x1, y1, x2, y2):
|
|
|
+ dx = x2 - x1
|
|
|
+ dy = y2 - y1
|
|
|
+ return ((dx * dx) + (dy * dy)) ** 0.5
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ import random
|
|
|
+ env = AGVEnv()
|
|
|
+
|
|
|
+ action_space_size = env.action_space.n
|
|
|
+ state_space_size = 100 * 100 * (6 * 360)
|
|
|
+
|
|
|
+ q_table = np.zeros((state_space_size, action_space_size))
|
|
|
+
|
|
|
+ num_episodes = 1000
|
|
|
+ max_steps_per_episode = 100 # but it won't go higher than 1
|
|
|
+
|
|
|
+ learning_rate = 0.1
|
|
|
+ discount_rate = 0.99
|
|
|
+
|
|
|
+ exploration_rate = 1
|
|
|
+ max_exploration_rate = 1
|
|
|
+ min_exploration_rate = 0.01
|
|
|
+
|
|
|
+ exploration_decay_rate = 0.01 # if we decrease it, will learn slower
|
|
|
+ rewards_all_episodes = []
|
|
|
+
|
|
|
+ def discretize(state):
|
|
|
+ return int(state[0] * 100), int(state[1] * 100), int(np.degrees(3 * np.pi) + np.degrees(state[2]))
|
|
|
+
|
|
|
+ # Q-Learning algorithm
|
|
|
+ try:
|
|
|
+ for episode in range(num_episodes):
|
|
|
+ state = env.reset()
|
|
|
+ dstate = discretize(state)
|
|
|
+ env.label.set_text("Episode: " + str(episode))
|
|
|
+
|
|
|
+ done = False
|
|
|
+ rewards_current_episode = 0
|
|
|
+
|
|
|
+ for step in range(max_steps_per_episode):
|
|
|
+ env.render()
|
|
|
+
|
|
|
+ # Exploration -exploitation trade-off
|
|
|
+ exploration_rate_threshold = random.uniform(0, 1)
|
|
|
+ if exploration_rate_threshold > exploration_rate:
|
|
|
+ action = np.argmax(q_table[dstate,:])
|
|
|
+ else:
|
|
|
+ action = env.action_space.sample()
|
|
|
+
|
|
|
+ new_state, reward, done, info = env.step(action)
|
|
|
+ dnew_state = discretize(new_state)
|
|
|
+
|
|
|
+ # Update Q-table for Q(s,a)
|
|
|
+ q_table[dstate, action] = (1 - learning_rate) * q_table[dstate, action] + \
|
|
|
+ learning_rate * (reward + discount_rate * np.max(q_table[dnew_state,:]))
|
|
|
+
|
|
|
+ state = new_state
|
|
|
+ rewards_current_episode += reward
|
|
|
+
|
|
|
+ if done:
|
|
|
+ break
|
|
|
+
|
|
|
+ # Exploration rate decay
|
|
|
+ exploration_rate = min_exploration_rate + \
|
|
|
+ (max_exploration_rate - min_exploration_rate) * np.exp(-exploration_decay_rate * episode)
|
|
|
+
|
|
|
+ rewards_all_episodes.append(rewards_current_episode)
|
|
|
+ except:
|
|
|
+ print("ERROR!")
|
|
|
+
|
|
|
+ # Calculate and print the average reward per 10 episodes
|
|
|
+ rewards_per_thousand_episodes = np.split(np.array(rewards_all_episodes), num_episodes / 100)
|
|
|
+ count = 100
|
|
|
+ print("********** Average reward per thousand episodes **********\n")
|
|
|
+
|
|
|
+ for r in rewards_per_thousand_episodes:
|
|
|
+ print(count, ": ", str(sum(r / 100)))
|
|
|
+ count += 100
|
|
|
+
|
|
|
+ # Print updated Q-table
|
|
|
+ # print("\n\n********** Q-table **********\n")
|
|
|
+ # print(q_table)
|
|
|
+ np.save("Q.npy", q_table)
|
|
|
+ with open("actions.csv", 'w') as file:
|
|
|
+ file.write(f"r,d")
|
|
|
+ for r, d in env.actions:
|
|
|
+ file.write(f"{r:.3f},{d:.3f}\n")
|