AGVEnv.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. import gym
  2. from gym import error, spaces, utils
  3. from gym.utils import seeding
  4. import numpy as np
  5. import pandas as pd
  6. from CBD.simulator import Simulator
  7. from AGV import AGVVirtual
  8. import matplotlib.pyplot as plt
  9. import matplotlib.animation as animation
  10. EPS = 0.000001
  11. class AGVEnv(gym.Env):
  12. def __init__(self):
  13. self.action_space = spaces.Discrete(9)
  14. self.observation_space = spaces.Box(low=np.array([0.0, 0.0, -3*np.pi]), high=np.array([1.0, 1.0, 3*np.pi]))
  15. self.last_action = 0.018, 0.211
  16. self.physical = pd.read_csv("trace_vidH.csv") # Trajectory of the recognized AGV
  17. self.physical["heading"] *= -1
  18. self.clean_path()
  19. self.time = self.physical["time"][0]
  20. self.states = [np.array([self.physical["x"][0], self.physical["y"][0], self.physical["heading"][0]])]
  21. self.actions = []
  22. self.fig, self.ax = plt.subplots(1,1)
  23. self.ax.plot(self.physical["x"], self.physical["y"], ls=':', c='blue')
  24. self.ani = animation.FuncAnimation(self.fig, lambda _: self.update(), interval=100)
  25. self.cart, = self.ax.plot(self.states[0][0], self.states[0][1], c='red')
  26. self.label = self.ax.text(0.02, 0.95, '', transform=self.ax.transAxes)
  27. self.same_actions = 0
  28. plt.ion()
  29. plt.show()
  30. def step(self, action):
  31. self.actions.append(self.last_action)
  32. r, d = self.last_action
  33. if action == 0:
  34. self.same_actions += 1
  35. elif action == 1:
  36. r += 0.001
  37. elif action == 2:
  38. r += 0.01
  39. elif action == 3:
  40. r -= 0.001
  41. elif action == 4:
  42. r -= 0.01
  43. elif action == 5:
  44. d += 0.001
  45. elif action == 6:
  46. d += 0.01
  47. elif action == 7:
  48. d -= 0.001
  49. elif action == 8:
  50. d -= 0.01
  51. if action > 0:
  52. self.same_actions = 0
  53. self.last_action = r, d
  54. if abs(r) < EPS or abs(d) < EPS:
  55. return self.states[-1], float('-inf'), True, {}
  56. # ro, do = self.last_action
  57. # reward = -np.power(ro - r, 2) - np.power(do - d, 2)
  58. reward = self.same_actions * 100
  59. agv = AGVVirtual("AGV", r, d, "obtained.csv", initial=self.states[-1], v=0.033, T=35, Kp=-0.01)
  60. sim = Simulator(agv)
  61. sim.setDeltaT(0.2)
  62. sim.run(self.time + 0.21, self.time)
  63. state = np.array(self.get_state(agv))
  64. last_state = self.states[-1]
  65. self.states.append(state)
  66. self.time = sim.getTime()
  67. moment = self.physical[self.physical["time"] <= self.time].iloc[-1]
  68. offset = self.euclidean(moment["x"], moment["y"], state[0], state[1])
  69. if offset > 0.1:
  70. reward -= 1000
  71. else:
  72. reward -= offset
  73. reward += self.euclidean(state[0], state[1], last_state[0], last_state[1]) ** 2
  74. TCP = agv.getBlockByName("TCP")
  75. end_time = TCP.data[TCP.time_col][-1]
  76. return state, reward, ((self.time >= end_time) or (reward < -500)), {}
  77. def reset(self):
  78. self.time = self.physical["time"][0]
  79. self.last_action = 0.018, 0.211
  80. self.actions.clear()
  81. self.states = [self.states[0]]
  82. return self.states[0]
  83. def update(self):
  84. x, y = [s[0] for s in self.states], [s[1] for s in self.states]
  85. self.cart.set_data(x, y)
  86. def render(self, mode='human'):
  87. # plt.draw()
  88. # plt.pause(0.001)
  89. self.fig.canvas.draw_idle()
  90. self.fig.canvas.start_event_loop(0.001)
  91. # def close(self):
  92. # pass
  93. def get_state(self, model):
  94. dd = model.getBlockByName("plot").data
  95. if len(dd) == 0:
  96. x = model.findBlock("odo.init_x")[0].getValue()
  97. y = model.findBlock("odo.init_y")[0].getValue()
  98. heading = model.findBlock("odo.init_w")[0].getValue()
  99. else:
  100. x, y = model.getBlockByName("plot").data[-1]
  101. heading = model.getBlockByName("headingPlot").data_xy[1][-1]
  102. return x, y, heading
  103. def clean_path(self):
  104. to_drop = []
  105. # dists = []
  106. for idx, row in self.physical.iterrows():
  107. subset = self.physical[self.physical["time"] <= row["time"] - 0.2]
  108. if len(subset) == 0: continue
  109. prev = subset.iloc[-1]
  110. dist = self.euclidean(prev["x"], prev["y"], row["x"], row["y"])
  111. # dists.append(dist)
  112. # REMOVE NOISE
  113. if dist > 0.0125:
  114. to_drop.append(idx)
  115. self.physical.drop(to_drop, inplace=True)
  116. @staticmethod
  117. def euclidean(x1, y1, x2, y2):
  118. dx = x2 - x1
  119. dy = y2 - y1
  120. return ((dx * dx) + (dy * dy)) ** 0.5
  121. if __name__ == '__main__':
  122. import random
  123. env = AGVEnv()
  124. action_space_size = env.action_space.n
  125. state_space_size = 100 * 100 * (6 * 360)
  126. q_table = np.zeros((state_space_size, action_space_size))
  127. num_episodes = 1000
  128. max_steps_per_episode = 100 # but it won't go higher than 1
  129. learning_rate = 0.1
  130. discount_rate = 0.99
  131. exploration_rate = 1
  132. max_exploration_rate = 1
  133. min_exploration_rate = 0.01
  134. exploration_decay_rate = 0.01 # if we decrease it, will learn slower
  135. rewards_all_episodes = []
  136. def discretize(state):
  137. return int(state[0] * 100), int(state[1] * 100), int(np.degrees(3 * np.pi) + np.degrees(state[2]))
  138. # Q-Learning algorithm
  139. try:
  140. for episode in range(num_episodes):
  141. state = env.reset()
  142. dstate = discretize(state)
  143. env.label.set_text("Episode: " + str(episode))
  144. done = False
  145. rewards_current_episode = 0
  146. for step in range(max_steps_per_episode):
  147. env.render()
  148. # Exploration -exploitation trade-off
  149. exploration_rate_threshold = random.uniform(0, 1)
  150. if exploration_rate_threshold > exploration_rate:
  151. action = np.argmax(q_table[dstate,:])
  152. else:
  153. action = env.action_space.sample()
  154. new_state, reward, done, info = env.step(action)
  155. dnew_state = discretize(new_state)
  156. # Update Q-table for Q(s,a)
  157. q_table[dstate, action] = (1 - learning_rate) * q_table[dstate, action] + \
  158. learning_rate * (reward + discount_rate * np.max(q_table[dnew_state,:]))
  159. state = new_state
  160. rewards_current_episode += reward
  161. if done:
  162. break
  163. # Exploration rate decay
  164. exploration_rate = min_exploration_rate + \
  165. (max_exploration_rate - min_exploration_rate) * np.exp(-exploration_decay_rate * episode)
  166. rewards_all_episodes.append(rewards_current_episode)
  167. except:
  168. print("ERROR!")
  169. # Calculate and print the average reward per 10 episodes
  170. rewards_per_thousand_episodes = np.split(np.array(rewards_all_episodes), num_episodes / 100)
  171. count = 100
  172. print("********** Average reward per thousand episodes **********\n")
  173. for r in rewards_per_thousand_episodes:
  174. print(count, ": ", str(sum(r / 100)))
  175. count += 100
  176. # Print updated Q-table
  177. # print("\n\n********** Q-table **********\n")
  178. # print(q_table)
  179. np.save("Q.npy", q_table)
  180. with open("actions.csv", 'w') as file:
  181. file.write(f"r,d")
  182. for r, d in env.actions:
  183. file.write(f"{r:.3f},{d:.3f}\n")