Просмотр исходного кода

Added Q-learning for the AGV

See Pydio for the Q matrix
rparedis 4 лет назад
Родитель
Сommit
1e0d012ee2
7 измененных файлов с 349 добавлено и 16 удалено
  1. 2 0
      .gitignore
  2. 223 0
      examples/AGV/AGVEnv.py
  3. 0 0
      examples/AGV/__init__.py
  4. 89 0
      examples/AGV/actions.csv
  5. BIN
      examples/AGV/tracking.png
  6. 35 16
      examples/AGV/tracking.py
  7. 0 0
      examples/__init__.py

+ 2 - 0
.gitignore

@@ -20,3 +20,5 @@ examples/AGV/tuning7/
 /src/build/
 
 examples/AGV/tuning8/
+
+*.npy

+ 223 - 0
examples/AGV/AGVEnv.py

@@ -0,0 +1,223 @@
+import gym
+from gym import error, spaces, utils
+from gym.utils import seeding
+import numpy as np
+import pandas as pd
+from CBD.simulator import Simulator
+from AGV import AGVVirtual
+import matplotlib.pyplot as plt
+import matplotlib.animation as animation
+
+EPS = 0.000001
+
+class AGVEnv(gym.Env):
+	def __init__(self):
+		self.action_space = spaces.Discrete(9)
+		self.observation_space = spaces.Box(low=np.array([0.0, 0.0, -3*np.pi]), high=np.array([1.0, 1.0, 3*np.pi]))
+
+		self.last_action = 0.018, 0.211
+		self.physical = pd.read_csv("trace_vidH.csv")  # Trajectory of the recognized AGV
+		self.physical["heading"] *= -1
+		self.clean_path()
+		self.time = self.physical["time"][0]
+
+		self.states = [np.array([self.physical["x"][0], self.physical["y"][0], self.physical["heading"][0]])]
+		self.actions = []
+
+		self.fig, self.ax = plt.subplots(1,1)
+		self.ax.plot(self.physical["x"], self.physical["y"], ls=':', c='blue')
+		self.ani = animation.FuncAnimation(self.fig, lambda _: self.update(), interval=100)
+		self.cart, = self.ax.plot(self.states[0][0], self.states[0][1], c='red')
+		self.label = self.ax.text(0.02, 0.95, '', transform=self.ax.transAxes)
+		self.same_actions = 0
+
+		plt.ion()
+		plt.show()
+
+	def step(self, action):
+		self.actions.append(self.last_action)
+		r, d = self.last_action
+		if action == 0:
+			self.same_actions += 1
+		elif action == 1:
+			r += 0.001
+		elif action == 2:
+			r += 0.01
+		elif action == 3:
+			r -= 0.001
+		elif action == 4:
+			r -= 0.01
+		elif action == 5:
+			d += 0.001
+		elif action == 6:
+			d += 0.01
+		elif action == 7:
+			d -= 0.001
+		elif action == 8:
+			d -= 0.01
+		if action > 0:
+			self.same_actions = 0
+		self.last_action = r, d
+		if abs(r) < EPS or abs(d) < EPS:
+			return self.states[-1], float('-inf'), True, {}
+		# ro, do = self.last_action
+		# reward = -np.power(ro - r, 2) - np.power(do - d, 2)
+		reward = self.same_actions * 100
+		agv = AGVVirtual("AGV", r, d, "obtained.csv", initial=self.states[-1], v=0.033, T=35, Kp=-0.01)
+		sim = Simulator(agv)
+		sim.setDeltaT(0.2)
+		sim.run(self.time + 0.21, self.time)
+		state = np.array(self.get_state(agv))
+		last_state = self.states[-1]
+		self.states.append(state)
+		self.time = sim.getTime()
+
+		moment = self.physical[self.physical["time"] <= self.time].iloc[-1]
+		offset = self.euclidean(moment["x"], moment["y"], state[0], state[1])
+		if offset > 0.1:
+			reward -= 1000
+		else:
+			reward -= offset
+			reward += self.euclidean(state[0], state[1], last_state[0], last_state[1]) ** 2
+
+		TCP = agv.getBlockByName("TCP")
+		end_time = TCP.data[TCP.time_col][-1]
+
+
+		return state, reward, ((self.time >= end_time) or (reward < -500)), {}
+
+	def reset(self):
+		self.time = self.physical["time"][0]
+		self.last_action = 0.018, 0.211
+		self.actions.clear()
+		self.states = [self.states[0]]
+		return self.states[0]
+
+	def update(self):
+		x, y = [s[0] for s in self.states], [s[1] for s in self.states]
+		self.cart.set_data(x, y)
+
+	def render(self, mode='human'):
+		# plt.draw()
+		# plt.pause(0.001)
+		self.fig.canvas.draw_idle()
+		self.fig.canvas.start_event_loop(0.001)
+
+	# def close(self):
+	# 	pass
+
+	def get_state(self, model):
+		dd = model.getBlockByName("plot").data
+		if len(dd) == 0:
+			x = model.findBlock("odo.init_x")[0].getValue()
+			y = model.findBlock("odo.init_y")[0].getValue()
+			heading = model.findBlock("odo.init_w")[0].getValue()
+		else:
+			x, y = model.getBlockByName("plot").data[-1]
+			heading = model.getBlockByName("headingPlot").data_xy[1][-1]
+		return x, y, heading
+
+	def clean_path(self):
+		to_drop = []
+		# dists = []
+		for idx, row in self.physical.iterrows():
+			subset = self.physical[self.physical["time"] <= row["time"] - 0.2]
+			if len(subset) == 0: continue
+			prev = subset.iloc[-1]
+			dist = self.euclidean(prev["x"], prev["y"], row["x"], row["y"])
+			# dists.append(dist)
+			# REMOVE NOISE
+			if dist > 0.0125:
+				to_drop.append(idx)
+		self.physical.drop(to_drop, inplace=True)
+
+	@staticmethod
+	def euclidean(x1, y1, x2, y2):
+		dx = x2 - x1
+		dy = y2 - y1
+		return ((dx * dx) + (dy * dy)) ** 0.5
+
+
+if __name__ == '__main__':
+	import random
+	env = AGVEnv()
+
+	action_space_size = env.action_space.n
+	state_space_size = 100 * 100 * (6 * 360)
+
+	q_table = np.zeros((state_space_size, action_space_size))
+
+	num_episodes = 1000
+	max_steps_per_episode = 100 # but it won't go higher than 1
+
+	learning_rate = 0.1
+	discount_rate = 0.99
+
+	exploration_rate = 1
+	max_exploration_rate = 1
+	min_exploration_rate = 0.01
+
+	exploration_decay_rate = 0.01 # if we decrease it, will learn slower
+	rewards_all_episodes = []
+
+	def discretize(state):
+		return int(state[0] * 100), int(state[1] * 100), int(np.degrees(3 * np.pi) + np.degrees(state[2]))
+
+	# Q-Learning algorithm
+	try:
+		for episode in range(num_episodes):
+			state = env.reset()
+			dstate = discretize(state)
+			env.label.set_text("Episode: " + str(episode))
+
+			done = False
+			rewards_current_episode = 0
+
+			for step in range(max_steps_per_episode):
+				env.render()
+
+				# Exploration -exploitation trade-off
+				exploration_rate_threshold = random.uniform(0, 1)
+				if exploration_rate_threshold > exploration_rate:
+					action = np.argmax(q_table[dstate,:])
+				else:
+					action = env.action_space.sample()
+
+				new_state, reward, done, info = env.step(action)
+				dnew_state = discretize(new_state)
+
+				# Update Q-table for Q(s,a)
+				q_table[dstate, action] = (1 - learning_rate) * q_table[dstate, action] + \
+				                         learning_rate * (reward + discount_rate * np.max(q_table[dnew_state,:]))
+
+				state = new_state
+				rewards_current_episode += reward
+
+				if done:
+					break
+
+			# Exploration rate decay
+			exploration_rate = min_exploration_rate + \
+			                   (max_exploration_rate - min_exploration_rate) * np.exp(-exploration_decay_rate * episode)
+
+			rewards_all_episodes.append(rewards_current_episode)
+	except:
+		print("ERROR!")
+
+	# Calculate and print the average reward per 10 episodes
+	rewards_per_thousand_episodes = np.split(np.array(rewards_all_episodes), num_episodes / 100)
+	count = 100
+	print("********** Average  reward per thousand episodes **********\n")
+
+	for r in rewards_per_thousand_episodes:
+		print(count, ": ", str(sum(r / 100)))
+		count += 100
+
+	# Print updated Q-table
+	# print("\n\n********** Q-table **********\n")
+	# print(q_table)
+	np.save("Q.npy", q_table)
+	with open("actions.csv", 'w') as file:
+		file.write(f"r,d")
+		for r, d in env.actions:
+			file.write(f"{r:.3f},{d:.3f}\n")

+ 0 - 0
examples/AGV/__init__.py


+ 89 - 0
examples/AGV/actions.csv

@@ -0,0 +1,89 @@
+r,d0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211
+0.018,0.211

BIN
examples/AGV/tracking.png


+ 35 - 16
examples/AGV/tracking.py

@@ -24,6 +24,7 @@ from CBD.simulator import Simulator
 from AGV import AGVVirtual
 import cv2
 from tqdm import tqdm
+import matplotlib.pyplot as plt
 
 EPS = 0.00001
 ERROR = 0.1
@@ -36,6 +37,7 @@ class TrackingSimulator:
 		self.setup()
 		self.physical = pd.read_csv("trace_vidH.csv")  # Trajectory of the recognized AGV
 		self.physical["heading"] *= -1
+		self.clean_path()
 		self.set_state([self.physical["x"][0], self.physical["y"][0], self.physical["heading"][0]])
 		self.path = list(zip(self.physical["x"], self.physical["y"], self.physical["heading"]))
 		self.trace = []
@@ -76,7 +78,7 @@ class TrackingSimulator:
 				if back < end_time:
 					# time, self.__offset_i, state, _, (r, d) = self.last_good_state()
 					# if state is None: break
-					now = back
+					now = back - 1.0
 					current = self.get_state()
 					if np.isnan(current[0]):
 						print("WHY???!!!")
@@ -143,6 +145,24 @@ class TrackingSimulator:
 			df = pd.DataFrame({"time": t, "r": r, "d": d})
 			df.to_csv("tracking.csv")
 
+	def clean_path(self):
+		to_drop = []
+		# dists = []
+		for idx, row in self.physical.iterrows():
+			subset = self.physical[self.physical["time"] <= row["time"] - 0.2]
+			if len(subset) == 0: continue
+			prev = subset.iloc[-1]
+			dist = self.euclidean(prev["x"], prev["y"], row["x"], row["y"])
+			# dists.append(dist)
+			# REMOVE NOISE
+			if dist > 0.0125:
+				to_drop.append(idx)
+		self.physical.drop(to_drop, inplace=True)
+		# fig, ax = plt.subplots(1, 1)
+		# r = ax.boxplot(dists)
+		# fig.show()
+		# print(sorted(dists))
+
 	def get_state(self):
 		dd = self.model.getBlockByName("plot").data
 		if len(dd) == 0:
@@ -250,8 +270,6 @@ if __name__ == '__main__':
 	ts = TrackingSimulator()
 	ts.run()
 
-	import matplotlib.pyplot as plt
-
 	fig = plt.figure()
 	ax = fig.add_subplot(111, projection='3d')
 	ax.set_xlabel("X (m)")
@@ -264,19 +282,20 @@ if __name__ == '__main__':
 		times = np.asarray(trace)[:,0]
 		ax.plot([x[2][0] for x in trace], [x[2][1] for x in trace], times, label=f"r = {r:.2f}; d = {d:.2f}")
 	# ax.legend()
+	plt.tight_layout()
 	plt.savefig("tracking.png")
 	plt.show()
 
-	fig, axs = plt.subplots(3, 3)
-	axs[0].set_xlabel("X (m)")
-	axs[0].set_ylabel("Y (m)")
-	axs[1].set_xlabel("Time (s)")
-	# ax.plot(ts.physical["x"], ts.physical["y"], ts.physical["time"], label="physical", ls=':')
-	# for trace in ts.traces:
-	# 	if len(trace) < 5: continue
-	# 	r, d = trace[-1][4]
-	# 	times = np.asarray(trace)[:,0]
-	# 	ax.plot([x[2][0] for x in trace], [x[2][1] for x in trace], times, label=f"r = {r:.2f}; d = {d:.2f}")
-	# # ax.legend()
-	# plt.savefig("tracking.png")
-	plt.show()
+	# fig, axs = plt.subplots(3, 3)
+	# axs[0].set_xlabel("X (m)")
+	# axs[0].set_ylabel("Y (m)")
+	# axs[1].set_xlabel("Time (s)")
+	# # ax.plot(ts.physical["x"], ts.physical["y"], ts.physical["time"], label="physical", ls=':')
+	# # for trace in ts.traces:
+	# # 	if len(trace) < 5: continue
+	# # 	r, d = trace[-1][4]
+	# # 	times = np.asarray(trace)[:,0]
+	# # 	ax.plot([x[2][0] for x in trace], [x[2][1] for x in trace], times, label=f"r = {r:.2f}; d = {d:.2f}")
+	# # # ax.legend()
+	# # plt.savefig("tracking.png")
+	# plt.show()

+ 0 - 0
examples/__init__.py