4 years ago · 1e0d012ee2
--- a/.gitignore
+++ b/.gitignore
@@ -20,3 +20,5 @@ examples/AGV/tuning7/
 
				 /src/build/
			
 
				 
			
 
				 examples/AGV/tuning8/
			
 
				+
			
 
				+*.npy
			
--- a/examples/AGV/AGVEnv.py
+++ b/examples/AGV/AGVEnv.py
@@ -0,0 +1,223 @@
 
				+import gym
			
 
				+from gym import error, spaces, utils
			
 
				+from gym.utils import seeding
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from CBD.simulator import Simulator
			
 
				+from AGV import AGVVirtual
			
 
				+import matplotlib.pyplot as plt
			
 
				+import matplotlib.animation as animation
			
 
				+
			
 
				+EPS = 0.000001
			
 
				+
			
 
				+class AGVEnv(gym.Env):
			
 
				+	def __init__(self):
			
 
				+		self.action_space = spaces.Discrete(9)
			
 
				+		self.observation_space = spaces.Box(low=np.array([0.0, 0.0, -3*np.pi]), high=np.array([1.0, 1.0, 3*np.pi]))
			
 
				+
			
 
				+		self.last_action = 0.018, 0.211
			
 
				+		self.physical = pd.read_csv("trace_vidH.csv")  # Trajectory of the recognized AGV
			
 
				+		self.physical["heading"] *= -1
			
 
				+		self.clean_path()
			
 
				+		self.time = self.physical["time"][0]
			
 
				+
			
 
				+		self.states = [np.array([self.physical["x"][0], self.physical["y"][0], self.physical["heading"][0]])]
			
 
				+		self.actions = []
			
 
				+
			
 
				+		self.fig, self.ax = plt.subplots(1,1)
			
 
				+		self.ax.plot(self.physical["x"], self.physical["y"], ls=':', c='blue')
			
 
				+		self.ani = animation.FuncAnimation(self.fig, lambda _: self.update(), interval=100)
			
 
				+		self.cart, = self.ax.plot(self.states[0][0], self.states[0][1], c='red')
			
 
				+		self.label = self.ax.text(0.02, 0.95, '', transform=self.ax.transAxes)
			
 
				+		self.same_actions = 0
			
 
				+
			
 
				+		plt.ion()
			
 
				+		plt.show()
			
 
				+
			
 
				+	def step(self, action):
			
 
				+		self.actions.append(self.last_action)
			
 
				+		r, d = self.last_action
			
 
				+		if action == 0:
			
 
				+			self.same_actions += 1
			
 
				+		elif action == 1:
			
 
				+			r += 0.001
			
 
				+		elif action == 2:
			
 
				+			r += 0.01
			
 
				+		elif action == 3:
			
 
				+			r -= 0.001
			
 
				+		elif action == 4:
			
 
				+			r -= 0.01
			
 
				+		elif action == 5:
			
 
				+			d += 0.001
			
 
				+		elif action == 6:
			
 
				+			d += 0.01
			
 
				+		elif action == 7:
			
 
				+			d -= 0.001
			
 
				+		elif action == 8:
			
 
				+			d -= 0.01
			
 
				+		if action > 0:
			
 
				+			self.same_actions = 0
			
 
				+		self.last_action = r, d
			
 
				+		if abs(r) < EPS or abs(d) < EPS:
			
 
				+			return self.states[-1], float('-inf'), True, {}
			
 
				+		# ro, do = self.last_action
			
 
				+		# reward = -np.power(ro - r, 2) - np.power(do - d, 2)
			
 
				+		reward = self.same_actions * 100
			
 
				+		agv = AGVVirtual("AGV", r, d, "obtained.csv", initial=self.states[-1], v=0.033, T=35, Kp=-0.01)
			
 
				+		sim = Simulator(agv)
			
 
				+		sim.setDeltaT(0.2)
			
 
				+		sim.run(self.time + 0.21, self.time)
			
 
				+		state = np.array(self.get_state(agv))
			
 
				+		last_state = self.states[-1]
			
 
				+		self.states.append(state)
			
 
				+		self.time = sim.getTime()
			
 
				+
			
 
				+		moment = self.physical[self.physical["time"] <= self.time].iloc[-1]
			
 
				+		offset = self.euclidean(moment["x"], moment["y"], state[0], state[1])
			
 
				+		if offset > 0.1:
			
 
				+			reward -= 1000
			
 
				+		else:
			
 
				+			reward -= offset
			
 
				+			reward += self.euclidean(state[0], state[1], last_state[0], last_state[1]) ** 2
			
 
				+
			
 
				+		TCP = agv.getBlockByName("TCP")
			
 
				+		end_time = TCP.data[TCP.time_col][-1]
			
 
				+
			
 
				+
			
 
				+		return state, reward, ((self.time >= end_time) or (reward < -500)), {}
			
 
				+
			
 
				+	def reset(self):
			
 
				+		self.time = self.physical["time"][0]
			
 
				+		self.last_action = 0.018, 0.211
			
 
				+		self.actions.clear()
			
 
				+		self.states = [self.states[0]]
			
 
				+		return self.states[0]
			
 
				+
			
 
				+	def update(self):
			
 
				+		x, y = [s[0] for s in self.states], [s[1] for s in self.states]
			
 
				+		self.cart.set_data(x, y)
			
 
				+
			
 
				+	def render(self, mode='human'):
			
 
				+		# plt.draw()
			
 
				+		# plt.pause(0.001)
			
 
				+		self.fig.canvas.draw_idle()
			
 
				+		self.fig.canvas.start_event_loop(0.001)
			
 
				+
			
 
				+	# def close(self):
			
 
				+	# 	pass
			
 
				+
			
 
				+	def get_state(self, model):
			
 
				+		dd = model.getBlockByName("plot").data
			
 
				+		if len(dd) == 0:
			
 
				+			x = model.findBlock("odo.init_x")[0].getValue()
			
 
				+			y = model.findBlock("odo.init_y")[0].getValue()
			
 
				+			heading = model.findBlock("odo.init_w")[0].getValue()
			
 
				+		else:
			
 
				+			x, y = model.getBlockByName("plot").data[-1]
			
 
				+			heading = model.getBlockByName("headingPlot").data_xy[1][-1]
			
 
				+		return x, y, heading
			
 
				+
			
 
				+	def clean_path(self):
			
 
				+		to_drop = []
			
 
				+		# dists = []
			
 
				+		for idx, row in self.physical.iterrows():
			
 
				+			subset = self.physical[self.physical["time"] <= row["time"] - 0.2]
			
 
				+			if len(subset) == 0: continue
			
 
				+			prev = subset.iloc[-1]
			
 
				+			dist = self.euclidean(prev["x"], prev["y"], row["x"], row["y"])
			
 
				+			# dists.append(dist)
			
 
				+			# REMOVE NOISE
			
 
				+			if dist > 0.0125:
			
 
				+				to_drop.append(idx)
			
 
				+		self.physical.drop(to_drop, inplace=True)
			
 
				+
			
 
				+	@staticmethod
			
 
				+	def euclidean(x1, y1, x2, y2):
			
 
				+		dx = x2 - x1
			
 
				+		dy = y2 - y1
			
 
				+		return ((dx * dx) + (dy * dy)) ** 0.5
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+	import random
			
 
				+	env = AGVEnv()
			
 
				+
			
 
				+	action_space_size = env.action_space.n
			
 
				+	state_space_size = 100 * 100 * (6 * 360)
			
 
				+
			
 
				+	q_table = np.zeros((state_space_size, action_space_size))
			
 
				+
			
 
				+	num_episodes = 1000
			
 
				+	max_steps_per_episode = 100 # but it won't go higher than 1
			
 
				+
			
 
				+	learning_rate = 0.1
			
 
				+	discount_rate = 0.99
			
 
				+
			
 
				+	exploration_rate = 1
			
 
				+	max_exploration_rate = 1
			
 
				+	min_exploration_rate = 0.01
			
 
				+
			
 
				+	exploration_decay_rate = 0.01 # if we decrease it, will learn slower
			
 
				+	rewards_all_episodes = []
			
 
				+
			
 
				+	def discretize(state):
			
 
				+		return int(state[0] * 100), int(state[1] * 100), int(np.degrees(3 * np.pi) + np.degrees(state[2]))
			
 
				+
			
 
				+	# Q-Learning algorithm
			
 
				+	try:
			
 
				+		for episode in range(num_episodes):
			
 
				+			state = env.reset()
			
 
				+			dstate = discretize(state)
			
 
				+			env.label.set_text("Episode: " + str(episode))
			
 
				+
			
 
				+			done = False
			
 
				+			rewards_current_episode = 0
			
 
				+
			
 
				+			for step in range(max_steps_per_episode):
			
 
				+				env.render()
			
 
				+
			
 
				+				# Exploration -exploitation trade-off
			
 
				+				exploration_rate_threshold = random.uniform(0, 1)
			
 
				+				if exploration_rate_threshold > exploration_rate:
			
 
				+					action = np.argmax(q_table[dstate,:])
			
 
				+				else:
			
 
				+					action = env.action_space.sample()
			
 
				+
			
 
				+				new_state, reward, done, info = env.step(action)
			
 
				+				dnew_state = discretize(new_state)
			
 
				+
			
 
				+				# Update Q-table for Q(s,a)
			
 
				+				q_table[dstate, action] = (1 - learning_rate) * q_table[dstate, action] + \
			
 
				+				                         learning_rate * (reward + discount_rate * np.max(q_table[dnew_state,:]))
			
 
				+
			
 
				+				state = new_state
			
 
				+				rewards_current_episode += reward
			
 
				+
			
 
				+				if done:
			
 
				+					break
			
 
				+
			
 
				+			# Exploration rate decay
			
 
				+			exploration_rate = min_exploration_rate + \
			
 
				+			                   (max_exploration_rate - min_exploration_rate) * np.exp(-exploration_decay_rate * episode)
			
 
				+
			
 
				+			rewards_all_episodes.append(rewards_current_episode)
			
 
				+	except:
			
 
				+		print("ERROR!")
			
 
				+
			
 
				+	# Calculate and print the average reward per 10 episodes
			
 
				+	rewards_per_thousand_episodes = np.split(np.array(rewards_all_episodes), num_episodes / 100)
			
 
				+	count = 100
			
 
				+	print("********** Average  reward per thousand episodes **********\n")
			
 
				+
			
 
				+	for r in rewards_per_thousand_episodes:
			
 
				+		print(count, ": ", str(sum(r / 100)))
			
 
				+		count += 100
			
 
				+
			
 
				+	# Print updated Q-table
			
 
				+	# print("\n\n********** Q-table **********\n")
			
 
				+	# print(q_table)
			
 
				+	np.save("Q.npy", q_table)
			
 
				+	with open("actions.csv", 'w') as file:
			
 
				+		file.write(f"r,d")
			
 
				+		for r, d in env.actions:
			
 
				+			file.write(f"{r:.3f},{d:.3f}\n")
			
--- a/examples/AGV/__init__.py
+++ b/examples/AGV/__init__.py
--- a/examples/AGV/actions.csv
+++ b/examples/AGV/actions.csv
@@ -0,0 +1,89 @@
 
				+r,d0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
 
				+0.018,0.211
			
--- a/examples/AGV/tracking.png
+++ b/examples/AGV/tracking.png
--- a/examples/AGV/tracking.py
+++ b/examples/AGV/tracking.py
@@ -24,6 +24,7 @@ from CBD.simulator import Simulator
 
				 from AGV import AGVVirtual
			
 
				 import cv2
			
 
				 from tqdm import tqdm
			
 
				+import matplotlib.pyplot as plt
			
 
				 
			
 
				 EPS = 0.00001
			
 
				 ERROR = 0.1
			
@@ -36,6 +37,7 @@ class TrackingSimulator:
 
				 		self.setup()
			
 
				 		self.physical = pd.read_csv("trace_vidH.csv")  # Trajectory of the recognized AGV
			
 
				 		self.physical["heading"] *= -1
			
 
				+		self.clean_path()
			
 
				 		self.set_state([self.physical["x"][0], self.physical["y"][0], self.physical["heading"][0]])
			
 
				 		self.path = list(zip(self.physical["x"], self.physical["y"], self.physical["heading"]))
			
 
				 		self.trace = []
			
@@ -76,7 +78,7 @@ class TrackingSimulator:
 
				 				if back < end_time:
			
 
				 					# time, self.__offset_i, state, _, (r, d) = self.last_good_state()
			
 
				 					# if state is None: break
			
 
				-					now = back
			
 
				+					now = back - 1.0
			
 
				 					current = self.get_state()
			
 
				 					if np.isnan(current[0]):
			
 
				 						print("WHY???!!!")
			
@@ -143,6 +145,24 @@ class TrackingSimulator:
 
				 			df = pd.DataFrame({"time": t, "r": r, "d": d})
			
 
				 			df.to_csv("tracking.csv")
			
 
				 
			
 
				+	def clean_path(self):
			
 
				+		to_drop = []
			
 
				+		# dists = []
			
 
				+		for idx, row in self.physical.iterrows():
			
 
				+			subset = self.physical[self.physical["time"] <= row["time"] - 0.2]
			
 
				+			if len(subset) == 0: continue
			
 
				+			prev = subset.iloc[-1]
			
 
				+			dist = self.euclidean(prev["x"], prev["y"], row["x"], row["y"])
			
 
				+			# dists.append(dist)
			
 
				+			# REMOVE NOISE
			
 
				+			if dist > 0.0125:
			
 
				+				to_drop.append(idx)
			
 
				+		self.physical.drop(to_drop, inplace=True)
			
 
				+		# fig, ax = plt.subplots(1, 1)
			
 
				+		# r = ax.boxplot(dists)
			
 
				+		# fig.show()
			
 
				+		# print(sorted(dists))
			
 
				+
			
 
				 	def get_state(self):
			
 
				 		dd = self.model.getBlockByName("plot").data
			
 
				 		if len(dd) == 0:
			
@@ -250,8 +270,6 @@ if __name__ == '__main__':
 
				 	ts = TrackingSimulator()
			
 
				 	ts.run()
			
 
				 
			
 
				-	import matplotlib.pyplot as plt
			
 
				-
			
 
				 	fig = plt.figure()
			
 
				 	ax = fig.add_subplot(111, projection='3d')
			
 
				 	ax.set_xlabel("X (m)")
			
@@ -264,19 +282,20 @@ if __name__ == '__main__':
 
				 		times = np.asarray(trace)[:,0]
			
 
				 		ax.plot([x[2][0] for x in trace], [x[2][1] for x in trace], times, label=f"r = {r:.2f}; d = {d:.2f}")
			
 
				 	# ax.legend()
			
 
				+	plt.tight_layout()
			
 
				 	plt.savefig("tracking.png")
			
 
				 	plt.show()
			
 
				 
			
 
				-	fig, axs = plt.subplots(3, 3)
			
 
				-	axs[0].set_xlabel("X (m)")
			
 
				-	axs[0].set_ylabel("Y (m)")
			
 
				-	axs[1].set_xlabel("Time (s)")
			
 
				-	# ax.plot(ts.physical["x"], ts.physical["y"], ts.physical["time"], label="physical", ls=':')
			
 
				-	# for trace in ts.traces:
			
 
				-	# 	if len(trace) < 5: continue
			
 
				-	# 	r, d = trace[-1][4]
			
 
				-	# 	times = np.asarray(trace)[:,0]
			
 
				-	# 	ax.plot([x[2][0] for x in trace], [x[2][1] for x in trace], times, label=f"r = {r:.2f}; d = {d:.2f}")
			
 
				-	# # ax.legend()
			
 
				-	# plt.savefig("tracking.png")
			
 
				-	plt.show()
			
 
				+	# fig, axs = plt.subplots(3, 3)
			
 
				+	# axs[0].set_xlabel("X (m)")
			
 
				+	# axs[0].set_ylabel("Y (m)")
			
 
				+	# axs[1].set_xlabel("Time (s)")
			
 
				+	# # ax.plot(ts.physical["x"], ts.physical["y"], ts.physical["time"], label="physical", ls=':')
			
 
				+	# # for trace in ts.traces:
			
 
				+	# # 	if len(trace) < 5: continue
			
 
				+	# # 	r, d = trace[-1][4]
			
 
				+	# # 	times = np.asarray(trace)[:,0]
			
 
				+	# # 	ax.plot([x[2][0] for x in trace], [x[2][1] for x in trace], times, label=f"r = {r:.2f}; d = {d:.2f}")
			
 
				+	# # # ax.legend()
			
 
				+	# # plt.savefig("tracking.png")
			
 
				+	# plt.show()
			
--- a/examples/__init__.py
+++ b/examples/__init__.py