rparedis
/
pyCBD


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
							import gym
from gym import error, spaces, utils
from gym.utils import seeding
import numpy as np
import pandas as pd
from CBD.simulator import Simulator
from AGV import AGVVirtual
import matplotlib.pyplot as plt
import matplotlib.animation as animation

EPS = 0.000001

class AGVEnv(gym.Env):
	def __init__(self):
		self.action_space = spaces.Discrete(9)
		self.observation_space = spaces.Box(low=np.array([0.0, 0.0, -3*np.pi]), high=np.array([1.0, 1.0, 3*np.pi]))

		self.last_action = 0.018, 0.211
		self.physical = pd.read_csv("trace_vidH.csv")  # Trajectory of the recognized AGV
		self.physical["heading"] *= -1
		self.clean_path()
		self.time = self.physical["time"][0]

		self.states = [np.array([self.physical["x"][0], self.physical["y"][0], self.physical["heading"][0]])]
		self.actions = []

		self.fig, self.ax = plt.subplots(1,1)
		self.ax.plot(self.physical["x"], self.physical["y"], ls=':', c='blue')
		self.ani = animation.FuncAnimation(self.fig, lambda _: self.update(), interval=100)
		self.cart, = self.ax.plot(self.states[0][0], self.states[0][1], c='red')
		self.label = self.ax.text(0.02, 0.95, '', transform=self.ax.transAxes)
		self.same_actions = 0

		plt.ion()
		plt.show()

	def step(self, action):
		self.actions.append(self.last_action)
		r, d = self.last_action
		if action == 0:
			self.same_actions += 1
		elif action == 1:
			r += 0.001
		elif action == 2:
			r += 0.01
		elif action == 3:
			r -= 0.001
		elif action == 4:
			r -= 0.01
		elif action == 5:
			d += 0.001
		elif action == 6:
			d += 0.01
		elif action == 7:
			d -= 0.001
		elif action == 8:
			d -= 0.01
		if action > 0:
			self.same_actions = 0
		self.last_action = r, d
		if abs(r) < EPS or abs(d) < EPS:
			return self.states[-1], float('-inf'), True, {}
		# ro, do = self.last_action
		# reward = -np.power(ro - r, 2) - np.power(do - d, 2)
		reward = self.same_actions * 100
		agv = AGVVirtual("AGV", r, d, "obtained.csv", initial=self.states[-1], v=0.033, T=35, Kp=-0.01)
		sim = Simulator(agv)
		sim.setDeltaT(0.2)
		sim.run(self.time + 0.21, self.time)
		state = np.array(self.get_state(agv))
		last_state = self.states[-1]
		self.states.append(state)
		self.time = sim.getTime()

		moment = self.physical[self.physical["time"] <= self.time].iloc[-1]
		offset = self.euclidean(moment["x"], moment["y"], state[0], state[1])
		if offset > 0.1:
			reward -= 1000
		else:
			reward -= offset
			reward += self.euclidean(state[0], state[1], last_state[0], last_state[1]) ** 2

		TCP = agv.getBlockByName("TCP")
		end_time = TCP.data[TCP.time_col][-1]


		return state, reward, ((self.time >= end_time) or (reward < -500)), {}

	def reset(self):
		self.time = self.physical["time"][0]
		self.last_action = 0.018, 0.211
		self.actions.clear()
		self.states = [self.states[0]]
		return self.states[0]

	def update(self):
		x, y = [s[0] for s in self.states], [s[1] for s in self.states]
		self.cart.set_data(x, y)

	def render(self, mode='human'):
		# plt.draw()
		# plt.pause(0.001)
		self.fig.canvas.draw_idle()
		self.fig.canvas.start_event_loop(0.001)

	# def close(self):
	# 	pass

	def get_state(self, model):
		dd = model.getBlockByName("plot").data
		if len(dd) == 0:
			x = model.findBlock("odo.init_x")[0].getValue()
			y = model.findBlock("odo.init_y")[0].getValue()
			heading = model.findBlock("odo.init_w")[0].getValue()
		else:
			x, y = model.getBlockByName("plot").data[-1]
			heading = model.getBlockByName("headingPlot").data_xy[1][-1]
		return x, y, heading

	def clean_path(self):
		to_drop = []
		# dists = []
		for idx, row in self.physical.iterrows():
			subset = self.physical[self.physical["time"] <= row["time"] - 0.2]
			if len(subset) == 0: continue
			prev = subset.iloc[-1]
			dist = self.euclidean(prev["x"], prev["y"], row["x"], row["y"])
			# dists.append(dist)
			# REMOVE NOISE
			if dist > 0.0125:
				to_drop.append(idx)
		self.physical.drop(to_drop, inplace=True)

	@staticmethod
	def euclidean(x1, y1, x2, y2):
		dx = x2 - x1
		dy = y2 - y1
		return ((dx * dx) + (dy * dy)) ** 0.5


if __name__ == '__main__':
	import random
	env = AGVEnv()

	action_space_size = env.action_space.n
	state_space_size = 100 * 100 * (6 * 360)

	q_table = np.zeros((state_space_size, action_space_size))

	num_episodes = 1000
	max_steps_per_episode = 100 # but it won't go higher than 1

	learning_rate = 0.1
	discount_rate = 0.99

	exploration_rate = 1
	max_exploration_rate = 1
	min_exploration_rate = 0.01

	exploration_decay_rate = 0.01 # if we decrease it, will learn slower
	rewards_all_episodes = []

	def discretize(state):
		return int(state[0] * 100), int(state[1] * 100), int(np.degrees(3 * np.pi) + np.degrees(state[2]))

	# Q-Learning algorithm
	try:
		for episode in range(num_episodes):
			state = env.reset()
			dstate = discretize(state)
			env.label.set_text("Episode: " + str(episode))

			done = False
			rewards_current_episode = 0

			for step in range(max_steps_per_episode):
				env.render()

				# Exploration -exploitation trade-off
				exploration_rate_threshold = random.uniform(0, 1)
				if exploration_rate_threshold > exploration_rate:
					action = np.argmax(q_table[dstate,:])
				else:
					action = env.action_space.sample()

				new_state, reward, done, info = env.step(action)
				dnew_state = discretize(new_state)

				# Update Q-table for Q(s,a)
				q_table[dstate, action] = (1 - learning_rate) * q_table[dstate, action] + \
				                         learning_rate * (reward + discount_rate * np.max(q_table[dnew_state,:]))

				state = new_state
				rewards_current_episode += reward

				if done:
					break

			# Exploration rate decay
			exploration_rate = min_exploration_rate + \
			                   (max_exploration_rate - min_exploration_rate) * np.exp(-exploration_decay_rate * episode)

			rewards_all_episodes.append(rewards_current_episode)
	except:
		print("ERROR!")

	# Calculate and print the average reward per 10 episodes
	rewards_per_thousand_episodes = np.split(np.array(rewards_all_episodes), num_episodes / 100)
	count = 100
	print("********** Average  reward per thousand episodes **********\n")

	for r in rewards_per_thousand_episodes:
		print(count, ": ", str(sum(r / 100)))
		count += 100

	# Print updated Q-table
	# print("\n\n********** Q-table **********\n")
	# print(q_table)
	np.save("Q.npy", q_table)
	with open("actions.csv", 'w') as file:
		file.write(f"r,d")
		for r, d in env.actions:
			file.write(f"{r:.3f},{d:.3f}\n")