rparedis
/
PoAB


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
							import pandas as pd
import numpy as np
import scipy.stats as stats
import random
import matplotlib.pyplot as plt

# wind = pd.read_csv("wind.csv", parse_dates=[1, 4], date_format="%Y-%m-%d %H:%M:%S.%f")
# wind.dropna()
# start = wind.iloc[0]["Aanvang tijd"]
# wind["start"] = wind["Aanvang tijd"] - start
# wind["start"] = wind["start"].apply(lambda x: x.to_numpy() / np.timedelta64(1, 'h'))
# wind["end"] = wind["Einde tijd"] - start
# wind["end"] = wind["end"].apply(lambda x: x.to_numpy() / np.timedelta64(1, 'h'))
#
# ax1 = plt.subplot(211)
# plt.tick_params('x', labelbottom=False)
# ax1.set_ylabel("wind force (B)")
#
# ax2 = plt.subplot(212, sharex=ax1)
# ax2.set_ylabel("wind direction")
# ax2.set_xlabel("time (h)")
#
# for sluis in wind["Sluis"].unique():
# 	info = wind[wind["Sluis"] == sluis]
# 	xs, ys, ws = [], [], []
# 	for _, row in info.iterrows():
# 		xs.append(row["start"])
# 		xs.append(row["end"])
# 		ys.append(row["Aanvang windkracht"])
# 		ys.append(row["Einde windkracht"])
# 		ws.append(row["Aanvang windrichting"])
# 		ws.append(row["Einde windrichting"])
# 		if len(ws) > 2 and (ws[-1] == np.nan or ws[-2] == np.nan):
# 			print(row["start"])
# 	ax1.plot(xs, ys, label=sluis)
# 	ax2.plot(xs, ws, label=sluis)
#
# plt.tight_layout()
# plt.legend()
# plt.show()
#
# exit()

df = pd.read_csv("2022.csv", parse_dates=[1, 3, 4, 5], date_format="%Y-%m-%d %H:%M:%S")

df["duur"] = df["Einde tijd"] - df["Aanvang tijd"]
df["duur"] = df["duur"].apply(lambda x: x.to_numpy() / np.timedelta64(1, 'h'))
df["Afstand"] = df["Afstand"].astype(float)
df["velocity"] = df["Afstand"] / df["duur"]   # km/h

df.replace([np.inf, -np.inf], np.nan, inplace=True)
df = df.dropna()

task = df["Taak"] == "Slepen"
vel = df["velocity"][task]

# remove outliers
Q1 = vel.quantile(0.25)
Q3 = vel.quantile(0.75)
IQR = Q3 - Q1
trueList = ~((vel < (Q1 - 1.5 * IQR)) | (vel > (Q3 + 1.5 * IQR)))
print("full:", len(vel))
inv = vel[~trueList]
vel = vel[trueList]
print("IQR:", len(vel), Q1 - 1.5 * IQR, Q3 + 1.5 * IQR)
print("inv:", len(inv), inv.min(), inv.max())

# unique color for each tug
colors = { i: "#"+''.join([random.choice('0123456789ABCDEF') for _ in range(6)]) for i in df["Sleepboot"].unique() }
df["color"] = [colors[b] for b in df["Sleepboot"]]

# plt.scatter(df["time"][task][trueList], vel, s=1, c=df["color"][task][trueList])
# plt.title("Sailing (Grouped by Start Time)")
# plt.xlabel("time (h)")
# plt.ylabel("velocity (km/h)")
# plt.show()


fig, ax1 = plt.subplots()
ax1.set_xlabel("velocity (km/h)")
ax2 = ax1.twinx()
ax1.set_ylabel("amount")
# ax2.set_ylabel("", color="tab:red")
ax2.tick_params(axis='y', labelcolor='r')

def plot(series, title):
	plt.title(title)

	buckets = {}

	for v in series:
		buckets.setdefault(v, 0)
		buckets[v] += 1

	if 0 in buckets:
		del buckets[0]

	ax1.bar(buckets.keys(), buckets.values(), label=title)
	ax1.set_ylim((0, 1.1 * max(buckets.values())))

	mu = series.mean()
	std = series.std()
	mx = int(np.ceil(series.max()))

	beta = mu / std**2
	alpha = mu * beta

	x = np.linspace(0, mx, mx * 2)
	# y1 = stats.gamma.pdf(x, a=alpha, scale=1./beta)
	y1 = stats.norm.pdf(x, mu, std)
	ax2.plot(x, y1, c='r', label=r"$\Gamma(%.3f, %.3f)$" % (mu, std))
	ax2.set_ylim((0, 1.1 * y1.max()))

plot(vel, "Sailing")

plt.legend()
plt.show()