| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118 |
- import pandas as pd
- import numpy as np
- import scipy.stats as stats
- import random
- import matplotlib.pyplot as plt
- # wind = pd.read_csv("wind.csv", parse_dates=[1, 4], date_format="%Y-%m-%d %H:%M:%S.%f")
- # wind.dropna()
- # start = wind.iloc[0]["Aanvang tijd"]
- # wind["start"] = wind["Aanvang tijd"] - start
- # wind["start"] = wind["start"].apply(lambda x: x.to_numpy() / np.timedelta64(1, 'h'))
- # wind["end"] = wind["Einde tijd"] - start
- # wind["end"] = wind["end"].apply(lambda x: x.to_numpy() / np.timedelta64(1, 'h'))
- #
- # ax1 = plt.subplot(211)
- # plt.tick_params('x', labelbottom=False)
- # ax1.set_ylabel("wind force (B)")
- #
- # ax2 = plt.subplot(212, sharex=ax1)
- # ax2.set_ylabel("wind direction")
- # ax2.set_xlabel("time (h)")
- #
- # for sluis in wind["Sluis"].unique():
- # info = wind[wind["Sluis"] == sluis]
- # xs, ys, ws = [], [], []
- # for _, row in info.iterrows():
- # xs.append(row["start"])
- # xs.append(row["end"])
- # ys.append(row["Aanvang windkracht"])
- # ys.append(row["Einde windkracht"])
- # ws.append(row["Aanvang windrichting"])
- # ws.append(row["Einde windrichting"])
- # if len(ws) > 2 and (ws[-1] == np.nan or ws[-2] == np.nan):
- # print(row["start"])
- # ax1.plot(xs, ys, label=sluis)
- # ax2.plot(xs, ws, label=sluis)
- #
- # plt.tight_layout()
- # plt.legend()
- # plt.show()
- #
- # exit()
- df = pd.read_csv("2022.csv", parse_dates=[1, 3, 4, 5], date_format="%Y-%m-%d %H:%M:%S")
- df["duur"] = df["Einde tijd"] - df["Aanvang tijd"]
- df["duur"] = df["duur"].apply(lambda x: x.to_numpy() / np.timedelta64(1, 'h'))
- df["Afstand"] = df["Afstand"].astype(float)
- df["velocity"] = df["Afstand"] / df["duur"] # km/h
- df.replace([np.inf, -np.inf], np.nan, inplace=True)
- df = df.dropna()
- task = df["Taak"] == "Slepen"
- vel = df["velocity"][task]
- # remove outliers
- Q1 = vel.quantile(0.25)
- Q3 = vel.quantile(0.75)
- IQR = Q3 - Q1
- trueList = ~((vel < (Q1 - 1.5 * IQR)) | (vel > (Q3 + 1.5 * IQR)))
- print("full:", len(vel))
- inv = vel[~trueList]
- vel = vel[trueList]
- print("IQR:", len(vel), Q1 - 1.5 * IQR, Q3 + 1.5 * IQR)
- print("inv:", len(inv), inv.min(), inv.max())
- # unique color for each tug
- colors = { i: "#"+''.join([random.choice('0123456789ABCDEF') for _ in range(6)]) for i in df["Sleepboot"].unique() }
- df["color"] = [colors[b] for b in df["Sleepboot"]]
- # plt.scatter(df["time"][task][trueList], vel, s=1, c=df["color"][task][trueList])
- # plt.title("Sailing (Grouped by Start Time)")
- # plt.xlabel("time (h)")
- # plt.ylabel("velocity (km/h)")
- # plt.show()
- fig, ax1 = plt.subplots()
- ax1.set_xlabel("velocity (km/h)")
- ax2 = ax1.twinx()
- ax1.set_ylabel("amount")
- # ax2.set_ylabel("", color="tab:red")
- ax2.tick_params(axis='y', labelcolor='r')
- def plot(series, title):
- plt.title(title)
- buckets = {}
- for v in series:
- buckets.setdefault(v, 0)
- buckets[v] += 1
- if 0 in buckets:
- del buckets[0]
- ax1.bar(buckets.keys(), buckets.values(), label=title)
- ax1.set_ylim((0, 1.1 * max(buckets.values())))
- mu = series.mean()
- std = series.std()
- mx = int(np.ceil(series.max()))
- beta = mu / std**2
- alpha = mu * beta
- x = np.linspace(0, mx, mx * 2)
- # y1 = stats.gamma.pdf(x, a=alpha, scale=1./beta)
- y1 = stats.norm.pdf(x, mu, std)
- ax2.plot(x, y1, c='r', label=r"$\Gamma(%.3f, %.3f)$" % (mu, std))
- ax2.set_ylim((0, 1.1 * y1.max()))
- plot(vel, "Sailing")
- plt.legend()
- plt.show()
|