plotter.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. import pandas as pd
  2. import numpy as np
  3. import scipy.stats as stats
  4. import random
  5. import matplotlib.pyplot as plt
  6. # wind = pd.read_csv("wind.csv", parse_dates=[1, 4], date_format="%Y-%m-%d %H:%M:%S.%f")
  7. # wind.dropna()
  8. # start = wind.iloc[0]["Aanvang tijd"]
  9. # wind["start"] = wind["Aanvang tijd"] - start
  10. # wind["start"] = wind["start"].apply(lambda x: x.to_numpy() / np.timedelta64(1, 'h'))
  11. # wind["end"] = wind["Einde tijd"] - start
  12. # wind["end"] = wind["end"].apply(lambda x: x.to_numpy() / np.timedelta64(1, 'h'))
  13. #
  14. # ax1 = plt.subplot(211)
  15. # plt.tick_params('x', labelbottom=False)
  16. # ax1.set_ylabel("wind force (B)")
  17. #
  18. # ax2 = plt.subplot(212, sharex=ax1)
  19. # ax2.set_ylabel("wind direction")
  20. # ax2.set_xlabel("time (h)")
  21. #
  22. # for sluis in wind["Sluis"].unique():
  23. # info = wind[wind["Sluis"] == sluis]
  24. # xs, ys, ws = [], [], []
  25. # for _, row in info.iterrows():
  26. # xs.append(row["start"])
  27. # xs.append(row["end"])
  28. # ys.append(row["Aanvang windkracht"])
  29. # ys.append(row["Einde windkracht"])
  30. # ws.append(row["Aanvang windrichting"])
  31. # ws.append(row["Einde windrichting"])
  32. # if len(ws) > 2 and (ws[-1] == np.nan or ws[-2] == np.nan):
  33. # print(row["start"])
  34. # ax1.plot(xs, ys, label=sluis)
  35. # ax2.plot(xs, ws, label=sluis)
  36. #
  37. # plt.tight_layout()
  38. # plt.legend()
  39. # plt.show()
  40. #
  41. # exit()
  42. df = pd.read_csv("2022.csv", parse_dates=[1, 3, 4, 5], date_format="%Y-%m-%d %H:%M:%S")
  43. df["duur"] = df["Einde tijd"] - df["Aanvang tijd"]
  44. df["duur"] = df["duur"].apply(lambda x: x.to_numpy() / np.timedelta64(1, 'h'))
  45. df["Afstand"] = df["Afstand"].astype(float)
  46. df["velocity"] = df["Afstand"] / df["duur"] # km/h
  47. df.replace([np.inf, -np.inf], np.nan, inplace=True)
  48. df = df.dropna()
  49. task = df["Taak"] == "Slepen"
  50. vel = df["velocity"][task]
  51. # remove outliers
  52. Q1 = vel.quantile(0.25)
  53. Q3 = vel.quantile(0.75)
  54. IQR = Q3 - Q1
  55. trueList = ~((vel < (Q1 - 1.5 * IQR)) | (vel > (Q3 + 1.5 * IQR)))
  56. print("full:", len(vel))
  57. inv = vel[~trueList]
  58. vel = vel[trueList]
  59. print("IQR:", len(vel), Q1 - 1.5 * IQR, Q3 + 1.5 * IQR)
  60. print("inv:", len(inv), inv.min(), inv.max())
  61. # unique color for each tug
  62. colors = { i: "#"+''.join([random.choice('0123456789ABCDEF') for _ in range(6)]) for i in df["Sleepboot"].unique() }
  63. df["color"] = [colors[b] for b in df["Sleepboot"]]
  64. # plt.scatter(df["time"][task][trueList], vel, s=1, c=df["color"][task][trueList])
  65. # plt.title("Sailing (Grouped by Start Time)")
  66. # plt.xlabel("time (h)")
  67. # plt.ylabel("velocity (km/h)")
  68. # plt.show()
  69. fig, ax1 = plt.subplots()
  70. ax1.set_xlabel("velocity (km/h)")
  71. ax2 = ax1.twinx()
  72. ax1.set_ylabel("amount")
  73. # ax2.set_ylabel("", color="tab:red")
  74. ax2.tick_params(axis='y', labelcolor='r')
  75. def plot(series, title):
  76. plt.title(title)
  77. buckets = {}
  78. for v in series:
  79. buckets.setdefault(v, 0)
  80. buckets[v] += 1
  81. if 0 in buckets:
  82. del buckets[0]
  83. ax1.bar(buckets.keys(), buckets.values(), label=title)
  84. ax1.set_ylim((0, 1.1 * max(buckets.values())))
  85. mu = series.mean()
  86. std = series.std()
  87. mx = int(np.ceil(series.max()))
  88. beta = mu / std**2
  89. alpha = mu * beta
  90. x = np.linspace(0, mx, mx * 2)
  91. # y1 = stats.gamma.pdf(x, a=alpha, scale=1./beta)
  92. y1 = stats.norm.pdf(x, mu, std)
  93. ax2.plot(x, y1, c='r', label=r"$\Gamma(%.3f, %.3f)$" % (mu, std))
  94. ax2.set_ylim((0, 1.1 * y1.max()))
  95. plot(vel, "Sailing")
  96. plt.legend()
  97. plt.show()