gru.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. from keras.models import load_model
  2. import numpy as np
  3. import tensorflow as tf
  4. from tensorflow.keras.layers import Dropout, Dense, LSTM,SimpleRNN,GRU
  5. import pandas as pd
  6. from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
  7. import matplotlib
  8. matplotlib.use('TkAgg')
  9. import matplotlib.pyplot as plt
  10. import os
  11. def norm(df,scale, *cols):
  12. for col in cols:
  13. df[col]=(df[col]-scale.get(col+"_min"))/(scale.get(col+"_max")-scale.get(col+"_min"))
  14. return df
  15. def inverse(df,scale, *cols):
  16. for col in cols:
  17. df[col]=(df[col]-scale.get(col+"_min"))*(scale.get(col+"_max")-scale.get(col+"_min"))
  18. return df
  19. def create_predata(data, n_predictions, n_next, scale=None):
  20. data=data.drop('Time', axis=1)
  21. if scale:
  22. cols = data.columns.tolist()
  23. data=norm(data,scale, *cols)
  24. dim = data.shape[1]
  25. data = data.values
  26. train_X, train_Y = [], []
  27. for i in range(data.shape[0] - n_predictions - n_next - 1):
  28. a = data[i:(i + n_predictions), :]
  29. train_X.append(a)
  30. b = []
  31. tempb = data[(i + n_predictions):(i + n_predictions + n_next), :]
  32. for j in range(len(tempb)):
  33. for k in range((dim - 2), (dim - 1)):
  34. b.append(tempb[j, k])
  35. train_Y.append(b)
  36. pre_X = np.array(train_X, dtype='float64')
  37. pre_Y = np.array(train_Y, dtype='float64')
  38. return pre_X,pre_Y
  39. def create_traindata(data, n_predictions, n_next, is_norm=False):
  40. '''
  41. 对数据进行处理
  42. '''
  43. data=data.drop('Time', axis=1)
  44. scale=None
  45. if is_norm:
  46. cols = data.columns.tolist()
  47. scale=dict(zip(map(lambda x: x+"_max",cols), data.max().tolist()))
  48. min_val = dict(zip(map(lambda x: x + "_min", cols), data.min().tolist()))
  49. scale.update(min_val)
  50. data=norm(data,scale, *cols)
  51. #max_min_scale=lambda x: (x-np.min(x))/(np.max(x)-np.min(x))
  52. # for col in cols:
  53. # data[col]=data[[col]].apply(max_min_scale)
  54. dim = data.shape[1]
  55. data=data.values
  56. train_X, train_Y = [], []
  57. for i in range(data.shape[0] - n_predictions - n_next - 1):
  58. a = data[i:(i + n_predictions), :]
  59. train_X.append(a)
  60. b=[]
  61. tempb = data[(i + n_predictions):(i + n_predictions + n_next), :]
  62. for j in range(len(tempb)):
  63. for k in range((dim-2), (dim-1)):
  64. b.append(tempb[j, k])
  65. train_Y.append(b)
  66. train_X = np.array(train_X, dtype='float64')
  67. train_Y = np.array(train_Y, dtype='float64')
  68. return train_X, train_Y, scale
  69. def trainModel(train_X, train_Y, save_dir="./gru/", validation=None, epochs=50):
  70. '''
  71. trainX,trainY: 训练LSTM模型所需要的数据
  72. '''
  73. model = tf.keras.Sequential([
  74. GRU(80, return_sequences=True),
  75. Dropout(0.2),
  76. GRU(40),
  77. Dropout(0.2),
  78. Dense(train_Y.shape[1])
  79. ])
  80. log_dir = save_dir+"logs"
  81. tensorboard = TensorBoard(log_dir=log_dir) #histogram_freq=1
  82. checkpoint = ModelCheckpoint(save_dir+"checkpoint.keras", monitor='val_loss', verbose=1,
  83. save_best_only=True, mode='auto')
  84. model.compile(optimizer=tf.keras.optimizers.RMSprop(0.001),
  85. loss='mse', metrics=[tf.keras.metrics.MeanSquaredError()])
  86. model.summary()
  87. model.fit(train_X, train_Y, epochs=epochs, batch_size=32, verbose=1,validation_split=0.05,validation_data=validation,callbacks=[checkpoint,tensorboard])
  88. def loadData(dir="index.csv"):
  89. df = pd.read_csv(dir, header=0, encoding="gbk")
  90. df.columns=['Time','cod','carbon','do','temperature','mlss','ph','ammonia','anoxia','aerobic']
  91. #19-23号删除
  92. df['Time'] = pd.to_datetime(df['Time'])
  93. mask = (df['Time'] > pd.Timestamp('2023/8/22')) | (
  94. (df['Time'] > pd.Timestamp('2023/6/23')) & (df['Time'] < pd.Timestamp('2023/8/18'))) | \
  95. (df['Time'] < pd.Timestamp('2023/6/19'))
  96. df = df[mask]
  97. df2 = df.fillna(df.mean())
  98. feat=df2['Time'].apply(lambda x: [x.hour, x.weekday()]).tolist()
  99. feat=pd.DataFrame(feat, columns=['hour','weekday'])
  100. return df2
  101. def predict(save_dir, test_x):
  102. model=load_model(save_dir+"checkpoint.keras")
  103. return model.predict(test_x)
  104. def main():
  105. df=loadData()
  106. #df=df[df['Time']>=pd.Timestamp('2023/3/01')]
  107. train_df=df[df['Time']<pd.Timestamp('2023/8/01')]
  108. #train_df = df[df['anoxia']>0]
  109. test_df = df[df['Time'] >= pd.Timestamp('2023/8/01')]
  110. x,y,scale=create_traindata(df, 6, 3, is_norm=True)
  111. test_x, test_y = create_predata(test_df, 6, 3,scale)
  112. save_dir=os.getcwd()+"/gru/"
  113. trainModel(x, y,save_dir=save_dir,validation=(test_x,test_y), epochs=3000)
  114. pre_y=predict(save_dir, test_x)
  115. ret = []
  116. amin, amax = scale.get("anoxia" + "_min"), scale.get("anoxia" + "_max")
  117. for i in range(pre_y.shape[0]):
  118. ret.append([(pre_y[i][0] + amin) * (amax - amin), (test_y[i][0] + amin) * (amax - amin)])
  119. pre_df = pd.DataFrame(ret, columns=['pre', 'true'])
  120. val = pre_df.mean().tolist()
  121. # 单步预测误差
  122. print(abs(val[0] - val[1]) / val[1])
  123. # 预测误差
  124. pre_df['diff'] = abs(pre_df['pre'] - pre_df['true']) / pre_df['true']
  125. print(pre_df.mean())
  126. pre_3=pd.DataFrame((pre_y+amin)*(amax-amin)).T
  127. true_3=pd.DataFrame((test_y+amin)*(amax-amin)).T
  128. diff=abs((pre_3-true_3).mean())/true_3.mean()
  129. print(diff.mean())
  130. # #训练误差
  131. pre_y = predict(save_dir, x)
  132. pre_3 = pd.DataFrame((pre_y + amin) * (amax - amin)).T
  133. true_3 = pd.DataFrame((y + amin) * (amax - amin)).T
  134. x1=abs((pre_3 - true_3)).mean()
  135. x2=true_3.mean()
  136. diff=pd.concat([x1,x2],axis=1)
  137. z=diff[diff.iloc[:,1]>0]
  138. z.columns=['diff','true']
  139. z['rate']=z['diff']/z['true']
  140. print(z[z['rate']<1].mean())
  141. plt.plot(pre_df['pre'])
  142. plt.plot(pre_df['true'])
  143. plt.legend(labels=["pre", "true"])
  144. plt.show()
  145. if __name__=="__main__":
  146. main()