from keras.models import load_model import numpy as np import tensorflow as tf from tensorflow.keras.layers import Dropout, Dense, LSTM,SimpleRNN,GRU import pandas as pd from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard import matplotlib matplotlib.use('TkAgg') import matplotlib.pyplot as plt import os def norm(df,scale, *cols): for col in cols: df[col]=(df[col]-scale.get(col+"_min"))/(scale.get(col+"_max")-scale.get(col+"_min")) return df def inverse(df,scale, *cols): for col in cols: df[col]=(df[col]-scale.get(col+"_min"))*(scale.get(col+"_max")-scale.get(col+"_min")) return df def create_predata(data, n_predictions, n_next, scale=None): data=data.drop('Time', axis=1) if scale: cols = data.columns.tolist() data=norm(data,scale, *cols) dim = data.shape[1] data = data.values train_X, train_Y = [], [] for i in range(data.shape[0] - n_predictions - n_next - 1): a = data[i:(i + n_predictions), :] train_X.append(a) b = [] tempb = data[(i + n_predictions):(i + n_predictions + n_next), :] for j in range(len(tempb)): for k in range((dim - 2), (dim - 1)): b.append(tempb[j, k]) train_Y.append(b) pre_X = np.array(train_X, dtype='float64') pre_Y = np.array(train_Y, dtype='float64') return pre_X,pre_Y def create_traindata(data, n_predictions, n_next, is_norm=False): ''' 对数据进行处理 ''' data=data.drop('Time', axis=1) scale=None if is_norm: cols = data.columns.tolist() scale=dict(zip(map(lambda x: x+"_max",cols), data.max().tolist())) min_val = dict(zip(map(lambda x: x + "_min", cols), data.min().tolist())) scale.update(min_val) data=norm(data,scale, *cols) #max_min_scale=lambda x: (x-np.min(x))/(np.max(x)-np.min(x)) # for col in cols: # data[col]=data[[col]].apply(max_min_scale) dim = data.shape[1] data=data.values train_X, train_Y = [], [] for i in range(data.shape[0] - n_predictions - n_next - 1): a = data[i:(i + n_predictions), :] train_X.append(a) b=[] tempb = data[(i + n_predictions):(i + n_predictions + n_next), :] for j in range(len(tempb)): for k in range((dim-2), (dim-1)): b.append(tempb[j, k]) train_Y.append(b) train_X = np.array(train_X, dtype='float64') train_Y = np.array(train_Y, dtype='float64') return train_X, train_Y, scale def trainModel(train_X, train_Y, save_dir="./gru/", validation=None, epochs=50): ''' trainX,trainY: 训练LSTM模型所需要的数据 ''' model = tf.keras.Sequential([ GRU(80, return_sequences=True), Dropout(0.2), GRU(40), Dropout(0.2), Dense(train_Y.shape[1]) ]) log_dir = save_dir+"logs" tensorboard = TensorBoard(log_dir=log_dir) #histogram_freq=1 checkpoint = ModelCheckpoint(save_dir+"checkpoint.keras", monitor='val_loss', verbose=1, save_best_only=True, mode='auto') model.compile(optimizer=tf.keras.optimizers.RMSprop(0.001), loss='mse', metrics=[tf.keras.metrics.MeanSquaredError()]) model.summary() model.fit(train_X, train_Y, epochs=epochs, batch_size=32, verbose=1,validation_split=0.05,validation_data=validation,callbacks=[checkpoint,tensorboard]) def loadData(dir="index.csv"): df = pd.read_csv(dir, header=0, encoding="gbk") df.columns=['Time','cod','carbon','do','temperature','mlss','ph','ammonia','anoxia','aerobic'] #19-23号删除 df['Time'] = pd.to_datetime(df['Time']) mask = (df['Time'] > pd.Timestamp('2023/8/22')) | ( (df['Time'] > pd.Timestamp('2023/6/23')) & (df['Time'] < pd.Timestamp('2023/8/18'))) | \ (df['Time'] < pd.Timestamp('2023/6/19')) df = df[mask] df2 = df.fillna(df.mean()) feat=df2['Time'].apply(lambda x: [x.hour, x.weekday()]).tolist() feat=pd.DataFrame(feat, columns=['hour','weekday']) return df2 def predict(save_dir, test_x): model=load_model(save_dir+"checkpoint.keras") return model.predict(test_x) def main(): df=loadData() #df=df[df['Time']>=pd.Timestamp('2023/3/01')] train_df=df[df['Time']0] test_df = df[df['Time'] >= pd.Timestamp('2023/8/01')] x,y,scale=create_traindata(df, 6, 3, is_norm=True) test_x, test_y = create_predata(test_df, 6, 3,scale) save_dir=os.getcwd()+"/gru/" trainModel(x, y,save_dir=save_dir,validation=(test_x,test_y), epochs=3000) pre_y=predict(save_dir, test_x) ret = [] amin, amax = scale.get("anoxia" + "_min"), scale.get("anoxia" + "_max") for i in range(pre_y.shape[0]): ret.append([(pre_y[i][0] + amin) * (amax - amin), (test_y[i][0] + amin) * (amax - amin)]) pre_df = pd.DataFrame(ret, columns=['pre', 'true']) val = pre_df.mean().tolist() # 单步预测误差 print(abs(val[0] - val[1]) / val[1]) # 预测误差 pre_df['diff'] = abs(pre_df['pre'] - pre_df['true']) / pre_df['true'] print(pre_df.mean()) pre_3=pd.DataFrame((pre_y+amin)*(amax-amin)).T true_3=pd.DataFrame((test_y+amin)*(amax-amin)).T diff=abs((pre_3-true_3).mean())/true_3.mean() print(diff.mean()) # #训练误差 pre_y = predict(save_dir, x) pre_3 = pd.DataFrame((pre_y + amin) * (amax - amin)).T true_3 = pd.DataFrame((y + amin) * (amax - amin)).T x1=abs((pre_3 - true_3)).mean() x2=true_3.mean() diff=pd.concat([x1,x2],axis=1) z=diff[diff.iloc[:,1]>0] z.columns=['diff','true'] z['rate']=z['diff']/z['true'] print(z[z['rate']<1].mean()) plt.plot(pre_df['pre']) plt.plot(pre_df['true']) plt.legend(labels=["pre", "true"]) plt.show() if __name__=="__main__": main()