123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163 |
- from keras.models import load_model
- import numpy as np
- import tensorflow as tf
- from tensorflow.keras.layers import Dropout, Dense, LSTM,SimpleRNN,GRU
- import pandas as pd
- from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
- import matplotlib
- matplotlib.use('TkAgg')
- import matplotlib.pyplot as plt
- import os
- def norm(df,scale, *cols):
- for col in cols:
- df[col]=(df[col]-scale.get(col+"_min"))/(scale.get(col+"_max")-scale.get(col+"_min"))
- return df
- def inverse(df,scale, *cols):
- for col in cols:
- df[col]=(df[col]-scale.get(col+"_min"))*(scale.get(col+"_max")-scale.get(col+"_min"))
- return df
- def create_predata(data, n_predictions, n_next, scale=None):
- data=data.drop('Time', axis=1)
- if scale:
- cols = data.columns.tolist()
- data=norm(data,scale, *cols)
- dim = data.shape[1]
- data = data.values
- train_X, train_Y = [], []
- for i in range(data.shape[0] - n_predictions - n_next - 1):
- a = data[i:(i + n_predictions), :]
- train_X.append(a)
- b = []
- tempb = data[(i + n_predictions):(i + n_predictions + n_next), :]
- for j in range(len(tempb)):
- for k in range((dim - 2), (dim - 1)):
- b.append(tempb[j, k])
- train_Y.append(b)
- pre_X = np.array(train_X, dtype='float64')
- pre_Y = np.array(train_Y, dtype='float64')
- return pre_X,pre_Y
- def create_traindata(data, n_predictions, n_next, is_norm=False):
- '''
- 对数据进行处理
- '''
- data=data.drop('Time', axis=1)
- scale=None
- if is_norm:
- cols = data.columns.tolist()
- scale=dict(zip(map(lambda x: x+"_max",cols), data.max().tolist()))
- min_val = dict(zip(map(lambda x: x + "_min", cols), data.min().tolist()))
- scale.update(min_val)
- data=norm(data,scale, *cols)
- #max_min_scale=lambda x: (x-np.min(x))/(np.max(x)-np.min(x))
- # for col in cols:
- # data[col]=data[[col]].apply(max_min_scale)
- dim = data.shape[1]
- data=data.values
- train_X, train_Y = [], []
- for i in range(data.shape[0] - n_predictions - n_next - 1):
- a = data[i:(i + n_predictions), :]
- train_X.append(a)
- b=[]
- tempb = data[(i + n_predictions):(i + n_predictions + n_next), :]
- for j in range(len(tempb)):
- for k in range((dim-2), (dim-1)):
- b.append(tempb[j, k])
- train_Y.append(b)
- train_X = np.array(train_X, dtype='float64')
- train_Y = np.array(train_Y, dtype='float64')
- return train_X, train_Y, scale
- def trainModel(train_X, train_Y, save_dir="./gru/", validation=None, epochs=50):
- '''
- trainX,trainY: 训练LSTM模型所需要的数据
- '''
- model = tf.keras.Sequential([
- GRU(80, return_sequences=True),
- Dropout(0.2),
- GRU(40),
- Dropout(0.2),
- Dense(train_Y.shape[1])
- ])
- log_dir = save_dir+"logs"
- tensorboard = TensorBoard(log_dir=log_dir) #histogram_freq=1
- checkpoint = ModelCheckpoint(save_dir+"checkpoint.keras", monitor='val_loss', verbose=1,
- save_best_only=True, mode='auto')
- model.compile(optimizer=tf.keras.optimizers.RMSprop(0.001),
- loss='mse', metrics=[tf.keras.metrics.MeanSquaredError()])
- model.summary()
- model.fit(train_X, train_Y, epochs=epochs, batch_size=32, verbose=1,validation_split=0.05,validation_data=validation,callbacks=[checkpoint,tensorboard])
- def loadData(dir="index.csv"):
- df = pd.read_csv(dir, header=0, encoding="gbk")
- df.columns=['Time','cod','carbon','do','temperature','mlss','ph','ammonia','anoxia','aerobic']
- #19-23号删除
- df['Time'] = pd.to_datetime(df['Time'])
- mask = (df['Time'] > pd.Timestamp('2023/8/22')) | (
- (df['Time'] > pd.Timestamp('2023/6/23')) & (df['Time'] < pd.Timestamp('2023/8/18'))) | \
- (df['Time'] < pd.Timestamp('2023/6/19'))
- df = df[mask]
- df2 = df.fillna(df.mean())
- feat=df2['Time'].apply(lambda x: [x.hour, x.weekday()]).tolist()
- feat=pd.DataFrame(feat, columns=['hour','weekday'])
- return df2
- def predict(save_dir, test_x):
- model=load_model(save_dir+"checkpoint.keras")
- return model.predict(test_x)
- def main():
- df=loadData()
- #df=df[df['Time']>=pd.Timestamp('2023/3/01')]
- train_df=df[df['Time']<pd.Timestamp('2023/8/01')]
- #train_df = df[df['anoxia']>0]
- test_df = df[df['Time'] >= pd.Timestamp('2023/8/01')]
- x,y,scale=create_traindata(df, 6, 3, is_norm=True)
- test_x, test_y = create_predata(test_df, 6, 3,scale)
- save_dir=os.getcwd()+"/gru/"
- trainModel(x, y,save_dir=save_dir,validation=(test_x,test_y), epochs=3000)
- pre_y=predict(save_dir, test_x)
- ret = []
- amin, amax = scale.get("anoxia" + "_min"), scale.get("anoxia" + "_max")
- for i in range(pre_y.shape[0]):
- ret.append([(pre_y[i][0] + amin) * (amax - amin), (test_y[i][0] + amin) * (amax - amin)])
- pre_df = pd.DataFrame(ret, columns=['pre', 'true'])
- val = pre_df.mean().tolist()
- # 单步预测误差
- print(abs(val[0] - val[1]) / val[1])
- # 预测误差
- pre_df['diff'] = abs(pre_df['pre'] - pre_df['true']) / pre_df['true']
- print(pre_df.mean())
- pre_3=pd.DataFrame((pre_y+amin)*(amax-amin)).T
- true_3=pd.DataFrame((test_y+amin)*(amax-amin)).T
- diff=abs((pre_3-true_3).mean())/true_3.mean()
- print(diff.mean())
- # #训练误差
- pre_y = predict(save_dir, x)
- pre_3 = pd.DataFrame((pre_y + amin) * (amax - amin)).T
- true_3 = pd.DataFrame((y + amin) * (amax - amin)).T
- x1=abs((pre_3 - true_3)).mean()
- x2=true_3.mean()
- diff=pd.concat([x1,x2],axis=1)
- z=diff[diff.iloc[:,1]>0]
- z.columns=['diff','true']
- z['rate']=z['diff']/z['true']
- print(z[z['rate']<1].mean())
- plt.plot(pre_df['pre'])
- plt.plot(pre_df['true'])
- plt.legend(labels=["pre", "true"])
- plt.show()
- if __name__=="__main__":
- main()
|