数据
输入32维的向量并输出值
有151组数据
目的
使用这样的一组数据建立预测模型
输入32维向量可以预测值
代码部分
1 导入工具包
importpandasaspdimportseabornassnsimportmatplotlib.pyplotaspltfromsklearn.utilsimportshufflefromsklearn.metrics importr2r omtensorflow.keras.modelsimportsequentialfromtensorflow.keras.callbacksimporthistorrt mean _ squared _ errorfromskleart sk learn.model _ selectionimportgridsearchcvfromskllearn.sk sregressorfromsklearn.model _ selectionimporttrain _ test
df=PD.read_excel(data.xls ),index_col=0) index_col=0意味着第一列不以索引df=df.T #的形式倒置列表3
model _ m=minmax scaler (fori indf.columns : if ) I=='soot ' ) : pass else 3360 df [ I ]=model _ m.fit _ trit 介绍fit_transform数据规范化、标准化#fit、transform、fit_transform
4拆分数据集
前100个作为训练集,后51个作为测试集
df=shuffle(df ) #随机排列数据df1=df ) :100 ) df2=df ) 100: ) http://www.Sina.com/
将column=df1.columns.tolist(#列的索引转换为列表corr_num=df1[column].corr ) method='spearman ' ).abs ) 绝对值corr _ ratio=corr _ num [ corr _ num [ ' soot ' ] 0.2 ] [ ' soot ' ] #相关系数绝对值大于0.2的参数PLT.figure(figsize=对筛选出的系数列进行相关分析的皮尔逊系数g=SNS.heatmap(Ratio,annot=True, cmap='rdylgn'(#热企图corr _ top=corr _ ratio.sort _ values (ascending=false ).reset_index ) # sort _ valu valu ees 将“从属关系”#索引更改为特征,并保存从属关系df_top=[col for col in corr_top[ '特征' ]] #特征索引df1=df1[df_]
train _ data=df1 [ df1.columns [ 1: ] ] # input test _ data=df1 [ ' soot ' ] # labelx _ train,X_val,y _ trat random_state=2020 ) #test_size=0.3其中,训练集70组验证集30组random_state=2020具有相同的分区2 读取数据
X_test=df2[df2.col
umns[1:]] #inputy_test = df2['soot'] #label8 KNN
8.1 网格调参
8.2 R2分数
model_KNN = KNeighborsRegressor( p = 1, n_neighbors = 3)#这里最佳的n_neighbors最佳值为3model_KNN.fit(X_train, y_train) #将训练集数据导入训练predict_KNN = model_KNN.predict(X_val) #用验证集测试r2_score(y_val, predict_KNN) #R2分数8.3 测试集预测
predict_KNN = model_KNN.predict(X_test) #输入测试集数据,获得预测值x = [i for i in range(0, 51)]y1 = [i for i in y_test] #实际值y2 = [i for i in predict_KNN] #预测值plt.figure(figsize = (20, 6))plt.plot(x, y1, label = '真实值')plt.plot(x, y2, label = '预测值')plt.title('KNN')plt.legend() #图例位置8.4 R2分数
r2_score(y_test, predict_KNN)9 ANN
9.1搭建初级模型
9.2 R2分数
predict_ANN = model_1.predict(X_val)#验证集中的预测值r2_score(y_val, predict_ANN)9.3再增加四个隐层
#5层隐层,每一层的神经元个数为32model_2 = Sequential()model_2.add(Dense(32, input_dim = 7, activation = 'relu'))model_2.add(Dense(32, activation = 'relu'))model_2.add(Dense(32, activation = 'relu'))model_2.add(Dense(32, activation = 'relu'))model_2.add(Dense(32, activation = 'relu'))model_2.add(Dense(1, activation = 'linear'))model_2.compile(optimizer = 'adam', loss = 'mean_absolute_error', metrics = ['mean_absolute_error'])history = model_2.fit(X_train, y_train, validation_data = (X_val, y_val), epochs = 200, batch_size = 16)#画图plt.plot(history.history['loss'])plt.plot(history.history['val_loss'])plt.title('模型的训练和验证损失曲线')plt.ylabel('Loss')plt.xlabel('Epochs')plt.legend(['Train', 'Validation'], loc = 'upper right')9.4 R2分数
predict_ANN = model_2.predict(X_val)r2_score(y_val, predict_ANN)9.5增加神经元
#5层隐层,每一层的神经元个数为64model_3 = Sequential()model_3.add(Dense(64, input_dim = 7, activation = 'relu'))model_3.add(Dense(64, activation = 'relu'))model_3.add(Dense(64, activation = 'relu'))model_3.add(Dense(64, activation = 'relu'))model_3.add(Dense(64, activation = 'relu'))model_3.add(Dense(1, activation = 'linear'))model_3.compile(optimizer = 'adam', loss = 'mean_absolute_error', metrics = ['mean_absolute_error'])history = model_3.fit(X_train, y_train, validation_data = (X_val, y_val), epochs = 100, batch_size = 16)#画图plt.plot(history.history['loss'])plt.plot(history.history['val_loss'])plt.title('模型的训练和验证损失曲线')plt.ylabel('Loss')plt.xlabel('Epochs')plt.legend(['Train', 'Validation'], loc = 'upper right')9.6 R2分数
predict_ANN = model_3.predict(X_val)r2_score(y_val, predict_ANN)9.7 测试集预测
#在三个ANN模型中选择R2分数最高的那个,这里选择model3predict_ANN = model_3.predict(X_test)x = [i for i in range(0, 51)]y1 = [i for i in y_test]y2 = [i for i in predict_ANN]plt.figure(figsize = (20, 6))plt.plot(x, y1, label = '真实值')plt.plot(x, y2, label = '预测值')plt.title('ANN')plt.legend()9.8 R2分数
r2_score(y_test, predict_ANN)