决策树算法原理及案例,用决策树法决策的案例

sklearn决策树实战案例

#可以输出所有行

from ipython.core.interactiveshellimportinteractiveshell

interactive shell.ast _ node _ interactivity=' all '

import numpy as np

导入pandas as PD

import matplotlib.pyplot as plt

PLT.style.use(seaborn ) )。

使用PLT.rcparams [ ' axes.unicode _ MINUS ' ]=false #设置显示中文时，减号的显示会受到影响。解决坐标轴乱码问题

PLT.rcparams [ ' font.family ' ]=[ ' arialunicodems ' ] #解决MAC的中文显示问题

读取数据train.csv

df=PD.read _ CSV (r '/users/liuz eming/desktop/python /机器学习/练习/train.csv ' )

#查看数据信息

df.info

#提取特征和标签

df1=df.drop([ '客户ID '，' y']，axis=1) )。

df1

X=df1

y=df['y']

#检查样品是否均衡

y.value_counts (

把测试集和训练集分开

测试集为0.2，随机数种子666

froms klearn.model _ selectionimporttrain _ test _ split

X_train，X_test，y_train，y_test=train_test_split(x，y，test_size=0.2，random _ state=65292;

#实例化决策树分类器对象

名为# class_weight的参数设置为“balanced”，random_state设置为25

froms klearn.treeimportdecisiontreeclassifier

DCT=decisiontreeclassifier (random _ state=25，class_weight='balanced ' () ) ) ) ) ) ) )。

#培训模式

DCT.fit(x_train，y_train ) )。

#确认模型在测试集中的正确性

DCT.Score(x_test，y_test ) ) ) ) ) ) ) ) ) ) )。

#使用网格搜索调整参数

froms klearn.model _ selectionimportgridsearchcv #可导入网格搜索，并帮助您选择参数的最佳值

import numpy as np

# # parameters的本质是一系列参数和要在网格搜索中搜索的参数的值范围，通过

#因此，由于要同时检索多个参数，所以要将这些参数和参数取值的范围汇总到词典中

parameters={ ' splitter ' : (' best '，' random ' )

，' criterion':('gini '，' entropy ' )

，' max _ depth ' : list (范围(1，10 ) )

，' min _ samples _ leaf ' : list (范围(5，30，5 ) )

、' min _ impurity _ decrease ' : list (NP.linspace (0，0.5，10 ) )限制父节点与子节点之间的信息增益

}

clf=decisiontreeclassifier (class _ weight=' balanced '，random_state=25 ) ) ) )。

#首先实例化模型

GS=gridsearchcv(clf，parameters，cv=5) )。

#进行网格搜索，GridSearchCV同时满足fit、score、交叉验证三个功能，parameters调整哪些参数，以及5折交叉验证

GS.fit(x_train，y_train ) )。

#最佳参数组合

GS.best_params_

#交叉核对最高得分

GS.best_score_

最后用最好的组合建模。