您的位置:首页 > 其它

打印随机森林模型

2018-08-15 19:00 316 查看
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
import pydot
from sklearn.externals.six import StringIO
from IPython.display import Image
import pydotplus
train = pd.read_csv("train2.csv", dtype={"Age": np.float64},)
print train.head(10)

def harmonize_data(titanic):
titanic["Age"] = titanic["Age"].fillna(titanic["Age"].median())
titanic.loc[titanic["Sex"] == "male", "Sex"] = 0
titanic.loc[titanic["Sex"] == "female", "Sex"] = 1

titanic["Embarked"] = titanic["Embarked"].fillna("S")

titanic.loc[titanic["Embarked"] == "S", "Embarked"] = 0
titanic.loc[titanic["Embarked"] == "C", "Embarked"] = 1
titanic.loc[titanic["Embarked"] == "Q", "Embarked"] = 2

titanic["Fare"] = titanic["Fare"].fillna(titanic["Fare"].median())

return titanic

harmonize_data(train)
print "ok"
predictors = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"]
results = []
sample_leaf_options = list(range(1, 500, 3))
n_estimators_options = list(range(1, 1000, 5))
groud_truth = train['Survived'][601:]

alg = RandomForestClassifier(min_samples_leaf=50, n_estimators=5, random_state=50)
alg.fit(train[predictors][:600], train['Survived'][:600])
predict = alg.predict(train[predictors][601:])
#print groud_truth == predict
results.append((50, 5, (groud_truth == predict).mean()))
#print((groud_truth == predict).mean())

print(results)

Estimators = alg.estimators_
for index, model in enumerate(Estimators):
filename = 'iris_' + str(index) + '.pdf'
dot_data = tree.export_graphviz(model , out_file=None,
feature_names=predictors,
class_names=["die","live"],
filled=True, rounded=True,
special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data)
Image(graph.create_png())
graph.write_pdf(filename)

前提需要安装graphviz

yum install graphviz

涉及到的训练集参考上一篇文章
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  打印 随机 森林