Visualize decision trees with jupyter notebook
import numpy as np
import pydotplus
from sklearn.feature_extraction.text import TfidfVectorizer
from IPython import display
from sklearn import tree
from io import StringIO
from operator import itemgetter
%matplotlib inline
%load_ext autoreload
%autoreload 2
input_text=np.array([
'Physics Mathematics Japanese', #Document 1
'Bread rice chocolate', #Document 2
'Building apartment apartment', #Document 3
'Hokkaido Fukuoka Kanagawa' #Document 4
])
#Document 1 is 1
#Document 2 is 2
#Document 3 is 3
#Document 4 is 4
target_data = np.array([
1,2,3,4
])
#Document tf-Convert to vector of idf
vectorizer=TfidfVectorizer()
input_data=vectorizer.fit_transform(input_text)
np.set_printoptions(precision=2) #Change the number of valid display digits to 2 digits
print(input_data.toarray())
#Generate decision tree model
model = tree.DecisionTreeClassifier(max_depth=5)
model = model.fit(input_data, target_data)
model.predict(input_data)
feature_names = list(map(itemgetter(0),
sorted(vectorizer.vocabulary_.items(), key=itemgetter(1))))
data = StringIO()
tree.export_graphviz(model, out_file=data, feature_names=feature_names)
graph = pydotplus.graph_from_dot_data(data.getvalue())
display.display(display.Image(graph.create_png()))
It looks like this. It feels good to see the Gini count!
Recommended Posts