# importsfrom sklearn import treefrom sklearn.preprocessing import LabelEncoder, OneHotEncoderimport numpy as npimport pydotimport matplotlib.pyplot as pltfrom io import StringIOimport os# target classtarget_class = “unacc”, “acc”, “good”, “vgood”# input_attributes for evaluationinput_attributes= “buying”, “vhigh”, “high”, “med”, “low”,”maint”, “vhigh”, “high”, “med”, “low”, “doors”, “2”, “3”, “4”, “5more”, “persons”, “2”, “4”, “more”, “lug_boot”, “small”, “med”, “big”, “safety”, “low”, “med”, “high”,# Loading data setdata = np.genfromtxt(‘http://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data’, delimiter=’,’, dtype=”U”)data_attributes = data:, :-1data_targets = data:, -1def change_str_data_to_one_hot(data, input_attributes): values_int = LabelEncoder().fit_transform(data.ravel()).reshape(*data.shape) values_bin = OneHotEncoder().fit_transform(values_int).toarray() feature_vals = for feature in input_attributes: key = feature0 for val in feature1: value = val feature_vals.append(“{}_is_{}”.format(key, value)) return values_bin, feature_valsdef convert_str_data_to_linear(data, input_attributes): values_lin = np.array(input_attributesval1.index(j) for val, j in enumerate(i) for i in data) # indexes range from 0 to n-1 feature_vals = i0 + “_index” for i in input_attributes return values_lin, feature_vals# Get both one-hot and linear versions of input featurestest_hot, feature_vals_one_hot = change_str_data_to_one_hot(data_attributes, input_attributes)values_linear_int, feature_vals_linear_int = convert_str_data_to_linear(data_attributes, input_attributes)# concatenate themconcat_val = np.concatenate(test_hot, values_linear_int, axis=-1)feature_vals = feature_vals_one_hot + feature_vals_linear_int# Outputs use indexesinteger_val = np.array(target_class.index(i) for i in data_targets)# train a simple decision treemaximum_depth = 4clf = tree.DecisionTreeClassifier(max_depth=maximum_depth)clf = clf.fit(concat_val, integer_val)# plot treedef plot_simple_tree(clf, target_class, tree_name): graph_save_path = os.path.join( “exported_trees”, “{}”.format(tree_name) ) print graph_save_path tree.export_graphviz(clf, out_file=”{}.dot”.format(graph_save_path)) create_dotfile = StringIO() tree.export_graphviz( clf, out_file=create_dotfile, feature_names=feature_vals, class_names=target_class, filled=True, rotate=True ) pydot.graphviz.graph_from_dot_data(create_dotfile.getvalue())0.write_png(“{}.png”.format(graph_save_path))# Plot our simple tree: #plot_simple_tree(clf, target_class, tree_name=”simple_decision_tree”)# plot input feature importancedef draw_features_chart(clf, classifier_name, feature_vals): sorted_feature_importances, sorted_feature_vals = ( zip(*sorted(zip(clf.feature_importances_, feature_vals))) ) plt.figure(figsize=(16, 9)) plt.barh( range(len(sorted_feature_importances)), sorted_feature_importances) plt.yticks( range(len(sorted_feature_importances)), “{}: {:.3}”.format(a, b) for a, b in zip(sorted_feature_vals, sorted_feature_importances) ) plt.title(“{}
” “(decrease in node impurity, weighted by the ” “probability of finding that node)”.format(classifier_name)) plt.show()draw_features_chart(clf, “simple decision tree”, feature_vals)# Plot our simple decision treeplot_simple_tree(clf, target_class, tree_name=”simple_decision_tree”)