In order to improve the performance of this code fragment, I have speeded up some code fragments by using the numba. However, a frustrating thing is that the performance of this decision tree is bad. For example, in the below code fragment, the performance of the decision tree is nearly 70 times slower than the implementation in Scikit-learn (220 ms vs 3 ms). Therefore, how can I achieve better performance by modifying this code?

```
import time
import numpy as np
from numba import njit
from sklearn.base import BaseEstimator
from sklearn.datasets import make_hastie_10_2
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
class Node:
def __init__(self, left, right, rule):
self.left = left
self.right = right
self.feature = rule(0)
self.threshold = rule(1)
class Leaf:
def __init__(self, value):
"""
`value` is an array of class probabilities if classifier is True, else
the mean of the region
"""
self.value = value
class DecisionTree(BaseEstimator):
def __init__(
self,
classifier=True,
max_depth=None,
n_feats=None,
criterion="entropy",
seed=None,
regularized=False,
):
"""
A decision tree model for regression and classification problems.
Parameters
----------
classifier : bool
Whether to treat target values as categorical (classifier =
True) or continuous (classifier = False). Default is True.
max_depth: int or None
The depth at which to stop growing the tree. If None, grow the tree
until all leaves are pure. Default is None.
n_feats : int
Specifies the number of features to sample on each split. If None,
use all features on each split. Default is None.
criterion : {'mse', 'entropy', 'gini'}
The error criterion to use when calculating splits. When
`classifier` is False, valid entries are {'mse'}. When `classifier`
is True, valid entries are {'entropy', 'gini'}. Default is
'entropy'.
seed : int or None
Seed for the random number generator. Default is None.
"""
if seed:
np.random.seed(seed)
self.depth = 0
self.root = None
self.n_feats = n_feats
self.criterion = criterion
self.classifier = classifier
self.max_depth = max_depth if max_depth else np.inf
self.factor = 1.2
self.regularized = regularized
self.feature_importances_ = None
if not classifier and criterion in ("gini", "entropy"):
raise ValueError(
"{} is a valid criterion only when classifier = True.".format(criterion)
)
if classifier and criterion == "mse":
raise ValueError("`mse` is a valid criterion only when classifier = False.")
def fit(self, X, Y):
"""
Fit a binary decision tree to a dataset.
Parameters
----------
X : :py:class:`ndarray <numpy.ndarray>` of shape `(N, M)`
The training data of `N` examples, each with `M` features
Y : :py:class:`ndarray <numpy.ndarray>` of shape `(N,)`
An array of integer class labels for each example in `X` if
self.classifier = True, otherwise the set of target values for
each example in `X`.
"""
self.n_classes = max(Y) + 1 if self.classifier else None
self.feature_importances_ = np.zeros(X.shape(1))
self.used_variables = np.zeros(X.shape(1))
self.n_feats = X.shape(1) if not self.n_feats else min(self.n_feats, X.shape(1))
self.root = self._grow(X, Y)
self.feature_importances_ = self.feature_importances_ / np.sum(self.feature_importances_)
def predict(self, X):
"""
Use the trained decision tree to classify or predict the examples in `X`.
Parameters
----------
X : :py:class:`ndarray <numpy.ndarray>` of shape `(N, M)`
The training data of `N` examples, each with `M` features
Returns
-------
preds : :py:class:`ndarray <numpy.ndarray>` of shape `(N,)`
The integer class labels predicted for each example in `X` if
self.classifier = True, otherwise the predicted target values.
"""
return np.array((self._traverse(x, self.root) for x in X))
def predict_class_probs(self, X):
"""
Use the trained decision tree to return the class probabilities for the
examples in `X`.
Parameters
----------
X : :py:class:`ndarray <numpy.ndarray>` of shape `(N, M)`
The training data of `N` examples, each with `M` features
Returns
-------
preds : :py:class:`ndarray <numpy.ndarray>` of shape `(N, n_classes)`
The class probabilities predicted for each example in `X`.
"""
assert self.classifier, "`predict_class_probs` undefined for classifier = False"
return np.array((self._traverse(x, self.root, prob=True) for x in X))
def _grow(self, X, Y, cur_depth=0):
# if all labels are the same, return a leaf
if len(set(Y)) == 1:
if self.classifier:
prob = np.zeros(self.n_classes)
prob(Y(0)) = 1.0
return Leaf(prob) if self.classifier else Leaf(Y(0))
# if we have reached max_depth, return a leaf
if cur_depth >= self.max_depth:
v = np.mean(Y, axis=0)
if self.classifier:
v = np.bincount(Y, minlength=self.n_classes) / len(Y)
return Leaf(v)
cur_depth += 1
self.depth = max(self.depth, cur_depth)
N, M = X.shape
feat_idxs = np.random.choice(M, self.n_feats, replace=False)
# greedily select the best split according to `criterion`
feat, thresh, best_gain = _segment(self.criterion, self.regularized, self.used_variables, self.factor,
X, Y, feat_idxs)
self.feature_importances_(feat) += len(X) * best_gain
self.used_variables(feat) = 1
l = np.argwhere(X(:, feat) <= thresh).flatten()
r = np.argwhere(X(:, feat) > thresh).flatten()
# grow the children that result from the split
left = self._grow(X(l, :), Y(l), cur_depth)
right = self._grow(X(r, :), Y(r), cur_depth)
return Node(left, right, (feat, thresh))
def _traverse(self, X, node, prob=False):
if isinstance(node, Leaf):
if self.classifier:
return node.value if prob else node.value.argmax()
return node.value
if X(node.feature) <= node.threshold:
return self._traverse(X, node.left, prob)
return self._traverse(X, node.right, prob)
@njit()
def _segment(criterion, regularized, used_variables, factor, X, Y, feat_idxs):
"""
Find the optimal split rule (feature index and split threshold) for the
data according to `self.criterion`.
"""
best_gain = -np.inf
split_idx, split_thresh = None, None
for i in feat_idxs:
vals = X(:, i)
levels = np.unique(vals)
thresholds = (levels(:-1) + levels(1:)) / 2 if len(levels) > 1 else levels
gains = np.array((_impurity_gain(criterion, Y, t, vals) for t in thresholds))
if regularized and used_variables(i):
gains = gains * factor
if gains.max() > best_gain:
split_idx = i
best_gain = gains.max()
split_thresh = thresholds(gains.argmax())
return split_idx, split_thresh, best_gain
@njit()
def _impurity_gain(criterion, Y, split_thresh, feat_values):
"""
Compute the impurity gain associated with a given split.
IG(split) = loss(parent) - weighted_avg(loss(left_child), loss(right_child))
"""
if criterion == "entropy":
parent_loss = entropy(Y)
elif criterion == "gini":
parent_loss = gini(Y)
elif criterion == "mse":
parent_loss = mse(Y)
else:
raise Exception
# generate split
left = np.argwhere(feat_values <= split_thresh).flatten()
right = np.argwhere(feat_values > split_thresh).flatten()
if len(left) == 0 or len(right) == 0:
return 0
# compute the weighted avg. of the loss for the children
n = len(Y)
n_l, n_r = len(left), len(right)
if criterion == "entropy":
e_l, e_r = entropy(Y(left)), entropy(Y(right))
elif criterion == "gini":
e_l, e_r = gini(Y(left)), gini(Y(right))
else:
raise Exception
child_loss = (n_l / n) * e_l + (n_r / n) * e_r
# impurity gain is difference in loss before vs. after split
ig = parent_loss - child_loss
return ig
@njit()
def mse(y):
"""
Mean squared error for decision tree (ie., mean) predictions
"""
return np.mean((y - np.mean(y)) ** 2)
@njit()
def entropy(y):
"""
Entropy of a label sequence
"""
hist = np.bincount(y)
ps = hist / np.sum(hist)
# return -np.sum((p * np.log2(p) for p in ps if p > 0))
return -np.array((p * np.log2(p) for p in ps if p > 0)).sum()
@njit()
def gini(y):
"""
Gini impurity (local entropy) of a label sequence
"""
hist = np.bincount(y)
N = np.sum(hist)
return 1 - np.array(((i / N) ** 2 for i in hist)).sum()
if __name__ == '__main__':
n_samples = 1000
X, y = make_hastie_10_2(n_samples=n_samples)
y(y < 0) = 0
X = X.astype(np.float)
y = y.astype(np.int)
test_size = 0.2
print(X.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
ts = time.time()
tree = DecisionTree(seed=0, criterion='gini', max_depth=3)
tree.fit(X_train, y_train)
print(accuracy_score(y_train, tree.predict(X_train)))
print(accuracy_score(y_test, tree.predict(X_test)))
te = time.time()
print('%2.2f ms' % ((te - ts) * 1000))
ts = time.time()
tree = DecisionTree(seed=0, criterion='gini', max_depth=3)
tree.fit(X_train, y_train)
print(accuracy_score(y_train, tree.predict(X_train)))
print(accuracy_score(y_test, tree.predict(X_test)))
te = time.time()
print('%2.2f ms' % ((te - ts) * 1000))
ts = time.time()
tree = DecisionTreeClassifier(random_state=0, max_depth=3)
tree.fit(X_train, y_train)
print(accuracy_score(y_train, tree.predict(X_train)))
print(accuracy_score(y_test, tree.predict(X_test)))
te = time.time()
print('%2.2f ms' % ((te - ts) * 1000))
```
```