19 template <
typename MetaHandlerFunc,
typename LeafHandlerFunc>
20 std::unique_ptr<treelite::Model> LoadSKLearnModel(
21 int n_trees,
int n_features,
int n_classes,
const int64_t* node_count,
22 const int64_t** children_left,
const int64_t** children_right,
const int64_t** feature,
23 const double** threshold,
const double** value,
const int64_t** n_node_samples,
24 const double** weighted_n_node_samples,
const double** impurity, MetaHandlerFunc meta_handler,
25 LeafHandlerFunc leaf_handler) {
26 TREELITE_CHECK_GT(n_trees, 0);
27 TREELITE_CHECK_GT(n_features, 0);
29 std::unique_ptr<treelite::Model> model_ptr = treelite::Model::Create<double, double>();
30 meta_handler(model_ptr.get(), n_features, n_classes);
33 for (
int tree_id = 0; tree_id < n_trees; ++tree_id) {
34 model->
trees.emplace_back();
40 std::queue<std::pair<int64_t, int>> Q;
42 const int64_t total_sample_cnt = n_node_samples[tree_id][0];
46 std::tie(node_id, new_node_id) = Q.front(); Q.pop();
47 const int64_t left_child_id = children_left[tree_id][node_id];
48 const int64_t right_child_id = children_right[tree_id][node_id];
49 const int64_t sample_cnt = n_node_samples[tree_id][node_id];
50 const double weighted_sample_cnt = weighted_n_node_samples[tree_id][node_id];
51 if (left_child_id == -1) {
52 leaf_handler(tree_id, node_id, new_node_id, value, n_classes, tree);
54 const int64_t split_index = feature[tree_id][node_id];
55 const double split_cond = threshold[tree_id][node_id];
56 const int64_t left_child_sample_cnt = n_node_samples[tree_id][left_child_id];
57 const int64_t right_child_sample_cnt = n_node_samples[tree_id][right_child_id];
58 const double gain = sample_cnt * (
59 impurity[tree_id][node_id]
60 - left_child_sample_cnt * impurity[tree_id][left_child_id] / sample_cnt
61 - right_child_sample_cnt * impurity[tree_id][right_child_id] / sample_cnt)
65 tree.
SetNumericalSplit(new_node_id, split_index, split_cond,
true, treelite::Operator::kLE);
66 tree.
SetGain(new_node_id, gain);
67 Q.push({left_child_id, tree.
LeftChild(new_node_id)});
68 Q.push({right_child_id, tree.
RightChild(new_node_id)});
71 tree.
SetSumHess(new_node_id, weighted_sample_cnt);
78 int n_estimators,
int n_features,
const int64_t* node_count,
const int64_t** children_left,
79 const int64_t** children_right,
const int64_t** feature,
const double** threshold,
80 const double** value,
const int64_t** n_node_samples,
const double** weighted_n_node_samples,
81 const double** impurity) {
82 auto meta_handler = [](
treelite::Model* model,
int n_features,
int n_classes) {
85 model->
task_type = treelite::TaskType::kBinaryClfRegr;
93 auto leaf_handler = [](
int tree_id, int64_t node_id,
int new_node_id,
const double** value,
95 const double leaf_value = value[tree_id][node_id];
96 dest_tree.SetLeaf(new_node_id, leaf_value);
98 return LoadSKLearnModel(n_estimators, n_features, 1, node_count, children_left, children_right,
99 feature, threshold, value, n_node_samples, weighted_n_node_samples, impurity, meta_handler,
104 int n_estimators,
int n_features,
const int64_t* node_count,
const int64_t** children_left,
105 const int64_t** children_right,
const int64_t** feature,
const double** threshold,
106 const double** value,
const int64_t** n_node_samples,
const double** weighted_n_node_samples,
107 const double** impurity,
const double ratio_c) {
108 auto meta_handler = [ratio_c](
treelite::Model* model,
int n_features,
int n_classes) {
111 model->
task_type = treelite::TaskType::kBinaryClfRegr;
121 auto leaf_handler = [](
int tree_id, int64_t node_id,
int new_node_id,
const double** value,
123 const double leaf_value = value[tree_id][node_id];
124 dest_tree.SetLeaf(new_node_id, leaf_value);
126 return LoadSKLearnModel(n_estimators, n_features, 1, node_count, children_left, children_right,
127 feature, threshold, value, n_node_samples, weighted_n_node_samples, impurity, meta_handler,
131 std::unique_ptr<treelite::Model> LoadSKLearnRandomForestClassifierBinary(
132 int n_estimators,
int n_features,
int n_classes,
const int64_t* node_count,
133 const int64_t** children_left,
const int64_t** children_right,
const int64_t** feature,
134 const double** threshold,
const double** value,
const int64_t** n_node_samples,
135 const double** weighted_n_node_samples,
const double** impurity) {
136 auto meta_handler = [](
treelite::Model* model,
int n_features,
int n_classes) {
139 model->
task_type = treelite::TaskType::kBinaryClfRegr;
147 auto leaf_handler = [](
int tree_id, int64_t node_id,
int new_node_id,
const double** value,
150 const double* leaf_count = &value[tree_id][node_id * 2];
152 const double fraction_positive = leaf_count[1] / (leaf_count[0] + leaf_count[1]);
153 dest_tree.SetLeaf(new_node_id, fraction_positive);
155 return LoadSKLearnModel(n_estimators, n_features, n_classes, node_count, children_left,
156 children_right, feature, threshold, value, n_node_samples, weighted_n_node_samples, impurity,
157 meta_handler, leaf_handler);
160 std::unique_ptr<treelite::Model> LoadSKLearnRandomForestClassifierMulticlass(
161 int n_estimators,
int n_features,
int n_classes,
const int64_t* node_count,
162 const int64_t** children_left,
const int64_t** children_right,
const int64_t** feature,
163 const double** threshold,
const double** value,
const int64_t** n_node_samples,
164 const double** weighted_n_node_samples,
const double** impurity) {
165 auto meta_handler = [](
treelite::Model* model,
int n_features,
int n_classes) {
168 model->
task_type = treelite::TaskType::kMultiClfProbDistLeaf;
177 auto leaf_handler = [](
int tree_id, int64_t node_id,
int new_node_id,
const double** value,
180 std::vector<double> prob_distribution(&value[tree_id][node_id * n_classes],
181 &value[tree_id][(node_id + 1) * n_classes]);
183 const double norm_factor =
184 std::accumulate(prob_distribution.begin(), prob_distribution.end(), 0.0);
185 std::for_each(prob_distribution.begin(), prob_distribution.end(), [norm_factor](
double& e) {
188 dest_tree.SetLeafVector(new_node_id, prob_distribution);
190 return LoadSKLearnModel(n_estimators, n_features, n_classes, node_count, children_left,
191 children_right, feature, threshold, value, n_node_samples, weighted_n_node_samples, impurity,
192 meta_handler, leaf_handler);
196 int n_estimators,
int n_features,
int n_classes,
const int64_t* node_count,
197 const int64_t** children_left,
const int64_t** children_right,
const int64_t** feature,
198 const double** threshold,
const double** value,
const int64_t** n_node_samples,
199 const double** weighted_n_node_samples,
const double** impurity) {
200 TREELITE_CHECK_GE(n_classes, 2);
201 if (n_classes == 2) {
202 return LoadSKLearnRandomForestClassifierBinary(n_estimators, n_features, n_classes, node_count,
203 children_left, children_right, feature, threshold, value, n_node_samples,
204 weighted_n_node_samples, impurity);
206 return LoadSKLearnRandomForestClassifierMulticlass(n_estimators, n_features, n_classes,
207 node_count, children_left, children_right, feature, threshold, value, n_node_samples,
208 weighted_n_node_samples, impurity);
213 int n_estimators,
int n_features,
const int64_t* node_count,
const int64_t** children_left,
214 const int64_t** children_right,
const int64_t** feature,
const double** threshold,
215 const double** value,
const int64_t** n_node_samples,
const double** weighted_n_node_samples,
216 const double** impurity) {
217 auto meta_handler = [](
treelite::Model* model,
int n_features,
int n_classes) {
220 model->
task_type = treelite::TaskType::kBinaryClfRegr;
228 auto leaf_handler = [](
int tree_id, int64_t node_id,
int new_node_id,
const double** value,
230 const double leaf_value = value[tree_id][node_id];
231 dest_tree.SetLeaf(new_node_id, leaf_value);
233 return LoadSKLearnModel(n_estimators, n_features, 1, node_count, children_left,
234 children_right, feature, threshold, value, n_node_samples, weighted_n_node_samples, impurity,
235 meta_handler, leaf_handler);
238 std::unique_ptr<treelite::Model> LoadSKLearnGradientBoostingClassifierBinary(
239 int n_estimators,
int n_features,
int n_classes,
const int64_t* node_count,
240 const int64_t** children_left,
const int64_t** children_right,
const int64_t** feature,
241 const double** threshold,
const double** value,
const int64_t** n_node_samples,
242 const double** weighted_n_node_samples,
const double** impurity) {
243 auto meta_handler = [](
treelite::Model* model,
int n_features,
int n_classes) {
246 model->
task_type = treelite::TaskType::kBinaryClfRegr;
254 auto leaf_handler = [](
int tree_id, int64_t node_id,
int new_node_id,
const double** value,
256 const double leaf_value = value[tree_id][node_id];
257 dest_tree.SetLeaf(new_node_id, leaf_value);
259 return LoadSKLearnModel(n_estimators, n_features, n_classes, node_count, children_left,
260 children_right, feature, threshold, value, n_node_samples, weighted_n_node_samples, impurity,
261 meta_handler, leaf_handler);
264 std::unique_ptr<treelite::Model> LoadSKLearnGradientBoostingClassifierMulticlass(
265 int n_estimators,
int n_features,
int n_classes,
const int64_t* node_count,
266 const int64_t** children_left,
const int64_t** children_right,
const int64_t** feature,
267 const double** threshold,
const double** value,
const int64_t** n_node_samples,
268 const double** weighted_n_node_samples,
const double** impurity) {
269 auto meta_handler = [](
treelite::Model* model,
int n_features,
int n_classes) {
272 model->
task_type = treelite::TaskType::kMultiClfGrovePerClass;
280 auto leaf_handler = [](
int tree_id, int64_t node_id,
int new_node_id,
const double** value,
282 const double leaf_value = value[tree_id][node_id];
283 dest_tree.SetLeaf(new_node_id, leaf_value);
285 return LoadSKLearnModel(n_estimators * n_classes, n_features, n_classes, node_count,
286 children_left, children_right, feature, threshold, value, n_node_samples,
287 weighted_n_node_samples, impurity, meta_handler, leaf_handler);
291 int n_estimators,
int n_features,
int n_classes,
const int64_t* node_count,
292 const int64_t** children_left,
const int64_t** children_right,
const int64_t** feature,
293 const double** threshold,
const double** value,
const int64_t** n_node_samples,
294 const double** weighted_n_node_samples,
const double** impurity) {
295 TREELITE_CHECK_GE(n_classes, 2);
296 if (n_classes == 2) {
297 return LoadSKLearnGradientBoostingClassifierBinary(n_estimators, n_features, n_classes,
298 node_count, children_left, children_right, feature, threshold, value, n_node_samples,
299 weighted_n_node_samples, impurity);
301 return LoadSKLearnGradientBoostingClassifierMulticlass(n_estimators, n_features, n_classes,
302 node_count, children_left, children_right, feature, threshold, value, n_node_samples,
303 weighted_n_node_samples, impurity);
ModelParam param
extra parameters
Collection of front-end methods to load or construct ensemble model.
void Init()
initialize the model with a single root node
bool grove_per_class
Whether we designate a subset of the trees to compute the prediction for each class.
bool average_tree_output
whether to average tree outputs
model structure for tree ensemble
unsigned int leaf_vector_size
Dimension of the output from each leaf node.
void SetSumHess(int nid, double sum_hess)
set the hessian sum of the node
in-memory representation of a decision tree
logging facility for Treelite
unsigned int num_class
The number of classes in the target label.
float global_bias
global bias of the model
TaskType task_type
Task type.
int32_t num_feature
number of features used for the model. It is assumed that all feature indices are between 0 and [num_...
std::vector< Tree< ThresholdType, LeafOutputType > > trees
member trees
std::unique_ptr< treelite::Model > LoadSKLearnRandomForestClassifier(int n_estimators, int n_features, int n_classes, const int64_t *node_count, const int64_t **children_left, const int64_t **children_right, const int64_t **feature, const double **threshold, const double **value, const int64_t **n_node_samples, const double **weighted_n_node_samples, const double **impurity)
Load a scikit-learn random forest classifier model from a collection of arrays. Refer to https://scik...
void SetGain(int nid, double gain)
set the gain value of the node
void SetDataCount(int nid, uint64_t data_count)
set the data count of the node
float ratio_c
scaling parameter for exponential standard ratio transformation expstdratio(x) = exp2(-x / c) ...
int LeftChild(int nid) const
Getters.
TaskParam task_param
Group of parameters that are specific to the particular task type.
std::unique_ptr< treelite::Model > LoadSKLearnRandomForestRegressor(int n_estimators, int n_features, const int64_t *node_count, const int64_t **children_left, const int64_t **children_right, const int64_t **feature, const double **threshold, const double **value, const int64_t **n_node_samples, const double **weighted_n_node_samples, const double **impurity)
Load a scikit-learn random forest regressor model from a collection of arrays. Refer to https://sciki...
int RightChild(int nid) const
index of the node's right child
void AddChilds(int nid)
add child nodes to node
OutputType output_type
The type of output from each leaf node.
std::unique_ptr< treelite::Model > LoadSKLearnGradientBoostingRegressor(int n_estimators, int n_features, const int64_t *node_count, const int64_t **children_left, const int64_t **children_right, const int64_t **feature, const double **threshold, const double **value, const int64_t **n_node_samples, const double **weighted_n_node_samples, const double **impurity)
Load a scikit-learn gradient boosting regressor model from a collection of arrays. Refer to https://scikit-learn.org/stable/auto_examples/tree/plot_unveil_tree_structure.html to learn the mearning of the arrays in detail.
thin wrapper for tree ensemble model
std::unique_ptr< treelite::Model > LoadSKLearnIsolationForest(int n_estimators, int n_features, const int64_t *node_count, const int64_t **children_left, const int64_t **children_right, const int64_t **feature, const double **threshold, const double **value, const int64_t **n_node_samples, const double **weighted_n_node_samples, const double **impurity, const double ratio_c)
Load a scikit-learn isolation forest model from a collection of arrays. Refer to https://scikit-learn...
void SetNumericalSplit(int nid, unsigned split_index, ThresholdType threshold, bool default_left, Operator cmp)
Setters.
std::unique_ptr< treelite::Model > LoadSKLearnGradientBoostingClassifier(int n_estimators, int n_features, int n_classes, const int64_t *node_count, const int64_t **children_left, const int64_t **children_right, const int64_t **feature, const double **threshold, const double **value, const int64_t **n_node_samples, const double **weighted_n_node_samples, const double **impurity)
Load a scikit-learn gradient boosting classifier model from a collection of arrays. Refer to https://scikit-learn.org/stable/auto_examples/tree/plot_unveil_tree_structure.html to learn the mearning of the arrays in detail.
char pred_transform[TREELITE_MAX_PRED_TRANSFORM_LENGTH]
name of prediction transform function