18 template <
typename MetaHandlerFunc,
typename LeafHandlerFunc>
19 std::unique_ptr<treelite::Model> LoadSKLearnModel(
20 int n_trees,
int n_features,
int n_classes,
const int64_t* node_count,
21 const int64_t** children_left,
const int64_t** children_right,
const int64_t** feature,
22 const double** threshold,
const double** value,
const int64_t** n_node_samples,
23 const double** impurity, MetaHandlerFunc meta_handler, LeafHandlerFunc leaf_handler) {
25 CHECK_GT(n_features, 0);
27 std::unique_ptr<treelite::Model> model_ptr = treelite::Model::Create<double, double>();
28 meta_handler(model_ptr.get(), n_features, n_classes);
31 for (
int tree_id = 0; tree_id < n_trees; ++tree_id) {
32 model->
trees.emplace_back();
38 std::queue<std::pair<int64_t, int>> Q;
40 const int64_t total_sample_cnt = n_node_samples[tree_id][0];
44 std::tie(node_id, new_node_id) = Q.front(); Q.pop();
45 const int64_t left_child_id = children_left[tree_id][node_id];
46 const int64_t right_child_id = children_right[tree_id][node_id];
47 const int64_t sample_cnt = n_node_samples[tree_id][node_id];
48 if (left_child_id == -1) {
49 leaf_handler(tree_id, node_id, new_node_id, value, n_classes, tree);
51 const int64_t split_index = feature[tree_id][node_id];
52 const double split_cond = threshold[tree_id][node_id];
53 const int64_t left_child_sample_cnt = n_node_samples[tree_id][left_child_id];
54 const int64_t right_child_sample_cnt = n_node_samples[tree_id][right_child_id];
55 const double gain = sample_cnt * (
56 impurity[tree_id][node_id]
57 - left_child_sample_cnt * impurity[tree_id][left_child_id] / sample_cnt
58 - right_child_sample_cnt * impurity[tree_id][right_child_id] / sample_cnt)
62 tree.
SetNumericalSplit(new_node_id, split_index, split_cond,
true, treelite::Operator::kLE);
63 tree.
SetGain(new_node_id, gain);
64 Q.push({left_child_id, tree.
LeftChild(new_node_id)});
65 Q.push({right_child_id, tree.
RightChild(new_node_id)});
74 int n_estimators,
int n_features,
const int64_t* node_count,
const int64_t** children_left,
75 const int64_t** children_right,
const int64_t** feature,
const double** threshold,
76 const double** value,
const int64_t** n_node_samples,
const double** impurity) {
77 auto meta_handler = [](
treelite::Model* model,
int n_features,
int n_classes) {
80 model->
task_type = treelite::TaskType::kBinaryClfRegr;
88 auto leaf_handler = [](
int tree_id, int64_t node_id,
int new_node_id,
const double** value,
90 const double leaf_value = value[tree_id][node_id];
91 dest_tree.SetLeaf(new_node_id, leaf_value);
93 return LoadSKLearnModel(n_estimators, n_features, 1, node_count, children_left, children_right,
94 feature, threshold, value, n_node_samples, impurity, meta_handler, leaf_handler);
97 std::unique_ptr<treelite::Model> LoadSKLearnRandomForestClassifierBinary(
98 int n_estimators,
int n_features,
int n_classes,
const int64_t* node_count,
99 const int64_t** children_left,
const int64_t** children_right,
const int64_t** feature,
100 const double** threshold,
const double** value,
const int64_t** n_node_samples,
101 const double** impurity) {
102 auto meta_handler = [](
treelite::Model* model,
int n_features,
int n_classes) {
105 model->
task_type = treelite::TaskType::kBinaryClfRegr;
113 auto leaf_handler = [](
int tree_id, int64_t node_id,
int new_node_id,
const double** value,
116 const double* leaf_count = &value[tree_id][node_id * 2];
118 const double fraction_positive = leaf_count[1] / (leaf_count[0] + leaf_count[1]);
119 dest_tree.SetLeaf(new_node_id, fraction_positive);
121 return LoadSKLearnModel(n_estimators, n_features, n_classes, node_count, children_left,
122 children_right, feature, threshold, value, n_node_samples, impurity, meta_handler,
126 std::unique_ptr<treelite::Model> LoadSKLearnRandomForestClassifierMulticlass(
127 int n_estimators,
int n_features,
int n_classes,
const int64_t* node_count,
128 const int64_t** children_left,
const int64_t** children_right,
const int64_t** feature,
129 const double** threshold,
const double** value,
const int64_t** n_node_samples,
130 const double** impurity) {
131 auto meta_handler = [](
treelite::Model* model,
int n_features,
int n_classes) {
134 model->
task_type = treelite::TaskType::kMultiClfProbDistLeaf;
143 auto leaf_handler = [](
int tree_id, int64_t node_id,
int new_node_id,
const double** value,
146 std::vector<double> prob_distribution(&value[tree_id][node_id * n_classes],
147 &value[tree_id][(node_id + 1) * n_classes]);
149 const double norm_factor =
150 std::accumulate(prob_distribution.begin(), prob_distribution.end(), 0.0);
151 std::for_each(prob_distribution.begin(), prob_distribution.end(), [norm_factor](
double& e) {
154 dest_tree.SetLeafVector(new_node_id, prob_distribution);
156 return LoadSKLearnModel(n_estimators, n_features, n_classes, node_count, children_left,
157 children_right, feature, threshold, value, n_node_samples, impurity, meta_handler,
162 int n_estimators,
int n_features,
int n_classes,
const int64_t* node_count,
163 const int64_t** children_left,
const int64_t** children_right,
const int64_t** feature,
164 const double** threshold,
const double** value,
const int64_t** n_node_samples,
165 const double** impurity) {
166 CHECK_GE(n_classes, 2);
167 if (n_classes == 2) {
168 return LoadSKLearnRandomForestClassifierBinary(n_estimators, n_features, n_classes, node_count,
169 children_left, children_right, feature, threshold, value, n_node_samples, impurity);
171 return LoadSKLearnRandomForestClassifierMulticlass(n_estimators, n_features, n_classes,
172 node_count, children_left, children_right, feature, threshold, value, n_node_samples,
178 int n_estimators,
int n_features,
const int64_t* node_count,
const int64_t** children_left,
179 const int64_t** children_right,
const int64_t** feature,
const double** threshold,
180 const double** value,
const int64_t** n_node_samples,
const double** impurity) {
181 auto meta_handler = [](
treelite::Model* model,
int n_features,
int n_classes) {
184 model->
task_type = treelite::TaskType::kBinaryClfRegr;
192 auto leaf_handler = [](
int tree_id, int64_t node_id,
int new_node_id,
const double** value,
194 const double leaf_value = value[tree_id][node_id];
195 dest_tree.SetLeaf(new_node_id, leaf_value);
197 return LoadSKLearnModel(n_estimators, n_features, 1, node_count, children_left,
198 children_right, feature, threshold, value, n_node_samples, impurity, meta_handler,
202 std::unique_ptr<treelite::Model> LoadSKLearnGradientBoostingClassifierBinary(
203 int n_estimators,
int n_features,
int n_classes,
const int64_t* node_count,
204 const int64_t** children_left,
const int64_t** children_right,
const int64_t** feature,
205 const double** threshold,
const double** value,
const int64_t** n_node_samples,
206 const double** impurity) {
207 auto meta_handler = [](
treelite::Model* model,
int n_features,
int n_classes) {
210 model->
task_type = treelite::TaskType::kBinaryClfRegr;
218 auto leaf_handler = [](
int tree_id, int64_t node_id,
int new_node_id,
const double** value,
220 const double leaf_value = value[tree_id][node_id];
221 dest_tree.SetLeaf(new_node_id, leaf_value);
223 return LoadSKLearnModel(n_estimators, n_features, n_classes, node_count, children_left,
224 children_right, feature, threshold, value, n_node_samples, impurity, meta_handler,
228 std::unique_ptr<treelite::Model> LoadSKLearnGradientBoostingClassifierMulticlass(
229 int n_estimators,
int n_features,
int n_classes,
const int64_t* node_count,
230 const int64_t** children_left,
const int64_t** children_right,
const int64_t** feature,
231 const double** threshold,
const double** value,
const int64_t** n_node_samples,
232 const double** impurity) {
233 auto meta_handler = [](
treelite::Model* model,
int n_features,
int n_classes) {
236 model->
task_type = treelite::TaskType::kMultiClfGrovePerClass;
244 auto leaf_handler = [](
int tree_id, int64_t node_id,
int new_node_id,
const double** value,
246 const double leaf_value = value[tree_id][node_id];
247 dest_tree.SetLeaf(new_node_id, leaf_value);
249 return LoadSKLearnModel(n_estimators * n_classes, n_features, n_classes, node_count,
250 children_left, children_right, feature, threshold, value, n_node_samples, impurity,
251 meta_handler, leaf_handler);
255 int n_estimators,
int n_features,
int n_classes,
const int64_t* node_count,
256 const int64_t** children_left,
const int64_t** children_right,
const int64_t** feature,
257 const double** threshold,
const double** value,
const int64_t** n_node_samples,
258 const double** impurity) {
259 CHECK_GE(n_classes, 2);
260 if (n_classes == 2) {
261 return LoadSKLearnGradientBoostingClassifierBinary(n_estimators, n_features, n_classes,
262 node_count, children_left, children_right, feature, threshold, value, n_node_samples,
265 return LoadSKLearnGradientBoostingClassifierMulticlass(n_estimators, n_features, n_classes,
266 node_count, children_left, children_right, feature, threshold, value, n_node_samples,
ModelParam param
extra parameters
std::unique_ptr< treelite::Model > LoadSKLearnRandomForestRegressor(int n_estimators, int n_features, const int64_t *node_count, const int64_t **children_left, const int64_t **children_right, const int64_t **feature, const double **threshold, const double **value, const int64_t **n_node_samples, const double **impurity)
Load a scikit-learn random forest regressor model from a collection of arrays. Refer to https://sciki...
Collection of front-end methods to load or construct ensemble model.
void Init()
initialize the model with a single root node
bool average_tree_output
whether to average tree outputs
model structure for tree ensemble
in-memory representation of a decision tree
float global_bias
global bias of the model
TaskType task_type
Task type.
std::unique_ptr< treelite::Model > LoadSKLearnRandomForestClassifier(int n_estimators, int n_features, int n_classes, const int64_t *node_count, const int64_t **children_left, const int64_t **children_right, const int64_t **feature, const double **threshold, const double **value, const int64_t **n_node_samples, const double **impurity)
Load a scikit-learn random forest classifier model from a collection of arrays. Refer to https://scik...
std::unique_ptr< treelite::Model > LoadSKLearnGradientBoostingClassifier(int n_estimators, int n_features, int n_classes, const int64_t *node_count, const int64_t **children_left, const int64_t **children_right, const int64_t **feature, const double **threshold, const double **value, const int64_t **n_node_samples, const double **impurity)
Load a scikit-learn gradient boosting classifier model from a collection of arrays. Refer to https://scikit-learn.org/stable/auto_examples/tree/plot_unveil_tree_structure.html to learn the mearning of the arrays in detail.
std::vector< Tree< ThresholdType, LeafOutputType > > trees
member trees
std::unique_ptr< treelite::Model > LoadSKLearnGradientBoostingRegressor(int n_estimators, int n_features, const int64_t *node_count, const int64_t **children_left, const int64_t **children_right, const int64_t **feature, const double **threshold, const double **value, const int64_t **n_node_samples, const double **impurity)
Load a scikit-learn gradient boosting regressor model from a collection of arrays. Refer to https://scikit-learn.org/stable/auto_examples/tree/plot_unveil_tree_structure.html to learn the mearning of the arrays in detail.
void SetGain(int nid, double gain)
set the gain value of the node
TaskParameter task_param
Group of parameters that are specific to the particular task type.
void SetDataCount(int nid, uint64_t data_count)
set the data count of the node
int LeftChild(int nid) const
Getters.
unsigned int num_class
The number of classes in the target label.
int RightChild(int nid) const
index of the node's right child
bool grove_per_class
Whether we designate a subset of the trees to compute the prediction for each class.
void AddChilds(int nid)
add child nodes to node
thin wrapper for tree ensemble model
OutputType output_type
The type of output from each leaf node.
int num_feature
number of features used for the model. It is assumed that all feature indices are between 0 and [num_...
unsigned int leaf_vector_size
Dimension of the output from each leaf node.
void SetNumericalSplit(int nid, unsigned split_index, ThresholdType threshold, bool default_left, Operator cmp)
Setters.
char pred_transform[TREELITE_MAX_PRED_TRANSFORM_LENGTH]
name of prediction transform function