19 template <
typename MetaHandlerFunc,
typename LeafHandlerFunc>
20 std::unique_ptr<treelite::Model> LoadSKLearnModel(
21 int n_trees,
int n_features,
int n_classes,
const int64_t* node_count,
22 const int64_t** children_left,
const int64_t** children_right,
const int64_t** feature,
23 const double** threshold,
const double** value,
const int64_t** n_node_samples,
24 const double** impurity, MetaHandlerFunc meta_handler, LeafHandlerFunc leaf_handler) {
25 TREELITE_CHECK_GT(n_trees, 0);
26 TREELITE_CHECK_GT(n_features, 0);
28 std::unique_ptr<treelite::Model> model_ptr = treelite::Model::Create<double, double>();
29 meta_handler(model_ptr.get(), n_features, n_classes);
32 for (
int tree_id = 0; tree_id < n_trees; ++tree_id) {
33 model->
trees.emplace_back();
39 std::queue<std::pair<int64_t, int>> Q;
41 const int64_t total_sample_cnt = n_node_samples[tree_id][0];
45 std::tie(node_id, new_node_id) = Q.front(); Q.pop();
46 const int64_t left_child_id = children_left[tree_id][node_id];
47 const int64_t right_child_id = children_right[tree_id][node_id];
48 const int64_t sample_cnt = n_node_samples[tree_id][node_id];
49 if (left_child_id == -1) {
50 leaf_handler(tree_id, node_id, new_node_id, value, n_classes, tree);
52 const int64_t split_index = feature[tree_id][node_id];
53 const double split_cond = threshold[tree_id][node_id];
54 const int64_t left_child_sample_cnt = n_node_samples[tree_id][left_child_id];
55 const int64_t right_child_sample_cnt = n_node_samples[tree_id][right_child_id];
56 const double gain = sample_cnt * (
57 impurity[tree_id][node_id]
58 - left_child_sample_cnt * impurity[tree_id][left_child_id] / sample_cnt
59 - right_child_sample_cnt * impurity[tree_id][right_child_id] / sample_cnt)
63 tree.
SetNumericalSplit(new_node_id, split_index, split_cond,
true, treelite::Operator::kLE);
64 tree.
SetGain(new_node_id, gain);
65 Q.push({left_child_id, tree.
LeftChild(new_node_id)});
66 Q.push({right_child_id, tree.
RightChild(new_node_id)});
75 int n_estimators,
int n_features,
const int64_t* node_count,
const int64_t** children_left,
76 const int64_t** children_right,
const int64_t** feature,
const double** threshold,
77 const double** value,
const int64_t** n_node_samples,
const double** impurity) {
78 auto meta_handler = [](
treelite::Model* model,
int n_features,
int n_classes) {
81 model->
task_type = treelite::TaskType::kBinaryClfRegr;
89 auto leaf_handler = [](
int tree_id, int64_t node_id,
int new_node_id,
const double** value,
91 const double leaf_value = value[tree_id][node_id];
92 dest_tree.SetLeaf(new_node_id, leaf_value);
94 return LoadSKLearnModel(n_estimators, n_features, 1, node_count, children_left, children_right,
95 feature, threshold, value, n_node_samples, impurity, meta_handler, leaf_handler);
98 std::unique_ptr<treelite::Model> LoadSKLearnRandomForestClassifierBinary(
99 int n_estimators,
int n_features,
int n_classes,
const int64_t* node_count,
100 const int64_t** children_left,
const int64_t** children_right,
const int64_t** feature,
101 const double** threshold,
const double** value,
const int64_t** n_node_samples,
102 const double** impurity) {
103 auto meta_handler = [](
treelite::Model* model,
int n_features,
int n_classes) {
106 model->
task_type = treelite::TaskType::kBinaryClfRegr;
114 auto leaf_handler = [](
int tree_id, int64_t node_id,
int new_node_id,
const double** value,
117 const double* leaf_count = &value[tree_id][node_id * 2];
119 const double fraction_positive = leaf_count[1] / (leaf_count[0] + leaf_count[1]);
120 dest_tree.SetLeaf(new_node_id, fraction_positive);
122 return LoadSKLearnModel(n_estimators, n_features, n_classes, node_count, children_left,
123 children_right, feature, threshold, value, n_node_samples, impurity, meta_handler,
127 std::unique_ptr<treelite::Model> LoadSKLearnRandomForestClassifierMulticlass(
128 int n_estimators,
int n_features,
int n_classes,
const int64_t* node_count,
129 const int64_t** children_left,
const int64_t** children_right,
const int64_t** feature,
130 const double** threshold,
const double** value,
const int64_t** n_node_samples,
131 const double** impurity) {
132 auto meta_handler = [](
treelite::Model* model,
int n_features,
int n_classes) {
135 model->
task_type = treelite::TaskType::kMultiClfProbDistLeaf;
144 auto leaf_handler = [](
int tree_id, int64_t node_id,
int new_node_id,
const double** value,
147 std::vector<double> prob_distribution(&value[tree_id][node_id * n_classes],
148 &value[tree_id][(node_id + 1) * n_classes]);
150 const double norm_factor =
151 std::accumulate(prob_distribution.begin(), prob_distribution.end(), 0.0);
152 std::for_each(prob_distribution.begin(), prob_distribution.end(), [norm_factor](
double& e) {
155 dest_tree.SetLeafVector(new_node_id, prob_distribution);
157 return LoadSKLearnModel(n_estimators, n_features, n_classes, node_count, children_left,
158 children_right, feature, threshold, value, n_node_samples, impurity, meta_handler,
163 int n_estimators,
int n_features,
int n_classes,
const int64_t* node_count,
164 const int64_t** children_left,
const int64_t** children_right,
const int64_t** feature,
165 const double** threshold,
const double** value,
const int64_t** n_node_samples,
166 const double** impurity) {
167 TREELITE_CHECK_GE(n_classes, 2);
168 if (n_classes == 2) {
169 return LoadSKLearnRandomForestClassifierBinary(n_estimators, n_features, n_classes, node_count,
170 children_left, children_right, feature, threshold, value, n_node_samples, impurity);
172 return LoadSKLearnRandomForestClassifierMulticlass(n_estimators, n_features, n_classes,
173 node_count, children_left, children_right, feature, threshold, value, n_node_samples,
179 int n_estimators,
int n_features,
const int64_t* node_count,
const int64_t** children_left,
180 const int64_t** children_right,
const int64_t** feature,
const double** threshold,
181 const double** value,
const int64_t** n_node_samples,
const double** impurity) {
182 auto meta_handler = [](
treelite::Model* model,
int n_features,
int n_classes) {
185 model->
task_type = treelite::TaskType::kBinaryClfRegr;
193 auto leaf_handler = [](
int tree_id, int64_t node_id,
int new_node_id,
const double** value,
195 const double leaf_value = value[tree_id][node_id];
196 dest_tree.SetLeaf(new_node_id, leaf_value);
198 return LoadSKLearnModel(n_estimators, n_features, 1, node_count, children_left,
199 children_right, feature, threshold, value, n_node_samples, impurity, meta_handler,
203 std::unique_ptr<treelite::Model> LoadSKLearnGradientBoostingClassifierBinary(
204 int n_estimators,
int n_features,
int n_classes,
const int64_t* node_count,
205 const int64_t** children_left,
const int64_t** children_right,
const int64_t** feature,
206 const double** threshold,
const double** value,
const int64_t** n_node_samples,
207 const double** impurity) {
208 auto meta_handler = [](
treelite::Model* model,
int n_features,
int n_classes) {
211 model->
task_type = treelite::TaskType::kBinaryClfRegr;
219 auto leaf_handler = [](
int tree_id, int64_t node_id,
int new_node_id,
const double** value,
221 const double leaf_value = value[tree_id][node_id];
222 dest_tree.SetLeaf(new_node_id, leaf_value);
224 return LoadSKLearnModel(n_estimators, n_features, n_classes, node_count, children_left,
225 children_right, feature, threshold, value, n_node_samples, impurity, meta_handler,
229 std::unique_ptr<treelite::Model> LoadSKLearnGradientBoostingClassifierMulticlass(
230 int n_estimators,
int n_features,
int n_classes,
const int64_t* node_count,
231 const int64_t** children_left,
const int64_t** children_right,
const int64_t** feature,
232 const double** threshold,
const double** value,
const int64_t** n_node_samples,
233 const double** impurity) {
234 auto meta_handler = [](
treelite::Model* model,
int n_features,
int n_classes) {
237 model->
task_type = treelite::TaskType::kMultiClfGrovePerClass;
245 auto leaf_handler = [](
int tree_id, int64_t node_id,
int new_node_id,
const double** value,
247 const double leaf_value = value[tree_id][node_id];
248 dest_tree.SetLeaf(new_node_id, leaf_value);
250 return LoadSKLearnModel(n_estimators * n_classes, n_features, n_classes, node_count,
251 children_left, children_right, feature, threshold, value, n_node_samples, impurity,
252 meta_handler, leaf_handler);
256 int n_estimators,
int n_features,
int n_classes,
const int64_t* node_count,
257 const int64_t** children_left,
const int64_t** children_right,
const int64_t** feature,
258 const double** threshold,
const double** value,
const int64_t** n_node_samples,
259 const double** impurity) {
260 TREELITE_CHECK_GE(n_classes, 2);
261 if (n_classes == 2) {
262 return LoadSKLearnGradientBoostingClassifierBinary(n_estimators, n_features, n_classes,
263 node_count, children_left, children_right, feature, threshold, value, n_node_samples,
266 return LoadSKLearnGradientBoostingClassifierMulticlass(n_estimators, n_features, n_classes,
267 node_count, children_left, children_right, feature, threshold, value, n_node_samples,
ModelParam param
extra parameters
std::unique_ptr< treelite::Model > LoadSKLearnRandomForestRegressor(int n_estimators, int n_features, const int64_t *node_count, const int64_t **children_left, const int64_t **children_right, const int64_t **feature, const double **threshold, const double **value, const int64_t **n_node_samples, const double **impurity)
Load a scikit-learn random forest regressor model from a collection of arrays. Refer to https://sciki...
Collection of front-end methods to load or construct ensemble model.
void Init()
initialize the model with a single root node
bool grove_per_class
Whether we designate a subset of the trees to compute the prediction for each class.
bool average_tree_output
whether to average tree outputs
model structure for tree ensemble
unsigned int leaf_vector_size
Dimension of the output from each leaf node.
in-memory representation of a decision tree
logging facility for Treelite
unsigned int num_class
The number of classes in the target label.
float global_bias
global bias of the model
TaskType task_type
Task type.
std::unique_ptr< treelite::Model > LoadSKLearnRandomForestClassifier(int n_estimators, int n_features, int n_classes, const int64_t *node_count, const int64_t **children_left, const int64_t **children_right, const int64_t **feature, const double **threshold, const double **value, const int64_t **n_node_samples, const double **impurity)
Load a scikit-learn random forest classifier model from a collection of arrays. Refer to https://scik...
std::unique_ptr< treelite::Model > LoadSKLearnGradientBoostingClassifier(int n_estimators, int n_features, int n_classes, const int64_t *node_count, const int64_t **children_left, const int64_t **children_right, const int64_t **feature, const double **threshold, const double **value, const int64_t **n_node_samples, const double **impurity)
Load a scikit-learn gradient boosting classifier model from a collection of arrays. Refer to https://scikit-learn.org/stable/auto_examples/tree/plot_unveil_tree_structure.html to learn the mearning of the arrays in detail.
std::vector< Tree< ThresholdType, LeafOutputType > > trees
member trees
std::unique_ptr< treelite::Model > LoadSKLearnGradientBoostingRegressor(int n_estimators, int n_features, const int64_t *node_count, const int64_t **children_left, const int64_t **children_right, const int64_t **feature, const double **threshold, const double **value, const int64_t **n_node_samples, const double **impurity)
Load a scikit-learn gradient boosting regressor model from a collection of arrays. Refer to https://scikit-learn.org/stable/auto_examples/tree/plot_unveil_tree_structure.html to learn the mearning of the arrays in detail.
void SetGain(int nid, double gain)
set the gain value of the node
void SetDataCount(int nid, uint64_t data_count)
set the data count of the node
int LeftChild(int nid) const
Getters.
TaskParam task_param
Group of parameters that are specific to the particular task type.
int RightChild(int nid) const
index of the node's right child
void AddChilds(int nid)
add child nodes to node
OutputType output_type
The type of output from each leaf node.
thin wrapper for tree ensemble model
int num_feature
number of features used for the model. It is assumed that all feature indices are between 0 and [num_...
void SetNumericalSplit(int nid, unsigned split_index, ThresholdType threshold, bool default_left, Operator cmp)
Setters.
char pred_transform[TREELITE_MAX_PRED_TRANSFORM_LENGTH]
name of prediction transform function