7 #ifndef TREELITE_TREE_H_ 8 #define TREELITE_TREE_H_ 12 #include <dmlc/logging.h> 24 Node() : sindex_(0) {}
39 return sindex_ & ((1U << 31) - 1U);
43 return (sindex_ >> 31) != 0;
51 return (this->info_).leaf_value;
58 return this->leaf_vector_;
64 return !(this->leaf_vector_.empty());
68 return (this->info_).threshold;
72 return parent_ & ((1U << 31) - 1);
76 return (parent_ & (1U << 31)) != 0;
88 return left_categories_;
104 CHECK_LT(split_index, (1U << 31) - 1) <<
"split_index too big";
105 if (default_left) split_index |= (1U << 31);
107 (this->info_).threshold = threshold;
109 this->split_type_ = SplitFeatureType::kNumerical;
121 CHECK_LT(split_index, (1U << 31) - 1) <<
"split_index too big";
122 if (default_left) split_index |= (1U << 31);
125 std::sort(this->left_categories_.begin(), this->left_categories_.end());
126 this->split_type_ = SplitFeatureType::kCategorical;
136 this->split_type_ = SplitFeatureType::kNone;
147 this->split_type_ = SplitFeatureType::kNone;
156 this->parent_ = pidx;
170 std::vector<tl_float> leaf_vector_;
199 std::vector<uint8_t> left_categories_;
204 std::vector<Node> nodes;
206 inline int AllocNode() {
208 CHECK_LT(
num_nodes, std::numeric_limits<int>::max())
209 <<
"number of nodes in the tree exceed 2^31";
237 nodes[0].set_leaf(0.0f);
238 nodes[0].set_parent(-1);
245 const int cleft = this->AllocNode();
246 const int cright = this->AllocNode();
247 nodes[nid].cleft_ =
cleft;
248 nodes[nid].cright_ =
cright;
249 nodes[
cleft].set_parent(nid,
true);
250 nodes[
cright].set_parent(nid,
false);
258 std::unordered_map<unsigned, bool> tmp;
259 for (
int nid = 0; nid <
num_nodes; ++nid) {
260 const Node& node = nodes[nid];
262 if (type != SplitFeatureType::kNone) {
263 const bool flag = (type == SplitFeatureType::kCategorical);
265 if (tmp.count(split_index) == 0) {
268 CHECK_EQ(tmp[split_index], flag) <<
"Feature " << split_index
269 <<
" cannot be simultaneously be categorical and numerical.";
273 std::vector<unsigned> result;
274 for (
const auto& kv : tmp) {
276 result.push_back(kv.first);
279 std::sort(result.begin(), result.end());
326 DMLC_DECLARE_FIELD(pred_transform).set_default(
"identity")
327 .describe(
"name of prediction transform function");
328 DMLC_DECLARE_FIELD(sigmoid_alpha).set_default(1.0f)
329 .set_lower_bound(0.0f)
330 .describe(
"scaling parameter for sigmoid function");
331 DMLC_DECLARE_FIELD(global_bias).set_default(0.0f)
332 .describe(
"global bias of the model");
336 inline void InitParamAndCheck(
ModelParam* param,
337 const std::vector<std::pair<std::string, std::string>> cfg) {
338 auto unknown = param->InitAllowUnknown(cfg);
339 if (unknown.size() > 0) {
340 std::ostringstream oss;
341 for (
const auto& kv : unknown) {
342 oss << kv.first <<
", ";
344 LOG(INFO) <<
"\033[1;31mWarning: Unknown parameters found; " 345 <<
"they have been ignored\u001B[0m: " << oss.str();
369 param.Init(std::vector<std::pair<std::string, std::string>>());
378 #endif // TREELITE_TREE_H_ bool is_left_child() const
whether current node is left child
int num_output_group
number of output groups – for multi-class classification Set to 1 for everything else ...
SplitFeatureType
feature split type
void Init()
initialize the model with a single root node
thin wrapper for tree ensemble model
float tl_float
float type to be used internally
void set_leaf(tl_float value)
set the leaf value of the node
std::vector< Tree > trees
member trees
void set_parent(int pidx, bool is_left_child=true)
set parent of the node
float sigmoid_alpha
scaling parameter for sigmoid function sigmoid(x) = 1 / (1 + exp(-alpha * x))
unsigned split_index() const
feature index of split condition
ModelParam param
extra parameters
Operator comparison_op() const
get comparison operator
in-memory representation of a decision tree
float global_bias
global bias of the model
tl_float threshold() const
bool has_leaf_vector() const
const std::vector< tl_float > & leaf_vector() const
int cright() const
index of right child
std::string pred_transform
name of prediction transform function
int cdefault() const
index of default child when feature is missing
const std::vector< uint8_t > & left_categories() const
get categories for left child node
void set_numerical_split(unsigned split_index, tl_float threshold, bool default_left, Operator cmp)
create a numerical split
bool random_forest_flag
flag for random forest; True for random forests and False for gradient boosted trees ...
void set_categorical_split(unsigned split_index, bool default_left, const std::vector< uint8_t > &left_categories)
create a categorical split
void set_leaf_vector(const std::vector< tl_float > &leaf_vector)
set the leaf vector of the node; useful for multi-class random forest classifier
int num_nodes
number of nodes
const Node & operator[](int nid) const
get node given nid (const version)
defines configuration macros of treelite
tl_float leaf_value() const
bool default_left() const
when feature is unknown, whether goes to left child
Node & operator[](int nid)
get node given nid
bool is_root() const
whether current node is root
void AddChilds(int nid)
add child nodes to node
int cleft() const
index of left child
std::vector< unsigned > GetCategoricalFeatures() const
get list of all categorical features that have appeared anywhere in tree
SplitFeatureType split_type() const
get feature split type
int parent() const
get parent of the node
bool is_leaf() const
whether current node is leaf node
int num_feature
number of features used for the model. It is assumed that all feature indices are between 0 and [num_...
Model()
disable copy; use default move
Operator
comparison operators