7 #ifndef TREELITE_TREE_H_ 8 #define TREELITE_TREE_H_ 16 #include <type_traits> 21 #define __TREELITE_STR(x) #x 22 #define _TREELITE_STR(x) __TREELITE_STR(x) 24 #define TREELITE_MAX_PRED_TRANSFORM_LENGTH 256 30 float stof(
const std::string& value,
size_t* pos);
54 inline void UseForeignBuffer(
void* prealloc_buf,
size_t size);
56 inline const T* Data()
const;
58 inline const T* End()
const;
60 inline const T& Back()
const;
61 inline size_t Size()
const;
62 inline void Reserve(
size_t newsize);
63 inline void Resize(
size_t newsize);
64 inline void Resize(
size_t newsize, T t);
66 inline void PushBack(T t);
67 inline void Extend(
const std::vector<T>& other);
68 inline T& operator[](
size_t idx);
69 inline const T& operator[](
size_t idx)
const;
70 static_assert(std::is_pod<T>::value,
"T must be POD");
129 bool missing_category_to_zero_;
140 static_assert(std::is_pod<Node>::value,
"Node must be a POD type");
141 static_assert(
sizeof(
Node) == 48,
"Node must be 48 bytes");
146 Tree& operator=(
const Tree&) =
delete;
149 inline Tree Clone()
const;
151 inline std::vector<PyBufferFrame> GetPyBuffer();
152 inline void InitFromPyBuffer(std::vector<PyBufferFrame> frames);
163 inline int AllocNode();
174 inline void AddChilds(
int nid);
180 inline std::vector<unsigned> GetCategoricalFeatures()
const;
187 inline int LeftChild(
int nid)
const;
192 inline int RightChild(
int nid)
const;
197 inline int DefaultChild(
int nid)
const;
202 inline uint32_t SplitIndex(
int nid)
const;
207 inline bool DefaultLeft(
int nid)
const;
212 inline bool IsLeaf(
int nid)
const;
217 inline tl_float LeafValue(
int nid)
const;
222 inline std::vector<tl_float> LeafVector(
int nid)
const;
227 inline bool HasLeafVector(
int nid)
const;
232 inline tl_float Threshold(
int nid)
const;
237 inline Operator ComparisonOp(
int nid)
const;
245 inline std::vector<uint32_t> LeftCategories(
int nid)
const;
255 inline bool HasDataCount(
int nid)
const;
260 inline uint64_t DataCount(
int nid)
const;
265 inline bool HasSumHess(
int nid)
const;
270 inline double SumHess(
int nid)
const;
275 inline bool HasGain(
int nid)
const;
280 inline double Gain(
int nid)
const;
286 inline bool MissingCategoryToZero(
int nid)
const;
298 inline void SetNumericalSplit(
int nid,
unsigned split_index,
tl_float threshold,
309 inline void SetCategoricalSplit(
int nid,
unsigned split_index,
bool default_left,
310 bool missing_category_to_zero,
311 const std::vector<uint32_t>& left_categories);
317 inline void SetLeaf(
int nid,
tl_float value);
323 inline void SetLeafVector(
int nid,
const std::vector<tl_float>& leaf_vector);
329 inline void SetSumHess(
int nid,
double sum_hess);
335 inline void SetDataCount(
int nid, uint64_t data_count);
341 inline void SetGain(
int nid,
double gain);
343 void ReferenceSerialize(dmlc::Stream* fo)
const;
368 char pred_transform[TREELITE_MAX_PRED_TRANSFORM_LENGTH] = {0};
386 ModelParam() : sigmoid_alpha(1.0f), global_bias(0.0f) {
387 std::memset(pred_transform, 0, TREELITE_MAX_PRED_TRANSFORM_LENGTH *
sizeof(
char));
388 std::strncpy(pred_transform,
"identity",
sizeof(pred_transform));
396 template<
typename Container>
397 inline std::vector<std::pair<std::string, std::string>>
398 InitAllowUnknown(
const Container &kwargs);
399 inline std::map<std::string, std::string> __DICT__()
const;
402 static_assert(std::is_standard_layout<ModelParam>::value,
403 "ModelParam must be in the standard layout");
405 inline void InitParamAndCheck(
ModelParam* param,
406 const std::vector<std::pair<std::string, std::string>>& cfg);
434 void ReferenceSerialize(dmlc::Stream* fo)
const;
436 inline std::vector<PyBufferFrame> GetPyBuffer();
437 inline void InitFromPyBuffer(std::vector<PyBufferFrame> frames);
438 inline Model Clone()
const;
445 #endif // TREELITE_TREE_H_ SplitFeatureType split_type_
feature split type
Implementation for tree.h.
bool gain_present_
whether gain_present_ field is present
int num_output_group
number of output groups – for multi-class classification Set to 1 for everything else ...
SplitFeatureType
feature split type
uint64_t data_count_
number of data points whose traversal paths include this node. LightGBM models natively store this st...
thin wrapper for tree ensemble model
float tl_float
float type to be used internally
Operator cmp_
operator to use for expression of form [fval] OP [threshold]. If the expression evaluates to true...
std::vector< Tree > trees
member trees
float sigmoid_alpha
scaling parameter for sigmoid function sigmoid(x) = 1 / (1 + exp(-alpha * x))
ModelParam param
extra parameters
bool data_count_present_
whether data_count_ field is present
int32_t cleft_
pointer to left and right children
in-memory representation of a decision tree
double sum_hess_
sum of hessian values for all data points whose traversal paths include this node. This value is generally correlated positively with the data count. XGBoost models natively store this statistics.
float global_bias
global bias of the model
double gain_
change in loss that is attributed to a particular split
store either leaf value or decision threshold
bool random_forest_flag
flag for random forest; True for random forests and False for gradient boosted trees ...
int num_nodes
number of nodes
defines configuration macros of Treelite
bool sum_hess_present_
whether sum_hess_ field is present
uint32_t sindex_
feature index used for the split highest bit indicates default direction for missing values ...
int num_feature
number of features used for the model. It is assumed that all feature indices are between 0 and [num_...
Info info_
storage for leaf value or decision threshold
Operator
comparison operators