22 int nid,
size_t* out_counts) {
29 if (data[split_index].missing == -1) {
30 Traverse_(tree, data, node.
cdefault(), out_counts);
33 if (node.
split_type() == treelite::SplitFeatureType::kNumerical) {
40 const auto fvalue = data[split_index].fvalue;
41 const uint32_t fvalue2 =
static_cast<uint32_t
>(fvalue);
43 result = (std::binary_search(left_categories.begin(),
44 left_categories.end(), fvalue));
47 Traverse_(tree, data, node.
cleft(), out_counts);
49 Traverse_(tree, data, node.
cright(), out_counts);
57 Traverse_(tree, data, 0, out_counts);
62 size_t rbegin,
size_t rend,
int nthread,
63 const size_t* count_row_ptr,
64 size_t* counts_tloc, Entry* inst) {
65 const size_t ntree = model.
trees.size();
66 CHECK_LE(rbegin, rend);
67 CHECK_LT(static_cast<int64_t>(rend), std::numeric_limits<int64_t>::max());
68 const int64_t rbegin_i =
static_cast<int64_t
>(rbegin);
69 const int64_t rend_i =
static_cast<int64_t
>(rend);
70 #pragma omp parallel for schedule(static) num_threads(nthread) 71 for (int64_t rid = rbegin_i; rid < rend_i; ++rid) {
72 const int tid = omp_get_thread_num();
73 const size_t off = dmat->
num_col * tid;
74 const size_t off2 = count_row_ptr[ntree] * tid;
75 const size_t ibegin = dmat->
row_ptr[rid];
76 const size_t iend = dmat->
row_ptr[rid + 1];
77 for (
size_t i = ibegin; i < iend; ++i) {
78 inst[off + dmat->
col_ind[i]].fvalue = dmat->
data[i];
80 for (
size_t tree_id = 0; tree_id < ntree; ++tree_id) {
81 Traverse(model.
trees[tree_id], &inst[off],
82 &counts_tloc[off2 + count_row_ptr[tree_id]]);
84 for (
size_t i = ibegin; i < iend; ++i) {
85 inst[off + dmat->
col_ind[i]].missing = -1;
96 int nthread,
int verbose) {
97 std::vector<size_t> counts;
98 std::vector<size_t> counts_tloc;
99 std::vector<size_t> count_row_ptr;
101 const size_t ntree = model.
trees.size();
102 const int max_thread = omp_get_max_threads();
103 nthread = (nthread == 0) ? max_thread : std::min(nthread, max_thread);
105 count_row_ptr.push_back(count_row_ptr.back() + tree.
num_nodes);
107 counts.resize(count_row_ptr[ntree], 0);
108 counts_tloc.resize(count_row_ptr[ntree] * nthread, 0);
110 std::vector<Entry> inst(nthread * dmat->
num_col, {-1});
111 const size_t pstep = (dmat->
num_row + 19) / 20;
113 for (
size_t rbegin = 0; rbegin < dmat->
num_row; rbegin += pstep) {
114 const size_t rend = std::min(rbegin + pstep, dmat->
num_row);
115 ComputeBranchLoop(model, dmat, rbegin, rend, nthread,
116 &count_row_ptr[0], &counts_tloc[0], &inst[0]);
118 LOG(INFO) << rend <<
" of " << dmat->
num_row <<
" rows processed";
123 for (
int tid = 0; tid < nthread; ++tid) {
124 const size_t off = count_row_ptr[ntree] * tid;
125 for (
size_t i = 0; i < count_row_ptr[ntree]; ++i) {
126 counts[i] += counts_tloc[off + i];
131 for (
size_t i = 0; i < ntree; ++i) {
132 this->counts.emplace_back(&counts[count_row_ptr[i]],
133 &counts[count_row_ptr[i + 1]]);
138 BranchAnnotator::Load(dmlc::Stream* fi) {
139 dmlc::istream is(fi);
140 auto reader = common::make_unique<dmlc::JSONReader>(&is);
141 reader->Read(&counts);
145 BranchAnnotator::Save(dmlc::Stream* fo)
const {
146 dmlc::ostream os(fo);
147 auto writer = common::make_unique<dmlc::JSONWriter>(&os);
148 writer->Write(counts);
std::vector< float > data
feature values
thin wrapper for tree ensemble model
float tl_float
float type to be used internally
std::vector< Tree > trees
member trees
unsigned split_index() const
feature index of split condition
Operator comparison_op() const
get comparison operator
in-memory representation of a decision tree
const std::vector< uint32_t > & left_categories() const
get categories for left child node
std::vector< uint32_t > col_ind
feature indices
bool CompareWithOp(tl_float lhs, Operator op, tl_float rhs)
perform comparison between two float's using a comparsion operator The comparison will be in the form...
tl_float threshold() const
int cright() const
index of right child
size_t num_row
number of rows
a simple data matrix in CSR (Compressed Sparse Row) storage
int cdefault() const
index of default child when feature is missing
int num_nodes
number of nodes
size_t num_col
number of columns
compatiblity wrapper for systems that don't support OpenMP
int cleft() const
index of left child
Building blocks for semantic model of tree prediction code.
SplitFeatureType split_type() const
get feature split type
std::vector< size_t > row_ptr
pointer to row headers; length of [num_row] + 1
bool is_leaf() const
whether current node is leaf node
Operator
comparison operators