19 #include "../threading_utils/parallel_for.h" 20 #include "./pred_transform.h" 26 template <
typename ThresholdType,
typename LeafOutputType>
29 return &tree.nodes_[nid];
38 using PredTransformFuncType = std::size_t (*) (
const treelite::Model&,
const float*,
float*);
45 void Init(std::size_t size) {
47 missing_.resize(size);
48 std::fill(data_.begin(), data_.end(), std::numeric_limits<float>::quiet_NaN());
49 std::fill(missing_.begin(), missing_.end(),
true);
52 template <
typename DMatrixType>
53 void Fill(
const DMatrixType* input, std::size_t row_id) {
54 std::size_t feature_count = 0;
55 input->FillRow(row_id, &data_[0]);
56 for (std::size_t i = 0; i < data_.size(); ++i) {
57 if ( !(missing_[i] = std::isnan(data_[i])) ) {
61 has_missing_ = data_.size() != feature_count;
63 template <
typename DMatrixType>
64 void Clear(
const DMatrixType* input, std::size_t row_id) {
65 input->ClearRow(row_id, &data_[0]);
66 std::fill(missing_.begin(), missing_.end(),
true);
69 std::size_t Size()
const {
72 float GetFValue(std::size_t i)
const {
75 bool IsMissing(
size_t i)
const {
78 bool HasMissing()
const {
83 std::vector<float> data_;
84 std::vector<bool> missing_;
88 template <
typename ThresholdType>
89 inline int NextNode(
float fvalue, ThresholdType threshold,
treelite::Operator op,
int left_child) {
91 if (op == treelite::Operator::kLT) {
92 return left_child + !(fvalue < threshold);
95 if (op == treelite::Operator::kLE) {
96 return left_child + !(fvalue <= threshold);
99 case treelite::Operator::kEQ:
100 return left_child + !(fvalue == threshold);
101 case treelite::Operator::kGT:
102 return left_child + !(fvalue > threshold);
103 case treelite::Operator::kGE:
104 return left_child + !(fvalue >= threshold);
106 TREELITE_CHECK(
false) <<
"Unrecognized comparison operator " <<
static_cast<int>(op);
111 inline int NextNodeCategorical(
float fvalue,
const std::vector<std::uint32_t>& matching_categories,
112 bool categories_list_right_child,
int left_child,
int right_child) {
113 bool is_matching_category;
114 auto max_representable_int =
static_cast<float>(std::uint32_t(1) << FLT_MANT_DIG);
115 if (fvalue < 0 || std::fabs(fvalue) > max_representable_int) {
116 is_matching_category =
false;
118 const auto category_value =
static_cast<std::uint32_t
>(fvalue);
119 is_matching_category = (
120 std::find(matching_categories.begin(), matching_categories.end(), category_value)
121 != matching_categories.end());
123 if (categories_list_right_child) {
124 return is_matching_category ? right_child : left_child;
126 return is_matching_category ? left_child : right_child;
130 constexpr std::size_t kBlockOfRowsSize = 64;
132 struct BinaryClfRegrOutputLogic {
133 template <
typename ThresholdType,
typename LeafOutputType>
135 std::size_t,
int node_id,
float* sum, std::size_t) {
138 inline static void ApplyAverageFactor(
const treelite::TaskParam& task_param, std::size_t num_tree,
139 float* output, std::size_t batch_offset,
140 std::size_t block_size) {
141 const auto average_factor =
static_cast<float>(num_tree);
142 const unsigned num_class = task_param.
num_class;
143 for (std::size_t i = 0; i < block_size; ++i) {
144 for (
unsigned k = 0; k < num_class; ++k) {
145 output[(batch_offset + i) * num_class + k] /= average_factor;
151 struct MultiClfGrovePerClassOutputLogic {
152 template <
typename ThresholdType,
typename LeafOutputType>
154 std::size_t tree_id,
int node_id,
float* sum,
155 std::size_t num_class) {
156 sum[tree_id % num_class] += tree.
LeafValue(node_id);
158 inline static void ApplyAverageFactor(
const treelite::TaskParam& task_param, std::size_t num_tree,
159 float* output, std::size_t batch_offset,
160 std::size_t block_size) {
161 auto num_boosting_round = num_tree /
static_cast<std::size_t
>(task_param.
num_class);
162 const auto average_factor =
static_cast<float>(num_boosting_round);
163 const unsigned num_class = task_param.
num_class;
164 for (std::size_t i = 0; i < block_size; ++i) {
165 for (
unsigned k = 0; k < num_class; ++k) {
166 output[(batch_offset + i) * num_class + k] /= average_factor;
172 struct MultiClfProbDistLeafOutputLogic {
173 template <
typename ThresholdType,
typename LeafOutputType>
175 std::size_t tree_id,
int node_id,
float* sum,
176 std::size_t num_class) {
178 for (
unsigned int i = 0; i < num_class; ++i) {
179 sum[i] += leaf_vector[i];
182 inline static void ApplyAverageFactor(
const treelite::TaskParam& task_param, std::size_t num_tree,
183 float* output, std::size_t batch_offset,
184 std::size_t block_size) {
185 return BinaryClfRegrOutputLogic::ApplyAverageFactor(task_param, num_tree, output, batch_offset,
190 template <
typename T1,
typename T2>
191 inline T1 DivRoundUp(
const T1 a,
const T2 b) {
192 return static_cast<T1
>(std::ceil(static_cast<double>(a) / b));
195 template <
typename DMatrixType>
196 void FVecFill(
const std::size_t block_size,
const std::size_t batch_offset,
197 const DMatrixType* input,
const std::size_t fvec_offset,
int num_feature,
198 std::vector<FVec>& feats) {
199 for (std::size_t i = 0; i < block_size; ++i) {
200 FVec& vec = feats[fvec_offset + i];
201 if (vec.Size() == 0) {
202 vec.Init(static_cast<std::size_t>(num_feature));
204 const std::size_t row_id = batch_offset + i;
205 vec.Fill(input, row_id);
209 template <
typename DMatrixType>
210 void FVecDrop(
const std::size_t block_size,
const std::size_t batch_offset,
211 const DMatrixType* input,
const std::size_t fvec_offset,
int num_feature,
212 std::vector<FVec>& feats) {
213 for (std::size_t i = 0; i < block_size; ++i) {
214 const std::size_t row_id = batch_offset + i;
215 feats[fvec_offset + i].Clear(input, row_id);
219 template <
typename ThresholdType,
typename LeafOutputType,
typename DMatrixType>
221 const DMatrixType* input,
float* output) {
222 const std::size_t num_row = input->GetNumRow();
223 const auto num_class = model.
task_param.num_class;
224 std::fill(output, output + num_row * num_class, model.
param.global_bias);
227 template <
bool has_missing,
bool has_categorical,
typename OutputLogic,
typename ThresholdType,
228 typename LeafOutputType>
230 std::size_t tree_id,
const FVec& feats,
float* output,
231 std::size_t num_class) {
234 = treelite::GTILBridge::GetNode(tree, node_id);
235 while (!node->IsLeaf()) {
236 const auto split_index = node->SplitIndex();
237 if (has_missing && feats.IsMissing(split_index)) {
238 node_id = node->DefaultChild();
240 const float fvalue = feats.GetFValue(split_index);
241 if (has_categorical && node->SplitType() == treelite::SplitFeatureType::kCategorical) {
243 node->CategoriesListRightChild(), node->
LeftChild(),
246 node_id = NextNode(fvalue, node->Threshold(), node->ComparisonOp(), node->
LeftChild());
249 node = treelite::GTILBridge::GetNode(tree, node_id);
251 OutputLogic::PushOutput(tree, tree_id, node_id, output, num_class);
254 template <
bool has_categorical,
typename OutputLogic,
typename ThresholdType,
255 typename LeafOutputType>
257 std::size_t tree_id,
const FVec& feats,
float* output,
258 std::size_t num_class) {
259 if (feats.HasMissing()) {
260 PredValueByOneTreeImpl<true, has_categorical, OutputLogic>(
261 tree, tree_id, feats, output, num_class);
263 PredValueByOneTreeImpl<false, has_categorical, OutputLogic>(
264 tree, tree_id, feats, output, num_class);
268 template <
typename OutputLogic,
typename ThresholdType,
typename LeafOutputType>
270 float* output,
const std::size_t batch_offset,
271 const std::size_t num_class,
const std::vector<FVec>& feats,
272 const std::size_t fvec_offset,
const std::size_t block_size) {
274 const std::size_t num_tree = model.
trees.size();
275 for (std::size_t tree_id = 0; tree_id < num_tree; ++tree_id) {
278 if (has_categorical) {
279 for (std::size_t i = 0; i < block_size; ++i) {
280 PredValueByOneTree<true, OutputLogic>(tree, tree_id, feats[fvec_offset + i],
281 &output[(batch_offset + i) * num_class], num_class);
284 for (std::size_t i = 0; i < block_size; ++i) {
285 PredValueByOneTree<false, OutputLogic>(tree, tree_id, feats[fvec_offset + i],
286 &output[(batch_offset + i) * num_class], num_class);
292 template <std::size_t block_of_rows_size,
typename OutputLogic,
typename ThresholdType,
293 typename LeafOutputType,
typename DMatrixType>
294 void PredictBatchByBlockOfRowsKernel(
296 float* output,
const ThreadConfig& thread_config) {
297 const std::size_t num_row = input->GetNumRow();
300 std::size_t n_blocks = DivRoundUp(num_row, block_of_rows_size);
302 std::vector<FVec> feats(thread_config.nthread * block_of_rows_size);
303 auto sched = treelite::threading_utils::ParallelSchedule::Static();
304 treelite::threading_utils::ParallelFor(std::size_t(0), n_blocks, thread_config, sched,
305 [&](std::size_t block_id,
int thread_id) {
306 const std::size_t batch_offset = block_id * block_of_rows_size;
307 const std::size_t block_size =
308 std::min(num_row - batch_offset, block_of_rows_size);
309 const std::size_t fvec_offset = thread_id * block_of_rows_size;
311 FVecFill(block_size, batch_offset, input, fvec_offset, num_feature, feats);
313 PredictByAllTrees<OutputLogic>(model, output, batch_offset,
314 static_cast<std::size_t
>(task_param.num_class), feats,
315 fvec_offset, block_size);
316 FVecDrop(block_size, batch_offset, input, fvec_offset, num_feature, feats);
317 if (model.average_tree_output) {
318 OutputLogic::ApplyAverageFactor(task_param, model.GetNumTree(), output, batch_offset,
324 template <
typename OutputLogic,
typename ThresholdType,
typename LeafOutputType,
325 typename DMatrixType>
326 void PredictBatchTreeParallelKernel(
328 float* output,
const ThreadConfig& thread_config) {
329 const std::size_t num_row = input->GetNumRow();
330 const std::size_t num_tree = model.GetNumTree();
332 const auto num_class = model.
task_param.num_class;
335 feats.Init(num_feature);
336 std::vector<float> sum_tloc(num_class * thread_config.nthread);
337 auto sched = treelite::threading_utils::ParallelSchedule::Static();
338 for (std::size_t row_id = 0; row_id < num_row; ++row_id) {
339 std::fill(sum_tloc.begin(), sum_tloc.end(), 0.0f);
340 feats.Fill(input, row_id);
341 treelite::threading_utils::ParallelFor(std::size_t(0), num_tree, thread_config, sched,
342 [&](std::size_t tree_id,
int thread_id) {
345 if (has_categorical) {
346 PredValueByOneTree<true, OutputLogic>(tree, tree_id, feats,
347 &sum_tloc[thread_id * num_class], num_class);
349 PredValueByOneTree<false, OutputLogic>(tree, tree_id, feats,
350 &sum_tloc[thread_id * num_class], num_class);
353 feats.Clear(input, row_id);
354 for (std::uint32_t thread_id = 0; thread_id < thread_config.nthread; ++thread_id) {
355 for (
unsigned i = 0; i < num_class; ++i) {
356 output[row_id * num_class + i] += sum_tloc[thread_id * num_class + i];
361 for (std::size_t row_id = 0; row_id < num_row; ++row_id) {
362 OutputLogic::ApplyAverageFactor(model.
task_param, num_tree, output, row_id, 1);
367 template <
typename OutputLogic,
typename ThresholdType,
typename LeafOutputType,
368 typename DMatrixType>
369 void PredictBatchDispatch(
371 float* output,
const ThreadConfig& thread_config) {
372 if (input->GetNumRow() < kBlockOfRowsSize) {
374 PredictBatchTreeParallelKernel<OutputLogic>(model, input, output, thread_config);
377 PredictBatchByBlockOfRowsKernel<kBlockOfRowsSize, OutputLogic>(
378 model, input, output, thread_config);
382 template <
typename ThresholdType,
typename LeafOutputType,
typename DMatrixType>
384 const DMatrixType* input,
float* output,
const ThreadConfig& thread_config) {
385 InitOutPredictions(model, input, output);
388 case treelite::TaskType::kBinaryClfRegr:
389 PredictBatchDispatch<BinaryClfRegrOutputLogic>(model, input, output, thread_config);
391 case treelite::TaskType::kMultiClfGrovePerClass:
392 PredictBatchDispatch<MultiClfGrovePerClassOutputLogic>(model, input, output, thread_config);
394 case treelite::TaskType::kMultiClfProbDistLeaf:
395 PredictBatchDispatch<MultiClfProbDistLeafOutputLogic>(model, input, output, thread_config);
397 case treelite::TaskType::kMultiClfCategLeaf:
400 <<
"Unsupported task type of the tree ensemble model: " 401 <<
static_cast<int>(model.task_type);
405 template <
typename ThresholdType,
typename LeafOutputType,
typename DMatrixType>
407 const DMatrixType* input,
float* output,
408 const ThreadConfig& thread_config,
bool pred_transform) {
409 std::size_t output_size_per_row;
410 const auto num_class = model.
task_param.num_class;
411 std::size_t num_row = input->GetNumRow();
412 if (pred_transform) {
413 std::vector<float> temp(treelite::gtil::GetPredictOutputSize(&model, num_row));
414 PredTransformFuncType pred_transform_func
415 = treelite::gtil::LookupPredTransform(model.
param.pred_transform);
416 auto sched = treelite::threading_utils::ParallelSchedule::Static();
418 output_size_per_row = pred_transform_func(model, &output[0], &temp[0]);
420 treelite::threading_utils::ParallelFor(std::size_t(0), num_row, thread_config, sched,
421 [&](std::size_t row_id,
int thread_id) {
422 pred_transform_func(model, &output[row_id * num_class],
423 &temp[row_id * output_size_per_row]);
426 temp.resize(output_size_per_row * num_row);
427 std::copy(temp.begin(), temp.end(), output);
429 output_size_per_row = model.
task_param.num_class;
431 return output_size_per_row * num_row;
434 template <
typename ThresholdType,
typename LeafOutputType,
typename DMatrixType>
436 const DMatrixType* input,
float* output,
437 const ThreadConfig& thread_config,
bool pred_transform) {
438 PredictRaw(model, input, output, thread_config);
439 return PredTransform(model, input, output, thread_config, pred_transform);
448 bool pred_transform) {
450 auto thread_config = threading_utils::ConfigureThreadConfig(nthread);
455 return model->Dispatch([d1, output, thread_config, pred_transform](
const auto& model) {
456 return PredictImpl(model, d1, output, thread_config, pred_transform);
459 return model->Dispatch([d2, output, thread_config, pred_transform](
const auto& model) {
460 return PredictImpl(model, d2, output, thread_config, pred_transform);
463 TREELITE_LOG(FATAL) <<
"DMatrix with float64 data is not supported";
468 std::size_t
Predict(
const Model* model,
const float* input, std::size_t num_row,
float* output,
469 int nthread,
bool pred_transform) {
470 std::unique_ptr<DenseDMatrixImpl<float>> dmat =
471 std::make_unique<DenseDMatrixImpl<float>>(
472 std::vector<float>(input, input + num_row * model->
num_feature),
473 std::numeric_limits<float>::quiet_NaN(),
476 return Predict(model, dmat.get(), output, nthread, pred_transform);
ModelParam param
extra parameters
Represent thread configuration, to be used with parallel loops.
Group of parameters that are dependent on the choice of the task type.
std::vector< LeafOutputType > LeafVector(int nid) const
get leaf vector of the leaf node; useful for multi-class random forest classifier ...
bool HasCategoricalSplit() const
Query whether this tree contains any categorical splits.
bool average_tree_output
whether to average tree outputs
Input data structure of Treelite.
model structure for tree ensemble
in-memory representation of a decision tree
logging facility for Treelite
unsigned int num_class
The number of classes in the target label.
TaskType task_type
Task type.
int32_t num_feature
number of features used for the model. It is assumed that all feature indices are between 0 and [num_...
std::vector< Tree< ThresholdType, LeafOutputType > > trees
member trees
General Tree Inference Library (GTIL), providing a reference implementation for predicting with decis...
std::size_t Predict(const Model *model, const DMatrix *input, float *output, int nthread, bool pred_transform)
Predict with a DMatrix.
LeafOutputType LeafValue(int nid) const
get leaf value of the leaf node
std::size_t GetPredictOutputSize(const Model *model, std::size_t num_row)
Given a batch of data rows, query the necessary size of array to hold predictions for all data points...
TaskParam task_param
Group of parameters that are specific to the particular task type.
thin wrapper for tree ensemble model
int LeftChild() const
Getters.
std::vector< std::uint32_t > MatchingCategories(int nid) const
Get list of all categories belonging to the left/right child node. See the categories_list_right_chil...
Operator
comparison operators