Treelite
predict.cc
Go to the documentation of this file.
1 
9 #include <treelite/gtil.h>
10 #include <treelite/tree.h>
11 #include <treelite/data.h>
12 #include <treelite/logging.h>
13 #include <limits>
14 #include <vector>
15 #include <cmath>
16 #include <cstdint>
17 #include <cstddef>
18 #include <cfloat>
19 #include "../threading_utils/parallel_for.h"
20 #include "./pred_transform.h"
21 
22 namespace treelite {
23 
24 class GTILBridge {
25  public:
26  template <typename ThresholdType, typename LeafOutputType>
27  inline static const typename Tree<ThresholdType, LeafOutputType>::Node*
28  GetNode(const Tree<ThresholdType, LeafOutputType>& tree, int nid) {
29  return &tree.nodes_[nid];
30  }
31 };
32 
33 } // namespace treelite
34 
35 namespace {
36 
38 using PredTransformFuncType = std::size_t (*) (const treelite::Model&, const float*, float*);
39 
43 class FVec {
44  public:
45  void Init(std::size_t size) {
46  data_.resize(size);
47  missing_.resize(size);
48  std::fill(data_.begin(), data_.end(), std::numeric_limits<float>::quiet_NaN());
49  std::fill(missing_.begin(), missing_.end(), true);
50  has_missing_ = true;
51  }
52  template <typename DMatrixType>
53  void Fill(const DMatrixType* input, std::size_t row_id) {
54  std::size_t feature_count = 0;
55  input->FillRow(row_id, &data_[0]);
56  for (std::size_t i = 0; i < data_.size(); ++i) {
57  if ( !(missing_[i] = std::isnan(data_[i])) ) {
58  ++feature_count;
59  }
60  }
61  has_missing_ = data_.size() != feature_count;
62  }
63  template <typename DMatrixType>
64  void Clear(const DMatrixType* input, std::size_t row_id) {
65  input->ClearRow(row_id, &data_[0]);
66  std::fill(missing_.begin(), missing_.end(), true);
67  has_missing_ = true;
68  }
69  std::size_t Size() const {
70  return data_.size();
71  }
72  float GetFValue(std::size_t i) const {
73  return data_[i];
74  }
75  bool IsMissing(size_t i) const {
76  return missing_[i];
77  }
78  bool HasMissing() const {
79  return has_missing_;
80  }
81 
82  private:
83  std::vector<float> data_;
84  std::vector<bool> missing_;
85  bool has_missing_;
86 };
87 
88 template <typename ThresholdType>
89 inline int NextNode(float fvalue, ThresholdType threshold, treelite::Operator op, int left_child) {
90  // XGBoost
91  if (op == treelite::Operator::kLT) {
92  return left_child + !(fvalue < threshold);
93  }
94  // LightGBM, sklearn, cuML RF
95  if (op == treelite::Operator::kLE) {
96  return left_child + !(fvalue <= threshold);
97  }
98  switch (op) {
99  case treelite::Operator::kEQ:
100  return left_child + !(fvalue == threshold);
101  case treelite::Operator::kGT:
102  return left_child + !(fvalue > threshold);
103  case treelite::Operator::kGE:
104  return left_child + !(fvalue >= threshold);
105  default:
106  TREELITE_CHECK(false) << "Unrecognized comparison operator " << static_cast<int>(op);
107  return -1;
108  }
109 }
110 
111 inline int NextNodeCategorical(float fvalue, const std::vector<std::uint32_t>& matching_categories,
112  bool categories_list_right_child, int left_child, int right_child) {
113  bool is_matching_category;
114  auto max_representable_int = static_cast<float>(std::uint32_t(1) << FLT_MANT_DIG);
115  if (fvalue < 0 || std::fabs(fvalue) > max_representable_int) {
116  is_matching_category = false;
117  } else {
118  const auto category_value = static_cast<std::uint32_t>(fvalue);
119  is_matching_category = (
120  std::find(matching_categories.begin(), matching_categories.end(), category_value)
121  != matching_categories.end());
122  }
123  if (categories_list_right_child) {
124  return is_matching_category ? right_child : left_child;
125  } else {
126  return is_matching_category ? left_child : right_child;
127  }
128 }
129 
130 constexpr std::size_t kBlockOfRowsSize = 64;
131 
132 struct BinaryClfRegrOutputLogic {
133  template <typename ThresholdType, typename LeafOutputType>
134  inline static void PushOutput(const treelite::Tree<ThresholdType, LeafOutputType>& tree,
135  std::size_t, int node_id, float* sum, std::size_t) {
136  sum[0] += tree.LeafValue(node_id);
137  }
138  inline static void ApplyAverageFactor(const treelite::TaskParam& task_param, std::size_t num_tree,
139  float* output, std::size_t batch_offset,
140  std::size_t block_size) {
141  const auto average_factor = static_cast<float>(num_tree);
142  const unsigned num_class = task_param.num_class;
143  for (std::size_t i = 0; i < block_size; ++i) {
144  for (unsigned k = 0; k < num_class; ++k) {
145  output[(batch_offset + i) * num_class + k] /= average_factor;
146  }
147  }
148  }
149 };
150 
151 struct MultiClfGrovePerClassOutputLogic {
152  template <typename ThresholdType, typename LeafOutputType>
153  inline static void PushOutput(const treelite::Tree<ThresholdType, LeafOutputType>& tree,
154  std::size_t tree_id, int node_id, float* sum,
155  std::size_t num_class) {
156  sum[tree_id % num_class] += tree.LeafValue(node_id);
157  }
158  inline static void ApplyAverageFactor(const treelite::TaskParam& task_param, std::size_t num_tree,
159  float* output, std::size_t batch_offset,
160  std::size_t block_size) {
161  auto num_boosting_round = num_tree / static_cast<std::size_t>(task_param.num_class);
162  const auto average_factor = static_cast<float>(num_boosting_round);
163  const unsigned num_class = task_param.num_class;
164  for (std::size_t i = 0; i < block_size; ++i) {
165  for (unsigned k = 0; k < num_class; ++k) {
166  output[(batch_offset + i) * num_class + k] /= average_factor;
167  }
168  }
169  }
170 };
171 
172 struct MultiClfProbDistLeafOutputLogic {
173  template <typename ThresholdType, typename LeafOutputType>
174  inline static void PushOutput(const treelite::Tree<ThresholdType, LeafOutputType>& tree,
175  std::size_t tree_id, int node_id, float* sum,
176  std::size_t num_class) {
177  auto leaf_vector = tree.LeafVector(node_id);
178  for (unsigned int i = 0; i < num_class; ++i) {
179  sum[i] += leaf_vector[i];
180  }
181  }
182  inline static void ApplyAverageFactor(const treelite::TaskParam& task_param, std::size_t num_tree,
183  float* output, std::size_t batch_offset,
184  std::size_t block_size) {
185  return BinaryClfRegrOutputLogic::ApplyAverageFactor(task_param, num_tree, output, batch_offset,
186  block_size);
187  }
188 };
189 
190 template <typename T1, typename T2>
191 inline T1 DivRoundUp(const T1 a, const T2 b) {
192  return static_cast<T1>(std::ceil(static_cast<double>(a) / b));
193 }
194 
195 template <typename DMatrixType>
196 void FVecFill(const std::size_t block_size, const std::size_t batch_offset,
197  const DMatrixType* input, const std::size_t fvec_offset, int num_feature,
198  std::vector<FVec>& feats) {
199  for (std::size_t i = 0; i < block_size; ++i) {
200  FVec& vec = feats[fvec_offset + i];
201  if (vec.Size() == 0) {
202  vec.Init(static_cast<std::size_t>(num_feature));
203  }
204  const std::size_t row_id = batch_offset + i;
205  vec.Fill(input, row_id);
206  }
207 }
208 
209 template <typename DMatrixType>
210 void FVecDrop(const std::size_t block_size, const std::size_t batch_offset,
211  const DMatrixType* input, const std::size_t fvec_offset, int num_feature,
212  std::vector<FVec>& feats) {
213  for (std::size_t i = 0; i < block_size; ++i) {
214  const std::size_t row_id = batch_offset + i;
215  feats[fvec_offset + i].Clear(input, row_id);
216  }
217 }
218 
219 template <typename ThresholdType, typename LeafOutputType, typename DMatrixType>
220 inline void InitOutPredictions(const treelite::ModelImpl<ThresholdType, LeafOutputType>& model,
221  const DMatrixType* input, float* output) {
222  const std::size_t num_row = input->GetNumRow();
223  const auto num_class = model.task_param.num_class;
224  std::fill(output, output + num_row * num_class, model.param.global_bias);
225 }
226 
227 template <bool has_missing, bool has_categorical, typename OutputLogic, typename ThresholdType,
228  typename LeafOutputType>
229 void PredValueByOneTreeImpl(const treelite::Tree<ThresholdType, LeafOutputType>& tree,
230  std::size_t tree_id, const FVec& feats, float* output,
231  std::size_t num_class) {
232  int node_id = 0;
234  = treelite::GTILBridge::GetNode(tree, node_id);
235  while (!node->IsLeaf()) {
236  const auto split_index = node->SplitIndex();
237  if (has_missing && feats.IsMissing(split_index)) {
238  node_id = node->DefaultChild();
239  } else {
240  const float fvalue = feats.GetFValue(split_index);
241  if (has_categorical && node->SplitType() == treelite::SplitFeatureType::kCategorical) {
242  node_id = NextNodeCategorical(fvalue, tree.MatchingCategories(node_id),
243  node->CategoriesListRightChild(), node->LeftChild(),
244  node->RightChild());
245  } else {
246  node_id = NextNode(fvalue, node->Threshold(), node->ComparisonOp(), node->LeftChild());
247  }
248  }
249  node = treelite::GTILBridge::GetNode(tree, node_id);
250  }
251  OutputLogic::PushOutput(tree, tree_id, node_id, output, num_class);
252 }
253 
254 template <bool has_categorical, typename OutputLogic, typename ThresholdType,
255  typename LeafOutputType>
256 void PredValueByOneTree(const treelite::Tree<ThresholdType, LeafOutputType>& tree,
257  std::size_t tree_id, const FVec& feats, float* output,
258  std::size_t num_class) {
259  if (feats.HasMissing()) {
260  PredValueByOneTreeImpl<true, has_categorical, OutputLogic>(
261  tree, tree_id, feats, output, num_class);
262  } else {
263  PredValueByOneTreeImpl<false, has_categorical, OutputLogic>(
264  tree, tree_id, feats, output, num_class);
265  }
266 }
267 
268 template <typename OutputLogic, typename ThresholdType, typename LeafOutputType>
269 void PredictByAllTrees(const treelite::ModelImpl<ThresholdType, LeafOutputType>& model,
270  float* output, const std::size_t batch_offset,
271  const std::size_t num_class, const std::vector<FVec>& feats,
272  const std::size_t fvec_offset, const std::size_t block_size) {
273  const int num_feature = model.num_feature;
274  const std::size_t num_tree = model.trees.size();
275  for (std::size_t tree_id = 0; tree_id < num_tree; ++tree_id) {
276  const treelite::Tree<ThresholdType, LeafOutputType>& tree = model.trees[tree_id];
277  auto has_categorical = tree.HasCategoricalSplit();
278  if (has_categorical) {
279  for (std::size_t i = 0; i < block_size; ++i) {
280  PredValueByOneTree<true, OutputLogic>(tree, tree_id, feats[fvec_offset + i],
281  &output[(batch_offset + i) * num_class], num_class);
282  }
283  } else {
284  for (std::size_t i = 0; i < block_size; ++i) {
285  PredValueByOneTree<false, OutputLogic>(tree, tree_id, feats[fvec_offset + i],
286  &output[(batch_offset + i) * num_class], num_class);
287  }
288  }
289  }
290 }
291 
292 template <std::size_t block_of_rows_size, typename OutputLogic, typename ThresholdType,
293  typename LeafOutputType, typename DMatrixType>
294 void PredictBatchByBlockOfRowsKernel(
295  const treelite::ModelImpl<ThresholdType, LeafOutputType>& model, const DMatrixType* input,
296  float* output, const ThreadConfig& thread_config) {
297  const std::size_t num_row = input->GetNumRow();
298  const int num_feature = model.num_feature;
299  const auto& task_param = model.task_param;
300  std::size_t n_blocks = DivRoundUp(num_row, block_of_rows_size);
301 
302  std::vector<FVec> feats(thread_config.nthread * block_of_rows_size);
303  auto sched = treelite::threading_utils::ParallelSchedule::Static();
304  treelite::threading_utils::ParallelFor(std::size_t(0), n_blocks, thread_config, sched,
305  [&](std::size_t block_id, int thread_id) {
306  const std::size_t batch_offset = block_id * block_of_rows_size;
307  const std::size_t block_size =
308  std::min(num_row - batch_offset, block_of_rows_size);
309  const std::size_t fvec_offset = thread_id * block_of_rows_size;
310 
311  FVecFill(block_size, batch_offset, input, fvec_offset, num_feature, feats);
312  // process block of rows through all trees to keep cache locality
313  PredictByAllTrees<OutputLogic>(model, output, batch_offset,
314  static_cast<std::size_t>(task_param.num_class), feats,
315  fvec_offset, block_size);
316  FVecDrop(block_size, batch_offset, input, fvec_offset, num_feature, feats);
317  if (model.average_tree_output) {
318  OutputLogic::ApplyAverageFactor(task_param, model.GetNumTree(), output, batch_offset,
319  block_size);
320  }
321  });
322 }
323 
324 template <typename OutputLogic, typename ThresholdType, typename LeafOutputType,
325  typename DMatrixType>
326 void PredictBatchTreeParallelKernel(
327  const treelite::ModelImpl<ThresholdType, LeafOutputType>& model, const DMatrixType* input,
328  float* output, const ThreadConfig& thread_config) {
329  const std::size_t num_row = input->GetNumRow();
330  const std::size_t num_tree = model.GetNumTree();
331  const int num_feature = model.num_feature;
332  const auto num_class = model.task_param.num_class;
333 
334  FVec feats;
335  feats.Init(num_feature);
336  std::vector<float> sum_tloc(num_class * thread_config.nthread);
337  auto sched = treelite::threading_utils::ParallelSchedule::Static();
338  for (std::size_t row_id = 0; row_id < num_row; ++row_id) {
339  std::fill(sum_tloc.begin(), sum_tloc.end(), 0.0f);
340  feats.Fill(input, row_id);
341  treelite::threading_utils::ParallelFor(std::size_t(0), num_tree, thread_config, sched,
342  [&](std::size_t tree_id, int thread_id) {
343  const treelite::Tree<ThresholdType, LeafOutputType>& tree = model.trees[tree_id];
344  auto has_categorical = tree.HasCategoricalSplit();
345  if (has_categorical) {
346  PredValueByOneTree<true, OutputLogic>(tree, tree_id, feats,
347  &sum_tloc[thread_id * num_class], num_class);
348  } else {
349  PredValueByOneTree<false, OutputLogic>(tree, tree_id, feats,
350  &sum_tloc[thread_id * num_class], num_class);
351  }
352  });
353  feats.Clear(input, row_id);
354  for (std::uint32_t thread_id = 0; thread_id < thread_config.nthread; ++thread_id) {
355  for (unsigned i = 0; i < num_class; ++i) {
356  output[row_id * num_class + i] += sum_tloc[thread_id * num_class + i];
357  }
358  }
359  }
360  if (model.average_tree_output) {
361  for (std::size_t row_id = 0; row_id < num_row; ++row_id) {
362  OutputLogic::ApplyAverageFactor(model.task_param, num_tree, output, row_id, 1);
363  }
364  }
365 }
366 
367 template <typename OutputLogic, typename ThresholdType, typename LeafOutputType,
368  typename DMatrixType>
369 void PredictBatchDispatch(
370  const treelite::ModelImpl<ThresholdType, LeafOutputType>& model, const DMatrixType* input,
371  float* output, const ThreadConfig& thread_config) {
372  if (input->GetNumRow() < kBlockOfRowsSize) {
373  // Small batch size => tree parallel method
374  PredictBatchTreeParallelKernel<OutputLogic>(model, input, output, thread_config);
375  } else {
376  // Sufficiently large batch size => row parallel method
377  PredictBatchByBlockOfRowsKernel<kBlockOfRowsSize, OutputLogic>(
378  model, input, output, thread_config);
379  }
380 }
381 
382 template <typename ThresholdType, typename LeafOutputType, typename DMatrixType>
383 inline void PredictRaw(const treelite::ModelImpl<ThresholdType, LeafOutputType>& model,
384  const DMatrixType* input, float* output, const ThreadConfig& thread_config) {
385  InitOutPredictions(model, input, output);
386 
387  switch (model.task_type) {
388  case treelite::TaskType::kBinaryClfRegr:
389  PredictBatchDispatch<BinaryClfRegrOutputLogic>(model, input, output, thread_config);
390  break;
391  case treelite::TaskType::kMultiClfGrovePerClass:
392  PredictBatchDispatch<MultiClfGrovePerClassOutputLogic>(model, input, output, thread_config);
393  break;
394  case treelite::TaskType::kMultiClfProbDistLeaf:
395  PredictBatchDispatch<MultiClfProbDistLeafOutputLogic>(model, input, output, thread_config);
396  break;
397  case treelite::TaskType::kMultiClfCategLeaf:
398  default:
399  TREELITE_LOG(FATAL)
400  << "Unsupported task type of the tree ensemble model: "
401  << static_cast<int>(model.task_type);
402  }
403 }
404 
405 template <typename ThresholdType, typename LeafOutputType, typename DMatrixType>
406 inline std::size_t PredTransform(const treelite::ModelImpl<ThresholdType, LeafOutputType>& model,
407  const DMatrixType* input, float* output,
408  const ThreadConfig& thread_config, bool pred_transform) {
409  std::size_t output_size_per_row;
410  const auto num_class = model.task_param.num_class;
411  std::size_t num_row = input->GetNumRow();
412  if (pred_transform) {
413  std::vector<float> temp(treelite::gtil::GetPredictOutputSize(&model, num_row));
414  PredTransformFuncType pred_transform_func
415  = treelite::gtil::LookupPredTransform(model.param.pred_transform);
416  auto sched = treelite::threading_utils::ParallelSchedule::Static();
417  // Query the size of output per input row.
418  output_size_per_row = pred_transform_func(model, &output[0], &temp[0]);
419  // Now transform predictions in parallel
420  treelite::threading_utils::ParallelFor(std::size_t(0), num_row, thread_config, sched,
421  [&](std::size_t row_id, int thread_id) {
422  pred_transform_func(model, &output[row_id * num_class],
423  &temp[row_id * output_size_per_row]);
424  });
425  // Copy transformed score back to output
426  temp.resize(output_size_per_row * num_row);
427  std::copy(temp.begin(), temp.end(), output);
428  } else {
429  output_size_per_row = model.task_param.num_class;
430  }
431  return output_size_per_row * num_row;
432 }
433 
434 template <typename ThresholdType, typename LeafOutputType, typename DMatrixType>
435 inline std::size_t PredictImpl(const treelite::ModelImpl<ThresholdType, LeafOutputType>& model,
436  const DMatrixType* input, float* output,
437  const ThreadConfig& thread_config, bool pred_transform) {
438  PredictRaw(model, input, output, thread_config);
439  return PredTransform(model, input, output, thread_config, pred_transform);
440 }
441 
442 } // anonymous namespace
443 
444 namespace treelite {
445 namespace gtil {
446 
447 std::size_t Predict(const Model* model, const DMatrix* input, float* output, int nthread,
448  bool pred_transform) {
449  // If nthread <= 0, then use all CPU cores in the system
450  auto thread_config = threading_utils::ConfigureThreadConfig(nthread);
451  // Check type of DMatrix
452  const auto* d1 = dynamic_cast<const DenseDMatrixImpl<float>*>(input);
453  const auto* d2 = dynamic_cast<const CSRDMatrixImpl<float>*>(input);
454  if (d1) {
455  return model->Dispatch([d1, output, thread_config, pred_transform](const auto& model) {
456  return PredictImpl(model, d1, output, thread_config, pred_transform);
457  });
458  } else if (d2) {
459  return model->Dispatch([d2, output, thread_config, pred_transform](const auto& model) {
460  return PredictImpl(model, d2, output, thread_config, pred_transform);
461  });
462  } else {
463  TREELITE_LOG(FATAL) << "DMatrix with float64 data is not supported";
464  return 0;
465  }
466 }
467 
468 std::size_t Predict(const Model* model, const float* input, std::size_t num_row, float* output,
469  int nthread, bool pred_transform) {
470  std::unique_ptr<DenseDMatrixImpl<float>> dmat =
471  std::make_unique<DenseDMatrixImpl<float>>(
472  std::vector<float>(input, input + num_row * model->num_feature),
473  std::numeric_limits<float>::quiet_NaN(),
474  num_row,
475  model->num_feature);
476  return Predict(model, dmat.get(), output, nthread, pred_transform);
477 }
478 
479 std::size_t GetPredictOutputSize(const Model* model, std::size_t num_row) {
480  return model->task_param.num_class * num_row;
481 }
482 
483 std::size_t GetPredictOutputSize(const Model* model, const DMatrix* input) {
484  return GetPredictOutputSize(model, input->GetNumRow());
485 }
486 
487 } // namespace gtil
488 } // namespace treelite
ModelParam param
extra parameters
Definition: tree.h:772
Represent thread configuration, to be used with parallel loops.
Definition: parallel_for.h:71
Group of parameters that are dependent on the choice of the task type.
Definition: tree.h:174
std::vector< LeafOutputType > LeafVector(int nid) const
get leaf vector of the leaf node; useful for multi-class random forest classifier ...
Definition: tree.h:455
bool HasCategoricalSplit() const
Query whether this tree contains any categorical splits.
Definition: tree.h:571
bool average_tree_output
whether to average tree outputs
Definition: tree.h:768
Input data structure of Treelite.
model structure for tree ensemble
tree node
Definition: tree.h:218
in-memory representation of a decision tree
Definition: tree.h:215
logging facility for Treelite
unsigned int num_class
The number of classes in the target label.
Definition: tree.h:193
TaskType task_type
Task type.
Definition: tree.h:766
std::vector< Tree< ThresholdType, LeafOutputType > > trees
member trees
Definition: tree.h:796
General Tree Inference Library (GTIL), providing a reference implementation for predicting with decis...
std::size_t Predict(const Model *model, const DMatrix *input, float *output, int nthread, bool pred_transform)
Predict with a DMatrix.
Definition: predict.cc:447
LeafOutputType LeafValue(int nid) const
get leaf value of the leaf node
Definition: tree.h:448
std::size_t GetPredictOutputSize(const Model *model, std::size_t num_row)
Given a batch of data rows, query the necessary size of array to hold predictions for all data points...
Definition: predict.cc:479
TaskParam task_param
Group of parameters that are specific to the particular task type.
Definition: tree.h:770
thin wrapper for tree ensemble model
Definition: tree.h:717
int num_feature
number of features used for the model. It is assumed that all feature indices are between 0 and [num_...
Definition: tree.h:764
int LeftChild() const
Getters.
Definition: tree.h:271
std::vector< std::uint32_t > MatchingCategories(int nid) const
Get list of all categories belonging to the left/right child node. See the categories_list_right_chil...
Definition: tree.h:496
Operator
comparison operators
Definition: base.h:26