15 #include <dmlc/config.h> 16 #include <dmlc/data.h> 46 inline const char* FileFormatString(
int format) {
48 case kLibSVM:
return "libsvm";
49 case kCSV:
return "csv";
50 case kLibFM:
return "libfm";
55 struct CLIParam :
public dmlc::Parameter<CLIParam> {
87 std::vector<std::pair<std::string, std::string> >
cfg;
91 DMLC_DECLARE_FIELD(task).set_default(kCodegen)
92 .add_enum(
"codegen", kCodegen)
93 .add_enum(
"annotate", kAnnotate)
94 .add_enum(
"predict", kPredict)
95 .describe(
"Task to be performed by the CLI program.");
96 DMLC_DECLARE_FIELD(verbose).set_default(0)
97 .describe(
"Produce extra messages if >0");
98 DMLC_DECLARE_FIELD(format)
99 .add_enum(
"xgboost", kXGBModel)
100 .add_enum(
"lightgbm", kLGBModel)
101 .add_enum(
"protobuf", kProtobuf)
102 .describe(
"Model format");
103 DMLC_DECLARE_FIELD(model_in).set_default(
"NULL")
104 .describe(
"Input model path");
105 DMLC_DECLARE_FIELD(name_codegen_dir).set_default(
"codegen")
106 .describe(
"directory name for generated code files");
107 DMLC_DECLARE_FIELD(name_annotate).set_default(
"annotate.json")
108 .describe(
"Name of generated annotation file");
109 DMLC_DECLARE_FIELD(name_pred).set_default(
"pred.txt")
110 .describe(
"Name of text file to save prediction");
111 DMLC_DECLARE_FIELD(train_data_path).set_default(
"NULL")
112 .describe(
"Training data path; used for annotation");
113 DMLC_DECLARE_FIELD(test_data_path).set_default(
"NULL")
114 .describe(
"Test data path; used prediction");
115 DMLC_DECLARE_FIELD(codelib_path).set_default(
"NULL")
116 .describe(
"Path to compiled dynamic shared library (.so/.dll/.dylib); " 117 "used for prediction");
118 DMLC_DECLARE_FIELD(train_format).set_default(kLibSVM)
119 .add_enum(
"libsvm", kLibSVM)
120 .add_enum(
"csv", kCSV)
121 .add_enum(
"libfm", kLibFM)
122 .describe(
"training set data format");
123 DMLC_DECLARE_FIELD(test_format).set_default(kLibSVM)
124 .add_enum(
"libsvm", kLibSVM)
125 .add_enum(
"csv", kCSV)
126 .add_enum(
"libfm", kLibFM)
127 .describe(
"test set data format");
128 DMLC_DECLARE_FIELD(nthread).set_default(0).describe(
129 "Number of threads to use.");
130 DMLC_DECLARE_FIELD(pred_margin).set_default(0).describe(
131 "if >0, predict margin instead of transformed probability");
134 DMLC_DECLARE_ALIAS(train_data_path, data);
135 DMLC_DECLARE_ALIAS(test_data_path, test:data);
136 DMLC_DECLARE_ALIAS(train_format, data_format);
139 inline void Configure(
const std::vector<std::pair<std::string, std::string> >& cfg) {
141 this->InitAllowUnknown(cfg);
148 CHECK(param.
model_in !=
"NULL") <<
"model_in parameter must be provided";
151 return frontend::LoadXGBoostModel(param.
model_in.c_str());
153 return frontend::LoadLightGBMModel(param.
model_in.c_str());
155 return frontend::LoadProtobufModel(param.
model_in.c_str());
157 LOG(FATAL) <<
"Unknown model format";
162 void CLICodegen(
const CLIParam& param) {
164 cparam.InitAllowUnknown(param.
cfg);
166 Model model = ParseModel(param);
167 LOG(INFO) <<
"model size = " << model.
trees.size();
170 common::filesystem::CreateDirectoryIfNotExist(param.
name_codegen_dir.c_str());
171 const std::string basename
175 auto semantic_model = compiler->Compile(model);
177 const std::string header_filename
180 std::vector<std::string> lines;
181 common::TransformPushBack(&lines, semantic_model.common_header->Compile(),
182 [] (std::string line) {
185 lines.emplace_back();
186 std::ostringstream oss;
188 std::copy(semantic_model.function_registry.begin(),
189 semantic_model.function_registry.end(),
190 std::ostream_iterator<FunctionEntry>(oss));
191 lines.push_back(oss.str());
192 common::WriteToFile(header_filename, lines);
195 std::vector<std::string> source_list;
196 std::vector<std::string> object_list;
197 if (semantic_model.units.size() == 1) {
198 const std::string filename = basename +
".c";
200 const std::string objname = basename +
".o";
201 source_list.push_back(filename);
202 object_list.push_back(objname);
203 auto lines = semantic_model.units[0].Compile(header_filename);
204 common::WriteToFile(filename_full, lines);
206 for (
size_t i = 0; i < semantic_model.units.size(); ++i) {
207 const std::string filename = basename + std::to_string(i) +
".c";
209 const std::string objname = basename + std::to_string(i) +
".o";
210 source_list.push_back(filename);
211 object_list.push_back(objname);
212 auto lines = semantic_model.units[i].Compile(header_filename);
213 common::WriteToFile(filename_full, lines);
219 const std::string library_name = basename +
".so";
220 std::ostringstream oss;
221 oss <<
"all: " << library_name << std::endl << std::endl
222 << library_name <<
": ";
223 for (
const auto& e : object_list) {
227 <<
"\tgcc -shared -O3 -o $@ $? -fPIC -std=c99 -flto -fopenmp" 228 << std::endl << std::endl;
229 for (
size_t i = 0; i < object_list.size(); ++i) {
230 oss << object_list[i] <<
": " << source_list[i] << std::endl
231 <<
"\tgcc -c -O3 -o $@ $? -fPIC -std=c99 -flto -fopenmp" << std::endl;
234 <<
"clean:" << std::endl
235 <<
"\trm -fv " << library_name <<
" ";
236 for (
const auto& e : object_list) {
245 void CLIAnnotate(
const CLIParam& param) {
246 Model model = ParseModel(param);
247 LOG(INFO) <<
"model size = " << model.
trees.size();
250 <<
"Need to specify train_data_path paramter for annotation task";
253 param.nthread, param.
verbose));
257 std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(
259 annotator.
Save(fo.get());
262 void CLIPredict(
const CLIParam& param) {
265 <<
"Need to specify codelib_path paramter for prediction task";
267 <<
"Need to specify test_data_path paramter for prediction task";
270 param.nthread, param.
verbose));
271 std::unique_ptr<CSRBatch> batch = common::make_unique<CSRBatch>();
272 batch->data = &dmat->data[0];
273 batch->col_ind = &dmat->col_ind[0];
274 batch->row_ptr = &dmat->row_ptr[0];
275 batch->num_row = dmat->num_row;
276 batch->num_col = dmat->num_col;
280 std::vector<float> result(result_size);
281 result_size = predictor.
PredictBatch(batch.get(), param.nthread,
285 std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(
287 dmlc::ostream os(fo.get());
288 for (
size_t i = 0; i < result_size; ++i) {
289 os << result[i] << std::endl;
292 os.set_stream(
nullptr);
295 int CLIRunTask(
int argc,
char* argv[]) {
297 printf(
"Usage: <config>\n");
301 std::vector<std::pair<std::string, std::string> >
cfg;
304 std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(argv[1],
"r"));
305 dmlc::istream cfgfile(fi.get());
306 dmlc::Config itr(cfgfile);
307 for (
const auto& entry : itr) {
308 cfg.push_back(std::make_pair(entry.first, entry.second));
312 for (
int i = 2; i < argc; ++i) {
313 char name[256], val[256];
314 if (sscanf(argv[i],
"%[^=]=%s", name, val) == 2) {
315 cfg.push_back(std::make_pair(std::string(name), std::string(val)));
320 param.Configure(cfg);
322 switch (param.
task) {
323 case kCodegen: CLICodegen(param);
break;
324 case kAnnotate: CLIAnnotate(param);
break;
325 case kPredict: CLIPredict(param);
break;
333 int main(
int argc,
char* argv[]) {
335 = treelite::LogCallbackRegistryStore::Get();
336 registry->Register([] (
const char* msg) {
337 std::cerr << msg << std::endl;
339 return treelite::CLIRunTask(argc, argv);
Load prediction function exported as a shared library.
int pred_margin
whether to predict margin instead of transformed probability
Collection of front-end methods to load or construct ensemble model.
thin wrapper for tree ensemble model
size_t PredictBatch(const CSRBatch *batch, int nthread, int verbose, bool pred_margin, float *out_result) const
make predictions on a batch of data rows
std::vector< std::pair< std::string, std::string > > cfg
all the configurations
std::vector< Tree > trees
member trees
size_t QueryResultSize(const CSRBatch *batch) const
Given a batch of data rows, query the necessary size of array to hold predictions for all data points...
parameters for tree compiler
void Annotate(const Model &model, const DMatrix *dmat, int nthread, int verbose)
annotate branches in a given model using frequency patterns in the training data. The annotation can ...
logging facility for treelite
std::string test_data_path
the path of test set: used for prediction
std::string model_in
model file
static DMatrix * Create(const char *filename, const char *format, int nthread, int verbose)
construct a new DMatrix from a file
Parameters for tree compiler.
Interface of compiler that translates a tree ensemble model into a semantic model.
std::string name_codegen_dir
directory name for generated code files
int verbose
whether verbose
Cross-platform wrapper for common filesystem functions.
std::string train_data_path
the path of training set: used for annotation
void Load(const char *name)
load the prediction function from dynamic shared library.
void Save(dmlc::Stream *fo) const
save branch annotation to a JSON file
std::string codelib_path
the path of compiled dynamic shared library: used for prediction
static Compiler * Create(const std::string &name, const compiler::CompilerParam ¶m)
create a compiler from given name
compatiblity wrapper for systems that don't support OpenMP
Building blocks for semantic model of tree prediction code.
std::string name_annotate
name of generated annotation file
predictor class: wrapper for optimized prediction code
std::string name_pred
name of text file to save prediction
int test_format
test set file format
int train_format
training set file format