Treelite
elf_formatter.cc
Go to the documentation of this file.
1 
7 #include <treelite/logging.h>
8 #include <fstream>
9 #include <iterator>
10 #include <stdexcept>
11 #include <vector>
12 #include <cstdio>
13 #include <cstring>
14 #include "./elf_formatter.h"
15 
16 #ifdef __linux__
17 
18 #include <elf.h>
19 
20 namespace {
21 
22 const unsigned int SHF_X86_64_LARGE = 0x10000000;
23 
24 const char ident_str[EI_NIDENT] = {
25  ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3, // magic string: 0x7F, "ELF"
26  ELFCLASS64, ELFDATA2LSB, // EI_CLASS, EI_DATA: 64-bit, little-endian
27  EV_CURRENT, // EI_VERSION: ELF version 1
28  ELFOSABI_NONE, // EI_OSABI: System V ABI or unspecified
29  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 // EI_PAD: reserved
30 };
31 
32 void AppendToBuffer(std::vector<char>* dest, const void* src, size_t count) {
33  const size_t beg = dest->size();
34  dest->resize(beg + count);
35  std::memcpy(dest->data() + beg, src, count);
36 }
37 
38 } // anonymous namespace
39 
40 namespace treelite {
41 namespace compiler {
42 
43 void AllocateELFHeader(std::vector<char>* elf_buffer) {
44  elf_buffer->resize(elf_buffer->size() + sizeof(Elf64_Ehdr));
45 }
46 
47 void FormatArrayAsELF(std::vector<char>* elf_buffer) {
48  const size_t array_size = elf_buffer->size() - sizeof(Elf64_Ehdr);
49 
50  /* Format compiler information string */
51  const char comment[] = "\0GCC: (Ubuntu 7.4.0-1ubuntu1~18.04.1) 7.4.0\0\0\0\0";
52  // padding added at the end so that the following section (.symtab) is 8-byte aligned.
53  const size_t comment_padding = 4; // remember how many NUL letters we added for padding
54  static_assert(sizeof(comment) == 48, ".comment section has incorrect size");
55 
56  /* Format symbol table */
57  const Elf64_Sym symtab[] = {
58  // Each symbol entry is of form {st_name, st_info, st_other, st_shndx, st_value, st_size}
59  // * st_name: Symbol name. The symbol name is given by the null-terminated string that starts
60  // at &strtab[st_name].
61  // * st_info: Symbol's type and binding attributes
62  // * st_other: Symbol visibility (we'll use STV_DEFAULT for all entries)
63  // * st_shndx: Index of the section associated with the symbol
64  // (SHN_UNDEF: no section associated. SHN_ABS: index of file entry, by convention)
65  // * st_value: Address associated with the symbol (we'll set this to 0 for all entries, since
66  // the object file is relocatable.)
67  // * st_size: Size (in bytes) of the symbol
68  { 0, ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE), STV_DEFAULT, SHN_UNDEF, 0, 0},
69  { 1, ELF64_ST_INFO(STB_LOCAL, STT_FILE), STV_DEFAULT, SHN_ABS, 0, 0},
70  { 0, ELF64_ST_INFO(STB_LOCAL, STT_SECTION), STV_DEFAULT, 1, 0, 0},
71  { 0, ELF64_ST_INFO(STB_LOCAL, STT_SECTION), STV_DEFAULT, 2, 0, 0},
72  { 0, ELF64_ST_INFO(STB_LOCAL, STT_SECTION), STV_DEFAULT, 3, 0, 0},
73  { 0, ELF64_ST_INFO(STB_LOCAL, STT_SECTION), STV_DEFAULT, 4, 0, 0},
74  { 0, ELF64_ST_INFO(STB_LOCAL, STT_SECTION), STV_DEFAULT, 6, 0, 0},
75  { 0, ELF64_ST_INFO(STB_LOCAL, STT_SECTION), STV_DEFAULT, 5, 0, 0},
76  {10, ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), STV_DEFAULT, 4, 0, array_size},
77  };
78  static_assert(sizeof(symtab) == 216, ".symtab has incorrect size");
79 
80  /* Format symbol name table */
81  const char strtab[] = "\0arrays.c\0nodes";
82  static_assert(sizeof(strtab) == 16, ".strtab has incorrect size");
83 
84  /* Format section name table */
85  const char shstrtab[] = "\0.symtab\0.strtab\0.shstrtab\0.text\0.data\0.bss\0.lrodata\0.comment\0"
86  ".note.GNU-stack\0\0";
87  // padding added at the end to ensure 4-byte alignment everywhere
88  const size_t shstrtab_padding = 2; // remember how many NUL letters we added for padding
89  static_assert(sizeof(shstrtab) == 80, ".shstrtab has incorrect size");
90 
91  /* Format ELF header */
92  Elf64_Ehdr elf_header;
93  // Compute e_shoff, section header table's offset.
94  const size_t e_shoff = sizeof(elf_header) + array_size + sizeof(comment)
95  + sizeof(symtab) + sizeof(strtab) + sizeof(shstrtab);
96 
97  std::memcpy(elf_header.e_ident, ident_str, EI_NIDENT);
98  elf_header.e_type = ET_REL; // A relocatable (object) file
99  elf_header.e_machine = EM_X86_64; // AMD64 architecture target
100  elf_header.e_version = EV_CURRENT; // ELF version 1
101  elf_header.e_entry = 0; // Set to zero because there's no entry point
102  elf_header.e_phoff = 0; // Set to zero because there's no program header table
103  elf_header.e_shoff = e_shoff; // Section header table's offset
104  elf_header.e_flags = 0; // Reserved
105  elf_header.e_ehsize = 64; // Size of ELF header (in bytes)
106  elf_header.e_phentsize = 0; // Set to zero because there's no program header table
107  elf_header.e_phnum = 0; // Set to zero because there's no program header table
108  elf_header.e_shentsize = 64; // Size of each section header (in bytes)
109  elf_header.e_shnum = 10; // Number of section headers
110  elf_header.e_shstrndx = 9; // Index (in section header table) of the section storing
111  // string representation of all section names
112  // In this case, the last section stores name of all sections
113 
114  /* Format section header table */
115  Elf64_Shdr section_header[] = {
116  // Each section header is of form {sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size,
117  // sh_link, sh_info, sh_addralign, sh_entsize}
118  // * sh_name: Section name. The section name is given by the null-terminated string that
119  // starts at &shstrtab[sh_name].
120  // * sh_type: Type of section
121  // * sh_flags: Miscellaneous attributes
122  // * sh_addr: Address of the first byte of the section (we'll set this to 0 for all
123  // sections, since the object file is relocatable.)
124  // * sh_offset: Byte offset from the beginning of the object file to the first byte in the
125  // section
126  // * sh_size: Size of section in bytes
127  // * sh_link: Interpretation of this field depends on the section type
128  // See https://www.sco.com/developers/gabi/1998-04-29/ch4.sheader.html#sh_link
129  // * sh_info: Interpretation of this field depends on the section type
130  // See https://www.sco.com/developers/gabi/1998-04-29/ch4.sheader.html#sh_link
131  // * sh_addralign: Alignment constraint for the section. This must be a power of 2. A value of
132  // 0 or 1 indicates the lack of alignment constraint.
133  // * sh_entsize: Size of each entry in a table (in bytes). This is only applicable if the
134  // section is a table of some kind (e.g. symbol table).
135  { 0, SHT_NULL, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0},
136  {27, SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, 0x0, 0x0, 0, 0, 0, 1, 0},
137  {33, SHT_PROGBITS, SHF_WRITE | SHF_ALLOC, 0x0, 0x0, 0, 0, 0, 1, 0},
138  {39, SHT_NOBITS, SHF_WRITE | SHF_ALLOC, 0x0, 0x0, 0, 0, 0, 1, 0},
139  {44, SHT_PROGBITS, SHF_ALLOC | SHF_X86_64_LARGE, 0x0, 0x0, array_size, 0, 0, 32, 0},
140  {53, SHT_PROGBITS, SHF_MERGE | SHF_STRINGS, 0x0, 0x0, sizeof(comment), 0, 0, 1, 1},
141  {62, SHT_PROGBITS, 0x0, 0x0, 0x0, 0, 0, 0, 1, 0},
142  { 1, SHT_SYMTAB, 0x0, 0x0, 0x0, sizeof(symtab), 8, 8, 8, 24},
143  { 9, SHT_STRTAB, 0x0, 0x0, 0x0, sizeof(strtab), 0, 0, 1, 0},
144  {17, SHT_STRTAB, 0x0, 0x0, 0x0, sizeof(shstrtab), 0, 0, 1, 0}
145  // Sections listed: (null) .text .data .bss .lrodata .comment .note.GNU-stack .symtab
146  // .strtab .shstrtab
147  // Note that some sections are not actually present in the object (thus has size zero).
148  };
149  // Compute offsets via cumulative sums
150  section_header[1].sh_offset = 0x40;
151  for (size_t i = 2; i < sizeof(section_header) / sizeof(Elf64_Shdr); ++i) {
152  section_header[i].sh_offset = section_header[i - 1].sh_offset + section_header[i - 1].sh_size;
153  }
154  // Adjust size info so that padding is excluded
155  section_header[5].sh_size -= comment_padding;
156  section_header[6].sh_offset -= comment_padding;
157  section_header[9].sh_size -= shstrtab_padding;
158 
183  /* Write ELF header */
184  std::memcpy(elf_buffer->data(), &elf_header, sizeof(Elf64_Ehdr));
185  // elf_buffer already has a placeholder for the ELF header
186  /* .lrodata (read-only data) segment is already part of elf_buffer */
187  /* Write .comment (compiler information) segment */
188  AppendToBuffer(elf_buffer, comment, sizeof(comment));
189  /* Write .symtab (symbol table) segment */
190  AppendToBuffer(elf_buffer, symtab, sizeof(symtab));
191  /* Write .strtab (symbol name table) segment */
192  AppendToBuffer(elf_buffer, strtab, sizeof(strtab));
193  /* Write .shstrtab (section name table) segment (referred by elf_header.e_shstrndx) */
194  AppendToBuffer(elf_buffer, shstrtab, sizeof(shstrtab));
195  /* Write section headers */
196  AppendToBuffer(elf_buffer, section_header, sizeof(section_header));
197 }
198 
199 } // namespace compiler
200 } // namespace treelite
201 
202 #else // __linux__
203 
204 namespace treelite {
205 namespace compiler {
206 
207 void AllocateELFHeader(std::vector<char>* elf_buffer) {
208  TREELITE_LOG(FATAL) << "dump_array_as_elf is not supported in non-Linux OSes";
209 }
210 
211 void FormatArrayAsELF(std::vector<char>* elf_buffer) {
212  TREELITE_LOG(FATAL) << "dump_array_as_elf is not supported in non-Linux OSes";
213 }
214 
215 } // namespace compiler
216 } // namespace treelite
217 
218 #endif // __linux__
void AllocateELFHeader(std::vector< char > *elf_buffer)
Pre-allocate space in a buffer to fit an ELF header.
logging facility for Treelite
Generate a relocatable object file containing a constant, read-only array.
void FormatArrayAsELF(std::vector< char > *elf_buffer)
Format a relocatable ELF object file containing a constant, read-only array.