Treelite
elf_formatter.cc
Go to the documentation of this file.
1 
7 #include <dmlc/registry.h>
8 #include <fstream>
9 #include <iterator>
10 #include <stdexcept>
11 #include <vector>
12 #include <cstdio>
13 #include <cstring>
14 #include "./elf_formatter.h"
15 
16 #ifdef __linux__
17 
18 #include <elf.h>
19 
20 namespace {
21 
22 const unsigned int SHF_X86_64_LARGE = 0x10000000;
23 
24 const char ident_str[EI_NIDENT] = {
25  ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3, // magic string: 0x7F, "ELF"
26  ELFCLASS64, ELFDATA2LSB, // EI_CLASS, EI_DATA: 64-bit, little-endian
27  EV_CURRENT, // EI_VERSION: ELF version 1
28  ELFOSABI_NONE, // EI_OSABI: System V ABI or unspecified
29  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 // EI_PAD: reserved
30 };
31 
32 void AppendToBuffer(std::vector<char>* dest, const void* src, size_t count) {
33  const size_t beg = dest->size();
34  dest->resize(beg + count);
35  std::memcpy(dest->data() + beg, src, count);
36 }
37 
38 } // anonymous namespace
39 
40 namespace treelite {
41 namespace compiler {
42 
43 DMLC_REGISTRY_FILE_TAG(elf_formatter);
44 
45 void AllocateELFHeader(std::vector<char>* elf_buffer) {
46  elf_buffer->resize(elf_buffer->size() + sizeof(Elf64_Ehdr));
47 }
48 
49 void FormatArrayAsELF(std::vector<char>* elf_buffer) {
50  const size_t array_size = elf_buffer->size() - sizeof(Elf64_Ehdr);
51 
52  /* Format compiler information string */
53  const char comment[] = "\0GCC: (Ubuntu 7.4.0-1ubuntu1~18.04.1) 7.4.0\0\0\0\0";
54  // padding added at the end so that the following section (.symtab) is 8-byte aligned.
55  const size_t comment_padding = 4; // remember how many NUL letters we added for padding
56  static_assert(sizeof(comment) == 48, ".comment section has incorrect size");
57 
58  /* Format symbol table */
59  const Elf64_Sym symtab[] = {
60  // Each symbol entry is of form {st_name, st_info, st_other, st_shndx, st_value, st_size}
61  // * st_name: Symbol name. The symbol name is given by the null-terminated string that starts
62  // at &strtab[st_name].
63  // * st_info: Symbol's type and binding attributes
64  // * st_other: Symbol visibility (we'll use STV_DEFAULT for all entries)
65  // * st_shndx: Index of the section associated with the symbol
66  // (SHN_UNDEF: no section associated. SHN_ABS: index of file entry, by convention)
67  // * st_value: Address associated with the symbol (we'll set this to 0 for all entries, since
68  // the object file is relocatable.)
69  // * st_size: Size (in bytes) of the symbol
70  { 0, ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE), STV_DEFAULT, SHN_UNDEF, 0, 0},
71  { 1, ELF64_ST_INFO(STB_LOCAL, STT_FILE), STV_DEFAULT, SHN_ABS, 0, 0},
72  { 0, ELF64_ST_INFO(STB_LOCAL, STT_SECTION), STV_DEFAULT, 1, 0, 0},
73  { 0, ELF64_ST_INFO(STB_LOCAL, STT_SECTION), STV_DEFAULT, 2, 0, 0},
74  { 0, ELF64_ST_INFO(STB_LOCAL, STT_SECTION), STV_DEFAULT, 3, 0, 0},
75  { 0, ELF64_ST_INFO(STB_LOCAL, STT_SECTION), STV_DEFAULT, 4, 0, 0},
76  { 0, ELF64_ST_INFO(STB_LOCAL, STT_SECTION), STV_DEFAULT, 6, 0, 0},
77  { 0, ELF64_ST_INFO(STB_LOCAL, STT_SECTION), STV_DEFAULT, 5, 0, 0},
78  {10, ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), STV_DEFAULT, 4, 0, array_size},
79  };
80  static_assert(sizeof(symtab) == 216, ".symtab has incorrect size");
81 
82  /* Format symbol name table */
83  const char strtab[] = "\0arrays.c\0nodes";
84  static_assert(sizeof(strtab) == 16, ".strtab has incorrect size");
85 
86  /* Format section name table */
87  const char shstrtab[] = "\0.symtab\0.strtab\0.shstrtab\0.text\0.data\0.bss\0.lrodata\0.comment\0"
88  ".note.GNU-stack\0\0";
89  // padding added at the end to ensure 4-byte alignment everywhere
90  const size_t shstrtab_padding = 2; // remember how many NUL letters we added for padding
91  static_assert(sizeof(shstrtab) == 80, ".shstrtab has incorrect size");
92 
93  /* Format ELF header */
94  Elf64_Ehdr elf_header;
95  // Compute e_shoff, section header table's offset.
96  const size_t e_shoff = sizeof(elf_header) + array_size + sizeof(comment)
97  + sizeof(symtab) + sizeof(strtab) + sizeof(shstrtab);
98 
99  std::memcpy(elf_header.e_ident, ident_str, EI_NIDENT);
100  elf_header.e_type = ET_REL; // A relocatable (object) file
101  elf_header.e_machine = EM_X86_64; // AMD64 architecture target
102  elf_header.e_version = EV_CURRENT; // ELF version 1
103  elf_header.e_entry = 0; // Set to zero because there's no entry point
104  elf_header.e_phoff = 0; // Set to zero because there's no program header table
105  elf_header.e_shoff = e_shoff; // Section header table's offset
106  elf_header.e_flags = 0; // Reserved
107  elf_header.e_ehsize = 64; // Size of ELF header (in bytes)
108  elf_header.e_phentsize = 0; // Set to zero because there's no program header table
109  elf_header.e_phnum = 0; // Set to zero because there's no program header table
110  elf_header.e_shentsize = 64; // Size of each section header (in bytes)
111  elf_header.e_shnum = 10; // Number of section headers
112  elf_header.e_shstrndx = 9; // Index (in section header table) of the section storing
113  // string representation of all section names
114  // In this case, the last section stores name of all sections
115 
116  /* Format section header table */
117  Elf64_Shdr section_header[] = {
118  // Each section header is of form {sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size,
119  // sh_link, sh_info, sh_addralign, sh_entsize}
120  // * sh_name: Section name. The section name is given by the null-terminated string that
121  // starts at &shstrtab[sh_name].
122  // * sh_type: Type of section
123  // * sh_flags: Miscellaneous attributes
124  // * sh_addr: Address of the first byte of the section (we'll set this to 0 for all
125  // sections, since the object file is relocatable.)
126  // * sh_offset: Byte offset from the beginning of the object file to the first byte in the
127  // section
128  // * sh_size: Size of section in bytes
129  // * sh_link: Interpretation of this field depends on the section type
130  // See https://www.sco.com/developers/gabi/1998-04-29/ch4.sheader.html#sh_link
131  // * sh_info: Interpretation of this field depends on the section type
132  // See https://www.sco.com/developers/gabi/1998-04-29/ch4.sheader.html#sh_link
133  // * sh_addralign: Alignment constraint for the section. This must be a power of 2. A value of
134  // 0 or 1 indicates the lack of alignment constraint.
135  // * sh_entsize: Size of each entry in a table (in bytes). This is only applicable if the
136  // section is a table of some kind (e.g. symbol table).
137  { 0, SHT_NULL, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0},
138  {27, SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, 0x0, 0x0, 0, 0, 0, 1, 0},
139  {33, SHT_PROGBITS, SHF_WRITE | SHF_ALLOC, 0x0, 0x0, 0, 0, 0, 1, 0},
140  {39, SHT_NOBITS, SHF_WRITE | SHF_ALLOC, 0x0, 0x0, 0, 0, 0, 1, 0},
141  {44, SHT_PROGBITS, SHF_ALLOC | SHF_X86_64_LARGE, 0x0, 0x0, array_size, 0, 0, 32, 0},
142  {53, SHT_PROGBITS, SHF_MERGE | SHF_STRINGS, 0x0, 0x0, sizeof(comment), 0, 0, 1, 1},
143  {62, SHT_PROGBITS, 0x0, 0x0, 0x0, 0, 0, 0, 1, 0},
144  { 1, SHT_SYMTAB, 0x0, 0x0, 0x0, sizeof(symtab), 8, 8, 8, 24},
145  { 9, SHT_STRTAB, 0x0, 0x0, 0x0, sizeof(strtab), 0, 0, 1, 0},
146  {17, SHT_STRTAB, 0x0, 0x0, 0x0, sizeof(shstrtab), 0, 0, 1, 0}
147  // Sections listed: (null) .text .data .bss .lrodata .comment .note.GNU-stack .symtab
148  // .strtab .shstrtab
149  // Note that some sections are not actually present in the object (thus has size zero).
150  };
151  // Compute offsets via cumulative sums
152  section_header[1].sh_offset = 0x40;
153  for (size_t i = 2; i < sizeof(section_header) / sizeof(Elf64_Shdr); ++i) {
154  section_header[i].sh_offset = section_header[i - 1].sh_offset + section_header[i - 1].sh_size;
155  }
156  // Adjust size info so that padding is excluded
157  section_header[5].sh_size -= comment_padding;
158  section_header[6].sh_offset -= comment_padding;
159  section_header[9].sh_size -= shstrtab_padding;
160 
185  /* Write ELF header */
186  std::memcpy(elf_buffer->data(), &elf_header, sizeof(Elf64_Ehdr));
187  // elf_buffer already has a placeholder for the ELF header
188  /* .lrodata (read-only data) segment is already part of elf_buffer */
189  /* Write .comment (compiler information) segment */
190  AppendToBuffer(elf_buffer, comment, sizeof(comment));
191  /* Write .symtab (symbol table) segment */
192  AppendToBuffer(elf_buffer, symtab, sizeof(symtab));
193  /* Write .strtab (symbol name table) segment */
194  AppendToBuffer(elf_buffer, strtab, sizeof(strtab));
195  /* Write .shstrtab (section name table) segment (referred by elf_header.e_shstrndx) */
196  AppendToBuffer(elf_buffer, shstrtab, sizeof(shstrtab));
197  /* Write section headers */
198  AppendToBuffer(elf_buffer, section_header, sizeof(section_header));
199 }
200 
201 } // namespace compiler
202 } // namespace treelite
203 
204 #else // __linux__
205 
206 namespace treelite {
207 namespace compiler {
208 
209 void AllocateELFHeader(std::vector<char>* elf_buffer) {
210  LOG(FATAL) << "dump_array_as_elf is not supported in non-Linux OSes";
211 }
212 
213 void FormatArrayAsELF(std::vector<char>* elf_buffer) {
214  LOG(FATAL) << "dump_array_as_elf is not supported in non-Linux OSes";
215 }
216 
217 } // namespace compiler
218 } // namespace treelite
219 
220 #endif // __linux__
void AllocateELFHeader(std::vector< char > *elf_buffer)
Pre-allocate space in a buffer to fit an ELF header.
Generate a relocatable object file containing a constant, read-only array.
void FormatArrayAsELF(std::vector< char > *elf_buffer)
Format a relocatable ELF object file containing a constant, read-only array.