Pokology - a community-driven site around GNU poke
                             
     _____
 ---'   __\_______
            ______)         Fun with ELF files
            __)             
           __)
 ---._______)
Table of Contents
_________________
1. Substitute a string with another same-size string
2. Replace main function
.. 1. With objdump assistance
3. Building "Hello, world" from scratch
1 Substitute a string with another same-size string
===================================================
  Let's assume that we only have the compiled version of the following C
  code (produced by gcc -o hello hello.c on a 64-bit machine):
  ,----
  | #include 
  | int
  | main()
  | {
  |   puts("Hello, Jose!");
  |   return 0;
  | }
  `----
  To replace Jose with Luca in binary file hello, you can do this using
  GNU poke:
  ,----
  | (poke) .set obase 16
  | (poke) .set endian little
  | (poke) .set pretty-print no
  | (poke) load elf
  | (poke) .file hello
  | (poke) var efile = Elf64_File @ 0#B
  | (poke) var rodata_arr = efile.get_sections_by_name(".rodata")
  | (poke) rodata_arr'length
  | 0x1UL
  | (poke) var rodata = rodata_arr[0]
  | (poke) efile.get_section_name(rodata.sh_name)
  | ".rodata"
  | (poke) rodata
  | Elf64_Shdr {
  |   sh_name=0xb3U#B,
  |   sh_type=0x1U,
  |   sh_flags=Elf64_SectionFlags {
  |     flags=0x2UL
  |   },
  |   sh_addr=0x2000UL#B,
  |   sh_offset=0x2000UL#B,
  |   sh_size=0x11UL#B,
  |   sh_link=0x0U,
  |   sh_info=0x0U,
  |   sh_addralign=0x4UL,
  |   sh_entsize=0x0UL#b
  | }
  | (poke) /* Dump the content of the section */
  | (poke) dump :from rodata.sh_offset :size rodata.sh_size
  | 76543210  0011 2233 4455 6677 8899 aabb ccdd eeff  0123456789ABCDEF
  | 00002000: 0100 0200 4865 6c6c 6f2c 204a 6f73 6521  ....Hello, Jose!
  | 00002010: 00                                       .
  | (poke) byte[4] @ (rodata.sh_offset + 4#B + 7#B) = ['L', 'u', 'c', 'a']
  | (poke) dump :from rodata.sh_offset :size rodata.sh_size
  | 76543210  0011 2233 4455 6677 8899 aabb ccdd eeff  0123456789ABCDEF
  | 00002000: 0100 0200 4865 6c6c 6f2c 204c 7563 6121  ....Hello, Luca!
  | 00002010: 00                                       .
  | (poke) .exit
  `----
  If you run ./hello program, it'll show
  ,----
  | Hello, Luca!
  `----
  instead of original
  ,----
  | Hello, Jose!
  `----
  And obviously other names with at most 4 characters, are also
  acceptable :)
2 Replace main function
=======================
  Consider the following program:
  ,----
  | #include 
  | int
  | main()
  | {
  |   puts("main()");
  |   return 0;
  | }
  | int
  | main2()
  | {
  |   puts("main2()");
  |   return 0;
  | }
  `----
  We want to change the executable file (produced by gcc -o main2
  main2.c), to call main2 function instead of main after the startup.
2.1 With objdump assistance
~~~~~~~~~~~~~~~~~~~~~~~~~~~
  By looking at the disassembly of .text section (using objdump -D -j
  .text main2), we can see these instructions at the end of _start
  label:
  ,----
  | [...]
  |     105a:       48 8d 0d 0f 01 00 00    lea    0x10f(%rip),%rcx # 1170 <__libc_csu_init>
  |     1061:       48 8d 3d d1 00 00 00    lea    0xd1(%rip),%rdi  # 1139 <main>  
  |     1068:       ff 15 72 2f 00 00       call   *0x2f72(%rip)    # 3fe0 <__libc_start_main@GLIBC_2.2.5>
  |     106e:       f4                      hlt
  |     106f:       90                      nop
  `----
  The instruction lea 0xd1(%rip),%rdi is loading the address of main
  into the %rdi.
  So we have to change the d1 00 00 00 part of the instruction which is
  a uint32 immediate value.
  Again, by looking at the objdump disassembly of main and main2, we can
  see that main2 - main = 0x1153 - 0x1139 = 0x1a = 26.
  ,----
  | 0000000000001139 <main>:
  |     1139:       55                      push   %rbp
  |     113a:       48 89 e5                mov    %rsp,%rbp
  |     113d:       48 8d 05 c0 0e 00 00    lea    0xec0(%rip),%rax        # 2004 <_IO_stdin_used+0x4>
  |     1144:       48 89 c7                mov    %rax,%rdi
  |     1147:       e8 e4 fe ff ff          call   1030 <puts@plt>
  |     114c:       b8 00 00 00 00          mov    $0x0,%eax
  |     1151:       5d                      pop    %rbp
  |     1152:       c3                      ret
  | 
  | 0000000000001153 <main2>:
  |     1153:       55                      push   %rbp
  |     1154:       48 89 e5                mov    %rsp,%rbp
  |     1157:       48 8d 05 ab 0e 00 00    lea    0xeab(%rip),%rax        # 2009 <_IO_stdin_used+0x9>
  |     115e:       48 89 c7                mov    %rax,%rdi
  |     1161:       e8 ca fe ff ff          call   1030 <puts@plt>
  |     1166:       b8 00 00 00 00          mov    $0x0,%eax
  |     116b:       5d                      pop    %rbp
  |     116c:       c3                      ret
  |     116d:       0f 1f 00                nopl   (%rax)
  `----
  In poke we have to update that immediate value to points to main2:
  ,----
  | (poke) .file main2
  | (poke) .set endian little
  | (poke) uint32 @ 0x1064#B = (uint32 @ 0x1064#B) + 0x1a
  | (poke) .close
  `----
  After closing the poke editor, you can run the main2 and see the
  following output:
  ,----
  | main2()
  `----
3 Building "Hello, world" from scratch
======================================
  This section is inspired by this nice blog post: "You be the linker --
  building "Hello, world" from scratch, in hexadecimal"
  https://kevinboone.me/elfdemo.html?i=1
  This example is available in examples/fun_with_elf_hello_world.pk file
  in https://git.ageinghacker.net/pokology git repository.
  The idea here is to create a valid working ELF executable which runs
  on a x86_64 Linux machine.
  When you run this Poke script, it creates a fun-with-elf.exe execuable
  file in the current directory. And you can run it!
  ,----
  | load elf;
  | 
  | /* Gives the bytes (little-endian) of a byte-offset of 32-bit width.  */
  | fun u32off_as_le_bytes = (offset helloworld_adr_num) byte[4]:
  |   {
  |     var helloworld_adr = byte[4] ();
  | 
  |     /* Fill `helloworld_adr` array with bytes of `helloworld_adr_num`
  |        in little-endian mode.  */
  |     with_temp_ios
  |       :endian ENDIAN_LITTLE
  |       :do lambda void:
  |         {
  |           offset @ 0#B = helloworld_adr_num;
  |           /* Disentangle the byte array from the temp IOS to make it
  |              normal unmapped array.  */
  |           helloworld_adr = unmap (byte[4] @ 0#B);
  |         }
  |       ;
  |     return helloworld_adr;
  |   }
  | 
  | set_endian(ENDIAN_LITTLE);
  | 
  | var load_adr   = 0x400000UL#B,
  |     epoint_off = 0x78UL#B,
  |     prghdr_off = 0x40UL#B,
  |     sechdr_off = 0xc0UL#B;
  | 
  | // Address of "Hello, World\n" string for the `mov` instruction
  | var str_adr_bytes = u32off_as_le_bytes (load_adr + 0xa2UL#B);
  | 
  | /* Building the ELF header.  */
  | 
  | var ehdr =
  |   Elf64_Ehdr
  |     {
  |       e_ident = Elf_Ident {
  |         // ei_mag = [0x7fUB,'E', 'L', 'F'],
  |         ei_class = 0x02UB, /* 64-bit */
  |         ei_data = 0x01UB,  /* little-endian */
  |         ei_version = 0x01UB,
  |         // ei_osabi = 0x0UB,
  |         // ei_abiversion = 0x0UB,
  |         // ei_pad = [0x0UB,0x0UB,0x0UB,0x0UB,0x0UB,0x0UB],
  |         // ei_nident = 0x0UB#B
  |       },
  |       e_type = ET_EXEC,
  |       e_machine = 0x3eUH, /* amd64 architecture */
  |       e_version = 0x1U,
  |       e_entry = load_adr + epoint_off,
  |       e_phoff = prghdr_off,
  |       e_shoff = sechdr_off,
  |       // e_flags = 0x0U,
  |       e_ehsize = Elf64_Ehdr {}'size,
  |       e_phentsize = Elf64_Phdr {}'size,
  |       e_phnum = 0x1UH,
  |       e_shentsize = Elf64_Shdr {}'size,
  |       e_shnum = 0x3UH,
  |       e_shstrndx = 0x2UH
  |     };
  | 
  | /* Constructing the program header.  */
  | 
  | var phdr =
  |   Elf64_Phdr
  |     {
  |       p_type   = PT_LOAD,
  |       p_flags  = Elf_SegmentFlags { flags = PF_X | PF_R },
  |       // p_offset = 0UL#B, // offset within file
  |       p_vaddr  = load_adr, // in virtual memory
  |       p_paddr  = load_adr, // in physical memory
  |       p_filesz = 0xB0UL#B,
  |       p_memsz  = 0xB0UL#B,
  |       p_align  = 0x200000UL#B, // alignment boundary for sections
  |     };
  | 
  | /* "Compiling" the program
  |    (x86-64 machine code)  */
  | 
  | var program_text = [
  |   //  mov 0x01, %rax ; sys_write
  |   0x48UB, 0xc7UB, 0xc0UB, 0x01UB, 0x00UB, 0x00UB, 0x00UB,
  | 
  |   //  mov 0x01, %rdi ; file descriptor (stdout)
  |   0x48UB, 0xc7UB, 0xc7UB, 0x01UB, 0x00UB, 0x00UB, 0x00UB,
  | 
  |   //  mov $helloworld_adr, %rsi ; location of string
  |   0x48UB, 0xc7UB, 0xc6UB,
  |   str_adr_bytes[0], str_adr_bytes[1], str_adr_bytes[2], str_adr_bytes[3],
  | 
  |   //  mov 0x0d,%rdx ; size of string, 13 bytes
  |   0x48UB, 0xc7UB, 0xc2UB, 0x0dUB, 0x00UB, 0x00UB, 0x00UB,
  | 
  |   //  syscall
  |   0x0fUB, 0x05UB,
  | 
  |   //  mov 0x3c,$rax ; exit program
  |   0x48UB, 0xc7UB, 0xc0UB, 0x3cUB, 0x00UB, 0x00UB, 0x00UB,
  | 
  |   //  xor %rdi,%rdi ; exit code, 0
  |   0x48UB, 0x31UB, 0xffUB,
  | 
  |   //  syscall
  |   0x0fUB, 0x05UB,
  | 
  |   //  Text "Hello, world\n\0" -- total 14 bytes including the null
  |   'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '\n', '\0',
  | ];
  | 
  | /* Constructing the string table */
  | 
  | var string_table = [
  |   ".shstrtab",
  |   ".text",
  | ];
  | 
  | /* Constructing the section header table  */
  | 
  | var sections = [
  |   Elf64_Shdr {},
  |   Elf64_Shdr
  |     {
  |       sh_name   = string_table[0]'size, // offset to the name of the section
  |       sh_type   = SHT_PROGBITS,         // program data
  |       sh_flags  = Elf64_SectionFlags { flags = SHF_ALLOC | SHF_EXECINSTR },
  |       sh_addr   = load_adr + epoint_off,
  |       sh_offset = epoint_off,
  |       sh_size   = Elf64_Shdr {}'size,
  |       // sh_link   = 0U,
  |       // sh_info   = 0U,
  |       sh_addralign = 1UL,
  |       // sh_entsize   = 0UL#B,
  |     },
  |   Elf64_Shdr
  |     {
  |       sh_name  = 0UL#B,
  |       sh_type  = SHT_STRTAB,
  |       sh_flags = Elf64_SectionFlags {},
  |       // sh_addr  = 0,
  |       sh_offset = 0xB0UL#B,
  |       sh_size   = 0x10UL#B,
  |       // sh_link   = 0U,
  |       // sh_info   = 0U,
  |       sh_addralign = 1UL,
  |       // sh_entsize   = 0UL#B,
  |     },
  | ];
  | 
  | /* Putting it all together */
  | 
  | var fd = open ("*elf*");
  | var off = 0UL#B;
  | 
  | Elf64_Ehdr @ off = ehdr;
  | off += ehdr'size;
  | 
  | Elf64_Phdr @ off = phdr;
  | off += phdr'size;
  | 
  | byte[] @ off = program_text;
  | off += program_text'size;
  | 
  | string[] @ off = string_table;
  | off += string_table'size;
  | 
  | Elf64_Shdr[] @ off = sections;
  | off += sections'size;
  | 
  | save :ios fd
  |      :file "fun-with-elf.exe"
  |      :from 0#B
  |      :size off
  |      :append 0
  |      ;
  | 
  | close (fd);
  `----