Linux Shellcode "Hello, World!"

user1408643 picture user1408643 · Mar 24, 2013 · Viewed 32.3k times · Source

I have the following working NASM code:

global _start

section .text

_start:
    mov eax, 0x4
    mov ebx, 0x1
    mov ecx, message
    mov edx, 0xF
    int 0x80

    mov eax, 0x1
    mov ebx, 0x0
    int 0x80

section .data
    message: db "Hello, World!", 0dh, 0ah

which prints "Hello, World!\n" to the screen. I also have the following C wrapper which contains the previous NASM object code:

char code[] =
"\xb8\x04\x00\x00\x00"
"\xbb\x01\x00\x00\x00"
"\xb9\x00\x00\x00\x00"
"\xba\x0f\x00\x00\x00"
"\xcd\x80\xb8\x01\x00"
"\x00\x00\xbb\x00\x00"
"\x00\x00\xcd\x80";

int main(void)
{
    (*(void(*)())code)();
}

However when I run the code, it seems like the assembler code isn't executed, but the program exits fine. Any ideas?

Thanks

Answer

user1129665 picture user1129665 · Mar 29, 2013

When you inject this shellcode, you don't know what is at message:

mov ecx, message

in the injected process, it can be anything but it will not be "Hello world!\r\n" since it is in the data section while you are dumping only the text section. You can see that your shellcode doesn't have "Hello world!\r\n":

"\xb8\x04\x00\x00\x00"
"\xbb\x01\x00\x00\x00"
"\xb9\x00\x00\x00\x00"
"\xba\x0f\x00\x00\x00"
"\xcd\x80\xb8\x01\x00"
"\x00\x00\xbb\x00\x00"
"\x00\x00\xcd\x80";

This is common problem in shellcode development, the way to work around it is this way:

global _start

section .text

_start:
    jmp MESSAGE      ; 1) lets jump to MESSAGE

GOBACK:
    mov eax, 0x4
    mov ebx, 0x1
    pop ecx          ; 3) we are poping into `ecx`, now we have the
                     ; address of "Hello, World!\r\n" 
    mov edx, 0xF
    int 0x80

    mov eax, 0x1
    mov ebx, 0x0
    int 0x80

MESSAGE:
    call GOBACK       ; 2) we are going back, since we used `call`, that means
                      ; the return address, which is in this case the address 
                      ; of "Hello, World!\r\n", is pushed into the stack.
    db "Hello, World!", 0dh, 0ah

section .data

Now dump the text section:

$ nasm -f elf shellcode.asm
$ ld shellcode.o -o shellcode
$ ./shellcode 
Hello, World!
$ objdump -d shellcode

shellcode:     file format elf32-i386


Disassembly of section .text:

08048060 <_start>:
 8048060:   e9 1e 00 00 00   jmp    8048083 <MESSAGE>

08048065 <GOBACK>:
 8048065:   b8 04 00 00 00   mov    $0x4,%eax
 804806a:   bb 01 00 00 00   mov    $0x1,%ebx
 804806f:   59               pop    %ecx
 8048070:   ba 0f 00 00 00   mov    $0xf,%edx
 8048075:   cd 80            int    $0x80
 8048077:   b8 01 00 00 00   mov    $0x1,%eax
 804807c:   bb 00 00 00 00   mov    $0x0,%ebx
 8048081:   cd 80            int    $0x80

08048083 <MESSAGE>:
 8048083:   e8 dd ff ff ff   call   8048065 <GOBACK>
 8048088:   48               dec    %eax                    <-+
 8048089:   65               gs                               |
 804808a:   6c               insb   (%dx),%es:(%edi)          |
 804808b:   6c               insb   (%dx),%es:(%edi)          |
 804808c:   6f               outsl  %ds:(%esi),(%dx)          |
 804808d:   2c 20            sub    $0x20,%al                 |
 804808f:   57               push   %edi                      |
 8048090:   6f               outsl  %ds:(%esi),(%dx)          |
 8048091:   72 6c            jb     80480ff <MESSAGE+0x7c>    |
 8048093:   64               fs                               |
 8048094:   21               .byte 0x21                       |
 8048095:   0d               .byte 0xd                        |
 8048096:   0a               .byte 0xa                      <-+

$

The lines I marked are our "Hello, World!\r\n" string:

$ printf "\x48\x65\x6c\x6c\x6f\x2c\x20\x57\x6f\x72\x6c\x64\x21\x0d\x0a"
Hello, World!

$ 

So our C wrapper will be:

char code[] = 

    "\xe9\x1e\x00\x00\x00"  //          jmp    (relative) <MESSAGE>
    "\xb8\x04\x00\x00\x00"  //          mov    $0x4,%eax
    "\xbb\x01\x00\x00\x00"  //          mov    $0x1,%ebx
    "\x59"                  //          pop    %ecx
    "\xba\x0f\x00\x00\x00"  //          mov    $0xf,%edx
    "\xcd\x80"              //          int    $0x80
    "\xb8\x01\x00\x00\x00"  //          mov    $0x1,%eax
    "\xbb\x00\x00\x00\x00"  //          mov    $0x0,%ebx
    "\xcd\x80"              //          int    $0x80
    "\xe8\xdd\xff\xff\xff"  //          call   (relative) <GOBACK>
    "Hello wolrd!\r\n";     // OR       "\x48\x65\x6c\x6c\x6f\x2c\x20\x57"
                            //          "\x6f\x72\x6c\x64\x21\x0d\x0a"


int main(int argc, char **argv)
{
    (*(void(*)())code)();

    return 0;
}

Lets test it, using -z execstack to enable read-implies-exec (process-wide, despite "stack" in the name) so we can executed code in the .data or .rodata sections:

$ gcc -m32 test.c -z execstack -o test
$ ./test 
Hello wolrd!

It works. (-m32 is necessary, too, on 64-bit systems. The int $0x80 32-bit ABI doesn't work with 64-bit addresses like .rodata in a PIE executable. Also, the machine code was assembled for 32-bit. It happens that the same sequence of bytes would decode to equivalent instructions in 64-bit mode but that's not always the case.)

Modern GNU ld puts .rodata in a separate segment from .text, so it can be non-executable. It used to be sufficient to use const char code[] to put executable code in a page of read-only data. At least for shellcode that doesn't want to modify itself.