0x0000000000400553 <main+59>: mov -0x4(%rbp),%eax
0x0000000000400556 <main+62>: cltq
0x0000000000400558 <main+64>: shl $0x3,%rax
0x000000000040055c <main+68>: mov %rax,%rdx
In fact my programe is as simple as :
5 int main(int argc, char *argv[]) {
6 int i = 0;
7 while(environ[i]) {
8 printf("%s\n", environ[i++]);
9 }
10 return 0;
But the assembly output is pretty long:
Dump of assembler code for function main:
0x0000000000400518 <main+0>: push %rbp
0x0000000000400519 <main+1>: mov %rsp,%rbp
0x000000000040051c <main+4>: sub $0x20,%rsp
0x0000000000400520 <main+8>: mov %edi,-0x14(%rbp)
0x0000000000400523 <main+11>: mov %rsi,-0x20(%rbp)
0x0000000000400527 <main+15>: movl $0x0,-0x4(%rbp)
0x000000000040052e <main+22>: jmp 0x400553 <main+59>
0x0000000000400530 <main+24>: mov -0x4(%rbp),%eax
0x0000000000400533 <main+27>: cltq
0x0000000000400535 <main+29>: shl $0x3,%rax
0x0000000000400539 <main+33>: mov %rax,%rdx
0x000000000040053c <main+36>: mov 0x2003e5(%rip),%rax # 0x600928 <environ@@GLIBC_2.2.5>
0x0000000000400543 <main+43>: lea (%rdx,%rax,1),%rax
0x0000000000400547 <main+47>: mov (%rax),%rdi
0x000000000040054a <main+50>: addl $0x1,-0x4(%rbp)
0x000000000040054e <main+54>: callq 0x400418 <puts@plt>
0x0000000000400553 <main+59>: mov -0x4(%rbp),%eax
0x0000000000400556 <main+62>: cltq
0x0000000000400558 <main+64>: shl $0x3,%rax
0x000000000040055c <main+68>: mov %rax,%rdx
0x000000000040055f <main+71>: mov 0x2003c2(%rip),%rax # 0x600928 <environ@@GLIBC_2.2.5>
0x0000000000400566 <main+78>: lea (%rdx,%rax,1),%rax
0x000000000040056a <main+82>: mov (%rax),%rax
0x000000000040056d <main+85>: test %rax,%rax
0x0000000000400570 <main+88>: jne 0x400530 <main+24>
0x0000000000400572 <main+90>: mov $0x0,%eax
0x0000000000400577 <main+95>: leaveq
0x0000000000400578 <main+96>: retq
End of assembler dump.
What I don't understand is this block:
0x000000000040052e <main+22>: jmp 0x400553 <main+59>
0x0000000000400530 <main+24>: mov -0x4(%rbp),%eax
0x0000000000400533 <main+27>: cltq
0x0000000000400535 <main+29>: shl $0x3,%rax
0x0000000000400539 <main+33>: mov %rax,%rdx
0x000000000040053c <main+36>: mov 0x2003e5(%rip),%rax # 0x600928 <environ@@GLIBC_2.2.5>
0x0000000000400543 <main+43>: lea (%rdx,%rax,1),%rax
0x0000000000400547 <main+47>: mov (%rax),%rdi
0x000000000040054a <main+50>: addl $0x1,-0x4(%rbp)
0x000000000040054e <main+54>: callq 0x400418 <puts@plt>
0x0000000000400553 <main+59>: mov -0x4(%rbp),%eax
0x0000000000400556 <main+62>: cltq
0x0000000000400558 <main+64>: shl $0x3,%rax
0x000000000040055c <main+68>: mov %rax,%rdx
0x000000000040055f <main+71>: mov 0x2003c2(%rip),%rax # 0x600928 <environ@@GLIBC_2.2.5>
0x0000000000400566 <main+78>: lea (%rdx,%rax,1),%rax
0x000000000040056a <main+82>: mov (%rax),%rax
0x000000000040056d <main+85>: test %rax,%rax
0x0000000000400570 <main+88>: jne 0x400530 <main+24>
Mnemonic
cltq
is the gas
mnemonic for Intel's cdqe
as documented at: https://sourceware.org/binutils/docs/as/i386_002dMnemonics.html
The mnemonics are:
cltq
): AT&T-stylecdqe
): IntelTerminology:
This is one of the few instructions whose GAS name is very different from the Intel version. as
accepts either mnemonic, but Intel-syntax assemblers like NASM may only accept the Intel names.
Effect
It sign extends 4 bytes into 8 bytes, which in 2's complement means that for:
In C, that usually represents a cast from signed int
to long
.
Example:
mov $0123456700000001, %rax # eax=1, high bytes of rax=garbage
cltq
# %rax == $0000 0000 0000 0001
mov $-1, %eax # %rax = 0000 0000 FFFF FFFF
cltq
# %rax == $FFFF FFFF FFFF FFFF == qword $-1
This instruction is only available on 64-bits.
Also consider the following instructions:
CWDE
(AT&T CWTL
), CBW
(AT&T CBTW
): smaller versions of CDQE
, also present in 32-bitCQO
family, which sign extends RAX
into RDX:RAX
MOVSX
family, which both sign extends and moves: what does movsbl instruction do?Minimal runnable examples on GitHub with assertions:
C example
GCC 4.9.3 emits it:
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char **argv) {
int i = strtol(argv[1], (char **)NULL, 16);;
long int l = i;
printf("%lx\n", l);
}
Compile and disassemble:
gcc -ggdb3 -std=c99 -O0 a.c
objdump -S a.out
contains:
int main(int argc, char **argv) {
...
long int l2 = i;
400545: 8b 45 fc mov -0x4(%rbp),%eax
400548: 48 98 cltq
40054a: 48 89 45 f0 mov %rax,-0x10(%rbp)
and the behavior is:
$ ./a.out 0x80000000
ffffffff80000000
$ ./a.out 0x40000000
40000000