restrict, part II = x86_64 Thursday 26th July 2007

Just to make some use of my cross compiler, here’s how the assembler for x86_64 stacks up for the restrict example which I posted before.

Without restrict:

fibincr2:
    movl    (%rdi), %eax  # eax = *a    A
    addl    (%rsi), %eax  # eax += *b   A + B
    movl    %eax, (%rdi)  # *a = eax    A + B
    movl    (%rsi), %edx  # edx = *b    B
    subl    %eax, %edx    # edx -= eax  -A
    movl    %edx, (%rsi)  # *b = edx    -A
    movl    (%rdi), %eax  # eax = *a    A + B
    subl    %edx, %eax    # eax -= edx  2A + B
    movl    %eax, (%rdi)  # *a = eax    2A + B
    addl    %eax, (%rsi)  # *b += eax   A + B
    ret

6 mov, 2 sub (2 register only), 2 add

With restrict:

fibincr2:
    movl    (%rdi), %eax  # eax = *a    A
    movl    %eax, %edx    # edx = eax   A
    addl    (%rsi), %edx  # edx += *b   A + B
    negl    %eax          # eax = -eax  -A
    movl    %eax, (%rsi)  # *b = eax    -A
    subl    %eax, %edx    # edx -= eax  2A + B
    addl    %edx, (%rsi)  # *b += edx   A + B
    movl    %edx, (%rdi)  # *a = edx    2A + B
    ret

4 mov (1 register only), 1 sub (1 register only), 2 add, 1 neg (1 register only)

Alternate algorithm:

fibincr2:
    movl    (%rdi), %edx
    leal    (%rdx,%rdx), %eax
    addl    (%rsi), %eax
    addl    %edx, (%rsi)
    movl    %eax, (%rdi)
    ret

2 mov, 2 add, 1 lea.

So the instruction counts stay exactly the same for the algorithms but as the x86_64 calling convention uses registers to pass variables we have no additional overhead for stack accessing instructions.

Comments are closed.