Code:
.p2align 4,,15
.globl saxpy
.type saxpy, @function
saxpy:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
cmpl %ecx, %edx
pushq %rbx
.cfi_def_cfa_offset 24
jge .L11
.cfi_offset 3, -24
.cfi_offset 6, -16
movl %ecx, %ebx
movslq %edx,%r10
subl %edx, %ebx
salq $2, %r10
cmpl $5, %ebx
leaq (%rdi,%r10), %r8
leaq (%rsi,%r10), %r10
jbe .L9
testb $15, %r8b
jne .L9
leaq 16(%r10), %rax
cmpq %rax, %r8
jbe .L17
.L12:
movl %ebx, %r11d
shrl $2, %r11d
leal 0(,%r11,4), %ebp
testl %ebp, %ebp
je .L6
movaps %xmm0, %xmm1
xorl %eax, %eax
xorps %xmm3, %xmm3
xorl %r9d, %r9d
shufps $0, %xmm1, %xmm1
movaps %xmm1, %xmm4
.p2align 4,,10
.p2align 3
.L7:
movaps %xmm3, %xmm1
addl $1, %r9d
movaps %xmm3, %xmm2
movlps (%r10,%rax), %xmm1
movlps (%r8,%rax), %xmm2
movhps 8(%r10,%rax), %xmm1
movhps 8(%r8,%rax), %xmm2
mulps %xmm4, %xmm1
addps %xmm2, %xmm1
movaps %xmm1, (%r8,%rax)
addq $16, %rax
cmpl %r11d, %r9d
jb .L7
cmpl %ebp, %ebx
leal (%rbp,%rdx), %edx
je .L11
.L6:
movslq %edx,%rax
salq $2, %rax
addq %rax, %rsi
addq %rax, %rdi
.p2align 4,,10
.p2align 3
.L8:
movss (%rsi), %xmm1
addl $1, %edx
mulss %xmm0, %xmm1
addq $4, %rsi
addss (%rdi), %xmm1
movss %xmm1, (%rdi)
addq $4, %rdi
cmpl %edx, %ecx
jg .L8
.L11:
popq %rbx
popq %rbp
ret
.p2align 4,,10
.p2align 3
.L17:
leaq 16(%r8), %rax
cmpq %rax, %r10
ja .L12
.p2align 4,,10
.p2align 3
.L9:
movss (%r10), %xmm1
addl $1, %edx
jge .L11
.cfi_offset 3, -24
.cfi_offset 6, -16
movl %ecx, %ebx
movslq %edx,%r10
subl %edx, %ebx
salq $2, %r10
cmpl $5, %ebx
leaq (%rdi,%r10), %r8
leaq (%rsi,%r10), %r10
jbe .L9
testb $15, %r8b
jne .L9
leaq 16(%r10), %rax
cmpq %rax, %r8
jbe .L17
.L12:
movl %ebx, %r11d
shrl $2, %r11d
leal 0(,%r11,4), %ebp
testl %ebp, %ebp
je .L6
movaps %xmm0, %xmm1
xorl %eax, %eax
xorps %xmm3, %xmm3
xorl %r9d, %r9d
shufps $0, %xmm1, %xmm1
movaps %xmm1, %xmm4
.p2align 4,,10
.p2align 3
.L7:
movaps %xmm3, %xmm1
addl $1, %r9d
movaps %xmm3, %xmm2
movlps (%r10,%rax), %xmm1
movlps (%r8,%rax), %xmm2
movhps 8(%r10,%rax), %xmm1
movhps 8(%r8,%rax), %xmm2
mulps %xmm4, %xmm1
addps %xmm2, %xmm1
movaps %xmm1, (%r8,%rax)
addq $16, %rax
cmpl %r11d, %r9d
jb .L7
cmpl %ebp, %ebx
leal (%rbp,%rdx), %edx
je .L11
.L6:
movslq %edx,%rax
salq $2, %rax
addq %rax, %rsi
addq %rax, %rdi
.p2align 4,,10
.p2align 3
.L8:
movss (%rsi), %xmm1
addl $1, %edx
mulss %xmm0, %xmm1
addq $4, %rsi
addss (%rdi), %xmm1
movss %xmm1, (%rdi)
addq $4, %rdi
cmpl %edx, %ecx
jg .L8
.L11:
popq %rbx
popq %rbp
ret
.p2align 4,,10
.p2align 3
.L17:
leaq 16(%r8), %rax
cmpq %rax, %r10
ja .L12
.p2align 4,,10
.p2align 3
.L9:
movss (%r10), %xmm1
addl $1, %edx
mulss %xmm0, %xmm1
addq $4, %r10
addss (%r8), %xmm1
movss %xmm1, (%r8)
addq $4, %r8
cmpl %edx, %ecx
jg .L9
jmp .L11
.cfi_endproc
En parlant de flemmasse va ptet falloir que je me bouge pour installer gcc 4.5.1