Code:
L__routine_start_main_0:
main:
pushq %rbp #19.1
movq %rsp, %rbp #19.1
andq $-128, %rsp #19.1
subq $128, %rsp #19.1
pushq $3 #19.1
popq %rdi #19.1
call __intel_new_proc_init #19.1
stmxcsr (%rsp) #19.1
movl $.2.5_2_kmpc_loc_struct_pack.1, %edi #19.1
xorl %esi, %esi #19.1
orl $32832, (%rsp) #19.1
xorl %eax, %eax #19.1
ldmxcsr (%rsp) #19.1
call __kmpc_begin #19.1
movl $.2.5_2_kmpc_loc_struct_pack.12, %edi #20.10
xorl %eax, %eax #20.10
call __kmpc_end #20.10
xorl %eax, %eax #20.10
movq %rbp, %rsp #20.10
popq %rbp #20.10
ret #20.10
.2.5_2_kmpc_loc_struct_pack.1:
.long 0
.long 2
.long 0
.long 0
.quad .2.5_2__kmpc_loc_pack.0
.byte 59
.byte 117
.byte 110
.byte 107
.byte 110
.byte 111
.byte 119
.byte 110
.byte 59
.byte 109
.byte 97
.byte 105
.byte 110
.byte 59
.byte 49
.byte 57
.byte 59
.byte 49
.byte 57
.byte 59
.byte 59
.2.5_2_kmpc_loc_struct_pack.12:
.long 0
.long 2
.long 0
.long 0
.quad .2.5_2__kmpc_loc_pack.11
.byte 59
.byte 117
.byte 110
.byte 107
.byte 110
.byte 111
.byte 119
.byte 110
.byte 59
.byte 109
.byte 97
.byte 105
.byte 110
.byte 59
.byte 50
.byte 48
.byte 59
.byte 50
.byte 48
.byte 59
.byte 59
L__routine_start__Z7saxpy_cPffS_i_1:
saxpy_c(float*, float, float*, int):
subq $360, %rsp #9.1
xorl %ecx, %ecx #10.3
movq %rdi, 8(%rsp) #9.1
movl $2, %edi #10.3
movq %rsi, 336(%rsp) #9.1
movl $-1, %esi #10.3
movl %edx, 352(%rsp) #9.1
movl $1, %edx #10.3
movl $__sd_2inst_string.1, %r8d #10.3
movl $10, %r9d #10.3
xorl %eax, %eax #10.3
movss %xmm0, 344(%rsp) #9.1
call __offload_target_acquire #10.3
testq %rax, %rax #10.3
je ..B2.5 # Prob 50% #10.3
movl $256, %edx #10.3
lea 80(%rsp), %rdi #10.3
movq %rax, (%rsp) #10.3
lea .2.6_2__offload_var_desc1_p.47(%rip), %rsi #10.3
call _intel_fast_memcpy #10.3
movq (%rsp), %rax #
lea 16(%rsp), %r9 #10.3
pushq $64 #10.3
popq %r10 #10.3
..B2.33: # Preds ..B2.33 ..B2.35
movq -8+.2.6_2__offload_var_desc2_p.52(%r10), %rdx #10.3
movq -16+.2.6_2__offload_var_desc2_p.52(%r10), %rcx #10.3
movq -24+.2.6_2__offload_var_desc2_p.52(%r10), %rsi #10.3
movq -32+.2.6_2__offload_var_desc2_p.52(%r10), %r8 #10.3
movq %rdx, -8(%r9,%r10) #10.3
movq %rcx, -16(%r9,%r10) #10.3
movq %rsi, -24(%r9,%r10) #10.3
movq %r8, -32(%r9,%r10) #10.3
subq $32, %r10 #10.3
jne ..B2.33 # Prob 50% #10.3
movl $__sd_2inst_string.2, %esi #10.3
lea 80(%rsp), %r8 #10.3
movq %rax, %rdi #10.3
lea 8(%rsp), %r10 #10.3
movq %r10, 56(%r8) #10.3
lea 336(%rsp), %r11 #10.3
movq %r11, 200(%rsp) #10.3
lea 352(%rsp), %rdx #10.3
movq %rdx, 264(%rsp) #10.3
lea 344(%rsp), %rcx #10.3
movq %rcx, 328(%rsp) #10.3
xorl %r10d, %r10d #10.3
pushq %rsp #10.3
pushq %r10 #10.3
pushq %r10 #10.3
xorl %edx, %edx #10.3
pushq $4 #10.3
popq %rcx #10.3
xorl %eax, %eax #10.3
pushq %r10 #10.3
call __offload_offload #10.3
addq $32, %rsp #10.3
testl %eax, %eax #10.3
jne ..B2.26 # Prob 50% #10.3
..B2.5: # Preds ..B2.2 ..B2.4
movslq 352(%rsp), %rcx #11.23
testq %rcx, %rcx #11.23
je ..B2.26 # Prob 10% #11.23
movq 8(%rsp), %rdi #13.16
cmpq $8, %rcx #11.3
movss 344(%rsp), %xmm0 #13.12
movq 336(%rsp), %rsi #13.23
jl ..B2.27 # Prob 10% #11.3
movq %rdi, %rdx #11.3
andq $15, %rdx #11.3
testl %edx, %edx #11.3
je ..B2.10 # Prob 50% #11.3
testb $3, %dl #11.3
jne ..B2.27 # Prob 10% #11.3
negl %edx #11.3
addl $16, %edx #11.3
shrl $2, %edx #11.3
..B2.10: # Preds ..B2.9 ..B2.7
movl %edx, %eax #11.3
lea 8(%rax), %r8 #11.3
cmpq %r8, %rcx #11.3
jl ..B2.27 # Prob 10% #11.3
movl %ecx, %r9d #11.3
movl %r9d, %r8d #11.3
subl %edx, %r8d #11.3
andl $7, %r8d #11.3
subl %r8d, %r9d #11.3
xorl %r8d, %r8d #11.3
movslq %r9d, %r9 #11.3
testq %rax, %rax #11.3
jbe ..B2.15 # Prob 0% #11.3
..B2.13: # Preds ..B2.11 ..B2.13
movss (%rdi,%r8,4), %xmm1 #13.16
mulss %xmm0, %xmm1 #13.16
addss (%rsi,%r8,4), %xmm1 #13.23
movss %xmm1, (%rdi,%r8,4) #13.5
incq %r8 #11.3
cmpq %rax, %r8 #11.3
jb ..B2.13 # Prob 82% #11.3
..B2.15: # Preds ..B2.13 ..B2.11
movl %edx, %edx #13.23
lea (%rsi,%rdx,4), %r8 #13.23
testq $15, %r8 #11.3
je ..B2.19 # Prob 60% #11.3
movaps %xmm0, %xmm1 #11.3
shufps $0, %xmm1, %xmm1 #11.3
..B2.17: # Preds ..B2.17 ..B2.16
movaps (%rdi,%rax,4), %xmm3 #13.16
movaps 16(%rdi,%rax,4), %xmm5 #13.16
mulps %xmm1, %xmm3 #13.16
mulps %xmm1, %xmm5 #13.16
movups (%rsi,%rax,4), %xmm2 #13.23
movups 16(%rsi,%rax,4), %xmm4 #13.23
addps %xmm2, %xmm3 #13.23
addps %xmm4, %xmm5 #13.23
movaps %xmm3, (%rdi,%rax,4) #13.5
movaps %xmm5, 16(%rdi,%rax,4) #13.5
addq $8, %rax #11.3
cmpq %r9, %rax #11.3
jb ..B2.17 # Prob 82% #11.3
jmp ..B2.22 # Prob 100% #11.3
..B2.19: # Preds ..B2.15
movaps %xmm0, %xmm1 #11.3
shufps $0, %xmm1, %xmm1 #11.3
..B2.20: # Preds ..B2.20 ..B2.19
movaps (%rdi,%rax,4), %xmm2 #13.16
movaps 16(%rdi,%rax,4), %xmm3 #13.16
mulps %xmm1, %xmm2 #13.16
mulps %xmm1, %xmm3 #13.16
addps (%rsi,%rax,4), %xmm2 #13.23
addps 16(%rsi,%rax,4), %xmm3 #13.23
movaps %xmm2, (%rdi,%rax,4) #13.5
movaps %xmm3, 16(%rdi,%rax,4) #13.5
addq $8, %rax #11.3
cmpq %r9, %rax #11.3
jb ..B2.20 # Prob 82% #11.3
..B2.22: # Preds ..B2.20 ..B2.17 ..B2.27
cmpq %rcx, %r9 #11.3
jae ..B2.26 # Prob 0% #11.3
..B2.24: # Preds ..B2.22 ..B2.24
movss (%rdi,%r9,4), %xmm1 #13.16
mulss %xmm0, %xmm1 #13.16
addss (%rsi,%r9,4), %xmm1 #13.23
movss %xmm1, (%rdi,%r9,4) #13.5
incq %r9 #11.3
cmpq %rcx, %r9 #11.3
jb ..B2.24 # Prob 82% #11.3
..B2.26: # Preds ..B2.24 ..B2.4 ..B2.5 ..B2.22
addq $360, %rsp #15.1
ret #15.1
..B2.27: # Preds ..B2.6 ..B2.10 ..B2.8 # Infreq
xorl %r9d, %r9d #11.3
jmp ..B2.22 # Prob 100% #11.3
.2.6_2__offload_var_desc1_p.47:
.byte 34
.byte 1
.byte 1
.byte 1
.long 4
.long 0
.long 0
.long 0x00000000,0x00000000
.long 0x00000004,0x00000000
.long 0x00000001,0x00000000
.long 0x00000000,0x00000000
.long 0x00000000,0x00000000
.long 0x00000000,0x00000000
.byte 34
.byte 3
.byte 1
.byte 1
.long 4
.long 0
.long 0
.long 0x00000000,0x00000000
.long 0x00000004,0x00000000
.long 0x00000001,0x00000000
.long 0x00000000,0x00000000
.long 0x00000000,0x00000000
.long 0x00000000,0x00000000
.byte 17
.byte 1
.byte 1
.byte 1
.long 8
.long 0
.long 0
.long 0x00000000,0x00000000
.long 0x00000004,0x00000000
.long 0x00000001,0x00000000
.long 0x00000000,0x00000000
.long 0x00000000,0x00000000
.long 0x00000000,0x00000000
.byte 17
.byte 1
.byte 1
.byte 1
.long 8
.long 0
.long 0
.long 0x00000000,0x00000000
.long 0x00000004,0x00000000
.long 0x00000001,0x00000000
.long 0x00000000,0x00000000
.long 0x00000000,0x00000000
.long 0x00000000,0x00000000
.2.6_2__offload_var_desc2_p.52:
.quad __sd_2inst_string.3
.long 0x00000000,0x00000000
.quad __sd_2inst_string.4
.long 0x00000000,0x00000000
.quad __sd_2inst_string.5
.long 0x00000000,0x00000000
.quad __sd_2inst_string.6
.long 0x00000000,0x00000000
__sd_2inst_string.1:
.byte 47
.byte 116
.byte 109
.byte 112
.byte 47
.byte 103
.byte 99
.byte 99
.byte 45
.byte 101
.byte 120
.byte 112
.byte 108
.byte 111
.byte 114
.byte 101
.byte 114
.byte 45
.byte 99
.byte 111
.byte 109
.byte 112
.byte 105
.byte 108
.byte 101
.byte 114
.byte 49
.byte 49
.byte 50
.byte 49
.byte 48
.byte 52
.byte 45
.byte 49
.byte 48
.byte 55
.byte 56
.byte 45
.byte 104
.byte 119
.byte 49
.byte 109
.byte 50
.byte 57
.byte 47
.byte 101
.byte 120
.byte 97
.byte 109
.byte 112
.byte 108
.byte 101
.byte 46
.byte 99
.byte 112
.byte 112
.byte 0
.byte 120
.byte 0
.byte 121
.byte 0
.byte 110
.byte 0
.byte 97
.byte 0
__sd_2inst_string.2:
.byte 95
.byte 95
.byte 111
.byte 102
.byte 102
.byte 108
.byte 111
.byte 97
.byte 100
.byte 95
.byte 101
.byte 110
.byte 116
.byte 114
.byte 121
.byte 95
.byte 101
.byte 120
.byte 97
.byte 109
.byte 112
.byte 108
.byte 101
.byte 95
.byte 99
.byte 112
.byte 112
.byte 95
.byte 49
.byte 48
.byte 95
.byte 90
.byte 55
.byte 115
.byte 97
.byte 120
.byte 112
.byte 121
.byte 95
.byte 99
.byte 80
.byte 102
.byte 102
.byte 83
.byte 95
.byte 105
.byte 0
.byte 95
.byte 95
.byte 111
.byte 102
.byte 102
.byte 108
.byte 111
.byte 97
.byte 100
.byte 95
.byte 101
.byte 110
.byte 116
.byte 114
.byte 121
.byte 95
.byte 101
.byte 120
.byte 97
.byte 109
.byte 112
.byte 108
.byte 101
.byte 95
.byte 99
.byte 112
.byte 112
.byte 95
.byte 49
.byte 48
.byte 95
.byte 90
.byte 55
.byte 115
.byte 97
.byte 120
.byte 112
.byte 121
.byte 95
.byte 99
.byte 80
.byte 102
.byte 102
.byte 83
.byte 95
.byte 105
.byte 0