Melzzzzz
6/29/2014 3:01:00 PM
On Sun, 29 Jun 2014 10:47:51 -0700
Ramine <ramine@1.1> wrote:
>
> Hello,
>
> Look for example at the assembler source code of
> SparseCompRow.c that you will find inside the
> scimark2 source code benchmark, look carefully
> at the follwing assembler code, and you will notice
> that it is auto-vectorizing, i am using just using
> -O1 level optimization and the -S flag to generate
> the assembler code, here it is:
>
>
> ===
>
>
> .file "SparseCompRow.c"
> .text
> .globl SparseCompRow_num_flops
> .def SparseCompRow_num_flops; .scl
> 2; .type 32; .endef .seh_proc
> SparseCompRow_num_flops SparseCompRow_num_flops:
> .seh_endprologue
> movl %edx, %eax
> sarl $31, %edx
> idivl %ecx
> imull %eax, %ecx
> cvtsi2sd %ecx, %xmm0
> addsd %xmm0, %xmm0
> cvtsi2sd %r8d, %xmm1
> mulsd %xmm1, %xmm0
> ret
> .seh_endproc
> .globl SparseCompRow_matmult
> .def SparseCompRow_matmult; .scl
> 2; .type 32; .endef .seh_proc
> SparseCompRow_matmult SparseCompRow_matmult:
> pushq %r13
> .seh_pushreg %r13
> pushq %r12
> .seh_pushreg %r12
> pushq %rbp
> .seh_pushreg %rbp
> pushq %rdi
> .seh_pushreg %rdi
> pushq %rsi
> .seh_pushreg %rsi
> pushq %rbx
> .seh_pushreg %rbx
> .seh_endprologue
> movl %ecx, %edi
> movq %rdx, %rbp
> movq %r8, %rdx
> movq %r9, %rsi
> movq 88(%rsp), %r9
> movq 96(%rsp), %rcx
> movl 104(%rsp), %r13d
> movl $0, %r12d
> xorpd %xmm2, %xmm2
> movapd %xmm2, %xmm3
> testl %r13d, %r13d
> jg .L15
> jmp .L2
> .L12:
> movl (%rsi,%rbx,4), %eax
> movl 4(%rsi,%rbx,4), %r8d
> cmpl %r8d, %eax
> jge .L10
> movapd %xmm2, %xmm0
> .L6:
> movslq %eax, %r10
> movslq (%r9,%r10,4), %r11
> movsd (%rcx,%r11,8), %xmm1
> mulsd (%rdx,%r10,8), %xmm1
> addsd %xmm1, %xmm0
> addl $1, %eax
> cmpl %r8d, %eax
> jne .L6
> jmp .L5
> .L10:
> movapd %xmm3, %xmm0
> .L5:
> movsd %xmm0, 0(%rbp,%rbx,8)
> addq $1, %rbx
> cmpl %ebx, %edi
> jg .L12
> .L8:
> addl $1, %r12d
> cmpl %r13d, %r12d
> jne .L15
> jmp .L2
> .L15:
> movl $0, %ebx
> testl %edi, %edi
> jg .L12
> .p2align 4,,4
> jmp .L8
> .L2:
> popq %rbx
> popq %rsi
> popq %rdi
> popq %rbp
> popq %r12
> popq %r13
> ret
> .seh_endproc
>
> ==
No this is just scalar code. It does not use mulpd/addpd rather
mulsd/addsd.
It is scalar instruction for simd.
--
Click OK to continue...