.text .globl aesni_multi_cbc_encrypt .type aesni_multi_cbc_encrypt,@function .align 32 aesni_multi_cbc_encrypt: .cfi_startproc cmpl $2,%edx jb .Lenc_non_avx movl OPENSSL_ia32cap_P+4(%rip),%ecx testl $268435456,%ecx jnz _avx_cbc_enc_shortcut jmp .Lenc_non_avx .align 16 .Lenc_non_avx: movq %rsp,%rax .cfi_def_cfa_register %rax pushq %rbx .cfi_offset %rbx,-16 pushq %rbp .cfi_offset %rbp,-24 pushq %r12 .cfi_offset %r12,-32 pushq %r13 .cfi_offset %r13,-40 pushq %r14 .cfi_offset %r14,-48 pushq %r15 .cfi_offset %r15,-56 subq $48,%rsp andq $-64,%rsp movq %rax,16(%rsp) .cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 .Lenc4x_body: movdqu (%rsi),%xmm12 leaq 120(%rsi),%rsi leaq 80(%rdi),%rdi .Lenc4x_loop_grande: movl %edx,24(%rsp) xorl %edx,%edx movl -64(%rdi),%ecx movq -80(%rdi),%r8 cmpl %edx,%ecx movq -72(%rdi),%r12 cmovgl %ecx,%edx testl %ecx,%ecx movdqu -56(%rdi),%xmm2 movl %ecx,32(%rsp) cmovleq %rsp,%r8 movl -24(%rdi),%ecx movq -40(%rdi),%r9 cmpl %edx,%ecx movq -32(%rdi),%r13 cmovgl %ecx,%edx testl %ecx,%ecx movdqu -16(%rdi),%xmm3 movl %ecx,36(%rsp) cmovleq %rsp,%r9 movl 16(%rdi),%ecx movq 0(%rdi),%r10 cmpl %edx,%ecx movq 8(%rdi),%r14 cmovgl %ecx,%edx testl %ecx,%ecx movdqu 24(%rdi),%xmm4 movl %ecx,40(%rsp) cmovleq %rsp,%r10 movl 56(%rdi),%ecx movq 40(%rdi),%r11 cmpl %edx,%ecx movq 48(%rdi),%r15 cmovgl %ecx,%edx testl %ecx,%ecx movdqu 64(%rdi),%xmm5 movl %ecx,44(%rsp) cmovleq %rsp,%r11 testl %edx,%edx jz .Lenc4x_done movups 16-120(%rsi),%xmm1 pxor %xmm12,%xmm2 movups 32-120(%rsi),%xmm0 pxor %xmm12,%xmm3 movl 240-120(%rsi),%eax pxor %xmm12,%xmm4 movdqu (%r8),%xmm6 pxor %xmm12,%xmm5 movdqu (%r9),%xmm7 pxor %xmm6,%xmm2 movdqu (%r10),%xmm8 pxor %xmm7,%xmm3 movdqu (%r11),%xmm9 pxor %xmm8,%xmm4 pxor %xmm9,%xmm5 movdqa 32(%rsp),%xmm10 xorq %rbx,%rbx jmp .Loop_enc4x .align 32 .Loop_enc4x: addq $16,%rbx leaq 16(%rsp),%rbp movl $1,%ecx subq %rbx,%rbp .byte 102,15,56,220,209 prefetcht0 31(%r8,%rbx,1) prefetcht0 31(%r9,%rbx,1) .byte 102,15,56,220,217 prefetcht0 31(%r10,%rbx,1) prefetcht0 31(%r10,%rbx,1) .byte 102,15,56,220,225 .byte 102,15,56,220,233 movups 48-120(%rsi),%xmm1 cmpl 32(%rsp),%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 cmovgeq %rbp,%r8 cmovgq %rbp,%r12 .byte 102,15,56,220,232 movups -56(%rsi),%xmm0 cmpl 36(%rsp),%ecx .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 cmovgeq %rbp,%r9 cmovgq %rbp,%r13 .byte 102,15,56,220,233 movups -40(%rsi),%xmm1 cmpl 40(%rsp),%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 cmovgeq %rbp,%r10 cmovgq %rbp,%r14 .byte 102,15,56,220,232 movups -24(%rsi),%xmm0 cmpl 44(%rsp),%ecx .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 cmovgeq %rbp,%r11 cmovgq %rbp,%r15 .byte 102,15,56,220,233 movups -8(%rsi),%xmm1 movdqa %xmm10,%xmm11 .byte 102,15,56,220,208 prefetcht0 15(%r12,%rbx,1) prefetcht0 15(%r13,%rbx,1) .byte 102,15,56,220,216 prefetcht0 15(%r14,%rbx,1) prefetcht0 15(%r15,%rbx,1) .byte 102,15,56,220,224 .byte 102,15,56,220,232 movups 128-120(%rsi),%xmm0 pxor %xmm12,%xmm12 .byte 102,15,56,220,209 pcmpgtd %xmm12,%xmm11 movdqu -120(%rsi),%xmm12 .byte 102,15,56,220,217 paddd %xmm11,%xmm10 movdqa %xmm10,32(%rsp) .byte 102,15,56,220,225 .byte 102,15,56,220,233 movups 144-120(%rsi),%xmm1 cmpl $11,%eax .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 movups 160-120(%rsi),%xmm0 jb .Lenc4x_tail .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 movups 176-120(%rsi),%xmm1 .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 movups 192-120(%rsi),%xmm0 je .Lenc4x_tail .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 movups 208-120(%rsi),%xmm1 .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 movups 224-120(%rsi),%xmm0 jmp .Lenc4x_tail .align 32 .Lenc4x_tail: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 movdqu (%r8,%rbx,1),%xmm6 movdqu 16-120(%rsi),%xmm1 .byte 102,15,56,221,208 movdqu (%r9,%rbx,1),%xmm7 pxor %xmm12,%xmm6 .byte 102,15,56,221,216 movdqu (%r10,%rbx,1),%xmm8 pxor %xmm12,%xmm7 .byte 102,15,56,221,224 movdqu (%r11,%rbx,1),%xmm9 pxor %xmm12,%xmm8 .byte 102,15,56,221,232 movdqu 32-120(%rsi),%xmm0 pxor %xmm12,%xmm9 movups %xmm2,-16(%r12,%rbx,1) pxor %xmm6,%xmm2 movups %xmm3,-16(%r13,%rbx,1) pxor %xmm7,%xmm3 movups %xmm4,-16(%r14,%rbx,1) pxor %xmm8,%xmm4 movups %xmm5,-16(%r15,%rbx,1) pxor %xmm9,%xmm5 decl %edx jnz .Loop_enc4x movq 16(%rsp),%rax .cfi_def_cfa %rax,8 movl 24(%rsp),%edx leaq 160(%rdi),%rdi decl %edx jnz .Lenc4x_loop_grande .Lenc4x_done: movq -48(%rax),%r15 .cfi_restore %r15 movq -40(%rax),%r14 .cfi_restore %r14 movq -32(%rax),%r13 .cfi_restore %r13 movq -24(%rax),%r12 .cfi_restore %r12 movq -16(%rax),%rbp .cfi_restore %rbp movq -8(%rax),%rbx .cfi_restore %rbx leaq (%rax),%rsp .cfi_def_cfa_register %rsp .Lenc4x_epilogue: .byte 0xf3,0xc3 .cfi_endproc .size aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt .globl aesni_multi_cbc_decrypt .type aesni_multi_cbc_decrypt,@function .align 32 aesni_multi_cbc_decrypt: .cfi_startproc cmpl $2,%edx jb .Ldec_non_avx movl OPENSSL_ia32cap_P+4(%rip),%ecx testl $268435456,%ecx jnz _avx_cbc_dec_shortcut jmp .Ldec_non_avx .align 16 .Ldec_non_avx: movq %rsp,%rax .cfi_def_cfa_register %rax pushq %rbx .cfi_offset %rbx,-16 pushq %rbp .cfi_offset %rbp,-24 pushq %r12 .cfi_offset %r12,-32 pushq %r13 .cfi_offset %r13,-40 pushq %r14 .cfi_offset %r14,-48 pushq %r15 .cfi_offset %r15,-56 subq $48,%rsp andq $-64,%rsp movq %rax,16(%rsp) .cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 .Ldec4x_body: movdqu (%rsi),%xmm12 leaq 120(%rsi),%rsi leaq 80(%rdi),%rdi .Ldec4x_loop_grande: movl %edx,24(%rsp) xorl %edx,%edx movl -64(%rdi),%ecx movq -80(%rdi),%r8 cmpl %edx,%ecx movq -72(%rdi),%r12 cmovgl %ecx,%edx testl %ecx,%ecx movdqu -56(%rdi),%xmm6 movl %ecx,32(%rsp) cmovleq %rsp,%r8 movl -24(%rdi),%ecx movq -40(%rdi),%r9 cmpl %edx,%ecx movq -32(%rdi),%r13 cmovgl %ecx,%edx testl %ecx,%ecx movdqu -16(%rdi),%xmm7 movl %ecx,36(%rsp) cmovleq %rsp,%r9 movl 16(%rdi),%ecx movq 0(%rdi),%r10 cmpl %edx,%ecx movq 8(%rdi),%r14 cmovgl %ecx,%edx testl %ecx,%ecx movdqu 24(%rdi),%xmm8 movl %ecx,40(%rsp) cmovleq %rsp,%r10 movl 56(%rdi),%ecx movq 40(%rdi),%r11 cmpl %edx,%ecx movq 48(%rdi),%r15 cmovgl %ecx,%edx testl %ecx,%ecx movdqu 64(%rdi),%xmm9 movl %ecx,44(%rsp) cmovleq %rsp,%r11 testl %edx,%edx jz .Ldec4x_done movups 16-120(%rsi),%xmm1 movups 32-120(%rsi),%xmm0 movl 240-120(%rsi),%eax movdqu (%r8),%xmm2 movdqu (%r9),%xmm3 pxor %xmm12,%xmm2 movdqu (%r10),%xmm4 pxor %xmm12,%xmm3 movdqu (%r11),%xmm5 pxor %xmm12,%xmm4 pxor %xmm12,%xmm5 movdqa 32(%rsp),%xmm10 xorq %rbx,%rbx jmp .Loop_dec4x .align 32 .Loop_dec4x: addq $16,%rbx leaq 16(%rsp),%rbp movl $1,%ecx subq %rbx,%rbp .byte 102,15,56,222,209 prefetcht0 31(%r8,%rbx,1) prefetcht0 31(%r9,%rbx,1) .byte 102,15,56,222,217 prefetcht0 31(%r10,%rbx,1) prefetcht0 31(%r11,%rbx,1) .byte 102,15,56,222,225 .byte 102,15,56,222,233 movups 48-120(%rsi),%xmm1 cmpl 32(%rsp),%ecx .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 cmovgeq %rbp,%r8 cmovgq %rbp,%r12 .byte 102,15,56,222,232 movups -56(%rsi),%xmm0 cmpl 36(%rsp),%ecx .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 cmovgeq %rbp,%r9 cmovgq %rbp,%r13 .byte 102,15,56,222,233 movups -40(%rsi),%xmm1 cmpl 40(%rsp),%ecx .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 cmovgeq %rbp,%r10 cmovgq %rbp,%r14 .byte 102,15,56,222,232 movups -24(%rsi),%xmm0 cmpl 44(%rsp),%ecx .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 cmovgeq %rbp,%r11 cmovgq %rbp,%r15 .byte 102,15,56,222,233 movups -8(%rsi),%xmm1 movdqa %xmm10,%xmm11 .byte 102,15,56,222,208 prefetcht0 15(%r12,%rbx,1) prefetcht0 15(%r13,%rbx,1) .byte 102,15,56,222,216 prefetcht0 15(%r14,%rbx,1) prefetcht0 15(%r15,%rbx,1) .byte 102,15,56,222,224 .byte 102,15,56,222,232 movups 128-120(%rsi),%xmm0 pxor %xmm12,%xmm12 .byte 102,15,56,222,209 pcmpgtd %xmm12,%xmm11 movdqu -120(%rsi),%xmm12 .byte 102,15,56,222,217 paddd %xmm11,%xmm10 movdqa %xmm10,32(%rsp) .byte 102,15,56,222,225 .byte 102,15,56,222,233 movups 144-120(%rsi),%xmm1 cmpl $11,%eax .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 movups 160-120(%rsi),%xmm0 jb .Ldec4x_tail .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 movups 176-120(%rsi),%xmm1 .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 movups 192-120(%rsi),%xmm0 je .Ldec4x_tail .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 movups 208-120(%rsi),%xmm1 .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 movups 224-120(%rsi),%xmm0 jmp .Ldec4x_tail .align 32 .Ldec4x_tail: .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 pxor %xmm0,%xmm6 pxor %xmm0,%xmm7 .byte 102,15,56,222,233 movdqu 16-120(%rsi),%xmm1 pxor %xmm0,%xmm8 pxor %xmm0,%xmm9 movdqu 32-120(%rsi),%xmm0 .byte 102,15,56,223,214 .byte 102,15,56,223,223 movdqu -16(%r8,%rbx,1),%xmm6 movdqu -16(%r9,%rbx,1),%xmm7 .byte 102,65,15,56,223,224 .byte 102,65,15,56,223,233 movdqu -16(%r10,%rbx,1),%xmm8 movdqu -16(%r11,%rbx,1),%xmm9 movups %xmm2,-16(%r12,%rbx,1) movdqu (%r8,%rbx,1),%xmm2 movups %xmm3,-16(%r13,%rbx,1) movdqu (%r9,%rbx,1),%xmm3 pxor %xmm12,%xmm2 movups %xmm4,-16(%r14,%rbx,1) movdqu (%r10,%rbx,1),%xmm4 pxor %xmm12,%xmm3 movups %xmm5,-16(%r15,%rbx,1) movdqu (%r11,%rbx,1),%xmm5 pxor %xmm12,%xmm4 pxor %xmm12,%xmm5 decl %edx jnz .Loop_dec4x movq 16(%rsp),%rax .cfi_def_cfa %rax,8 movl 24(%rsp),%edx leaq 160(%rdi),%rdi decl %edx jnz .Ldec4x_loop_grande .Ldec4x_done: movq -48(%rax),%r15 .cfi_restore %r15 movq -40(%rax),%r14 .cfi_restore %r14 movq -32(%rax),%r13 .cfi_restore %r13 movq -24(%rax),%r12 .cfi_restore %r12 movq -16(%rax),%rbp .cfi_restore %rbp movq -8(%rax),%rbx .cfi_restore %rbx leaq (%rax),%rsp .cfi_def_cfa_register %rsp .Ldec4x_epilogue: .byte 0xf3,0xc3 .cfi_endproc .size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt .type aesni_multi_cbc_encrypt_avx,@function .align 32 aesni_multi_cbc_encrypt_avx: .cfi_startproc _avx_cbc_enc_shortcut: movq %rsp,%rax .cfi_def_cfa_register %rax pushq %rbx .cfi_offset %rbx,-16 pushq %rbp .cfi_offset %rbp,-24 pushq %r12 .cfi_offset %r12,-32 pushq %r13 .cfi_offset %r13,-40 pushq %r14 .cfi_offset %r14,-48 pushq %r15 .cfi_offset %r15,-56 subq $192,%rsp andq $-128,%rsp movq %rax,16(%rsp) .cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 .Lenc8x_body: vzeroupper vmovdqu (%rsi),%xmm15 leaq 120(%rsi),%rsi leaq 160(%rdi),%rdi shrl $1,%edx .Lenc8x_loop_grande: xorl %edx,%edx movl -144(%rdi),%ecx movq -160(%rdi),%r8 cmpl %edx,%ecx movq -152(%rdi),%rbx cmovgl %ecx,%edx testl %ecx,%ecx vmovdqu -136(%rdi),%xmm2 movl %ecx,32(%rsp) cmovleq %rsp,%r8 subq %r8,%rbx movq %rbx,64(%rsp) movl -104(%rdi),%ecx movq -120(%rdi),%r9 cmpl %edx,%ecx movq -112(%rdi),%rbp cmovgl %ecx,%edx testl %ecx,%ecx vmovdqu -96(%rdi),%xmm3 movl %ecx,36(%rsp) cmovleq %rsp,%r9 subq %r9,%rbp movq %rbp,72(%rsp) movl -64(%rdi),%ecx movq -80(%rdi),%r10 cmpl %edx,%ecx movq -72(%rdi),%rbp cmovgl %ecx,%edx testl %ecx,%ecx vmovdqu -56(%rdi),%xmm4 movl %ecx,40(%rsp) cmovleq %rsp,%r10 subq %r10,%rbp movq %rbp,80(%rsp) movl -24(%rdi),%ecx movq -40(%rdi),%r11 cmpl %edx,%ecx movq -32(%rdi),%rbp cmovgl %ecx,%edx testl %ecx,%ecx vmovdqu -16(%rdi),%xmm5 movl %ecx,44(%rsp) cmovleq %rsp,%r11 subq %r11,%rbp movq %rbp,88(%rsp) movl 16(%rdi),%ecx movq 0(%rdi),%r12 cmpl %edx,%ecx movq 8(%rdi),%rbp cmovgl %ecx,%edx testl %ecx,%ecx vmovdqu 24(%rdi),%xmm6 movl %ecx,48(%rsp) cmovleq %rsp,%r12 subq %r12,%rbp movq %rbp,96(%rsp) movl 56(%rdi),%ecx movq 40(%rdi),%r13 cmpl %edx,%ecx movq 48(%rdi),%rbp cmovgl %ecx,%edx testl %ecx,%ecx vmovdqu 64(%rdi),%xmm7 movl %ecx,52(%rsp) cmovleq %rsp,%r13 subq %r13,%rbp movq %rbp,104(%rsp) movl 96(%rdi),%ecx movq 80(%rdi),%r14 cmpl %edx,%ecx movq 88(%rdi),%rbp cmovgl %ecx,%edx testl %ecx,%ecx vmovdqu 104(%rdi),%xmm8 movl %ecx,56(%rsp) cmovleq %rsp,%r14 subq %r14,%rbp movq %rbp,112(%rsp) movl 136(%rdi),%ecx movq 120(%rdi),%r15 cmpl %edx,%ecx movq 128(%rdi),%rbp cmovgl %ecx,%edx testl %ecx,%ecx vmovdqu 144(%rdi),%xmm9 movl %ecx,60(%rsp) cmovleq %rsp,%r15 subq %r15,%rbp movq %rbp,120(%rsp) testl %edx,%edx jz .Lenc8x_done vmovups 16-120(%rsi),%xmm1 vmovups 32-120(%rsi),%xmm0 movl 240-120(%rsi),%eax vpxor (%r8),%xmm15,%xmm10 leaq 128(%rsp),%rbp vpxor (%r9),%xmm15,%xmm11 vpxor (%r10),%xmm15,%xmm12 vpxor (%r11),%xmm15,%xmm13 vpxor %xmm10,%xmm2,%xmm2 vpxor (%r12),%xmm15,%xmm10 vpxor %xmm11,%xmm3,%xmm3 vpxor (%r13),%xmm15,%xmm11 vpxor %xmm12,%xmm4,%xmm4 vpxor (%r14),%xmm15,%xmm12 vpxor %xmm13,%xmm5,%xmm5 vpxor (%r15),%xmm15,%xmm13 vpxor %xmm10,%xmm6,%xmm6 movl $1,%ecx vpxor %xmm11,%xmm7,%xmm7 vpxor %xmm12,%xmm8,%xmm8 vpxor %xmm13,%xmm9,%xmm9 jmp .Loop_enc8x .align 32 .Loop_enc8x: vaesenc %xmm1,%xmm2,%xmm2 cmpl 32+0(%rsp),%ecx vaesenc %xmm1,%xmm3,%xmm3 prefetcht0 31(%r8) vaesenc %xmm1,%xmm4,%xmm4 vaesenc %xmm1,%xmm5,%xmm5 leaq (%r8,%rbx,1),%rbx cmovgeq %rsp,%r8 vaesenc %xmm1,%xmm6,%xmm6 cmovgq %rsp,%rbx vaesenc %xmm1,%xmm7,%xmm7 subq %r8,%rbx vaesenc %xmm1,%xmm8,%xmm8 vpxor 16(%r8),%xmm15,%xmm10 movq %rbx,64+0(%rsp) vaesenc %xmm1,%xmm9,%xmm9 vmovups -72(%rsi),%xmm1 leaq 16(%r8,%rbx,1),%r8 vmovdqu %xmm10,0(%rbp) vaesenc %xmm0,%xmm2,%xmm2 cmpl 32+4(%rsp),%ecx movq 64+8(%rsp),%rbx vaesenc %xmm0,%xmm3,%xmm3 prefetcht0 31(%r9) vaesenc %xmm0,%xmm4,%xmm4 vaesenc %xmm0,%xmm5,%xmm5 leaq (%r9,%rbx,1),%rbx cmovgeq %rsp,%r9 vaesenc %xmm0,%xmm6,%xmm6 cmovgq %rsp,%rbx vaesenc %xmm0,%xmm7,%xmm7 subq %r9,%rbx vaesenc %xmm0,%xmm8,%xmm8 vpxor 16(%r9),%xmm15,%xmm11 movq %rbx,64+8(%rsp) vaesenc %xmm0,%xmm9,%xmm9 vmovups -56(%rsi),%xmm0 leaq 16(%r9,%rbx,1),%r9 vmovdqu %xmm11,16(%rbp) vaesenc %xmm1,%xmm2,%xmm2 cmpl 32+8(%rsp),%ecx movq 64+16(%rsp),%rbx vaesenc %xmm1,%xmm3,%xmm3 prefetcht0 31(%r10) vaesenc %xmm1,%xmm4,%xmm4 prefetcht0 15(%r8) vaesenc %xmm1,%xmm5,%xmm5 leaq (%r10,%rbx,1),%rbx cmovgeq %rsp,%r10 vaesenc %xmm1,%xmm6,%xmm6 cmovgq %rsp,%rbx vaesenc %xmm1,%xmm7,%xmm7 subq %r10,%rbx vaesenc %xmm1,%xmm8,%xmm8 vpxor 16(%r10),%xmm15,%xmm12 movq %rbx,64+16(%rsp) vaesenc %xmm1,%xmm9,%xmm9 vmovups -40(%rsi),%xmm1 leaq 16(%r10,%rbx,1),%r10 vmovdqu %xmm12,32(%rbp) vaesenc %xmm0,%xmm2,%xmm2 cmpl 32+12(%rsp),%ecx movq 64+24(%rsp),%rbx vaesenc %xmm0,%xmm3,%xmm3 prefetcht0 31(%r11) vaesenc %xmm0,%xmm4,%xmm4 prefetcht0 15(%r9) vaesenc %xmm0,%xmm5,%xmm5 leaq (%r11,%rbx,1),%rbx cmovgeq %rsp,%r11 vaesenc %xmm0,%xmm6,%xmm6 cmovgq %rsp,%rbx vaesenc %xmm0,%xmm7,%xmm7 subq %r11,%rbx vaesenc %xmm0,%xmm8,%xmm8 vpxor 16(%r11),%xmm15,%xmm13 movq %rbx,64+24(%rsp) vaesenc %xmm0,%xmm9,%xmm9 vmovups -24(%rsi),%xmm0 leaq 16(%r11,%rbx,1),%r11 vmovdqu %xmm13,48(%rbp) vaesenc %xmm1,%xmm2,%xmm2 cmpl 32+16(%rsp),%ecx movq 64+32(%rsp),%rbx vaesenc %xmm1,%xmm3,%xmm3 prefetcht0 31(%r12) vaesenc %xmm1,%xmm4,%xmm4 prefetcht0 15(%r10) vaesenc %xmm1,%xmm5,%xmm5 leaq (%r12,%rbx,1),%rbx cmovgeq %rsp,%r12 vaesenc %xmm1,%xmm6,%xmm6 cmovgq %rsp,%rbx vaesenc %xmm1,%xmm7,%xmm7 subq %r12,%rbx vaesenc %xmm1,%xmm8,%xmm8 vpxor 16(%r12),%xmm15,%xmm10 movq %rbx,64+32(%rsp) vaesenc %xmm1,%xmm9,%xmm9 vmovups -8(%rsi),%xmm1 leaq 16(%r12,%rbx,1),%r12 vaesenc %xmm0,%xmm2,%xmm2 cmpl 32+20(%rsp),%ecx movq 64+40(%rsp),%rbx vaesenc %xmm0,%xmm3,%xmm3 prefetcht0 31(%r13) vaesenc %xmm0,%xmm4,%xmm4 prefetcht0 15(%r11) vaesenc %xmm0,%xmm5,%xmm5 leaq (%rbx,%r13,1),%rbx cmovgeq %rsp,%r13 vaesenc %xmm0,%xmm6,%xmm6 cmovgq %rsp,%rbx vaesenc %xmm0,%xmm7,%xmm7 subq %r13,%rbx vaesenc %xmm0,%xmm8,%xmm8 vpxor 16(%r13),%xmm15,%xmm11 movq %rbx,64+40(%rsp) vaesenc %xmm0,%xmm9,%xmm9 vmovups 8(%rsi),%xmm0 leaq 16(%r13,%rbx,1),%r13 vaesenc %xmm1,%xmm2,%xmm2 cmpl 32+24(%rsp),%ecx movq 64+48(%rsp),%rbx vaesenc %xmm1,%xmm3,%xmm3 prefetcht0 31(%r14) vaesenc %xmm1,%xmm4,%xmm4 prefetcht0 15(%r12) vaesenc %xmm1,%xmm5,%xmm5 leaq (%r14,%rbx,1),%rbx cmovgeq %rsp,%r14 vaesenc %xmm1,%xmm6,%xmm6 cmovgq %rsp,%rbx vaesenc %xmm1,%xmm7,%xmm7 subq %r14,%rbx vaesenc %xmm1,%xmm8,%xmm8 vpxor 16(%r14),%xmm15,%xmm12 movq %rbx,64+48(%rsp) vaesenc %xmm1,%xmm9,%xmm9 vmovups 24(%rsi),%xmm1 leaq 16(%r14,%rbx,1),%r14 vaesenc %xmm0,%xmm2,%xmm2 cmpl 32+28(%rsp),%ecx movq 64+56(%rsp),%rbx vaesenc %xmm0,%xmm3,%xmm3 prefetcht0 31(%r15) vaesenc %xmm0,%xmm4,%xmm4 prefetcht0 15(%r13) vaesenc %xmm0,%xmm5,%xmm5 leaq (%r15,%rbx,1),%rbx cmovgeq %rsp,%r15 vaesenc %xmm0,%xmm6,%xmm6 cmovgq %rsp,%rbx vaesenc %xmm0,%xmm7,%xmm7 subq %r15,%rbx vaesenc %xmm0,%xmm8,%xmm8 vpxor 16(%r15),%xmm15,%xmm13 movq %rbx,64+56(%rsp) vaesenc %xmm0,%xmm9,%xmm9 vmovups 40(%rsi),%xmm0 leaq 16(%r15,%rbx,1),%r15 vmovdqu 32(%rsp),%xmm14 prefetcht0 15(%r14) prefetcht0 15(%r15) cmpl $11,%eax jb .Lenc8x_tail vaesenc %xmm1,%xmm2,%xmm2 vaesenc %xmm1,%xmm3,%xmm3 vaesenc %xmm1,%xmm4,%xmm4 vaesenc %xmm1,%xmm5,%xmm5 vaesenc %xmm1,%xmm6,%xmm6 vaesenc %xmm1,%xmm7,%xmm7 vaesenc %xmm1,%xmm8,%xmm8 vaesenc %xmm1,%xmm9,%xmm9 vmovups 176-120(%rsi),%xmm1 vaesenc %xmm0,%xmm2,%xmm2 vaesenc %xmm0,%xmm3,%xmm3 vaesenc %xmm0,%xmm4,%xmm4 vaesenc %xmm0,%xmm5,%xmm5 vaesenc %xmm0,%xmm6,%xmm6 vaesenc %xmm0,%xmm7,%xmm7 vaesenc %xmm0,%xmm8,%xmm8 vaesenc %xmm0,%xmm9,%xmm9 vmovups 192-120(%rsi),%xmm0 je .Lenc8x_tail vaesenc %xmm1,%xmm2,%xmm2 vaesenc %xmm1,%xmm3,%xmm3 vaesenc %xmm1,%xmm4,%xmm4 vaesenc %xmm1,%xmm5,%xmm5 vaesenc %xmm1,%xmm6,%xmm6 vaesenc %xmm1,%xmm7,%xmm7 vaesenc %xmm1,%xmm8,%xmm8 vaesenc %xmm1,%xmm9,%xmm9 vmovups 208-120(%rsi),%xmm1 vaesenc %xmm0,%xmm2,%xmm2 vaesenc %xmm0,%xmm3,%xmm3 vaesenc %xmm0,%xmm4,%xmm4 vaesenc %xmm0,%xmm5,%xmm5 vaesenc %xmm0,%xmm6,%xmm6 vaesenc %xmm0,%xmm7,%xmm7 vaesenc %xmm0,%xmm8,%xmm8 vaesenc %xmm0,%xmm9,%xmm9 vmovups 224-120(%rsi),%xmm0 .Lenc8x_tail: vaesenc %xmm1,%xmm2,%xmm2 vpxor %xmm15,%xmm15,%xmm15 vaesenc %xmm1,%xmm3,%xmm3 vaesenc %xmm1,%xmm4,%xmm4 vpcmpgtd %xmm15,%xmm14,%xmm15 vaesenc %xmm1,%xmm5,%xmm5 vaesenc %xmm1,%xmm6,%xmm6 vpaddd %xmm14,%xmm15,%xmm15 vmovdqu 48(%rsp),%xmm14 vaesenc %xmm1,%xmm7,%xmm7 movq 64(%rsp),%rbx vaesenc %xmm1,%xmm8,%xmm8 vaesenc %xmm1,%xmm9,%xmm9 vmovups 16-120(%rsi),%xmm1 vaesenclast %xmm0,%xmm2,%xmm2 vmovdqa %xmm15,32(%rsp) vpxor %xmm15,%xmm15,%xmm15 vaesenclast %xmm0,%xmm3,%xmm3 vaesenclast %xmm0,%xmm4,%xmm4 vpcmpgtd %xmm15,%xmm14,%xmm15 vaesenclast %xmm0,%xmm5,%xmm5 vaesenclast %xmm0,%xmm6,%xmm6 vpaddd %xmm15,%xmm14,%xmm14 vmovdqu -120(%rsi),%xmm15 vaesenclast %xmm0,%xmm7,%xmm7 vaesenclast %xmm0,%xmm8,%xmm8 vmovdqa %xmm14,48(%rsp) vaesenclast %xmm0,%xmm9,%xmm9 vmovups 32-120(%rsi),%xmm0 vmovups %xmm2,-16(%r8) subq %rbx,%r8 vpxor 0(%rbp),%xmm2,%xmm2 vmovups %xmm3,-16(%r9) subq 72(%rsp),%r9 vpxor 16(%rbp),%xmm3,%xmm3 vmovups %xmm4,-16(%r10) subq 80(%rsp),%r10 vpxor 32(%rbp),%xmm4,%xmm4 vmovups %xmm5,-16(%r11) subq 88(%rsp),%r11 vpxor 48(%rbp),%xmm5,%xmm5 vmovups %xmm6,-16(%r12) subq 96(%rsp),%r12 vpxor %xmm10,%xmm6,%xmm6 vmovups %xmm7,-16(%r13) subq 104(%rsp),%r13 vpxor %xmm11,%xmm7,%xmm7 vmovups %xmm8,-16(%r14) subq 112(%rsp),%r14 vpxor %xmm12,%xmm8,%xmm8 vmovups %xmm9,-16(%r15) subq 120(%rsp),%r15 vpxor %xmm13,%xmm9,%xmm9 decl %edx jnz .Loop_enc8x movq 16(%rsp),%rax .cfi_def_cfa %rax,8 .Lenc8x_done: vzeroupper movq -48(%rax),%r15 .cfi_restore %r15 movq -40(%rax),%r14 .cfi_restore %r14 movq -32(%rax),%r13 .cfi_restore %r13 movq -24(%rax),%r12 .cfi_restore %r12 movq -16(%rax),%rbp .cfi_restore %rbp movq -8(%rax),%rbx .cfi_restore %rbx leaq (%rax),%rsp .cfi_def_cfa_register %rsp .Lenc8x_epilogue: .byte 0xf3,0xc3 .cfi_endproc .size aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx .type aesni_multi_cbc_decrypt_avx,@function .align 32 aesni_multi_cbc_decrypt_avx: .cfi_startproc _avx_cbc_dec_shortcut: movq %rsp,%rax .cfi_def_cfa_register %rax pushq %rbx .cfi_offset %rbx,-16 pushq %rbp .cfi_offset %rbp,-24 pushq %r12 .cfi_offset %r12,-32 pushq %r13 .cfi_offset %r13,-40 pushq %r14 .cfi_offset %r14,-48 pushq %r15 .cfi_offset %r15,-56 subq $256,%rsp andq $-256,%rsp subq $192,%rsp movq %rax,16(%rsp) .cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 .Ldec8x_body: vzeroupper vmovdqu (%rsi),%xmm15 leaq 120(%rsi),%rsi leaq 160(%rdi),%rdi shrl $1,%edx .Ldec8x_loop_grande: xorl %edx,%edx movl -144(%rdi),%ecx movq -160(%rdi),%r8 cmpl %edx,%ecx movq -152(%rdi),%rbx cmovgl %ecx,%edx testl %ecx,%ecx vmovdqu -136(%rdi),%xmm2 movl %ecx,32(%rsp) cmovleq %rsp,%r8 subq %r8,%rbx movq %rbx,64(%rsp) vmovdqu %xmm2,192(%rsp) movl -104(%rdi),%ecx movq -120(%rdi),%r9 cmpl %edx,%ecx movq -112(%rdi),%rbp cmovgl %ecx,%edx testl %ecx,%ecx vmovdqu -96(%rdi),%xmm3 movl %ecx,36(%rsp) cmovleq %rsp,%r9 subq %r9,%rbp movq %rbp,72(%rsp) vmovdqu %xmm3,208(%rsp) movl -64(%rdi),%ecx movq -80(%rdi),%r10 cmpl %edx,%ecx movq -72(%rdi),%rbp cmovgl %ecx,%edx testl %ecx,%ecx vmovdqu -56(%rdi),%xmm4 movl %ecx,40(%rsp) cmovleq %rsp,%r10 subq %r10,%rbp movq %rbp,80(%rsp) vmovdqu %xmm4,224(%rsp) movl -24(%rdi),%ecx movq -40(%rdi),%r11 cmpl %edx,%ecx movq -32(%rdi),%rbp cmovgl %ecx,%edx testl %ecx,%ecx vmovdqu -16(%rdi),%xmm5 movl %ecx,44(%rsp) cmovleq %rsp,%r11 subq %r11,%rbp movq %rbp,88(%rsp) vmovdqu %xmm5,240(%rsp) movl 16(%rdi),%ecx movq 0(%rdi),%r12 cmpl %edx,%ecx movq 8(%rdi),%rbp cmovgl %ecx,%edx testl %ecx,%ecx vmovdqu 24(%rdi),%xmm6 movl %ecx,48(%rsp) cmovleq %rsp,%r12 subq %r12,%rbp movq %rbp,96(%rsp) vmovdqu %xmm6,256(%rsp) movl 56(%rdi),%ecx movq 40(%rdi),%r13 cmpl %edx,%ecx movq 48(%rdi),%rbp cmovgl %ecx,%edx testl %ecx,%ecx vmovdqu 64(%rdi),%xmm7 movl %ecx,52(%rsp) cmovleq %rsp,%r13 subq %r13,%rbp movq %rbp,104(%rsp) vmovdqu %xmm7,272(%rsp) movl 96(%rdi),%ecx movq 80(%rdi),%r14 cmpl %edx,%ecx movq 88(%rdi),%rbp cmovgl %ecx,%edx testl %ecx,%ecx vmovdqu 104(%rdi),%xmm8 movl %ecx,56(%rsp) cmovleq %rsp,%r14 subq %r14,%rbp movq %rbp,112(%rsp) vmovdqu %xmm8,288(%rsp) movl 136(%rdi),%ecx movq 120(%rdi),%r15 cmpl %edx,%ecx movq 128(%rdi),%rbp cmovgl %ecx,%edx testl %ecx,%ecx vmovdqu 144(%rdi),%xmm9 movl %ecx,60(%rsp) cmovleq %rsp,%r15 subq %r15,%rbp movq %rbp,120(%rsp) vmovdqu %xmm9,304(%rsp) testl %edx,%edx jz .Ldec8x_done vmovups 16-120(%rsi),%xmm1 vmovups 32-120(%rsi),%xmm0 movl 240-120(%rsi),%eax leaq 192+128(%rsp),%rbp vmovdqu (%r8),%xmm2 vmovdqu (%r9),%xmm3 vmovdqu (%r10),%xmm4 vmovdqu (%r11),%xmm5 vmovdqu (%r12),%xmm6 vmovdqu (%r13),%xmm7 vmovdqu (%r14),%xmm8 vmovdqu (%r15),%xmm9 vmovdqu %xmm2,0(%rbp) vpxor %xmm15,%xmm2,%xmm2 vmovdqu %xmm3,16(%rbp) vpxor %xmm15,%xmm3,%xmm3 vmovdqu %xmm4,32(%rbp) vpxor %xmm15,%xmm4,%xmm4 vmovdqu %xmm5,48(%rbp) vpxor %xmm15,%xmm5,%xmm5 vmovdqu %xmm6,64(%rbp) vpxor %xmm15,%xmm6,%xmm6 vmovdqu %xmm7,80(%rbp) vpxor %xmm15,%xmm7,%xmm7 vmovdqu %xmm8,96(%rbp) vpxor %xmm15,%xmm8,%xmm8 vmovdqu %xmm9,112(%rbp) vpxor %xmm15,%xmm9,%xmm9 xorq $0x80,%rbp movl $1,%ecx jmp .Loop_dec8x .align 32 .Loop_dec8x: vaesdec %xmm1,%xmm2,%xmm2 cmpl 32+0(%rsp),%ecx vaesdec %xmm1,%xmm3,%xmm3 prefetcht0 31(%r8) vaesdec %xmm1,%xmm4,%xmm4 vaesdec %xmm1,%xmm5,%xmm5 leaq (%r8,%rbx,1),%rbx cmovgeq %rsp,%r8 vaesdec %xmm1,%xmm6,%xmm6 cmovgq %rsp,%rbx vaesdec %xmm1,%xmm7,%xmm7 subq %r8,%rbx vaesdec %xmm1,%xmm8,%xmm8 vmovdqu 16(%r8),%xmm10 movq %rbx,64+0(%rsp) vaesdec %xmm1,%xmm9,%xmm9 vmovups -72(%rsi),%xmm1 leaq 16(%r8,%rbx,1),%r8 vmovdqu %xmm10,128(%rsp) vaesdec %xmm0,%xmm2,%xmm2 cmpl 32+4(%rsp),%ecx movq 64+8(%rsp),%rbx vaesdec %xmm0,%xmm3,%xmm3 prefetcht0 31(%r9) vaesdec %xmm0,%xmm4,%xmm4 vaesdec %xmm0,%xmm5,%xmm5 leaq (%r9,%rbx,1),%rbx cmovgeq %rsp,%r9 vaesdec %xmm0,%xmm6,%xmm6 cmovgq %rsp,%rbx vaesdec %xmm0,%xmm7,%xmm7 subq %r9,%rbx vaesdec %xmm0,%xmm8,%xmm8 vmovdqu 16(%r9),%xmm11 movq %rbx,64+8(%rsp) vaesdec %xmm0,%xmm9,%xmm9 vmovups -56(%rsi),%xmm0 leaq 16(%r9,%rbx,1),%r9 vmovdqu %xmm11,144(%rsp) vaesdec %xmm1,%xmm2,%xmm2 cmpl 32+8(%rsp),%ecx movq 64+16(%rsp),%rbx vaesdec %xmm1,%xmm3,%xmm3 prefetcht0 31(%r10) vaesdec %xmm1,%xmm4,%xmm4 prefetcht0 15(%r8) vaesdec %xmm1,%xmm5,%xmm5 leaq (%r10,%rbx,1),%rbx cmovgeq %rsp,%r10 vaesdec %xmm1,%xmm6,%xmm6 cmovgq %rsp,%rbx vaesdec %xmm1,%xmm7,%xmm7 subq %r10,%rbx vaesdec %xmm1,%xmm8,%xmm8 vmovdqu 16(%r10),%xmm12 movq %rbx,64+16(%rsp) vaesdec %xmm1,%xmm9,%xmm9 vmovups -40(%rsi),%xmm1 leaq 16(%r10,%rbx,1),%r10 vmovdqu %xmm12,160(%rsp) vaesdec %xmm0,%xmm2,%xmm2 cmpl 32+12(%rsp),%ecx movq 64+24(%rsp),%rbx vaesdec %xmm0,%xmm3,%xmm3 prefetcht0 31(%r11) vaesdec %xmm0,%xmm4,%xmm4 prefetcht0 15(%r9) vaesdec %xmm0,%xmm5,%xmm5 leaq (%r11,%rbx,1),%rbx cmovgeq %rsp,%r11 vaesdec %xmm0,%xmm6,%xmm6 cmovgq %rsp,%rbx vaesdec %xmm0,%xmm7,%xmm7 subq %r11,%rbx vaesdec %xmm0,%xmm8,%xmm8 vmovdqu 16(%r11),%xmm13 movq %rbx,64+24(%rsp) vaesdec %xmm0,%xmm9,%xmm9 vmovups -24(%rsi),%xmm0 leaq 16(%r11,%rbx,1),%r11 vmovdqu %xmm13,176(%rsp) vaesdec %xmm1,%xmm2,%xmm2 cmpl 32+16(%rsp),%ecx movq 64+32(%rsp),%rbx vaesdec %xmm1,%xmm3,%xmm3 prefetcht0 31(%r12) vaesdec %xmm1,%xmm4,%xmm4 prefetcht0 15(%r10) vaesdec %xmm1,%xmm5,%xmm5 leaq (%r12,%rbx,1),%rbx cmovgeq %rsp,%r12 vaesdec %xmm1,%xmm6,%xmm6 cmovgq %rsp,%rbx vaesdec %xmm1,%xmm7,%xmm7 subq %r12,%rbx vaesdec %xmm1,%xmm8,%xmm8 vmovdqu 16(%r12),%xmm10 movq %rbx,64+32(%rsp) vaesdec %xmm1,%xmm9,%xmm9 vmovups -8(%rsi),%xmm1 leaq 16(%r12,%rbx,1),%r12 vaesdec %xmm0,%xmm2,%xmm2 cmpl 32+20(%rsp),%ecx movq 64+40(%rsp),%rbx vaesdec %xmm0,%xmm3,%xmm3 prefetcht0 31(%r13) vaesdec %xmm0,%xmm4,%xmm4 prefetcht0 15(%r11) vaesdec %xmm0,%xmm5,%xmm5 leaq (%rbx,%r13,1),%rbx cmovgeq %rsp,%r13 vaesdec %xmm0,%xmm6,%xmm6 cmovgq %rsp,%rbx vaesdec %xmm0,%xmm7,%xmm7 subq %r13,%rbx vaesdec %xmm0,%xmm8,%xmm8 vmovdqu 16(%r13),%xmm11 movq %rbx,64+40(%rsp) vaesdec %xmm0,%xmm9,%xmm9 vmovups 8(%rsi),%xmm0 leaq 16(%r13,%rbx,1),%r13 vaesdec %xmm1,%xmm2,%xmm2 cmpl 32+24(%rsp),%ecx movq 64+48(%rsp),%rbx vaesdec %xmm1,%xmm3,%xmm3 prefetcht0 31(%r14) vaesdec %xmm1,%xmm4,%xmm4 prefetcht0 15(%r12) vaesdec %xmm1,%xmm5,%xmm5 leaq (%r14,%rbx,1),%rbx cmovgeq %rsp,%r14 vaesdec %xmm1,%xmm6,%xmm6 cmovgq %rsp,%rbx vaesdec %xmm1,%xmm7,%xmm7 subq %r14,%rbx vaesdec %xmm1,%xmm8,%xmm8 vmovdqu 16(%r14),%xmm12 movq %rbx,64+48(%rsp) vaesdec %xmm1,%xmm9,%xmm9 vmovups 24(%rsi),%xmm1 leaq 16(%r14,%rbx,1),%r14 vaesdec %xmm0,%xmm2,%xmm2 cmpl 32+28(%rsp),%ecx movq 64+56(%rsp),%rbx vaesdec %xmm0,%xmm3,%xmm3 prefetcht0 31(%r15) vaesdec %xmm0,%xmm4,%xmm4 prefetcht0 15(%r13) vaesdec %xmm0,%xmm5,%xmm5 leaq (%r15,%rbx,1),%rbx cmovgeq %rsp,%r15 vaesdec %xmm0,%xmm6,%xmm6 cmovgq %rsp,%rbx vaesdec %xmm0,%xmm7,%xmm7 subq %r15,%rbx vaesdec %xmm0,%xmm8,%xmm8 vmovdqu 16(%r15),%xmm13 movq %rbx,64+56(%rsp) vaesdec %xmm0,%xmm9,%xmm9 vmovups 40(%rsi),%xmm0 leaq 16(%r15,%rbx,1),%r15 vmovdqu 32(%rsp),%xmm14 prefetcht0 15(%r14) prefetcht0 15(%r15) cmpl $11,%eax jb .Ldec8x_tail vaesdec %xmm1,%xmm2,%xmm2 vaesdec %xmm1,%xmm3,%xmm3 vaesdec %xmm1,%xmm4,%xmm4 vaesdec %xmm1,%xmm5,%xmm5 vaesdec %xmm1,%xmm6,%xmm6 vaesdec %xmm1,%xmm7,%xmm7 vaesdec %xmm1,%xmm8,%xmm8 vaesdec %xmm1,%xmm9,%xmm9 vmovups 176-120(%rsi),%xmm1 vaesdec %xmm0,%xmm2,%xmm2 vaesdec %xmm0,%xmm3,%xmm3 vaesdec %xmm0,%xmm4,%xmm4 vaesdec %xmm0,%xmm5,%xmm5 vaesdec %xmm0,%xmm6,%xmm6 vaesdec %xmm0,%xmm7,%xmm7 vaesdec %xmm0,%xmm8,%xmm8 vaesdec %xmm0,%xmm9,%xmm9 vmovups 192-120(%rsi),%xmm0 je .Ldec8x_tail vaesdec %xmm1,%xmm2,%xmm2 vaesdec %xmm1,%xmm3,%xmm3 vaesdec %xmm1,%xmm4,%xmm4 vaesdec %xmm1,%xmm5,%xmm5 vaesdec %xmm1,%xmm6,%xmm6 vaesdec %xmm1,%xmm7,%xmm7 vaesdec %xmm1,%xmm8,%xmm8 vaesdec %xmm1,%xmm9,%xmm9 vmovups 208-120(%rsi),%xmm1 vaesdec %xmm0,%xmm2,%xmm2 vaesdec %xmm0,%xmm3,%xmm3 vaesdec %xmm0,%xmm4,%xmm4 vaesdec %xmm0,%xmm5,%xmm5 vaesdec %xmm0,%xmm6,%xmm6 vaesdec %xmm0,%xmm7,%xmm7 vaesdec %xmm0,%xmm8,%xmm8 vaesdec %xmm0,%xmm9,%xmm9 vmovups 224-120(%rsi),%xmm0 .Ldec8x_tail: vaesdec %xmm1,%xmm2,%xmm2 vpxor %xmm15,%xmm15,%xmm15 vaesdec %xmm1,%xmm3,%xmm3 vaesdec %xmm1,%xmm4,%xmm4 vpcmpgtd %xmm15,%xmm14,%xmm15 vaesdec %xmm1,%xmm5,%xmm5 vaesdec %xmm1,%xmm6,%xmm6 vpaddd %xmm14,%xmm15,%xmm15 vmovdqu 48(%rsp),%xmm14 vaesdec %xmm1,%xmm7,%xmm7 movq 64(%rsp),%rbx vaesdec %xmm1,%xmm8,%xmm8 vaesdec %xmm1,%xmm9,%xmm9 vmovups 16-120(%rsi),%xmm1 vaesdeclast %xmm0,%xmm2,%xmm2 vmovdqa %xmm15,32(%rsp) vpxor %xmm15,%xmm15,%xmm15 vaesdeclast %xmm0,%xmm3,%xmm3 vpxor 0(%rbp),%xmm2,%xmm2 vaesdeclast %xmm0,%xmm4,%xmm4 vpxor 16(%rbp),%xmm3,%xmm3 vpcmpgtd %xmm15,%xmm14,%xmm15 vaesdeclast %xmm0,%xmm5,%xmm5 vpxor 32(%rbp),%xmm4,%xmm4 vaesdeclast %xmm0,%xmm6,%xmm6 vpxor 48(%rbp),%xmm5,%xmm5 vpaddd %xmm15,%xmm14,%xmm14 vmovdqu -120(%rsi),%xmm15 vaesdeclast %xmm0,%xmm7,%xmm7 vpxor 64(%rbp),%xmm6,%xmm6 vaesdeclast %xmm0,%xmm8,%xmm8 vpxor 80(%rbp),%xmm7,%xmm7 vmovdqa %xmm14,48(%rsp) vaesdeclast %xmm0,%xmm9,%xmm9 vpxor 96(%rbp),%xmm8,%xmm8 vmovups 32-120(%rsi),%xmm0 vmovups %xmm2,-16(%r8) subq %rbx,%r8 vmovdqu 128+0(%rsp),%xmm2 vpxor 112(%rbp),%xmm9,%xmm9 vmovups %xmm3,-16(%r9) subq 72(%rsp),%r9 vmovdqu %xmm2,0(%rbp) vpxor %xmm15,%xmm2,%xmm2 vmovdqu 128+16(%rsp),%xmm3 vmovups %xmm4,-16(%r10) subq 80(%rsp),%r10 vmovdqu %xmm3,16(%rbp) vpxor %xmm15,%xmm3,%xmm3 vmovdqu 128+32(%rsp),%xmm4 vmovups %xmm5,-16(%r11) subq 88(%rsp),%r11 vmovdqu %xmm4,32(%rbp) vpxor %xmm15,%xmm4,%xmm4 vmovdqu 128+48(%rsp),%xmm5 vmovups %xmm6,-16(%r12) subq 96(%rsp),%r12 vmovdqu %xmm5,48(%rbp) vpxor %xmm15,%xmm5,%xmm5 vmovdqu %xmm10,64(%rbp) vpxor %xmm10,%xmm15,%xmm6 vmovups %xmm7,-16(%r13) subq 104(%rsp),%r13 vmovdqu %xmm11,80(%rbp) vpxor %xmm11,%xmm15,%xmm7 vmovups %xmm8,-16(%r14) subq 112(%rsp),%r14 vmovdqu %xmm12,96(%rbp) vpxor %xmm12,%xmm15,%xmm8 vmovups %xmm9,-16(%r15) subq 120(%rsp),%r15 vmovdqu %xmm13,112(%rbp) vpxor %xmm13,%xmm15,%xmm9 xorq $128,%rbp decl %edx jnz .Loop_dec8x movq 16(%rsp),%rax .cfi_def_cfa %rax,8 .Ldec8x_done: vzeroupper movq -48(%rax),%r15 .cfi_restore %r15 movq -40(%rax),%r14 .cfi_restore %r14 movq -32(%rax),%r13 .cfi_restore %r13 movq -24(%rax),%r12 .cfi_restore %r12 movq -16(%rax),%rbp .cfi_restore %rbp movq -8(%rax),%rbx .cfi_restore %rbx leaq (%rax),%rsp .cfi_def_cfa_register %rsp .Ldec8x_epilogue: .byte 0xf3,0xc3 .cfi_endproc .size aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx .section ".note.gnu.property", "a" .p2align 3 .long 1f - 0f .long 4f - 1f .long 5 0: # "GNU" encoded with .byte, since .asciz isn't supported # on Solaris. .byte 0x47 .byte 0x4e .byte 0x55 .byte 0 1: .p2align 3 .long 0xc0000002 .long 3f - 2f 2: .long 3 3: .p2align 3 4: