[llvm] [x32] Fix BLAKE3 assembly (PR #149617)

Harald van Dijk via llvm-commits llvm-commits at lists.llvm.org
Sat Jul 19 03:53:49 PDT 2025


https://github.com/hvdijk updated https://github.com/llvm/llvm-project/pull/149617

>From db270fb27d9d2461f9f80ee29c165839f4d09103 Mon Sep 17 00:00:00 2001
From: Harald van Dijk <harald at gigawatt.nl>
Date: Sat, 19 Jul 2025 00:47:46 +0100
Subject: [PATCH] [x32] Fix BLAKE3 assembly

The x86-64 assembly implementations of BLAKE3 are used both in 64-bit
and in 32-bit pointer mode, but only worked in 64-bit pointer mode. This
PR adds support to also allow them to work in 32-bit pointer mode.
---
 .../Support/BLAKE3/blake3_avx2_x86-64_unix.S  | 43 +++++++++
 .../BLAKE3/blake3_avx512_x86-64_unix.S        | 91 +++++++++++++++++++
 .../Support/BLAKE3/blake3_sse2_x86-64_unix.S  | 28 ++++++
 .../Support/BLAKE3/blake3_sse41_x86-64_unix.S | 28 ++++++
 4 files changed, 190 insertions(+)

diff --git a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S
index e98893c7ef8b8..f285fe119f4c1 100644
--- a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S
+++ b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S
@@ -45,6 +45,10 @@ blake3_hash_many_avx2:
         mov     rbp, rsp
         sub     rsp, 680
         and     rsp, 0xFFFFFFFFFFFFFFC0
+#ifdef _ILP32
+        mov     esi, esi
+        mov     edx, edx
+#endif
         neg     r9d
         vmovd   xmm0, r9d
         vpbroadcastd ymm0, xmm0
@@ -77,6 +81,7 @@ blake3_hash_many_avx2:
         vpbroadcastd ymm5, dword ptr [rcx+0x14]
         vpbroadcastd ymm6, dword ptr [rcx+0x18]
         vpbroadcastd ymm7, dword ptr [rcx+0x1C]
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
         mov     r10, qword ptr [rdi+0x10]
@@ -85,6 +90,16 @@ blake3_hash_many_avx2:
         mov     r13, qword ptr [rdi+0x28]
         mov     r14, qword ptr [rdi+0x30]
         mov     r15, qword ptr [rdi+0x38]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+        mov     r10d, dword ptr [rdi+0x8]
+        mov     r11d, dword ptr [rdi+0xc]
+        mov     r12d, dword ptr [rdi+0x10]
+        mov     r13d, dword ptr [rdi+0x14]
+        mov     r14d, dword ptr [rdi+0x18]
+        mov     r15d, dword ptr [rdi+0x1c]
+#endif
         movzx   eax, byte ptr [rbp+0x38]
         movzx   ebx, byte ptr [rbp+0x40]
         or      eax, ebx
@@ -1305,7 +1320,11 @@ blake3_hash_many_avx2:
         vmovdqa ymm0, ymmword ptr [rsp+0x260]
         vpsubd  ymm2, ymm0, ymm2
         vmovdqa ymmword ptr [rsp+0x260], ymm2
+#ifndef _ILP32
         add     rdi, 64
+#else
+        add     rdi, 32
+#endif
         add     rbx, 256
         mov     qword ptr [rbp+0x50], rbx
         sub     rsi, 8
@@ -1346,10 +1365,17 @@ blake3_hash_many_avx2:
         vpblendd ymm15, ymm15, ymm12, 0x44
         vmovdqa ymmword ptr [rsp], ymm14
         vmovdqa ymmword ptr [rsp+0x20], ymm15
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
         mov     r10, qword ptr [rdi+0x10]
         mov     r11, qword ptr [rdi+0x18]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+        mov     r10d, dword ptr [rdi+0x8]
+        mov     r11d, dword ptr [rdi+0xc]
+#endif
         movzx   eax, byte ptr [rbp+0x40]
         or      eax, r13d
         xor     edx, edx
@@ -1557,7 +1583,11 @@ blake3_hash_many_avx2:
         vmovaps xmmword ptr [rsp+0x240], xmm0
         vmovaps xmmword ptr [rsp+0x260], xmm2
         add     rbx, 128
+#ifndef _ILP32
         add     rdi, 32
+#else
+        add     rdi, 16
+#endif
         sub     rsi, 4
 3:
         test    rsi, 0x2
@@ -1573,8 +1603,13 @@ blake3_hash_many_avx2:
         vinserti128 ymm13, ymm13, xmm14, 0x01
         vbroadcasti128 ymm14, xmmword ptr [ROT16+rip]
         vbroadcasti128 ymm15, xmmword ptr [ROT8+rip]
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+#endif
         movzx   eax, byte ptr [rbp+0x40]
         or      eax, r13d
         xor     edx, edx
@@ -1683,7 +1718,11 @@ blake3_hash_many_avx2:
         vmovaps ymmword ptr [rsp+0x240], ymm0
         vmovaps ymmword ptr [rsp+0x260], ymm2
         add     rbx, 64
+#ifndef _ILP32
         add     rdi, 16
+#else
+        add     rdi, 8
+#endif
         sub     rsi, 2
 3:
         test    rsi, 0x1
@@ -1695,7 +1734,11 @@ blake3_hash_many_avx2:
         vpinsrd xmm13, xmm3, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
         vmovdqa xmm14, xmmword ptr [ROT16+rip]
         vmovdqa xmm15, xmmword ptr [ROT8+rip]
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
+#else
+        mov     r8d, dword ptr [rdi]
+#endif
         movzx   eax, byte ptr [rbp+0x40]
         or      eax, r13d
         xor     edx, edx
diff --git a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S
index b4b14946de10e..709c4752d4084 100644
--- a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S
+++ b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S
@@ -59,6 +59,10 @@ blake3_hash_many_avx512:
         sub     rsp, 144
         and     rsp, 0xFFFFFFFFFFFFFFC0
         neg     r9
+#ifdef _ILP32
+        mov     esi, esi
+        mov     edx, edx
+#endif
         kmovw   k1, r9d
         vmovd   xmm0, r8d
         vpbroadcastd ymm0, xmm0
@@ -107,6 +111,7 @@ blake3_hash_many_avx512:
         cmp     rdx, qword ptr [rsp+0x80]
         cmove   eax, ebx
         mov     dword ptr [rsp+0x88], eax
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
         mov     r10, qword ptr [rdi+0x10]
@@ -115,6 +120,16 @@ blake3_hash_many_avx512:
         mov     r13, qword ptr [rdi+0x48]
         mov     r14, qword ptr [rdi+0x50]
         mov     r15, qword ptr [rdi+0x58]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+        mov     r10d, dword ptr [rdi+0x8]
+        mov     r11d, dword ptr [rdi+0xc]
+        mov     r12d, dword ptr [rdi+0x20]
+        mov     r13d, dword ptr [rdi+0x24]
+        mov     r14d, dword ptr [rdi+0x28]
+        mov     r15d, dword ptr [rdi+0x2c]
+#endif
         vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
         vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
         vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
@@ -127,6 +142,7 @@ blake3_hash_many_avx512:
         vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
         vpunpcklqdq zmm10, zmm18, zmm19
         vpunpckhqdq zmm11, zmm18, zmm19
+#ifndef _ILP32
         mov     r8, qword ptr [rdi+0x20]
         mov     r9, qword ptr [rdi+0x28]
         mov     r10, qword ptr [rdi+0x30]
@@ -135,6 +151,16 @@ blake3_hash_many_avx512:
         mov     r13, qword ptr [rdi+0x68]
         mov     r14, qword ptr [rdi+0x70]
         mov     r15, qword ptr [rdi+0x78]
+#else
+        mov     r8d, dword ptr [rdi+0x10]
+        mov     r9d, dword ptr [rdi+0x14]
+        mov     r10d, dword ptr [rdi+0x18]
+        mov     r11d, dword ptr [rdi+0x1c]
+        mov     r12d, dword ptr [rdi+0x30]
+        mov     r13d, dword ptr [rdi+0x34]
+        mov     r14d, dword ptr [rdi+0x38]
+        mov     r15d, dword ptr [rdi+0x3c]
+#endif
         vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
         vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
         vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
@@ -169,6 +195,7 @@ blake3_hash_many_avx512:
         vmovdqa32 zmm23, zmm19
         vpermt2d zmm19, zmm27, zmm8
         vpermt2d zmm23, zmm31, zmm8
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
         mov     r10, qword ptr [rdi+0x10]
@@ -177,6 +204,16 @@ blake3_hash_many_avx512:
         mov     r13, qword ptr [rdi+0x48]
         mov     r14, qword ptr [rdi+0x50]
         mov     r15, qword ptr [rdi+0x58]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+        mov     r10d, dword ptr [rdi+0x8]
+        mov     r11d, dword ptr [rdi+0xc]
+        mov     r12d, dword ptr [rdi+0x20]
+        mov     r13d, dword ptr [rdi+0x24]
+        mov     r14d, dword ptr [rdi+0x28]
+        mov     r15d, dword ptr [rdi+0x2c]
+#endif
         vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
         vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
         vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
@@ -197,6 +234,7 @@ blake3_hash_many_avx512:
         prefetcht0 [r14+rdx+0x80]
         prefetcht0 [r11+rdx+0x80]
         prefetcht0 [r15+rdx+0x80]
+#ifndef _ILP32
         mov     r8, qword ptr [rdi+0x20]
         mov     r9, qword ptr [rdi+0x28]
         mov     r10, qword ptr [rdi+0x30]
@@ -205,6 +243,16 @@ blake3_hash_many_avx512:
         mov     r13, qword ptr [rdi+0x68]
         mov     r14, qword ptr [rdi+0x70]
         mov     r15, qword ptr [rdi+0x78]
+#else
+        mov     r8d, dword ptr [rdi+0x10]
+        mov     r9d, dword ptr [rdi+0x14]
+        mov     r10d, dword ptr [rdi+0x18]
+        mov     r11d, dword ptr [rdi+0x1c]
+        mov     r12d, dword ptr [rdi+0x30]
+        mov     r13d, dword ptr [rdi+0x34]
+        mov     r14d, dword ptr [rdi+0x38]
+        mov     r15d, dword ptr [rdi+0x3c]
+#endif
         vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
         vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
         vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
@@ -1095,7 +1143,11 @@ blake3_hash_many_avx512:
         vpaddd  zmm1 {k2}, zmm1, dword ptr [ADD1+rip] {1to16}
         vmovdqa32 zmmword ptr [rsp], zmm2
         vmovdqa32 zmmword ptr [rsp+0x1*0x40], zmm1
+#ifndef _ILP32
         add     rdi, 128
+#else
+        add     rdi, 64
+#endif
         add     rbx, 512
         mov     qword ptr [rbp+0x50], rbx
         sub     rsi, 16
@@ -1125,6 +1177,7 @@ blake3_hash_many_avx512:
         vpbroadcastd ymm5, dword ptr [rcx+0x14]
         vpbroadcastd ymm6, dword ptr [rcx+0x18]
         vpbroadcastd ymm7, dword ptr [rcx+0x1C]
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
         mov     r10, qword ptr [rdi+0x10]
@@ -1133,6 +1186,16 @@ blake3_hash_many_avx512:
         mov     r13, qword ptr [rdi+0x28]
         mov     r14, qword ptr [rdi+0x30]
         mov     r15, qword ptr [rdi+0x38]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+        mov     r10d, dword ptr [rdi+0x8]
+        mov     r11d, dword ptr [rdi+0xc]
+        mov     r12d, dword ptr [rdi+0x10]
+        mov     r13d, dword ptr [rdi+0x14]
+        mov     r14d, dword ptr [rdi+0x18]
+        mov     r15d, dword ptr [rdi+0x1c]
+#endif
         movzx   eax, byte ptr [rbp+0x38]
         movzx   ebx, byte ptr [rbp+0x40]
         or      eax, ebx
@@ -2055,7 +2118,11 @@ blake3_hash_many_avx512:
         vmovdqa ymmword ptr [rsp+0x2*0x20], ymm2
         add     rbx, 256
         mov     qword ptr [rbp+0x50], rbx
+#ifndef _ILP32
         add     rdi, 64
+#else
+        add     rdi, 32
+#endif
         sub     rsi, 8
 3:
         mov     rbx, qword ptr [rbp+0x50]
@@ -2078,10 +2145,17 @@ blake3_hash_many_avx512:
         kmovw   k2, eax
         vpblendmd zmm13 {k2}, zmm13, zmm12
         vbroadcasti32x4 zmm15, xmmword ptr [BLAKE3_IV+rip]
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
         mov     r10, qword ptr [rdi+0x10]
         mov     r11, qword ptr [rdi+0x18]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+        mov     r10d, dword ptr [rdi+0x8]
+        mov     r11d, dword ptr [rdi+0xc]
+#endif
         mov     eax, 43690
         kmovw   k3, eax
         mov     eax, 34952
@@ -2195,7 +2269,11 @@ blake3_hash_many_avx512:
         vmovdqa xmmword ptr [rsp], xmm0
         vmovdqa xmmword ptr [rsp+0x40], xmm2
         add     rbx, 128
+#ifndef _ILP32
         add     rdi, 32
+#else
+        add     rdi, 16
+#endif
         sub     rsi, 4
 3:
         test    esi, 0x2
@@ -2209,8 +2287,13 @@ blake3_hash_many_avx512:
         vpinsrd xmm14, xmm14, dword ptr [rsp+0x44], 1
         vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
         vinserti128 ymm13, ymm13, xmm14, 0x01
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+#endif
         movzx   eax, byte ptr [rbp+0x40]
         or      eax, r13d
         xor     edx, edx
@@ -2308,7 +2391,11 @@ blake3_hash_many_avx512:
         vmovdqa xmmword ptr [rsp], xmm0
         vmovdqa xmmword ptr [rsp+0x4*0x10], xmm2
         add     rbx, 64
+#ifndef _ILP32
         add     rdi, 16
+#else
+        add     rdi, 8
+#endif
         sub     rsi, 2
 3:
         test    esi, 0x1
@@ -2319,7 +2406,11 @@ blake3_hash_many_avx512:
         vpinsrd xmm14, xmm14, dword ptr [rsp+0x40], 1
         vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
         vmovdqa xmm15, xmmword ptr [BLAKE3_IV+rip]
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
+#else
+        mov     r8d, dword ptr [rdi]
+#endif
         movzx   eax, byte ptr [rbp+0x40]
         or      eax, r13d
         xor     edx, edx
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S
index d69a1706fefe7..85434df927cdd 100644
--- a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S
+++ b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S
@@ -54,6 +54,10 @@ blake3_hash_many_sse2:
         sub     rsp, 360
         and     rsp, 0xFFFFFFFFFFFFFFC0
         neg     r9d
+#ifdef _ILP32
+        mov     esi, esi
+        mov     edx, edx
+#endif
         movd    xmm0, r9d
         pshufd  xmm0, xmm0, 0x00
         movdqa  xmmword ptr [rsp+0x130], xmm0
@@ -91,10 +95,17 @@ blake3_hash_many_sse2:
         pshufd  xmm5, xmm7, 0x55
         pshufd  xmm6, xmm7, 0xAA
         pshufd  xmm7, xmm7, 0xFF
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
         mov     r10, qword ptr [rdi+0x10]
         mov     r11, qword ptr [rdi+0x18]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+        mov     r10d, dword ptr [rdi+0x8]
+        mov     r11d, dword ptr [rdi+0xc]
+#endif
         movzx   eax, byte ptr [rbp+0x40]
         or      eax, r13d
         xor     edx, edx
@@ -1648,7 +1659,11 @@ blake3_hash_many_sse2:
         psubd   xmm1, xmm0
         movdqa  xmmword ptr [rsp+0x120], xmm1
         add     rbx, 128
+#ifndef _ILP32
         add     rdi, 32
+#else
+        add     rdi, 16
+#endif
         sub     rsi, 4
         cmp     rsi, 4
         jnc     2b
@@ -1679,8 +1694,13 @@ blake3_hash_many_sse2:
         movd    xmm13, dword ptr [rsp+0x124]
         punpckldq xmm14, xmm13
         movaps  xmmword ptr [rsp+0x10], xmm14
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+#endif
         movzx   eax, byte ptr [rbp+0x40]
         or      eax, r13d
         xor     edx, edx
@@ -1909,7 +1929,11 @@ blake3_hash_many_sse2:
         mov    r11d, dword ptr [rsp+0x120+8*rax]
         mov dword ptr [rsp+0x110], r10d
         mov dword ptr [rsp+0x120], r11d
+#ifndef _ILP32
         add     rdi, 16
+#else
+        add     rdi, 8
+#endif
         add     rbx, 64
         sub     rsi, 2
 3:
@@ -1920,7 +1944,11 @@ blake3_hash_many_sse2:
         movd    xmm13, dword ptr [rsp+0x110]
         movd    xmm14, dword ptr [rsp+0x120]
         punpckldq xmm13, xmm14
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
+#else
+        mov     r8d, dword ptr [rdi]
+#endif
         movzx   eax, byte ptr [rbp+0x40]
         or      eax, r13d
         xor     edx, edx
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S
index c5b103af61c4f..403773421587c 100644
--- a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S
+++ b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S
@@ -54,6 +54,10 @@ blake3_hash_many_sse41:
         sub     rsp, 360
         and     rsp, 0xFFFFFFFFFFFFFFC0
         neg     r9d
+#ifdef _ILP32
+        mov     esi, esi
+        mov     edx, edx
+#endif
         movd    xmm0, r9d
         pshufd  xmm0, xmm0, 0x00
         movdqa  xmmword ptr [rsp+0x130], xmm0
@@ -91,10 +95,17 @@ blake3_hash_many_sse41:
         pshufd  xmm5, xmm7, 0x55
         pshufd  xmm6, xmm7, 0xAA
         pshufd  xmm7, xmm7, 0xFF
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
         mov     r10, qword ptr [rdi+0x10]
         mov     r11, qword ptr [rdi+0x18]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+        mov     r10d, dword ptr [rdi+0x8]
+        mov     r11d, dword ptr [rdi+0xc]
+#endif
         movzx   eax, byte ptr [rbp+0x40]
         or      eax, r13d
         xor     edx, edx
@@ -1452,7 +1463,11 @@ blake3_hash_many_sse41:
         psubd   xmm1, xmm0
         movdqa  xmmword ptr [rsp+0x120], xmm1
         add     rbx, 128
+#ifndef _ILP32
         add     rdi, 32
+#else
+        add     rdi, 16
+#endif
         sub     rsi, 4
         cmp     rsi, 4
         jnc     2b
@@ -1483,8 +1498,13 @@ blake3_hash_many_sse41:
         pinsrd  xmm14, dword ptr [rsp+0x124], 1
         pinsrd  xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
         movaps  xmmword ptr [rsp+0x10], xmm14
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+#endif
         movzx   eax, byte ptr [rbp+0x40]
         or      eax, r13d
         xor     edx, edx
@@ -1686,7 +1706,11 @@ blake3_hash_many_sse41:
         blendvps xmm2, xmm4, xmm0
         movdqa  xmmword ptr [rsp+0x110], xmm1
         movdqa  xmmword ptr [rsp+0x120], xmm2
+#ifndef _ILP32
         add     rdi, 16
+#else
+        add     rdi, 8
+#endif
         add     rbx, 64
         sub     rsi, 2
 3:
@@ -1699,7 +1723,11 @@ blake3_hash_many_sse41:
         pinsrd  xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
         movaps  xmm14, xmmword ptr [ROT8+rip]
         movaps  xmm15, xmmword ptr [ROT16+rip]
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
+#else
+        mov     r8d, dword ptr [rdi]
+#endif
         movzx   eax, byte ptr [rbp+0x40]
         or      eax, r13d
         xor     edx, edx



More information about the llvm-commits mailing list