[llvm] Use 256 bit register when AVX2 or higher is available. (PR #91721)
via llvm-commits
llvm-commits at lists.llvm.org
Fri May 10 03:08:53 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: None (shamithoke)
<details>
<summary>Changes</summary>
Based on my internal tests, using 256 bit register is faster (when available).
---
Full diff: https://github.com/llvm/llvm-project/pull/91721.diff
3 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+6-3)
- (modified) llvm/test/CodeGen/X86/bitreverse.ll (+23-16)
- (modified) llvm/test/CodeGen/X86/vector-bitreverse.ll (+111-29)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a811ce43422ec..bbacc146abe98 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31363,10 +31363,13 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
assert(
(VT == MVT::i32 || VT == MVT::i64 || VT == MVT::i16 || VT == MVT::i8) &&
"Only tested for i8/i16/i32/i64");
- MVT VecVT = MVT::getVectorVT(VT, 128 / VT.getSizeInBits());
+ unsigned int VecLen = Subtarget.hasAVX2() ? 256 : 128;
+ MVT CharVecVT = Subtarget.hasAVX2() ? MVT::v32i8 : MVT::v16i8;
+
+ MVT VecVT = MVT::getVectorVT(VT, VecLen / VT.getSizeInBits());
SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, In);
- Res = DAG.getNode(ISD::BITREVERSE, DL, MVT::v16i8,
- DAG.getBitcast(MVT::v16i8, Res));
+ Res = DAG.getNode(ISD::BITREVERSE, DL, CharVecVT,
+ DAG.getBitcast(CharVecVT, Res));
Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
DAG.getBitcast(VecVT, Res), DAG.getIntPtrConstant(0, DL));
return (VT == MVT::i8) ? Res : DAG.getNode(ISD::BSWAP, DL, VT, Res);
diff --git a/llvm/test/CodeGen/X86/bitreverse.ll b/llvm/test/CodeGen/X86/bitreverse.ll
index 4f2654843728f..207878408e57e 100644
--- a/llvm/test/CodeGen/X86/bitreverse.ll
+++ b/llvm/test/CodeGen/X86/bitreverse.ll
@@ -173,9 +173,10 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind {
; GFNI-LABEL: test_bitreverse_i64:
; GFNI: # %bb.0:
; GFNI-NEXT: vmovq %rdi, %xmm0
-; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
; GFNI-NEXT: vmovq %xmm0, %rax
; GFNI-NEXT: bswapq %rax
+; GFNI-NEXT: vzeroupper
; GFNI-NEXT: retq
%b = call i64 @llvm.bitreverse.i64(i64 %a)
ret i64 %b
@@ -238,9 +239,10 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind {
; GFNI-LABEL: test_bitreverse_i32:
; GFNI: # %bb.0:
; GFNI-NEXT: vmovd %edi, %xmm0
-; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
; GFNI-NEXT: vmovd %xmm0, %eax
; GFNI-NEXT: bswapl %eax
+; GFNI-NEXT: vzeroupper
; GFNI-NEXT: retq
%b = call i32 @llvm.bitreverse.i32(i32 %a)
ret i32 %b
@@ -306,10 +308,11 @@ define i24 @test_bitreverse_i24(i24 %a) nounwind {
; GFNI-LABEL: test_bitreverse_i24:
; GFNI: # %bb.0:
; GFNI-NEXT: vmovd %edi, %xmm0
-; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
; GFNI-NEXT: vmovd %xmm0, %eax
; GFNI-NEXT: bswapl %eax
; GFNI-NEXT: shrl $8, %eax
+; GFNI-NEXT: vzeroupper
; GFNI-NEXT: retq
%b = call i24 @llvm.bitreverse.i24(i24 %a)
ret i24 %b
@@ -375,10 +378,11 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
; GFNI-LABEL: test_bitreverse_i16:
; GFNI: # %bb.0:
; GFNI-NEXT: vmovd %edi, %xmm0
-; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
; GFNI-NEXT: vmovd %xmm0, %eax
; GFNI-NEXT: rolw $8, %ax
; GFNI-NEXT: # kill: def $ax killed $ax killed $eax
+; GFNI-NEXT: vzeroupper
; GFNI-NEXT: retq
%b = call i16 @llvm.bitreverse.i16(i16 %a)
ret i16 %b
@@ -433,9 +437,10 @@ define i8 @test_bitreverse_i8(i8 %a) {
; GFNI-LABEL: test_bitreverse_i8:
; GFNI: # %bb.0:
; GFNI-NEXT: vmovd %edi, %xmm0
-; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
; GFNI-NEXT: vmovd %xmm0, %eax
; GFNI-NEXT: # kill: def $al killed $al killed $eax
+; GFNI-NEXT: vzeroupper
; GFNI-NEXT: retq
%b = call i8 @llvm.bitreverse.i8(i8 %a)
ret i8 %b
@@ -492,10 +497,11 @@ define i4 @test_bitreverse_i4(i4 %a) {
; GFNI-LABEL: test_bitreverse_i4:
; GFNI: # %bb.0:
; GFNI-NEXT: vmovd %edi, %xmm0
-; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
; GFNI-NEXT: vmovd %xmm0, %eax
; GFNI-NEXT: shrb $4, %al
; GFNI-NEXT: # kill: def $al killed $al killed $eax
+; GFNI-NEXT: vzeroupper
; GFNI-NEXT: retq
%b = call i4 @llvm.bitreverse.i4(i4 %a)
ret i4 %b
@@ -1340,48 +1346,48 @@ define i528 @large_promotion(i528 %A) nounwind {
; GFNI-NEXT: pushq %r14
; GFNI-NEXT: pushq %rbx
; GFNI-NEXT: movq %rdi, %rax
-; GFNI-NEXT: vpbroadcastq {{.*#+}} xmm0 = [9241421688590303745,9241421688590303745]
; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; GFNI-NEXT: vpbroadcastq {{.*#+}} ymm0 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; GFNI-NEXT: vgf2p8affineqb $0, %ymm0, %ymm1, %ymm1
; GFNI-NEXT: vmovq %xmm1, %r10
; GFNI-NEXT: bswapq %r10
; GFNI-NEXT: vmovq %r9, %xmm1
-; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; GFNI-NEXT: vgf2p8affineqb $0, %ymm0, %ymm1, %ymm1
; GFNI-NEXT: vmovq %xmm1, %rdi
; GFNI-NEXT: bswapq %rdi
; GFNI-NEXT: vmovq %r8, %xmm1
-; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; GFNI-NEXT: vgf2p8affineqb $0, %ymm0, %ymm1, %ymm1
; GFNI-NEXT: vmovq %xmm1, %r8
; GFNI-NEXT: bswapq %r8
; GFNI-NEXT: movq %r8, %r9
; GFNI-NEXT: shldq $16, %rdi, %r9
; GFNI-NEXT: shldq $16, %r10, %rdi
; GFNI-NEXT: vmovq %rcx, %xmm1
-; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; GFNI-NEXT: vgf2p8affineqb $0, %ymm0, %ymm1, %ymm1
; GFNI-NEXT: vmovq %xmm1, %rcx
; GFNI-NEXT: bswapq %rcx
; GFNI-NEXT: shrdq $48, %rcx, %r8
; GFNI-NEXT: vmovq %rdx, %xmm1
-; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; GFNI-NEXT: vgf2p8affineqb $0, %ymm0, %ymm1, %ymm1
; GFNI-NEXT: vmovq %xmm1, %rdx
; GFNI-NEXT: bswapq %rdx
; GFNI-NEXT: shrdq $48, %rdx, %rcx
; GFNI-NEXT: vmovq %rsi, %xmm1
-; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; GFNI-NEXT: vgf2p8affineqb $0, %ymm0, %ymm1, %ymm1
; GFNI-NEXT: vmovq %xmm1, %rsi
; GFNI-NEXT: bswapq %rsi
; GFNI-NEXT: shrdq $48, %rsi, %rdx
; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; GFNI-NEXT: vgf2p8affineqb $0, %ymm0, %ymm1, %ymm1
; GFNI-NEXT: vmovq %xmm1, %r11
; GFNI-NEXT: bswapq %r11
; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; GFNI-NEXT: vgf2p8affineqb $0, %ymm0, %ymm1, %ymm1
; GFNI-NEXT: vmovq %xmm1, %rbx
; GFNI-NEXT: bswapq %rbx
; GFNI-NEXT: shrdq $48, %rbx, %r11
; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm0
+; GFNI-NEXT: vgf2p8affineqb $0, %ymm0, %ymm1, %ymm0
; GFNI-NEXT: vmovq %xmm0, %r14
; GFNI-NEXT: bswapq %r14
; GFNI-NEXT: shrdq $48, %r14, %rbx
@@ -1398,6 +1404,7 @@ define i528 @large_promotion(i528 %A) nounwind {
; GFNI-NEXT: movw %si, 64(%rax)
; GFNI-NEXT: popq %rbx
; GFNI-NEXT: popq %r14
+; GFNI-NEXT: vzeroupper
; GFNI-NEXT: retq
%Z = call i528 @llvm.bitreverse.i528(i528 %A)
ret i528 %Z
diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll
index 90cc3d5fdde82..e950955dff80a 100644
--- a/llvm/test/CodeGen/X86/vector-bitreverse.ll
+++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll
@@ -67,13 +67,33 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
; GFNISSE-NEXT: # kill: def $al killed $al killed $eax
; GFNISSE-NEXT: retq
;
-; GFNIAVX-LABEL: test_bitreverse_i8:
-; GFNIAVX: # %bb.0:
-; GFNIAVX-NEXT: vmovd %edi, %xmm0
-; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; GFNIAVX-NEXT: vmovd %xmm0, %eax
-; GFNIAVX-NEXT: # kill: def $al killed $al killed $eax
-; GFNIAVX-NEXT: retq
+; GFNIAVX1-LABEL: test_bitreverse_i8:
+; GFNIAVX1: # %bb.0:
+; GFNIAVX1-NEXT: vmovd %edi, %xmm0
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; GFNIAVX1-NEXT: vmovd %xmm0, %eax
+; GFNIAVX1-NEXT: # kill: def $al killed $al killed $eax
+; GFNIAVX1-NEXT: retq
+;
+; GFNIAVX2-LABEL: test_bitreverse_i8:
+; GFNIAVX2: # %bb.0:
+; GFNIAVX2-NEXT: vmovd %edi, %xmm0
+; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0
+; GFNIAVX2-NEXT: vmovd %xmm0, %eax
+; GFNIAVX2-NEXT: # kill: def $al killed $al killed $eax
+; GFNIAVX2-NEXT: vzeroupper
+; GFNIAVX2-NEXT: retq
+;
+; GFNIAVX512-LABEL: test_bitreverse_i8:
+; GFNIAVX512: # %bb.0:
+; GFNIAVX512-NEXT: vmovd %edi, %xmm0
+; GFNIAVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; GFNIAVX512-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0
+; GFNIAVX512-NEXT: vmovd %xmm0, %eax
+; GFNIAVX512-NEXT: # kill: def $al killed $al killed $eax
+; GFNIAVX512-NEXT: vzeroupper
+; GFNIAVX512-NEXT: retq
%b = call i8 @llvm.bitreverse.i8(i8 %a)
ret i8 %b
}
@@ -142,14 +162,36 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
; GFNISSE-NEXT: # kill: def $ax killed $ax killed $eax
; GFNISSE-NEXT: retq
;
-; GFNIAVX-LABEL: test_bitreverse_i16:
-; GFNIAVX: # %bb.0:
-; GFNIAVX-NEXT: vmovd %edi, %xmm0
-; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; GFNIAVX-NEXT: vmovd %xmm0, %eax
-; GFNIAVX-NEXT: rolw $8, %ax
-; GFNIAVX-NEXT: # kill: def $ax killed $ax killed $eax
-; GFNIAVX-NEXT: retq
+; GFNIAVX1-LABEL: test_bitreverse_i16:
+; GFNIAVX1: # %bb.0:
+; GFNIAVX1-NEXT: vmovd %edi, %xmm0
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; GFNIAVX1-NEXT: vmovd %xmm0, %eax
+; GFNIAVX1-NEXT: rolw $8, %ax
+; GFNIAVX1-NEXT: # kill: def $ax killed $ax killed $eax
+; GFNIAVX1-NEXT: retq
+;
+; GFNIAVX2-LABEL: test_bitreverse_i16:
+; GFNIAVX2: # %bb.0:
+; GFNIAVX2-NEXT: vmovd %edi, %xmm0
+; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0
+; GFNIAVX2-NEXT: vmovd %xmm0, %eax
+; GFNIAVX2-NEXT: rolw $8, %ax
+; GFNIAVX2-NEXT: # kill: def $ax killed $ax killed $eax
+; GFNIAVX2-NEXT: vzeroupper
+; GFNIAVX2-NEXT: retq
+;
+; GFNIAVX512-LABEL: test_bitreverse_i16:
+; GFNIAVX512: # %bb.0:
+; GFNIAVX512-NEXT: vmovd %edi, %xmm0
+; GFNIAVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; GFNIAVX512-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0
+; GFNIAVX512-NEXT: vmovd %xmm0, %eax
+; GFNIAVX512-NEXT: rolw $8, %ax
+; GFNIAVX512-NEXT: # kill: def $ax killed $ax killed $eax
+; GFNIAVX512-NEXT: vzeroupper
+; GFNIAVX512-NEXT: retq
%b = call i16 @llvm.bitreverse.i16(i16 %a)
ret i16 %b
}
@@ -214,13 +256,33 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind {
; GFNISSE-NEXT: bswapl %eax
; GFNISSE-NEXT: retq
;
-; GFNIAVX-LABEL: test_bitreverse_i32:
-; GFNIAVX: # %bb.0:
-; GFNIAVX-NEXT: vmovd %edi, %xmm0
-; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; GFNIAVX-NEXT: vmovd %xmm0, %eax
-; GFNIAVX-NEXT: bswapl %eax
-; GFNIAVX-NEXT: retq
+; GFNIAVX1-LABEL: test_bitreverse_i32:
+; GFNIAVX1: # %bb.0:
+; GFNIAVX1-NEXT: vmovd %edi, %xmm0
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; GFNIAVX1-NEXT: vmovd %xmm0, %eax
+; GFNIAVX1-NEXT: bswapl %eax
+; GFNIAVX1-NEXT: retq
+;
+; GFNIAVX2-LABEL: test_bitreverse_i32:
+; GFNIAVX2: # %bb.0:
+; GFNIAVX2-NEXT: vmovd %edi, %xmm0
+; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0
+; GFNIAVX2-NEXT: vmovd %xmm0, %eax
+; GFNIAVX2-NEXT: bswapl %eax
+; GFNIAVX2-NEXT: vzeroupper
+; GFNIAVX2-NEXT: retq
+;
+; GFNIAVX512-LABEL: test_bitreverse_i32:
+; GFNIAVX512: # %bb.0:
+; GFNIAVX512-NEXT: vmovd %edi, %xmm0
+; GFNIAVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; GFNIAVX512-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0
+; GFNIAVX512-NEXT: vmovd %xmm0, %eax
+; GFNIAVX512-NEXT: bswapl %eax
+; GFNIAVX512-NEXT: vzeroupper
+; GFNIAVX512-NEXT: retq
%b = call i32 @llvm.bitreverse.i32(i32 %a)
ret i32 %b
}
@@ -289,13 +351,33 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind {
; GFNISSE-NEXT: bswapq %rax
; GFNISSE-NEXT: retq
;
-; GFNIAVX-LABEL: test_bitreverse_i64:
-; GFNIAVX: # %bb.0:
-; GFNIAVX-NEXT: vmovq %rdi, %xmm0
-; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; GFNIAVX-NEXT: vmovq %xmm0, %rax
-; GFNIAVX-NEXT: bswapq %rax
-; GFNIAVX-NEXT: retq
+; GFNIAVX1-LABEL: test_bitreverse_i64:
+; GFNIAVX1: # %bb.0:
+; GFNIAVX1-NEXT: vmovq %rdi, %xmm0
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; GFNIAVX1-NEXT: vmovq %xmm0, %rax
+; GFNIAVX1-NEXT: bswapq %rax
+; GFNIAVX1-NEXT: retq
+;
+; GFNIAVX2-LABEL: test_bitreverse_i64:
+; GFNIAVX2: # %bb.0:
+; GFNIAVX2-NEXT: vmovq %rdi, %xmm0
+; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0
+; GFNIAVX2-NEXT: vmovq %xmm0, %rax
+; GFNIAVX2-NEXT: bswapq %rax
+; GFNIAVX2-NEXT: vzeroupper
+; GFNIAVX2-NEXT: retq
+;
+; GFNIAVX512-LABEL: test_bitreverse_i64:
+; GFNIAVX512: # %bb.0:
+; GFNIAVX512-NEXT: vmovq %rdi, %xmm0
+; GFNIAVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; GFNIAVX512-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0
+; GFNIAVX512-NEXT: vmovq %xmm0, %rax
+; GFNIAVX512-NEXT: bswapq %rax
+; GFNIAVX512-NEXT: vzeroupper
+; GFNIAVX512-NEXT: retq
%b = call i64 @llvm.bitreverse.i64(i64 %a)
ret i64 %b
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/91721
More information about the llvm-commits
mailing list