[llvm] Support for i8/i16 for bitreverse using GFNI. (PR #88625)

Sat Apr 13 08:20:44 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-x86

Author: None (shamithoke)

<details>
<summary>Changes</summary>

In continuation to the PR - https://github.com/llvm/llvm-project/pull/81764, this change extends the GFNI support to i8 and i16. 

---
Full diff: https://github.com/llvm/llvm-project/pull/88625.diff


3 Files Affected:

- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+11-2) 
- (modified) llvm/test/CodeGen/X86/bitreverse.ll (+13-44) 
- (modified) llvm/test/CodeGen/X86/vector-bitreverse.ll (+16-62) 


``````````diff

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b7cb4b7dafeb69..bf8d7fc175fe64 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1287,6 +1287,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     }
 
     if (Subtarget.hasGFNI()) {
+      setOperationAction(ISD::BITREVERSE,       MVT::i8, Custom);
+      setOperationAction(ISD::BITREVERSE,       MVT::i16, Custom);
       setOperationAction(ISD::BITREVERSE,       MVT::i32, Custom);
       setOperationAction(ISD::BITREVERSE,       MVT::i64, Custom);
     }
@@ -1501,6 +1503,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::TRUNCATE,          MVT::v32i32, Custom);
     setOperationAction(ISD::TRUNCATE,          MVT::v32i64, Custom);
 
+    if (Subtarget.hasGFNI()) {
+      setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
+      setOperationAction(ISD::BITREVERSE, MVT::i16, Custom);
+      setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
+      setOperationAction(ISD::BITREVERSE, MVT::i64, Custom);
+    }
+
     for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
       setOperationAction(ISD::SETCC,           VT, Custom);
       setOperationAction(ISD::CTPOP,           VT, Custom);
@@ -31335,14 +31344,14 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
 
   // Lower i32/i64 as vXi8 BITREVERSE + BSWAP
   if (!VT.isVector()) {
-    assert((VT == MVT::i32 || VT == MVT::i64) && "Only tested for i32/i64");
+    assert(VT == MVT::i32 || VT == MVT::i64 || VT == MVT::i16 || VT == MVT::i8);
     MVT VecVT = MVT::getVectorVT(VT, 128 / VT.getSizeInBits());
     SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, In);
     Res = DAG.getNode(ISD::BITREVERSE, DL, MVT::v16i8,
                       DAG.getBitcast(MVT::v16i8, Res));
     Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
                       DAG.getBitcast(VecVT, Res), DAG.getIntPtrConstant(0, DL));
-    return DAG.getNode(ISD::BSWAP, DL, VT, Res);
+    return (VT == MVT::i8) ? Res : DAG.getNode(ISD::BSWAP, DL, VT, Res);
   }
 
   assert(VT.isVector() && VT.getSizeInBits() >= 128);
diff --git a/llvm/test/CodeGen/X86/bitreverse.ll b/llvm/test/CodeGen/X86/bitreverse.ll
index 704563ab1bbf70..4f2654843728f4 100644
--- a/llvm/test/CodeGen/X86/bitreverse.ll
+++ b/llvm/test/CodeGen/X86/bitreverse.ll
@@ -374,24 +374,10 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
 ;
 ; GFNI-LABEL: test_bitreverse_i16:
 ; GFNI:       # %bb.0:
-; GFNI-NEXT:    # kill: def $edi killed $edi def $rdi
-; GFNI-NEXT:    rolw $8, %di
-; GFNI-NEXT:    movl %edi, %eax
-; GFNI-NEXT:    andl $3855, %eax # imm = 0xF0F
-; GFNI-NEXT:    shll $4, %eax
-; GFNI-NEXT:    shrl $4, %edi
-; GFNI-NEXT:    andl $3855, %edi # imm = 0xF0F
-; GFNI-NEXT:    orl %eax, %edi
-; GFNI-NEXT:    movl %edi, %eax
-; GFNI-NEXT:    andl $13107, %eax # imm = 0x3333
-; GFNI-NEXT:    shrl $2, %edi
-; GFNI-NEXT:    andl $13107, %edi # imm = 0x3333
-; GFNI-NEXT:    leal (%rdi,%rax,4), %eax
-; GFNI-NEXT:    movl %eax, %ecx
-; GFNI-NEXT:    andl $21845, %ecx # imm = 0x5555
-; GFNI-NEXT:    shrl %eax
-; GFNI-NEXT:    andl $21845, %eax # imm = 0x5555
-; GFNI-NEXT:    leal (%rax,%rcx,2), %eax
+; GFNI-NEXT:    vmovd %edi, %xmm0
+; GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; GFNI-NEXT:    vmovd %xmm0, %eax
+; GFNI-NEXT:    rolw $8, %ax
 ; GFNI-NEXT:    # kill: def $ax killed $ax killed $eax
 ; GFNI-NEXT:    retq
   %b = call i16 @llvm.bitreverse.i16(i16 %a)
@@ -446,19 +432,10 @@ define i8 @test_bitreverse_i8(i8 %a) {
 ;
 ; GFNI-LABEL: test_bitreverse_i8:
 ; GFNI:       # %bb.0:
-; GFNI-NEXT:    rolb $4, %dil
-; GFNI-NEXT:    movl %edi, %eax
-; GFNI-NEXT:    andb $51, %al
-; GFNI-NEXT:    shlb $2, %al
-; GFNI-NEXT:    shrb $2, %dil
-; GFNI-NEXT:    andb $51, %dil
-; GFNI-NEXT:    orb %dil, %al
-; GFNI-NEXT:    movl %eax, %ecx
-; GFNI-NEXT:    andb $85, %cl
-; GFNI-NEXT:    addb %cl, %cl
-; GFNI-NEXT:    shrb %al
-; GFNI-NEXT:    andb $85, %al
-; GFNI-NEXT:    orb %cl, %al
+; GFNI-NEXT:    vmovd %edi, %xmm0
+; GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; GFNI-NEXT:    vmovd %xmm0, %eax
+; GFNI-NEXT:    # kill: def $al killed $al killed $eax
 ; GFNI-NEXT:    retq
   %b = call i8 @llvm.bitreverse.i8(i8 %a)
   ret i8 %b
@@ -514,19 +491,11 @@ define i4 @test_bitreverse_i4(i4 %a) {
 ;
 ; GFNI-LABEL: test_bitreverse_i4:
 ; GFNI:       # %bb.0:
-; GFNI-NEXT:    # kill: def $edi killed $edi def $rdi
-; GFNI-NEXT:    movl %edi, %eax
-; GFNI-NEXT:    andb $8, %al
-; GFNI-NEXT:    leal (%rdi,%rdi), %ecx
-; GFNI-NEXT:    andb $4, %cl
-; GFNI-NEXT:    leal (,%rdi,8), %edx
-; GFNI-NEXT:    andb $8, %dl
-; GFNI-NEXT:    orb %cl, %dl
-; GFNI-NEXT:    shrb %dil
-; GFNI-NEXT:    andb $2, %dil
-; GFNI-NEXT:    orb %dil, %dl
-; GFNI-NEXT:    shrb $3, %al
-; GFNI-NEXT:    orb %dl, %al
+; GFNI-NEXT:    vmovd %edi, %xmm0
+; GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; GFNI-NEXT:    vmovd %xmm0, %eax
+; GFNI-NEXT:    shrb $4, %al
+; GFNI-NEXT:    # kill: def $al killed $al killed $eax
 ; GFNI-NEXT:    retq
   %b = call i4 @llvm.bitreverse.i4(i4 %a)
   ret i4 %b
diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll
index b22b508db8b280..90cc3d5fdde829 100644
--- a/llvm/test/CodeGen/X86/vector-bitreverse.ll
+++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll
@@ -61,36 +61,18 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
 ;
 ; GFNISSE-LABEL: test_bitreverse_i8:
 ; GFNISSE:       # %bb.0:
-; GFNISSE-NEXT:    rolb $4, %dil
-; GFNISSE-NEXT:    movl %edi, %eax
-; GFNISSE-NEXT:    andb $51, %al
-; GFNISSE-NEXT:    shlb $2, %al
-; GFNISSE-NEXT:    shrb $2, %dil
-; GFNISSE-NEXT:    andb $51, %dil
-; GFNISSE-NEXT:    orb %dil, %al
-; GFNISSE-NEXT:    movl %eax, %ecx
-; GFNISSE-NEXT:    andb $85, %cl
-; GFNISSE-NEXT:    addb %cl, %cl
-; GFNISSE-NEXT:    shrb %al
-; GFNISSE-NEXT:    andb $85, %al
-; GFNISSE-NEXT:    orb %cl, %al
+; GFNISSE-NEXT:    movd %edi, %xmm0
+; GFNISSE-NEXT:    gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; GFNISSE-NEXT:    movd %xmm0, %eax
+; GFNISSE-NEXT:    # kill: def $al killed $al killed $eax
 ; GFNISSE-NEXT:    retq
 ;
 ; GFNIAVX-LABEL: test_bitreverse_i8:
 ; GFNIAVX:       # %bb.0:
-; GFNIAVX-NEXT:    rolb $4, %dil
-; GFNIAVX-NEXT:    movl %edi, %eax
-; GFNIAVX-NEXT:    andb $51, %al
-; GFNIAVX-NEXT:    shlb $2, %al
-; GFNIAVX-NEXT:    shrb $2, %dil
-; GFNIAVX-NEXT:    andb $51, %dil
-; GFNIAVX-NEXT:    orb %dil, %al
-; GFNIAVX-NEXT:    movl %eax, %ecx
-; GFNIAVX-NEXT:    andb $85, %cl
-; GFNIAVX-NEXT:    addb %cl, %cl
-; GFNIAVX-NEXT:    shrb %al
-; GFNIAVX-NEXT:    andb $85, %al
-; GFNIAVX-NEXT:    orb %cl, %al
+; GFNIAVX-NEXT:    vmovd %edi, %xmm0
+; GFNIAVX-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; GFNIAVX-NEXT:    vmovd %xmm0, %eax
+; GFNIAVX-NEXT:    # kill: def $al killed $al killed $eax
 ; GFNIAVX-NEXT:    retq
   %b = call i8 @llvm.bitreverse.i8(i8 %a)
   ret i8 %b
@@ -153,47 +135,19 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
 ;
 ; GFNISSE-LABEL: test_bitreverse_i16:
 ; GFNISSE:       # %bb.0:
-; GFNISSE-NEXT:    # kill: def $edi killed $edi def $rdi
-; GFNISSE-NEXT:    rolw $8, %di
-; GFNISSE-NEXT:    movl %edi, %eax
-; GFNISSE-NEXT:    andl $3855, %eax # imm = 0xF0F
-; GFNISSE-NEXT:    shll $4, %eax
-; GFNISSE-NEXT:    shrl $4, %edi
-; GFNISSE-NEXT:    andl $3855, %edi # imm = 0xF0F
-; GFNISSE-NEXT:    orl %eax, %edi
-; GFNISSE-NEXT:    movl %edi, %eax
-; GFNISSE-NEXT:    andl $13107, %eax # imm = 0x3333
-; GFNISSE-NEXT:    shrl $2, %edi
-; GFNISSE-NEXT:    andl $13107, %edi # imm = 0x3333
-; GFNISSE-NEXT:    leal (%rdi,%rax,4), %eax
-; GFNISSE-NEXT:    movl %eax, %ecx
-; GFNISSE-NEXT:    andl $21845, %ecx # imm = 0x5555
-; GFNISSE-NEXT:    shrl %eax
-; GFNISSE-NEXT:    andl $21845, %eax # imm = 0x5555
-; GFNISSE-NEXT:    leal (%rax,%rcx,2), %eax
+; GFNISSE-NEXT:    movd %edi, %xmm0
+; GFNISSE-NEXT:    gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; GFNISSE-NEXT:    movd %xmm0, %eax
+; GFNISSE-NEXT:    rolw $8, %ax
 ; GFNISSE-NEXT:    # kill: def $ax killed $ax killed $eax
 ; GFNISSE-NEXT:    retq
 ;
 ; GFNIAVX-LABEL: test_bitreverse_i16:
 ; GFNIAVX:       # %bb.0:
-; GFNIAVX-NEXT:    # kill: def $edi killed $edi def $rdi
-; GFNIAVX-NEXT:    rolw $8, %di
-; GFNIAVX-NEXT:    movl %edi, %eax
-; GFNIAVX-NEXT:    andl $3855, %eax # imm = 0xF0F
-; GFNIAVX-NEXT:    shll $4, %eax
-; GFNIAVX-NEXT:    shrl $4, %edi
-; GFNIAVX-NEXT:    andl $3855, %edi # imm = 0xF0F
-; GFNIAVX-NEXT:    orl %eax, %edi
-; GFNIAVX-NEXT:    movl %edi, %eax
-; GFNIAVX-NEXT:    andl $13107, %eax # imm = 0x3333
-; GFNIAVX-NEXT:    shrl $2, %edi
-; GFNIAVX-NEXT:    andl $13107, %edi # imm = 0x3333
-; GFNIAVX-NEXT:    leal (%rdi,%rax,4), %eax
-; GFNIAVX-NEXT:    movl %eax, %ecx
-; GFNIAVX-NEXT:    andl $21845, %ecx # imm = 0x5555
-; GFNIAVX-NEXT:    shrl %eax
-; GFNIAVX-NEXT:    andl $21845, %eax # imm = 0x5555
-; GFNIAVX-NEXT:    leal (%rax,%rcx,2), %eax
+; GFNIAVX-NEXT:    vmovd %edi, %xmm0
+; GFNIAVX-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; GFNIAVX-NEXT:    vmovd %xmm0, %eax
+; GFNIAVX-NEXT:    rolw $8, %ax
 ; GFNIAVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; GFNIAVX-NEXT:    retq
   %b = call i16 @llvm.bitreverse.i16(i16 %a)

``````````

</details>


https://github.com/llvm/llvm-project/pull/88625