[llvm] c7bd284 - Support for i8/i16 for bitreverse using GFNI. (#88625)

Mon Apr 15 02:38:29 PDT 2024

Author: shamithoke
Date: 2024-04-15T10:38:26+01:00
New Revision: c7bd284dc3ff21a5a14b4ccbbfa1988de8723c33

URL: https://github.com/llvm/llvm-project/commit/c7bd284dc3ff21a5a14b4ccbbfa1988de8723c33
DIFF: https://github.com/llvm/llvm-project/commit/c7bd284dc3ff21a5a14b4ccbbfa1988de8723c33.diff

LOG: Support for i8/i16 for bitreverse using GFNI. (#88625)

In continuation to the PR #81764, this change extends the GFNI support to i8 and i16.

---------

Co-authored-by: shami <shami_thoke at yahoo.com>

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/bitreverse.ll
    llvm/test/CodeGen/X86/vector-bitreverse.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index edfa05a66288e1..87f2eaddedf864 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1287,8 +1287,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     }
 
     if (Subtarget.hasGFNI()) {
-      setOperationAction(ISD::BITREVERSE,       MVT::i32, Custom);
-      setOperationAction(ISD::BITREVERSE,       MVT::i64, Custom);
+      setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
+      setOperationAction(ISD::BITREVERSE, MVT::i16, Custom);
+      setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
+      setOperationAction(ISD::BITREVERSE, MVT::i64, Custom);
     }
 
     // These might be better off as horizontal vector ops.
@@ -31317,16 +31319,18 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
   if (VT.is256BitVector() && !Subtarget.hasInt256())
     return splitVectorIntUnary(Op, DAG, DL);
 
-  // Lower i32/i64 as vXi8 BITREVERSE + BSWAP
+  // Lower i8/i16/i32/i64 as vXi8 BITREVERSE + BSWAP
   if (!VT.isVector()) {
-    assert((VT == MVT::i32 || VT == MVT::i64) && "Only tested for i32/i64");
+    assert(
+        (VT == MVT::i32 || VT == MVT::i64 || VT == MVT::i16 || VT == MVT::i8) &&
+        "Only tested for i8/i16/i32/i64");
     MVT VecVT = MVT::getVectorVT(VT, 128 / VT.getSizeInBits());
     SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, In);
     Res = DAG.getNode(ISD::BITREVERSE, DL, MVT::v16i8,
                       DAG.getBitcast(MVT::v16i8, Res));
     Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
                       DAG.getBitcast(VecVT, Res), DAG.getIntPtrConstant(0, DL));
-    return DAG.getNode(ISD::BSWAP, DL, VT, Res);
+    return (VT == MVT::i8) ? Res : DAG.getNode(ISD::BSWAP, DL, VT, Res);
   }
 
   assert(VT.isVector() && VT.getSizeInBits() >= 128);

diff  --git a/llvm/test/CodeGen/X86/bitreverse.ll b/llvm/test/CodeGen/X86/bitreverse.ll
index 704563ab1bbf70..4f2654843728f4 100644
--- a/llvm/test/CodeGen/X86/bitreverse.ll
+++ b/llvm/test/CodeGen/X86/bitreverse.ll
@@ -374,24 +374,10 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
 ;
 ; GFNI-LABEL: test_bitreverse_i16:
 ; GFNI:       # %bb.0:
-; GFNI-NEXT:    # kill: def $edi killed $edi def $rdi
-; GFNI-NEXT:    rolw $8, %di
-; GFNI-NEXT:    movl %edi, %eax
-; GFNI-NEXT:    andl $3855, %eax # imm = 0xF0F
-; GFNI-NEXT:    shll $4, %eax
-; GFNI-NEXT:    shrl $4, %edi
-; GFNI-NEXT:    andl $3855, %edi # imm = 0xF0F
-; GFNI-NEXT:    orl %eax, %edi
-; GFNI-NEXT:    movl %edi, %eax
-; GFNI-NEXT:    andl $13107, %eax # imm = 0x3333
-; GFNI-NEXT:    shrl $2, %edi
-; GFNI-NEXT:    andl $13107, %edi # imm = 0x3333
-; GFNI-NEXT:    leal (%rdi,%rax,4), %eax
-; GFNI-NEXT:    movl %eax, %ecx
-; GFNI-NEXT:    andl $21845, %ecx # imm = 0x5555
-; GFNI-NEXT:    shrl %eax
-; GFNI-NEXT:    andl $21845, %eax # imm = 0x5555
-; GFNI-NEXT:    leal (%rax,%rcx,2), %eax
+; GFNI-NEXT:    vmovd %edi, %xmm0
+; GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; GFNI-NEXT:    vmovd %xmm0, %eax
+; GFNI-NEXT:    rolw $8, %ax
 ; GFNI-NEXT:    # kill: def $ax killed $ax killed $eax
 ; GFNI-NEXT:    retq
   %b = call i16 @llvm.bitreverse.i16(i16 %a)
@@ -446,19 +432,10 @@ define i8 @test_bitreverse_i8(i8 %a) {
 ;
 ; GFNI-LABEL: test_bitreverse_i8:
 ; GFNI:       # %bb.0:
-; GFNI-NEXT:    rolb $4, %dil
-; GFNI-NEXT:    movl %edi, %eax
-; GFNI-NEXT:    andb $51, %al
-; GFNI-NEXT:    shlb $2, %al
-; GFNI-NEXT:    shrb $2, %dil
-; GFNI-NEXT:    andb $51, %dil
-; GFNI-NEXT:    orb %dil, %al
-; GFNI-NEXT:    movl %eax, %ecx
-; GFNI-NEXT:    andb $85, %cl
-; GFNI-NEXT:    addb %cl, %cl
-; GFNI-NEXT:    shrb %al
-; GFNI-NEXT:    andb $85, %al
-; GFNI-NEXT:    orb %cl, %al
+; GFNI-NEXT:    vmovd %edi, %xmm0
+; GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; GFNI-NEXT:    vmovd %xmm0, %eax
+; GFNI-NEXT:    # kill: def $al killed $al killed $eax
 ; GFNI-NEXT:    retq
   %b = call i8 @llvm.bitreverse.i8(i8 %a)
   ret i8 %b
@@ -514,19 +491,11 @@ define i4 @test_bitreverse_i4(i4 %a) {
 ;
 ; GFNI-LABEL: test_bitreverse_i4:
 ; GFNI:       # %bb.0:
-; GFNI-NEXT:    # kill: def $edi killed $edi def $rdi
-; GFNI-NEXT:    movl %edi, %eax
-; GFNI-NEXT:    andb $8, %al
-; GFNI-NEXT:    leal (%rdi,%rdi), %ecx
-; GFNI-NEXT:    andb $4, %cl
-; GFNI-NEXT:    leal (,%rdi,8), %edx
-; GFNI-NEXT:    andb $8, %dl
-; GFNI-NEXT:    orb %cl, %dl
-; GFNI-NEXT:    shrb %dil
-; GFNI-NEXT:    andb $2, %dil
-; GFNI-NEXT:    orb %dil, %dl
-; GFNI-NEXT:    shrb $3, %al
-; GFNI-NEXT:    orb %dl, %al
+; GFNI-NEXT:    vmovd %edi, %xmm0
+; GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; GFNI-NEXT:    vmovd %xmm0, %eax
+; GFNI-NEXT:    shrb $4, %al
+; GFNI-NEXT:    # kill: def $al killed $al killed $eax
 ; GFNI-NEXT:    retq
   %b = call i4 @llvm.bitreverse.i4(i4 %a)
   ret i4 %b

diff  --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll
index b22b508db8b280..90cc3d5fdde829 100644
--- a/llvm/test/CodeGen/X86/vector-bitreverse.ll
+++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll
@@ -61,36 +61,18 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
 ;
 ; GFNISSE-LABEL: test_bitreverse_i8:
 ; GFNISSE:       # %bb.0:
-; GFNISSE-NEXT:    rolb $4, %dil
-; GFNISSE-NEXT:    movl %edi, %eax
-; GFNISSE-NEXT:    andb $51, %al
-; GFNISSE-NEXT:    shlb $2, %al
-; GFNISSE-NEXT:    shrb $2, %dil
-; GFNISSE-NEXT:    andb $51, %dil
-; GFNISSE-NEXT:    orb %dil, %al
-; GFNISSE-NEXT:    movl %eax, %ecx
-; GFNISSE-NEXT:    andb $85, %cl
-; GFNISSE-NEXT:    addb %cl, %cl
-; GFNISSE-NEXT:    shrb %al
-; GFNISSE-NEXT:    andb $85, %al
-; GFNISSE-NEXT:    orb %cl, %al
+; GFNISSE-NEXT:    movd %edi, %xmm0
+; GFNISSE-NEXT:    gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; GFNISSE-NEXT:    movd %xmm0, %eax
+; GFNISSE-NEXT:    # kill: def $al killed $al killed $eax
 ; GFNISSE-NEXT:    retq
 ;
 ; GFNIAVX-LABEL: test_bitreverse_i8:
 ; GFNIAVX:       # %bb.0:
-; GFNIAVX-NEXT:    rolb $4, %dil
-; GFNIAVX-NEXT:    movl %edi, %eax
-; GFNIAVX-NEXT:    andb $51, %al
-; GFNIAVX-NEXT:    shlb $2, %al
-; GFNIAVX-NEXT:    shrb $2, %dil
-; GFNIAVX-NEXT:    andb $51, %dil
-; GFNIAVX-NEXT:    orb %dil, %al
-; GFNIAVX-NEXT:    movl %eax, %ecx
-; GFNIAVX-NEXT:    andb $85, %cl
-; GFNIAVX-NEXT:    addb %cl, %cl
-; GFNIAVX-NEXT:    shrb %al
-; GFNIAVX-NEXT:    andb $85, %al
-; GFNIAVX-NEXT:    orb %cl, %al
+; GFNIAVX-NEXT:    vmovd %edi, %xmm0
+; GFNIAVX-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; GFNIAVX-NEXT:    vmovd %xmm0, %eax
+; GFNIAVX-NEXT:    # kill: def $al killed $al killed $eax
 ; GFNIAVX-NEXT:    retq
   %b = call i8 @llvm.bitreverse.i8(i8 %a)
   ret i8 %b
@@ -153,47 +135,19 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
 ;
 ; GFNISSE-LABEL: test_bitreverse_i16:
 ; GFNISSE:       # %bb.0:
-; GFNISSE-NEXT:    # kill: def $edi killed $edi def $rdi
-; GFNISSE-NEXT:    rolw $8, %di
-; GFNISSE-NEXT:    movl %edi, %eax
-; GFNISSE-NEXT:    andl $3855, %eax # imm = 0xF0F
-; GFNISSE-NEXT:    shll $4, %eax
-; GFNISSE-NEXT:    shrl $4, %edi
-; GFNISSE-NEXT:    andl $3855, %edi # imm = 0xF0F
-; GFNISSE-NEXT:    orl %eax, %edi
-; GFNISSE-NEXT:    movl %edi, %eax
-; GFNISSE-NEXT:    andl $13107, %eax # imm = 0x3333
-; GFNISSE-NEXT:    shrl $2, %edi
-; GFNISSE-NEXT:    andl $13107, %edi # imm = 0x3333
-; GFNISSE-NEXT:    leal (%rdi,%rax,4), %eax
-; GFNISSE-NEXT:    movl %eax, %ecx
-; GFNISSE-NEXT:    andl $21845, %ecx # imm = 0x5555
-; GFNISSE-NEXT:    shrl %eax
-; GFNISSE-NEXT:    andl $21845, %eax # imm = 0x5555
-; GFNISSE-NEXT:    leal (%rax,%rcx,2), %eax
+; GFNISSE-NEXT:    movd %edi, %xmm0
+; GFNISSE-NEXT:    gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; GFNISSE-NEXT:    movd %xmm0, %eax
+; GFNISSE-NEXT:    rolw $8, %ax
 ; GFNISSE-NEXT:    # kill: def $ax killed $ax killed $eax
 ; GFNISSE-NEXT:    retq
 ;
 ; GFNIAVX-LABEL: test_bitreverse_i16:
 ; GFNIAVX:       # %bb.0:
-; GFNIAVX-NEXT:    # kill: def $edi killed $edi def $rdi
-; GFNIAVX-NEXT:    rolw $8, %di
-; GFNIAVX-NEXT:    movl %edi, %eax
-; GFNIAVX-NEXT:    andl $3855, %eax # imm = 0xF0F
-; GFNIAVX-NEXT:    shll $4, %eax
-; GFNIAVX-NEXT:    shrl $4, %edi
-; GFNIAVX-NEXT:    andl $3855, %edi # imm = 0xF0F
-; GFNIAVX-NEXT:    orl %eax, %edi
-; GFNIAVX-NEXT:    movl %edi, %eax
-; GFNIAVX-NEXT:    andl $13107, %eax # imm = 0x3333
-; GFNIAVX-NEXT:    shrl $2, %edi
-; GFNIAVX-NEXT:    andl $13107, %edi # imm = 0x3333
-; GFNIAVX-NEXT:    leal (%rdi,%rax,4), %eax
-; GFNIAVX-NEXT:    movl %eax, %ecx
-; GFNIAVX-NEXT:    andl $21845, %ecx # imm = 0x5555
-; GFNIAVX-NEXT:    shrl %eax
-; GFNIAVX-NEXT:    andl $21845, %eax # imm = 0x5555
-; GFNIAVX-NEXT:    leal (%rax,%rcx,2), %eax
+; GFNIAVX-NEXT:    vmovd %edi, %xmm0
+; GFNIAVX-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; GFNIAVX-NEXT:    vmovd %xmm0, %eax
+; GFNIAVX-NEXT:    rolw $8, %ax
 ; GFNIAVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; GFNIAVX-NEXT:    retq
   %b = call i16 @llvm.bitreverse.i16(i16 %a)