[llvm] r307563 - [PPC CodeGen] Expand the bitreverse.i64 intrinsic.

Tony Jiang via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 10 11:11:23 PDT 2017


Author: jtony
Date: Mon Jul 10 11:11:23 2017
New Revision: 307563

URL: http://llvm.org/viewvc/llvm-project?rev=307563&view=rev
Log:
[PPC CodeGen] Expand the bitreverse.i64 intrinsic.

Differential Revision: https://reviews.llvm.org/D34908
Fix PR: https://bugs.llvm.org/show_bug.cgi?id=33093

Modified:
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
    llvm/trunk/test/CodeGen/PowerPC/pr33093.ll
    llvm/trunk/test/CodeGen/PowerPC/testBitReverse.ll

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=307563&r1=307562&r2=307563&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Mon Jul 10 11:11:23 2017
@@ -138,6 +138,7 @@ PPCTargetLowering::PPCTargetLowering(con
 
   // Match BITREVERSE to customized fast code sequence in the td file.
   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
+  setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
 
   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
   for (MVT VT : MVT::integer_valuetypes()) {

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td?rev=307563&r1=307562&r2=307563&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td Mon Jul 10 11:11:23 2017
@@ -4522,3 +4522,122 @@ def RotateInsertByte1 {
 
 def : Pat<(i32 (bitreverse i32:$A)),
   (RLDICL_32 RotateInsertByte1.Left, 0, 32)>;
+
+// Fast 64-bit reverse bits algorithm:
+// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit):
+// n = ((n >> 1) & 0x5555555555555555) | ((n << 1) & 0xAAAAAAAAAAAAAAAA);
+// Step 2: 2-bit swap (swap odd 2-bit and even 2-bit):
+// n = ((n >> 2) & 0x3333333333333333) | ((n << 2) & 0xCCCCCCCCCCCCCCCC);
+// Step 3: 4-bit swap (swap odd 4-bit and even 4-bit):
+// n = ((n >> 4) & 0x0F0F0F0F0F0F0F0F) | ((n << 4) & 0xF0F0F0F0F0F0F0F0);
+// Step 4: byte reverse (Suppose n = [B1,B2,B3,B4,B5,B6,B7,B8]):
+// Apply the same byte reverse algorithm mentioned above for the fast 32-bit
+// reverse to both the high 32 bit and low 32 bit of the 64 bit value. And
+// then OR them together to get the final result.
+def MaskValues64 {
+  dag Lo1 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo1, sub_32));
+  dag Hi1 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi1, sub_32));
+  dag Lo2 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo2, sub_32));
+  dag Hi2 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi2, sub_32));
+  dag Lo4 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo4, sub_32));
+  dag Hi4 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi4, sub_32));
+}
+
+def DWMaskValues {
+  dag Lo1 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo1, 32, 31), 0x5555), 0x5555);
+  dag Hi1 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi1, 32, 31), 0xAAAA), 0xAAAA);
+  dag Lo2 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo2, 32, 31), 0x3333), 0x3333);
+  dag Hi2 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi2, 32, 31), 0xCCCC), 0xCCCC);
+  dag Lo4 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo4, 32, 31), 0x0F0F), 0x0F0F);
+  dag Hi4 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi4, 32, 31), 0xF0F0), 0xF0F0);
+}
+
+def DWShift1 {
+  dag Right = (RLDICL $A, 63, 1);
+  dag Left = (RLDICR $A, 1, 62);
+}
+
+def DWSwap1 {
+  dag Bit = (OR8 (AND8 DWShift1.Right, DWMaskValues.Lo1),
+                 (AND8 DWShift1.Left, DWMaskValues.Hi1));
+}
+
+def DWShift2 {
+  dag Right = (RLDICL DWSwap1.Bit, 62, 2);
+  dag Left = (RLDICR DWSwap1.Bit, 2, 61);
+}
+
+def DWSwap2 {
+  dag Bits = (OR8 (AND8 DWShift2.Right, DWMaskValues.Lo2),
+                  (AND8 DWShift2.Left, DWMaskValues.Hi2));
+}
+
+def DWShift4 {
+  dag Right = (RLDICL DWSwap2.Bits, 60, 4);
+  dag Left = (RLDICR DWSwap2.Bits, 4, 59);
+}
+
+def DWSwap4 {
+  dag Bits = (OR8 (AND8 DWShift4.Right, DWMaskValues.Lo4),
+                  (AND8 DWShift4.Left, DWMaskValues.Hi4));
+}
+
+// Bit swap is done, now start byte swap.
+def DWExtractLo32 {
+  dag SubReg = (i32 (EXTRACT_SUBREG DWSwap4.Bits, sub_32));
+}
+
+def DWRotateLo32 {
+  dag Left24 = (RLWINM DWExtractLo32.SubReg, 24, 0, 31);
+}
+
+def DWLo32RotateInsertByte3 {
+  dag Left = (RLWIMI DWRotateLo32.Left24, DWExtractLo32.SubReg, 8, 8, 15);
+}
+
+// Lower 32 bits in the right order
+def DWLo32RotateInsertByte1 {
+  dag Left =
+    (RLWIMI DWLo32RotateInsertByte3.Left, DWExtractLo32.SubReg, 8, 24, 31);
+}
+
+def ExtendLo32 {
+  dag To64Bit =
+    (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+          DWLo32RotateInsertByte1.Left, sub_32));
+}
+
+def DWShiftHi32 { // SRDI DWSwap4.Bits, 32)
+  dag ToLo32 = (RLDICL DWSwap4.Bits, 32, 32);
+}
+
+def DWExtractHi32 {
+  dag SubReg = (i32 (EXTRACT_SUBREG DWShiftHi32.ToLo32, sub_32));
+}
+
+def DWRotateHi32 {
+  dag Left24 = (RLWINM DWExtractHi32.SubReg, 24, 0, 31);
+}
+
+def DWHi32RotateInsertByte3 {
+  dag Left = (RLWIMI DWRotateHi32.Left24, DWExtractHi32.SubReg, 8, 8, 15);
+}
+
+// High 32 bits in the right order, but in the low 32-bit position
+def DWHi32RotateInsertByte1 {
+  dag Left =
+    (RLWIMI DWHi32RotateInsertByte3.Left, DWExtractHi32.SubReg, 8, 24, 31);
+}
+
+def ExtendHi32 {
+  dag To64Bit =
+    (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+          DWHi32RotateInsertByte1.Left, sub_32));
+}
+
+def DWShiftLo32 { // SLDI ExtendHi32.To64Bit, 32
+  dag ToHi32 = (RLDICR ExtendHi32.To64Bit, 32, 31);
+}
+
+def : Pat<(i64 (bitreverse i64:$A)),
+  (OR8 DWShiftLo32.ToHi32, ExtendLo32.To64Bit)>;

Modified: llvm/trunk/test/CodeGen/PowerPC/pr33093.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/pr33093.ll?rev=307563&r1=307562&r2=307563&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/pr33093.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/pr33093.ll Mon Jul 10 11:11:23 2017
@@ -65,3 +65,101 @@ entry:
   %or22 = or i32 %or19, %shl18
   ret i32 %or22
 }
+
+define i64 @ReverseBits64(i64 %n) {
+; CHECK-LABEL: ReverseBits64:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    lis 4, -21846
+; CHECK-NEXT:    lis 5, 21845
+; CHECK-NEXT:    lis 6, -13108
+; CHECK-NEXT:    lis 7, 13107
+; CHECK-NEXT:    sldi 8, 3, 1
+; CHECK-NEXT:    rldicl 3, 3, 63, 1
+; CHECK-NEXT:    ori 4, 4, 43690
+; CHECK-NEXT:    ori 5, 5, 21845
+; CHECK-NEXT:    ori 6, 6, 52428
+; CHECK-NEXT:    ori 7, 7, 13107
+; CHECK-NEXT:    sldi 4, 4, 32
+; CHECK-NEXT:    sldi 5, 5, 32
+; CHECK-NEXT:    oris 4, 4, 43690
+; CHECK-NEXT:    oris 5, 5, 21845
+; CHECK-NEXT:    ori 4, 4, 43690
+; CHECK-NEXT:    ori 5, 5, 21845
+; CHECK-NEXT:    and 3, 3, 5
+; CHECK-NEXT:    sldi 5, 6, 32
+; CHECK-NEXT:    sldi 6, 7, 32
+; CHECK-NEXT:    and 4, 8, 4
+; CHECK-NEXT:    lis 7, 3855
+; CHECK-NEXT:    or 3, 3, 4
+; CHECK-NEXT:    oris 12, 5, 52428
+; CHECK-NEXT:    oris 9, 6, 13107
+; CHECK-NEXT:    lis 6, -3856
+; CHECK-NEXT:    ori 7, 7, 3855
+; CHECK-NEXT:    sldi 8, 3, 2
+; CHECK-NEXT:    ori 4, 12, 52428
+; CHECK-NEXT:    rldicl 3, 3, 62, 2
+; CHECK-NEXT:    ori 5, 9, 13107
+; CHECK-NEXT:    ori 6, 6, 61680
+; CHECK-NEXT:    and 3, 3, 5
+; CHECK-NEXT:    sldi 5, 6, 32
+; CHECK-NEXT:    and 4, 8, 4
+; CHECK-NEXT:    sldi 6, 7, 32
+; CHECK-NEXT:    or 3, 3, 4
+; CHECK-NEXT:    oris 10, 5, 61680
+; CHECK-NEXT:    oris 11, 6, 3855
+; CHECK-NEXT:    sldi 6, 3, 4
+; CHECK-NEXT:    ori 4, 10, 61680
+; CHECK-NEXT:    rldicl 3, 3, 60, 4
+; CHECK-NEXT:    ori 5, 11, 3855
+; CHECK-NEXT:    and 4, 6, 4
+; CHECK-NEXT:    and 3, 3, 5
+; CHECK-NEXT:    or 3, 3, 4
+; CHECK-NEXT:    rldicl 4, 3, 32, 32
+; CHECK-NEXT:    rlwinm 6, 3, 24, 0, 31
+; CHECK-NEXT:    rlwinm 5, 4, 24, 0, 31
+; CHECK-NEXT:    rlwimi 6, 3, 8, 8, 15
+; CHECK-NEXT:    rlwimi 5, 4, 8, 8, 15
+; CHECK-NEXT:    rlwimi 6, 3, 8, 24, 31
+; CHECK-NEXT:    rlwimi 5, 4, 8, 24, 31
+; CHECK-NEXT:    sldi 12, 5, 32
+; CHECK-NEXT:    or 3, 12, 6
+; CHECK-NEXT:    blr
+entry:
+  %shr = lshr i64 %n, 1
+  %and = and i64 %shr, 6148914691236517205
+  %and1 = shl i64 %n, 1
+  %shl = and i64 %and1, -6148914691236517206
+  %or = or i64 %and, %shl
+  %shr2 = lshr i64 %or, 2
+  %and3 = and i64 %shr2, 3689348814741910323
+  %and4 = shl i64 %or, 2
+  %shl5 = and i64 %and4, -3689348814741910324
+  %or6 = or i64 %and3, %shl5
+  %shr7 = lshr i64 %or6, 4
+  %and8 = and i64 %shr7, 1085102592571150095
+  %and9 = shl i64 %or6, 4
+  %shl10 = and i64 %and9, -1085102592571150096
+  %or11 = or i64 %and8, %shl10
+  %shr13 = lshr i64 %or11, 56
+  %and14 = lshr i64 %or11, 40
+  %shr15 = and i64 %and14, 65280
+  %and17 = lshr i64 %or11, 24
+  %shr18 = and i64 %and17, 16711680
+  %and20 = lshr i64 %or11, 8
+  %shr21 = and i64 %and20, 4278190080
+  %and23 = shl i64 %or11, 8
+  %shl24 = and i64 %and23, 1095216660480
+  %and26 = shl i64 %or11, 24
+  %shl27 = and i64 %and26, 280375465082880
+  %and29 = shl i64 %or11, 40
+  %shl30 = and i64 %and29, 71776119061217280
+  %shl33 = shl i64 %or11, 56
+  %or16 = or i64 %shl33, %shr13
+  %or19 = or i64 %or16, %shr15
+  %or22 = or i64 %or19, %shr18
+  %or25 = or i64 %or22, %shr21
+  %or28 = or i64 %or25, %shl24
+  %or31 = or i64 %or28, %shl27
+  %or34 = or i64 %or31, %shl30
+  ret i64 %or34
+}

Modified: llvm/trunk/test/CodeGen/PowerPC/testBitReverse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/testBitReverse.ll?rev=307563&r1=307562&r2=307563&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/testBitReverse.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/testBitReverse.ll Mon Jul 10 11:11:23 2017
@@ -40,3 +40,66 @@ define i32 @testBitReverseIntrinsicI32(i
   %res = call i32 @llvm.bitreverse.i32(i32 %arg)
   ret i32 %res
 }
+
+declare i64 @llvm.bitreverse.i64(i64)
+define i64 @testBitReverseIntrinsicI64(i64 %arg) {
+; CHECK-LABEL: testBitReverseIntrinsicI64:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    lis 4, -21846
+; CHECK-NEXT:    lis 5, 21845
+; CHECK-NEXT:    lis 6, -13108
+; CHECK-NEXT:    lis 7, 13107
+; CHECK-NEXT:    sldi 8, 3, 1
+; CHECK-NEXT:    rldicl 3, 3, 63, 1
+; CHECK-NEXT:    ori 4, 4, 43690
+; CHECK-NEXT:    ori 5, 5, 21845
+; CHECK-NEXT:    ori 6, 6, 52428
+; CHECK-NEXT:    ori 7, 7, 13107
+; CHECK-NEXT:    sldi 4, 4, 32
+; CHECK-NEXT:    sldi 5, 5, 32
+; CHECK-NEXT:    oris 4, 4, 43690
+; CHECK-NEXT:    oris 5, 5, 21845
+; CHECK-NEXT:    ori 4, 4, 43690
+; CHECK-NEXT:    ori 5, 5, 21845
+; CHECK-NEXT:    and 3, 3, 5
+; CHECK-NEXT:    sldi 5, 6, 32
+; CHECK-NEXT:    sldi 6, 7, 32
+; CHECK-NEXT:    and 4, 8, 4
+; CHECK-NEXT:    lis 7, 3855
+; CHECK-NEXT:    or 3, 3, 4
+; CHECK-NEXT:    oris 12, 5, 52428
+; CHECK-NEXT:    oris 9, 6, 13107
+; CHECK-NEXT:    lis 6, -3856
+; CHECK-NEXT:    ori 7, 7, 3855
+; CHECK-NEXT:    sldi 8, 3, 2
+; CHECK-NEXT:    ori 4, 12, 52428
+; CHECK-NEXT:    rldicl 3, 3, 62, 2
+; CHECK-NEXT:    ori 5, 9, 13107
+; CHECK-NEXT:    ori 6, 6, 61680
+; CHECK-NEXT:    and 3, 3, 5
+; CHECK-NEXT:    sldi 5, 6, 32
+; CHECK-NEXT:    and 4, 8, 4
+; CHECK-NEXT:    sldi 6, 7, 32
+; CHECK-NEXT:    or 3, 3, 4
+; CHECK-NEXT:    oris 10, 5, 61680
+; CHECK-NEXT:    oris 11, 6, 3855
+; CHECK-NEXT:    sldi 6, 3, 4
+; CHECK-NEXT:    ori 4, 10, 61680
+; CHECK-NEXT:    rldicl 3, 3, 60, 4
+; CHECK-NEXT:    ori 5, 11, 3855
+; CHECK-NEXT:    and 4, 6, 4
+; CHECK-NEXT:    and 3, 3, 5
+; CHECK-NEXT:    or 3, 3, 4
+; CHECK-NEXT:    rldicl 4, 3, 32, 32
+; CHECK-NEXT:    rlwinm 6, 3, 24, 0, 31
+; CHECK-NEXT:    rlwinm 5, 4, 24, 0, 31
+; CHECK-NEXT:    rlwimi 6, 3, 8, 8, 15
+; CHECK-NEXT:    rlwimi 5, 4, 8, 8, 15
+; CHECK-NEXT:    rlwimi 6, 3, 8, 24, 31
+; CHECK-NEXT:    rlwimi 5, 4, 8, 24, 31
+; CHECK-NEXT:    sldi 12, 5, 32
+; CHECK-NEXT:    or 3, 12, 6
+; CHECK-NEXT:    blr
+  %res = call i64 @llvm.bitreverse.i64(i64 %arg)
+  ret i64 %res
+}




More information about the llvm-commits mailing list