[llvm] r307413 - [PPC CodeGen] Expand the bitreverse.i32 intrinsic.

Tony Jiang via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 7 09:41:55 PDT 2017


Author: jtony
Date: Fri Jul  7 09:41:55 2017
New Revision: 307413

URL: http://llvm.org/viewvc/llvm-project?rev=307413&view=rev
Log:
[PPC CodeGen] Expand the bitreverse.i32 intrinsic.

Differential Revision: https://reviews.llvm.org/D33572
Fix PR: https://bugs.llvm.org/show_bug.cgi?id=33093

Added:
    llvm/trunk/test/CodeGen/PowerPC/pr33093.ll
    llvm/trunk/test/CodeGen/PowerPC/testBitReverse.ll
Removed:
    llvm/trunk/test/CodeGen/PowerPC/bitreverse.ll
Modified:
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=307413&r1=307412&r2=307413&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Fri Jul  7 09:41:55 2017
@@ -136,6 +136,9 @@ PPCTargetLowering::PPCTargetLowering(con
     addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
   }
 
+  // Match BITREVERSE to customized fast code sequence in the td file.
+  setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
+
   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
   for (MVT VT : MVT::integer_valuetypes()) {
     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td?rev=307413&r1=307412&r2=307413&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td Fri Jul  7 09:41:55 2017
@@ -4454,3 +4454,71 @@ def MSGSYNC : XForm_0<31, 886, (outs), (
 def STOP : XForm_0<19, 370, (outs), (ins), "stop", IIC_SprSTOP, []>;
 
 } // IsISA3_0
+
+// Fast 32-bit reverse bits algorithm:
+// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit):
+// n = ((n >> 1) & 0x55555555) | ((n << 1) & 0xAAAAAAAA);
+// Step 2: 2-bit swap (swap odd 2-bit and even 2-bit):
+// n = ((n >> 2) & 0x33333333) | ((n << 2) & 0xCCCCCCCC);
+// Step 3: 4-bit swap (swap odd 4-bit and even 4-bit):
+// n = ((n >> 4) & 0x0F0F0F0F) | ((n << 4) & 0xF0F0F0F0);
+// Step 4: byte reverse (Suppose n = [B1,B2,B3,B4]):
+// Step 4.1: Put B4,B2 in the right position (rotate left 3 bytes):
+// n' = (n rotl 24);  After which n' = [B4, B1, B2, B3]
+// Step 4.2: Insert B3 to the right position:
+// n' = rlwimi n', n, 8, 8, 15;  After which n' = [B4, B3, B2, B3]
+// Step 4.3: Insert B1 to the right position:
+// n' = rlwimi n', n, 8, 24, 31;  After which n' = [B4, B3, B2, B1]
+def MaskValues {
+  dag Lo1 = (ORI (LIS 0x5555), 0x5555);
+  dag Hi1 = (ORI (LIS 0xAAAA), 0xAAAA);
+  dag Lo2 = (ORI (LIS 0x3333), 0x3333);
+  dag Hi2 = (ORI (LIS 0xCCCC), 0xCCCC);
+  dag Lo4 = (ORI (LIS 0x0F0F), 0x0F0F);
+  dag Hi4 = (ORI (LIS 0xF0F0), 0xF0F0);
+}
+
+def Shift1 {
+  dag Right = (RLWINM $A, 31, 1, 31);
+  dag Left = (RLWINM $A, 1, 0, 30);
+}
+
+def Swap1 {
+  dag Bit = (OR (AND Shift1.Right, MaskValues.Lo1),
+   (AND Shift1.Left, MaskValues.Hi1));
+}
+
+def Shift2 {
+  dag Right = (RLWINM Swap1.Bit, 30, 2, 31);
+  dag Left = (RLWINM Swap1.Bit, 2, 0, 29);
+}
+
+def Swap2 {
+  dag Bits = (OR (AND Shift2.Right, MaskValues.Lo2),
+                 (AND Shift2.Left, MaskValues.Hi2));
+}
+
+def Shift4 {
+  dag Right = (RLWINM Swap2.Bits, 28, 4, 31);
+  dag Left = (RLWINM Swap2.Bits, 4, 0, 27);
+}
+
+def Swap4 {
+  dag Bits = (OR (AND Shift4.Right, MaskValues.Lo4),
+                 (AND Shift4.Left, MaskValues.Hi4));
+}
+
+def Rotate {
+  dag Left3Bytes = (RLWINM Swap4.Bits, 24, 0, 31);
+}
+
+def RotateInsertByte3 {
+  dag Left = (RLWIMI Rotate.Left3Bytes, Swap4.Bits, 8, 8, 15);
+}
+
+def RotateInsertByte1 {
+  dag Left = (RLWIMI RotateInsertByte3.Left, Swap4.Bits, 8, 24, 31);
+}
+
+def : Pat<(i32 (bitreverse i32:$A)),
+  (RLDICL_32 RotateInsertByte1.Left, 0, 32)>;

Removed: llvm/trunk/test/CodeGen/PowerPC/bitreverse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/bitreverse.ll?rev=307412&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/bitreverse.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/bitreverse.ll (removed)
@@ -1,23 +0,0 @@
-; RUN: llc -verify-machineinstrs -march=ppc64 %s -o - | FileCheck %s
-
-; These tests just check that the plumbing is in place for @llvm.bitreverse. The
-; actual output is massive at the moment as llvm.bitreverse is not yet legal.
-
-declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>) readnone
-
-define <2 x i16> @f(<2 x i16> %a) {
-; CHECK-LABEL: f:
-; CHECK: rlwinm
-  %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
-  ret <2 x i16> %b
-}
-
-declare i8 @llvm.bitreverse.i8(i8) readnone
-
-define i8 @g(i8 %a) {
-; CHECK-LABEL: g:
-; CHECK: rlwinm
-; CHECK: rlwimi
-  %b = call i8 @llvm.bitreverse.i8(i8 %a)
-  ret i8 %b
-}

Added: llvm/trunk/test/CodeGen/PowerPC/pr33093.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/pr33093.ll?rev=307413&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/pr33093.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/pr33093.ll Fri Jul  7 09:41:55 2017
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+
+define zeroext i32 @ReverseBits(i32 zeroext %n) {
+; CHECK-LABEL: ReverseBits:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    lis 4, -21846
+; CHECK-NEXT:    lis 5, 21845
+; CHECK-NEXT:    slwi 6, 3, 1
+; CHECK-NEXT:    srwi 3, 3, 1
+; CHECK-NEXT:    lis 7, -13108
+; CHECK-NEXT:    lis 8, 13107
+; CHECK-NEXT:    ori 4, 4, 43690
+; CHECK-NEXT:    ori 5, 5, 21845
+; CHECK-NEXT:    lis 10, -3856
+; CHECK-NEXT:    lis 11, 3855
+; CHECK-NEXT:    and 3, 3, 5
+; CHECK-NEXT:    and 4, 6, 4
+; CHECK-NEXT:    ori 5, 8, 13107
+; CHECK-NEXT:    or 3, 3, 4
+; CHECK-NEXT:    ori 4, 7, 52428
+; CHECK-NEXT:    slwi 9, 3, 2
+; CHECK-NEXT:    srwi 3, 3, 2
+; CHECK-NEXT:    and 3, 3, 5
+; CHECK-NEXT:    and 4, 9, 4
+; CHECK-NEXT:    ori 5, 11, 3855
+; CHECK-NEXT:    or 3, 3, 4
+; CHECK-NEXT:    ori 4, 10, 61680
+; CHECK-NEXT:    slwi 12, 3, 4
+; CHECK-NEXT:    srwi 3, 3, 4
+; CHECK-NEXT:    and 4, 12, 4
+; CHECK-NEXT:    and 3, 3, 5
+; CHECK-NEXT:    or 3, 3, 4
+; CHECK-NEXT:    rotlwi 4, 3, 24
+; CHECK-NEXT:    rlwimi 4, 3, 8, 8, 15
+; CHECK-NEXT:    rlwimi 4, 3, 8, 24, 31
+; CHECK-NEXT:    rldicl 3, 4, 0, 32
+; CHECK-NEXT:    clrldi 3, 3, 32
+; CHECK-NEXT:    blr
+entry:
+  %shr = lshr i32 %n, 1
+  %and = and i32 %shr, 1431655765
+  %and1 = shl i32 %n, 1
+  %shl = and i32 %and1, -1431655766
+  %or = or i32 %and, %shl
+  %shr2 = lshr i32 %or, 2
+  %and3 = and i32 %shr2, 858993459
+  %and4 = shl i32 %or, 2
+  %shl5 = and i32 %and4, -858993460
+  %or6 = or i32 %and3, %shl5
+  %shr7 = lshr i32 %or6, 4
+  %and8 = and i32 %shr7, 252645135
+  %and9 = shl i32 %or6, 4
+  %shl10 = and i32 %and9, -252645136
+  %or11 = or i32 %and8, %shl10
+  %shr13 = lshr i32 %or11, 24
+  %and14 = lshr i32 %or11, 8
+  %shr15 = and i32 %and14, 65280
+  %and17 = shl i32 %or11, 8
+  %shl18 = and i32 %and17, 16711680
+  %shl21 = shl i32 %or11, 24
+  %or16 = or i32 %shl21, %shr13
+  %or19 = or i32 %or16, %shr15
+  %or22 = or i32 %or19, %shl18
+  ret i32 %or22
+}

Added: llvm/trunk/test/CodeGen/PowerPC/testBitReverse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/testBitReverse.ll?rev=307413&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/testBitReverse.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/testBitReverse.ll Fri Jul  7 09:41:55 2017
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+declare i32 @llvm.bitreverse.i32(i32)
+define i32 @testBitReverseIntrinsicI32(i32 %arg) {
+; CHECK-LABEL: testBitReverseIntrinsicI32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    lis 4, -21846
+; CHECK-NEXT:    lis 5, 21845
+; CHECK-NEXT:    slwi 6, 3, 1
+; CHECK-NEXT:    srwi 3, 3, 1
+; CHECK-NEXT:    lis 7, -13108
+; CHECK-NEXT:    lis 8, 13107
+; CHECK-NEXT:    ori 4, 4, 43690
+; CHECK-NEXT:    ori 5, 5, 21845
+; CHECK-NEXT:    lis 10, -3856
+; CHECK-NEXT:    lis 11, 3855
+; CHECK-NEXT:    and 3, 3, 5
+; CHECK-NEXT:    and 4, 6, 4
+; CHECK-NEXT:    ori 5, 8, 13107
+; CHECK-NEXT:    or 3, 3, 4
+; CHECK-NEXT:    ori 4, 7, 52428
+; CHECK-NEXT:    slwi 9, 3, 2
+; CHECK-NEXT:    srwi 3, 3, 2
+; CHECK-NEXT:    and 3, 3, 5
+; CHECK-NEXT:    and 4, 9, 4
+; CHECK-NEXT:    ori 5, 11, 3855
+; CHECK-NEXT:    or 3, 3, 4
+; CHECK-NEXT:    ori 4, 10, 61680
+; CHECK-NEXT:    slwi 12, 3, 4
+; CHECK-NEXT:    srwi 3, 3, 4
+; CHECK-NEXT:    and 4, 12, 4
+; CHECK-NEXT:    and 3, 3, 5
+; CHECK-NEXT:    or 3, 3, 4
+; CHECK-NEXT:    rotlwi 4, 3, 24
+; CHECK-NEXT:    rlwimi 4, 3, 8, 8, 15
+; CHECK-NEXT:    rlwimi 4, 3, 8, 24, 31
+; CHECK-NEXT:    rldicl 3, 4, 0, 32
+; CHECK-NEXT:    blr
+  %res = call i32 @llvm.bitreverse.i32(i32 %arg)
+  ret i32 %res
+}




More information about the llvm-commits mailing list