[llvm] 7a7e910 - [PowerPC] Implement P10 Byte Reverse Insructions

Lei Huang via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 21 07:16:03 PST 2022


Author: Lei Huang
Date: 2022-12-21T09:15:57-06:00
New Revision: 7a7e9109a2d64a1c09b5dbe958893329fc30467e

URL: https://github.com/llvm/llvm-project/commit/7a7e9109a2d64a1c09b5dbe958893329fc30467e
DIFF: https://github.com/llvm/llvm-project/commit/7a7e9109a2d64a1c09b5dbe958893329fc30467e.diff

LOG: [PowerPC] Implement P10 Byte Reverse Insructions

Generate brh, brw and brd instructions for byte-swap operations
on P10 and generating a single instruction for a 32-bit swap followed
by a 16-bit right shift.

Reviewed By: stefanp

Differential Revision: https://reviews.llvm.org/D140414

Added: 
    llvm/test/CodeGen/PowerPC/p10-bswap.ll

Modified: 
    llvm/lib/Target/PowerPC/P10InstrResources.td
    llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCInstrP10.td
    llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
    llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td
index 27dfc82853423..f7d07a06c33eb 100644
--- a/llvm/lib/Target/PowerPC/P10InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -1626,6 +1626,9 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_ANY],
 // 4 Cycles Permute operations, 1 input operands
 def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read],
       (instrs
+    BRD,
+    BRH, BRH8,
+    BRW, BRW8,
     LVSL,
     LVSR,
     LXVKQ,

diff  --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index ed98922f860b2..5a7367b479824 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -3936,9 +3936,19 @@ bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
 
   switch (N->getOpcode()) {
   default: break;
+  case ISD::SRL:
+    // If we are on P10, we have a pattern for 32-bit (srl (bswap r), 16) that
+    // uses the BRH instruction.
+    if (Subtarget->isISA3_1() && N->getValueType(0) == MVT::i32 &&
+        N->getOperand(0).getOpcode() == ISD::BSWAP) {
+      auto &OpRight = N->getOperand(1);
+      ConstantSDNode *SRLConst = dyn_cast<ConstantSDNode>(OpRight);
+      if (SRLConst && SRLConst->getSExtValue() == 16)
+        return false;
+    }
+    LLVM_FALLTHROUGH;
   case ISD::ROTL:
   case ISD::SHL:
-  case ISD::SRL:
   case ISD::AND:
   case ISD::OR: {
     BitPermutationSelector BPS(CurDAG);

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index cb49b0a74c8f9..1936c1f0fe8fa 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -452,14 +452,19 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setOperationAction(ISD::FROUND, MVT::f32, Legal);
   }
 
-  // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
-  // to speed up scalar BSWAP64.
+  // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
+  // instruction xxbrd to speed up scalar BSWAP64.
+  if (Subtarget.isISA3_1()) {
+    setOperationAction(ISD::BSWAP, MVT::i32, Legal);
+    setOperationAction(ISD::BSWAP, MVT::i64, Legal);
+  } else {
+    setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+    setOperationAction(
+        ISD::BSWAP, MVT::i64,
+        (Subtarget.hasP9Vector() && Subtarget.isPPC64()) ? Custom : Expand);
+  }
+
   // CTPOP or CTTZ were introduced in P8/P9 respectively
-  setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
-  if (Subtarget.hasP9Vector() && Subtarget.isPPC64())
-    setOperationAction(ISD::BSWAP, MVT::i64  , Custom);
-  else
-    setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
   if (Subtarget.isISA3_0()) {
     setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
     setOperationAction(ISD::CTTZ , MVT::i64  , Legal);

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td
index 853d81d05db8d..6e4d3035f56ea 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrP10.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td
@@ -1744,6 +1744,21 @@ let Predicates = [IsISA3_1] in {
                                v2i64:$vB))]>;
   def XVTLSBB : XX2_BF3_XO5_XB6_XO9<60, 2, 475, (outs crrc:$BF), (ins vsrc:$XB),
                                     "xvtlsbb $BF, $XB", IIC_VecGeneral, []>;
+  def BRH : XForm_11<31, 219, (outs gprc:$RA), (ins gprc:$RS),
+                     "brh $RA, $RS", IIC_IntRotate,
+                     [(set i32:$RA, (srl (bswap i32:$RS), (i32 16)))]>;
+  def BRW : XForm_11<31, 155, (outs gprc:$RA), (ins gprc:$RS),
+                     "brw $RA, $RS", IIC_IntRotate,
+                     [(set i32:$RA, (bswap i32:$RS))]>;
+  let isCodeGenOnly = 1 in {
+    def BRH8 : XForm_11<31, 219, (outs g8rc:$RA), (ins g8rc:$RS),
+                        "brh $RA, $RS", IIC_IntRotate, []>;
+    def BRW8 : XForm_11<31, 155, (outs g8rc:$RA), (ins g8rc:$RS),
+                        "brw $RA, $RS", IIC_IntRotate, []>;
+  }
+  def BRD : XForm_11<31, 187, (outs g8rc:$RA), (ins g8rc:$RS),
+                     "brd $RA, $RS", IIC_IntRotate,
+                     [(set i64:$RA, (bswap i64:$RS))]>;
 
   // The XFormMemOp flag for the following 8 instructions is set on
   // the instruction format.

diff  --git a/llvm/test/CodeGen/PowerPC/p10-bswap.ll b/llvm/test/CodeGen/PowerPC/p10-bswap.ll
new file mode 100644
index 0000000000000..1b89e0c1eab3d
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/p10-bswap.ll
@@ -0,0 +1,137 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr10 -ppc-asm-full-reg-names < %s | FileCheck %s
+
+; Check that the brh/brw/brd instructions are generated for the bswap
+; intrinsic for register operand on P10 and that the lhbrx/lwbrx/ldbrw
+; instructions are generated for memory operand.
+
+declare i16 @llvm.bswap.i16(i16)
+
+define zeroext i16 @test_nomem16(i16 zeroext %a) {
+; CHECK-LABEL: test_nomem16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    brh r3, r3
+; CHECK-NEXT:    clrldi r3, r3, 32
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call i16 @llvm.bswap.i16(i16 %a)
+  ret i16 %0
+}
+
+declare i32 @llvm.bswap.i32(i32)
+
+define zeroext i32 @test_nomem32(i32 zeroext %a) {
+; CHECK-LABEL: test_nomem32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    brw r3, r3
+; CHECK-NEXT:    clrldi r3, r3, 32
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %0
+}
+
+; Check that brh and clrldi are produced from a call to @llvm.bswap.i32
+; followed by a right shift of 16 (and a zero-extend at the end of the DAG).
+define zeroext i32 @test_bswap_shift16(i32 zeroext %a) {
+; CHECK-LABEL: test_bswap_shift16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    brh r3, r3
+; CHECK-NEXT:    clrldi r3, r3, 32
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call i32 @llvm.bswap.i32(i32 %a)
+  %shr = lshr i32 %0, 16
+  ret i32 %shr
+}
+
+; Check that brh are produced from a call to @llvm.bswap.i32
+; followed by a right shift of 16.
+declare i64 @call_1()
+define void @test_bswap_shift16_2() {
+; CHECK-LABEL: test_bswap_shift16_2:
+; CHECK:       # %bb.0: # %bb
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    bl call_1 at notoc
+; CHECK-NEXT:    brh r3, r3
+; CHECK-NEXT:    sth r3, 0(r3)
+bb:
+  switch i32 undef, label %bb1 [
+    i32 78, label %bb2
+  ]
+
+bb1:
+  unreachable
+
+bb2:
+  %i = call i64 @call_1()
+  %i3 = trunc i64 %i to i32
+  %i4 = call i32 @llvm.bswap.i32(i32 %i3)
+  %i5 = lshr i32 %i4, 16
+  %i6 = trunc i32 %i5 to i16
+  store i16 %i6, ptr undef, align 2
+  unreachable
+}
+
+define zeroext i32 @test_bswap_shift18(i32 zeroext %a) {
+; CHECK-LABEL: test_bswap_shift18:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    brw r3, r3
+; CHECK-NEXT:    rlwinm r3, r3, 14, 18, 31
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call i32 @llvm.bswap.i32(i32 %a)
+  %shr = lshr i32 %0, 18
+  ret i32 %shr
+}
+
+declare i64 @llvm.bswap.i64(i64)
+
+define i64 @test_nomem64(i64 %a) {
+; CHECK-LABEL: test_nomem64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    brd r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %0
+}
+
+define i16 @test_mem16(ptr %a) {
+; CHECK-LABEL: test_mem16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lhbrx r3, 0, r3
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i16, ptr %a, align 2
+  %1 = tail call i16 @llvm.bswap.i16(i16 %0)
+  ret i16 %1
+}
+
+define i32 @test_mem32(ptr %a) {
+; CHECK-LABEL: test_mem32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lwbrx r3, 0, r3
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i32, ptr %a, align 4
+  %1 = tail call i32 @llvm.bswap.i32(i32 %0)
+  ret i32 %1
+}
+
+define i64 @test_mem64(ptr %a) {
+; CHECK-LABEL: test_mem64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ldbrx r3, 0, r3
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i64, ptr %a, align 8
+  %1 = tail call i64 @llvm.bswap.i64(i64 %0)
+  ret i64 %1
+}
+

diff  --git a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
index 5a42c3983e400..1bbc6129b6c8d 100644
--- a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
+++ b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
@@ -423,6 +423,15 @@
 # CHECK: xxpermx 6, 63, 21, 34, 2
 0x05 0x00 0x00 0x02 0x88 0xdf 0xa8 0x8c
 
+# CHECK: brh 1, 2
+0x7c 0x41 0x01 0xb6
+
+# CHECK: brw 1, 2
+0x7c 0x41 0x01 0x36
+
+# CHECK: brd 1, 2
+0x7c 0x41 0x01 0x76
+
 # CHECK: xxblendvb 6, 63, 21, 34
 0x05 0x00 0x00 0x00 0x84 0xdf 0xa8 0x8c
 

diff  --git a/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s b/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
index d18c84dc73cbc..efd935417b793 100644
--- a/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
+++ b/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
@@ -613,6 +613,15 @@
 # CHECK-LE: xxpermx 6, 63, 21, 34, 2              # encoding: [0x02,0x00,0x00,0x05,
 # CHECK-LE-SAME:                                               0x8c,0xa8,0xdf,0x88]
             xxpermx 6, 63, 21, 34, 2
+# CHECK-BE: brh 1, 2                              # encoding: [0x7c,0x41,0x01,0xb6]
+# CHECK-LE: brh 1, 2                              # encoding: [0xb6,0x01,0x41,0x7c]
+            brh 1, 2
+# CHECK-BE: brw 1, 2                              # encoding: [0x7c,0x41,0x01,0x36]
+# CHECK-LE: brw 1, 2                              # encoding: [0x36,0x01,0x41,0x7c]
+            brw 1, 2
+# CHECK-BE: brd 1, 2                              # encoding: [0x7c,0x41,0x01,0x76]
+# CHECK-LE: brd 1, 2                              # encoding: [0x76,0x01,0x41,0x7c]
+            brd 1, 2
 # CHECK-BE: xxblendvb 6, 63, 21, 34               # encoding: [0x05,0x00,0x00,0x00,
 # CHECK-BE-SAME:                                               0x84,0xdf,0xa8,0x8c]
 # CHECK-LE: xxblendvb 6, 63, 21, 34               # encoding: [0x00,0x00,0x00,0x05,


        


More information about the llvm-commits mailing list