[llvm] 288f762 - [PowerPC] Materialize 34 bit constants with pli on Power 10.

Stefan Pintilie via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 2 07:50:00 PST 2021


Author: Stefan Pintilie
Date: 2021-02-02T09:49:22-06:00
New Revision: 288f762b6ff2c0cedfda88e8b9af11c2ec412cd3

URL: https://github.com/llvm/llvm-project/commit/288f762b6ff2c0cedfda88e8b9af11c2ec412cd3
DIFF: https://github.com/llvm/llvm-project/commit/288f762b6ff2c0cedfda88e8b9af11c2ec412cd3.diff

LOG: [PowerPC] Materialize 34 bit constants with pli on Power 10.

NOTE: This patch was originally written by Anil Mahmud. His code has been
rebased but otherwise left mostly unchanged.

A new instructon on Power 10 allows for the materialization of 34 bit
immediate values. This patch allows the compiler to take advantage of
the new instruction in this situation.

Reviewed By: amyk

Differential Revision: https://reviews.llvm.org/D92879

Added: 
    llvm/test/CodeGen/PowerPC/p10-constants.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
    llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
    llvm/lib/Target/PowerPC/PPCInstrInfo.td
    llvm/lib/Target/PowerPC/PPCInstrPrefix.td
    llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 693b0adaede4..15771eef747c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -1033,12 +1033,50 @@ static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
   return nullptr;
 }
 
+// Try to select instructions to generate a 64 bit immediate using prefix as
+// well as non prefix instructions. The function will return the SDNode
+// to materialize that constant or it will return nullptr if it does not
+// find one. The variable InstCnt is set to the number of instructions that
+// were selected.
+static SDNode *selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl,
+                                        uint64_t Imm, unsigned &InstCnt) {
+  // Following patterns use 1 instruction to materialize Imm.
+  InstCnt = 1;
+
+  // The pli instruction can materialize up to 34 bits directly.
+  // It is defined in the TD file and so we just return the constant.
+  if (isInt<34>(Imm))
+    return cast<ConstantSDNode>(CurDAG->getConstant(Imm, dl, MVT::i64));
+
+  InstCnt = 0;
+  return nullptr;
+}
+
 static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,
                             unsigned *InstCnt = nullptr) {
   unsigned InstCntDirect = 0;
   // No more than 3 instructions is used if we can select the i64 immediate
   // directly.
   SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect);
+
+  const PPCSubtarget &Subtarget =
+      CurDAG->getMachineFunction().getSubtarget<PPCSubtarget>();
+
+  if (Subtarget.hasPrefixInstrs()) {
+    unsigned InstCntDirectP = 0;
+    SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCntDirectP);
+    // Use the prefix case in either of two cases:
+    // 1) We have no result from the non-prefix case to use.
+    // 2) The non-prefix case uses more instructions than the prefix case.
+    // If the prefix and non-prefix cases use the same number of instructions
+    // we will prefer the non-prefix case.
+    if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) {
+      if (InstCnt)
+        *InstCnt = InstCntDirectP;
+      return ResultP;
+    }
+  }
+
   if (Result) {
     if (InstCnt)
       *InstCnt = InstCntDirect;
@@ -4728,8 +4766,11 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
 
   case ISD::Constant:
     if (N->getValueType(0) == MVT::i64) {
-      ReplaceNode(N, selectI64Imm(CurDAG, N));
-      return;
+      SDNode *ResNode = selectI64Imm(CurDAG, N);
+      if (!isa<ConstantSDNode>(ResNode)) {
+        ReplaceNode(N, ResNode);
+        return;
+      }
     }
     break;
 

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 9e3c6c569bd7..743be282c00d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1096,6 +1096,8 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
     break;
   case PPC::LI:
   case PPC::LI8:
+  case PPC::PLI:
+  case PPC::PLI8:
   case PPC::LIS:
   case PPC::LIS8:
   case PPC::ADDIStocHA:

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 724af23542d7..1f0f30c5de39 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -398,6 +398,14 @@ def getFPAs32BitInt : SDNodeXForm<fpimm, [{
                                    SDLoc(N), MVT::i32);
 }]>;
 
+def imm34 : PatLeaf<(imm), [{
+  return isInt<34>(N->getSExtValue());
+}]>;
+
+def getImmAs64BitInt : SDNodeXForm<imm, [{
+  return getI64Imm(N->getSExtValue(), SDLoc(N));
+}]>;
+
 def SHL32 : SDNodeXForm<imm, [{
   // Transformation function: 31 - imm
   return getI32Imm(31 - N->getZExtValue(), SDLoc(N));

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index fa6fb651c803..4bab7c42c7d4 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -2643,6 +2643,8 @@ let AddedComplexity = 400, Predicates = [PrefixInstrs] in {
 }
 
 let Predicates = [PrefixInstrs] in {
+  def : Pat<(i32 imm34:$imm), (PLI (getImmAs64BitInt imm:$imm))>;
+  def : Pat<(i64 imm34:$imm), (PLI8 (getImmAs64BitInt imm:$imm))>;
   def : Pat<(v16i8 (int_ppc_vsx_xxpermx v16i8:$A, v16i8:$B, v16i8:$C, timm:$D)),
             (COPY_TO_REGCLASS (XXPERMX (COPY_TO_REGCLASS $A, VSRC),
                                        (COPY_TO_REGCLASS $B, VSRC),

diff  --git a/llvm/test/CodeGen/PowerPC/p10-constants.ll b/llvm/test/CodeGen/PowerPC/p10-constants.ll
new file mode 100644
index 000000000000..3a266ff2d044
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/p10-constants.ll
@@ -0,0 +1,290 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s --check-prefix=CHECK32
+
+; These test cases aim to test constant materialization using the pli instruction on Power10.
+
+define  signext i32 @t_16BitsMinRequiring34Bits() {
+; CHECK-LABEL: t_16BitsMinRequiring34Bits:
+; CHECK:	pli r3, 32768
+; CHECK-NEXT:	blr
+; CHECK32-LABEL: t_16BitsMinRequiring34Bits:
+; CHECK32:	pli r3, 32768
+; CHECK32-NEXT:	blr
+
+entry:
+  ret i32 32768
+}
+
+define  signext i32 @t_16Bits() {
+; CHECK-LABEL: t_16Bits:
+; CHECK:	pli r3, 62004
+; CHECK-NEXT:	blr
+; CHECK32-LABEL: t_16Bits:
+; CHECK32:	pli r3, 62004
+; CHECK32-NEXT:	blr
+
+entry:
+  ret i32 62004
+}
+
+define  signext i32 @t_lt32gt16BitsNonShiftable() {
+; CHECK-LABEL: t_lt32gt16BitsNonShiftable:
+; CHECK:	pli r3, 1193046
+; CHECK-NEXT:	blr
+; CHECK32-LABEL: t_lt32gt16BitsNonShiftable:
+; CHECK32:	pli r3, 1193046
+; CHECK32-NEXT:	blr
+
+entry:
+  ret i32 1193046
+}
+
+define  signext i32 @t_32Bits() {
+; CHECK-LABEL: t_32Bits:
+; CHECK:	pli r3, -231451016
+; CHECK-NEXT:	blr
+; CHECK32-LABEL: t_32Bits:
+; CHECK32:	pli r3, -231451016
+; CHECK32-NEXT:	blr
+
+entry:
+  ret i32 -231451016
+}
+
+define  i64 @t_34BitsLargestPositive() {
+; CHECK-LABEL: t_34BitsLargestPositive:
+; CHECK:	pli r3, 8589934591
+; CHECK-NEXT:	blr
+; CHECK32-LABEL: t_34BitsLargestPositive:
+; CHECK32:	li r3, 1
+; CHECK32-NEXT: li r4, -1
+; CHECK32-NEXT:	blr
+
+entry:
+  ret i64 8589934591
+}
+
+define  i64 @t_neg34Bits() {
+; CHECK-LABEL: t_neg34Bits:
+; CHECK:	pli r3, -8284514696
+; CHECK-NEXT:	blr
+; CHECK32-LABEL: t_neg34Bits:
+; CHECK32:	li r3, -2
+; CHECK32-NEXT: pli r4, 305419896
+; CHECK32-NEXT:	blr
+
+entry:
+  ret i64 -8284514696
+}
+
+define  signext i32 @t_16BitsMinRequiring34BitsMinusOne() {
+; CHECK-LABEL: t_16BitsMinRequiring34BitsMinusOne:
+; CHECK:	li r3, 32767
+; CHECK-NEXT:	blr
+; CHECK32-LABEL: t_16BitsMinRequiring34BitsMinusOne:
+; CHECK32:	li r3, 32767
+; CHECK32-NEXT:	blr
+
+entry:
+  ret i32 32767
+}
+
+define  signext i32 @t_lt16Bits() {
+; CHECK-LABEL: t_lt16Bits:
+; CHECK:	li r3, 291
+; CHECK-NEXT:	blr
+; CHECK32-LABEL: t_lt16Bits:
+; CHECK32:	li r3, 291
+; CHECK32-NEXT:	blr
+
+entry:
+  ret i32 291
+}
+
+define  signext i32 @t_neglt16Bits() {
+; CHECK-LABEL: t_neglt16Bits:
+; CHECK:	li r3, -3805
+; CHECK-NEXT:	blr
+; CHECK32-LABEL: t_neglt16Bits:
+; CHECK32:	li r3, -3805
+; CHECK32-NEXT:	blr
+
+entry:
+  ret i32 -3805
+}
+
+define  signext i32 @t_neg16Bits() {
+; CHECK-LABEL: t_neg16Bits:
+; CHECK:	li r3, -32204
+; CHECK-NEXT:	blr
+; CHECK32-LABEL: t_neg16Bits:
+; CHECK32:	li r3, -32204
+; CHECK32-NEXT:	blr
+
+entry:
+  ret i32 -32204
+}
+
+define  signext i32 @t_lt32gt16BitsShiftable() {
+; CHECK-LABEL: t_lt32gt16BitsShiftable:
+; CHECK:	lis r3, 18
+; CHECK-NEXT:	blr
+; CHECK32-LABEL: t_lt32gt16BitsShiftable:
+; CHECK32:	lis r3, 18
+; CHECK32-NEXT:	blr
+
+entry:
+  ret i32 1179648
+}
+
+define  signext i32 @t_32gt16BitsShiftable() {
+; CHECK-LABEL: t_32gt16BitsShiftable:
+; CHECK:	lis r3, -3532
+; CHECK-NEXT:	blr
+; CHECK32-LABEL: t_32gt16BitsShiftable:
+; CHECK32:	lis r3, -3532
+; CHECK32-NEXT:	blr
+
+entry:
+  ret i32 -231473152
+}
+
+define  signext i32 @t_32BitsZero() {
+; CHECK-LABEL: t_32BitsZero:
+; CHECK:	li r3, 0
+; CHECK-NEXT:	blr
+; CHECK32-LABEL: t_32BitsZero:
+; CHECK32:	li r3, 0
+; CHECK32-NEXT:	blr
+
+entry:
+  ret i32 0
+}
+
+define  signext i32 @t_32BitsAllOnes() {
+; CHECK-LABEL: t_32BitsAllOnes:
+; CHECK:	li r3, -1
+; CHECK-NEXT:	blr
+; CHECK32-LABEL: t_32BitsAllOnes:
+; CHECK32:	li r3, -1
+; CHECK32-NEXT:	blr
+
+entry:
+  ret i32 -1
+}
+
+define  i64 @t_34BitsLargestPositivePlus() {
+; CHECK-LABEL: t_34BitsLargestPositivePlus:
+; CHECK:	li r3, 1
+; CHECK-NEXT:	rldic r3, r3, 33, 30
+; CHECK-NEXT:	blr
+; CHECK32-LABEL: t_34BitsLargestPositivePlus:
+; CHECK32:	li r3, 2
+; CHECK32-NEXT:	li r4, 0
+; CHECK32-NEXT:	blr
+
+entry:
+  ret i64 8589934592
+}
+
+define  i64 @t_34Bits() {
+; CHECK-LABEL: t_34Bits:
+; CHECK:	lis r3, 25158
+; CHECK-NEXT:	ori r3, r3, 35535
+; CHECK-NEXT:	rldic r3, r3, 3, 30
+; CHECK-NEXT:	blr
+; CHECK32-LABEL: t_34Bits:
+; CHECK32:	li r3, 3
+; CHECK32-NEXT:	pli r4, 305419896
+; CHECK32-NEXT:	blr
+
+entry:
+  ret i64 13190321784
+}
+
+define  i64 @t_35Bits() {
+; CHECK-LABEL: t_35Bits:
+; CHECK:	lis r3, -442
+; CHECK-NEXT:	ori r3, r3, 35535
+; CHECK-NEXT:	rldic r3, r3, 3, 29
+; CHECK-NEXT:	blr
+; CHECK32-LABEL: t_35Bits:
+; CHECK32:	li r3, 7
+; CHECK32-NEXT:	pli r4, -231451016
+; CHECK32-NEXT:	blr
+
+entry:
+  ret i64 34128287352
+}
+
+; The load immediates resulting from phi-nodes are needed to test whether
+; li/lis is preferred to pli by the instruction selector.
+define dso_local void @t_phiNode() {
+; CHECK-LABEL: t_phiNode:
+; CHECK:	lis r6, 18
+; CHECK-NEXT:	li r5, 291
+; CHECK-NEXT:	li r4, 0
+; CHECK-NEXT:   cmpwi r3, 1
+; CHECK-NEXT:	li r3, -1
+; CHECK:	pli r6, 2147483647
+; CHECK-NEXT:	pli r5, 1193046
+; CHECK-NEXT:	pli r4, 32768
+; CHECK-NEXT:	pli r3, -231451016
+; CHECK32-LABEL: t_phiNode:
+; CHECK32:	lis r6, 18
+; CHECK32-NEXT:	li r5, 291
+; CHECK32-NEXT:	li r4, 0
+; CHECK32-NEXT:   cmpwi r3, 1
+; CHECK32-NEXT:	li r3, -1
+; CHECK32:	pli r6, 2147483647
+; CHECK32-NEXT:	pli r5, 1193046
+; CHECK32-NEXT:	pli r4, 32768
+; CHECK32-NEXT:	pli r3, -231451016
+
+entry:
+  br label %while.body
+
+while.body:                                       ; preds = %if.else.i, %entry
+  br label %while.body.i
+
+while.body.i:                                     ; preds = %sw.epilog.i, %while.body
+  %a.1.i = phi i32 [ %a.2.i, %sw.epilog.i ], [ -1, %while.body ]
+  %b.1.i = phi i32 [ %b.2.i, %sw.epilog.i ], [ 0, %while.body ]
+  %c.1.i = phi i32 [ %c.2.i, %sw.epilog.i ], [ 291, %while.body ]
+  %d.1.i = phi i32 [ %d.2.i, %sw.epilog.i ], [ 1179648, %while.body ]
+  %0 = load i8, i8* null, align 1
+  %cmp1.i = icmp eq i8 %0, 1
+  br i1 %cmp1.i, label %if.then.i, label %if.else.i
+
+if.then.i:                                        ; preds = %while.body.i
+  switch i8 undef, label %sw.default.i [
+    i8 3, label %sw.epilog.i
+    i8 2, label %sw.bb1.i
+  ]
+
+sw.bb1.i:                                        ; preds = %if.then.i
+  br label %sw.epilog.i
+
+sw.default.i:                                     ; preds = %if.then.i
+  unreachable
+
+sw.epilog.i:                                      ; preds = %sw.bb2.i, %sw.bb1.i, %if.then.i
+  %a.2.i = phi i32 [ -231451016, %sw.bb1.i ], [ %a.1.i, %if.then.i ]
+  %b.2.i = phi i32 [ 32768, %sw.bb1.i ], [ %b.1.i, %if.then.i ]
+  %c.2.i = phi i32 [ 1193046, %sw.bb1.i ], [ %c.1.i, %if.then.i ]
+  %d.2.i = phi i32 [ 2147483647, %sw.bb1.i ], [ %d.1.i, %if.then.i ]
+  br label %while.body.i
+
+if.else.i:                                     ; preds = %while.body.i
+  call void @func2(i32 signext %a.1.i, i32 signext %b.1.i, i32 signext %c.1.i, i32 signext %d.1.i)
+  br label %while.body
+}
+
+declare void @func2(i32, i32, i32, i32)

diff  --git a/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll b/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll
index 8ed63654306c..f48882012323 100644
--- a/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll
@@ -317,32 +317,28 @@ define void @test_ldst_7(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
 ; test case is a constant that fits within 34-bits.
 ; CHECK-LABEL: test_ldst_7:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    li r5, 0
-; CHECK-NEXT:    ori r5, r5, 32799
+; CHECK-NEXT:    pli r5, 32799
 ; CHECK-NEXT:    lxvpx vsp0, r3, r5
 ; CHECK-NEXT:    stxvpx vsp0, r4, r5
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-NOMMA-LABEL: test_ldst_7:
 ; CHECK-NOMMA:       # %bb.0: # %entry
-; CHECK-NOMMA-NEXT:    li r5, 0
-; CHECK-NOMMA-NEXT:    ori r5, r5, 32799
+; CHECK-NOMMA-NEXT:    pli r5, 32799
 ; CHECK-NOMMA-NEXT:    lxvpx vsp0, r3, r5
 ; CHECK-NOMMA-NEXT:    stxvpx vsp0, r4, r5
 ; CHECK-NOMMA-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test_ldst_7:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    li r5, 0
-; CHECK-BE-NEXT:    ori r5, r5, 32799
+; CHECK-BE-NEXT:    pli r5, 32799
 ; CHECK-BE-NEXT:    lxvpx vsp0, r3, r5
 ; CHECK-BE-NEXT:    stxvpx vsp0, r4, r5
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-BE-NOMMA-LABEL: test_ldst_7:
 ; CHECK-BE-NOMMA:       # %bb.0: # %entry
-; CHECK-BE-NOMMA-NEXT:    li r5, 0
-; CHECK-BE-NOMMA-NEXT:    ori r5, r5, 32799
+; CHECK-BE-NOMMA-NEXT:    pli r5, 32799
 ; CHECK-BE-NOMMA-NEXT:    lxvpx vsp0, r3, r5
 ; CHECK-BE-NOMMA-NEXT:    stxvpx vsp0, r4, r5
 ; CHECK-BE-NOMMA-NEXT:    blr


        


More information about the llvm-commits mailing list