[PATCH] D139813: [PowerPC] Improve materialization for immediates which is almost a 32 bit splat.

Esme Yi via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 12 00:28:44 PST 2022


Esme created this revision.
Esme added reviewers: shchenz, stefanp, amyk, PowerPC.
Herald added subscribers: kbarton, hiraditya, nemanjai.
Herald added a project: All.
Esme requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Some 64 bit constants can be materialized with fewer instructions than we currently use.
We consider a 64 bit immediate value divided into four parts, `Hi16OfHi32 (bits 48...63)`, `Lo16OfHi32 (bits 32...47)`, `Hi16OfLo32 (bits 16...31)`, `Lo16OfLo32 (bits 0...15)`. Either `Hi16OfHi32 == Hi16OfLo32` or `Lo16OfHi32 == Lo16OfLo32`, the immediate can be treated as "almost" a splat of a 32 bit value in a 64 bit register.
For example:

  define  i64 @almost_splat() {
  entry:
    ; 0xCCFFCCFF0123CCFF (Lo16OfHi32 == Lo16OfLo32)
    ret i64 14771750698406366463
  }

Currently we use 5 instruction to materialize the immediate:

  # %bb.0:                                # %entry
  	lis 3, -13057
  	ori 3, 3, 52479
  	rldic 3, 3, 32, 0
  	oris 3, 3, 291
  	ori 3, 3, 52479
  	blr

To improve that we can use 3 instructions to generate the splat and use 1 instruction to modify the different part:

  # %bb.0:                                # %entry
  	lis 3, -13057
  	ori 3, 3, 52479
  	rldimi 3, 3, 32, 0   // generate the splat
  	oris 3, 3, 291.       // modify Hi16OfLo32
  	blr


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D139813

Files:
  llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
  llvm/test/CodeGen/PowerPC/constants-i64.ll


Index: llvm/test/CodeGen/PowerPC/constants-i64.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/constants-i64.ll
+++ llvm/test/CodeGen/PowerPC/constants-i64.ll
@@ -375,4 +375,28 @@
   ret i64 11174473921
 }
 
+define i64 @imm19() #0 {
+; CHECK-LABEL: imm19:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:  lis 3, 13107
+; CHECK-NEXT:  ori 3, 3, 13107
+; CHECK-NEXT:  rldic 3, 3, 34, 0
+; CHECK-NEXT:  ori 3, 3, 52428
+; CHECK-NEXT:  blr
+entry:
+  ret i64 14757395255531719884 ;0xCCCCCCCC0000CCCC
+}
+
+define i64 @imm20() #0 {
+; CHECK-LABEL: imm20:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:  lis 3, -13057
+; CHECK-NEXT:  ori 3, 3, 52479
+; CHECK-NEXT:  rldimi 3, 3, 32, 0
+; CHECK-NEXT:  oris 3, 3, 291
+; CHECK-NEXT:  blr
+entry:
+  ret i64 14771750698406366463 ;0xCCFFCCFF0123CCFF
+}
+
 attributes #0 = { nounwind readnone }
Index: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -1325,6 +1325,43 @@
                                     getI32Imm(Lo16));
     ++InstCntDirect;
   }
+
+  // Try to use 4 instructions to materialize the immediate which is "almost" a
+  // splat of a 32 bit immediate.
+  if (InstCntDirect > 4) {
+    uint32_t Hi16OfHi32 = (Hi_32(Imm) >> 16) & 0xffff;
+    uint32_t Lo16OfHi32 = Hi_32(Imm) & 0xffff;
+    uint32_t Hi16OfLo32 = (Lo_32(Imm) >> 16) & 0xffff;
+    uint32_t Lo16OfLo32 = Lo_32(Imm) & 0xffff;
+
+    auto getSplat = [CurDAG, dl](uint32_t Hi16, uint32_t Lo16) {
+      auto getI32Imm = [CurDAG, dl](unsigned Imm) {
+        return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
+      };
+      SDNode *Result =
+          CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi16));
+      Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
+                                      SDValue(Result, 0), getI32Imm(Lo16));
+      SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
+                       getI32Imm(0)};
+      return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
+    };
+
+    if (Hi16OfHi32 == Hi16OfLo32) {
+      InstCntDirect = 4;
+      Result = getSplat(Hi16OfHi32, Lo16OfHi32);
+      Result = CurDAG->getMachineNode(
+          PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
+          CurDAG->getTargetConstant(Lo16OfLo32, dl, MVT::i32));
+    } else if (Lo16OfHi32 == Lo16OfLo32) {
+      InstCntDirect = 4;
+      Result = getSplat(Hi16OfHi32, Lo16OfHi32);
+      Result = CurDAG->getMachineNode(
+          PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0),
+          CurDAG->getTargetConstant(Hi16OfLo32, dl, MVT::i32));
+    }
+  }
+
   if (InstCnt)
     *InstCnt = InstCntDirect;
   return Result;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D139813.482007.patch
Type: text/x-patch
Size: 2833 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20221212/a526cad8/attachment.bin>


More information about the llvm-commits mailing list