[llvm] [PowePC] using MTVSRBMI instruction instead of constant pool in power10+ (PR #144084)

zhijian lin via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 23 07:08:34 PDT 2025


https://github.com/diggerlin updated https://github.com/llvm/llvm-project/pull/144084

>From 31d4e3c7601264107e9719cdae222af6e2e6fd22 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 11 Jun 2025 15:20:14 +0000
Subject: [PATCH 1/3] using MTVSRBMI instead of constant pool

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 49 +++++++++++++++++++++
 llvm/test/CodeGen/PowerPC/mtvsrbmi.ll       | 23 ++--------
 2 files changed, 53 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 0f8e5e57c58b7..60c89aff155f3 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9580,6 +9580,37 @@ static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
   return false;
 }
 
+bool isValidMtVsrbmi(APInt &BMI, BuildVectorSDNode &BVN) {
+  unsigned int NumOps = BVN.getNumOperands();
+  assert(NumOps > 0 && "isConstantSplat has 0-size build vector");
+
+  BMI.clearAllBits();
+  EVT VT = BVN.getValueType(0);
+  APInt ConstValue(VT.getSizeInBits(), 0);
+
+  unsigned EltWidth = VT.getScalarSizeInBits();
+
+  for (unsigned j = 0; j < NumOps; ++j) {
+    SDValue OpVal = BVN.getOperand(j);
+    unsigned BitPos = j * EltWidth;
+    auto *CN = dyn_cast<ConstantSDNode>(OpVal);
+
+    if (!CN)
+      return false;
+
+    ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos);
+  }
+
+  for (unsigned J = 0; J < 16; J++) {
+    APInt ExtractValue = ConstValue.extractBits(8, J * 8);
+    if (ExtractValue != 0x00 && ExtractValue != 0xFF)
+      return false;
+    if (ExtractValue == 0xFF)
+      BMI.setBit(J);
+  }
+  return true;
+}
+
 // If this is a case we can't handle, return null and let the default
 // expansion code take care of it.  If we CAN select this case, and if it
 // selects to a single instruction, return Op.  Otherwise, if we can codegen
@@ -9591,6 +9622,24 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
 
+  if(Subtarget.hasP10Vector()) {
+    APInt BMI(32, 0);
+    // If the value of the vector is all zeros or all ones,
+    // we do not convert it to MTVSRBMI.
+    // The xxleqv instruction sets a vector with all ones.
+    // The xxlxor instruction sets a vector with all zeros.
+    if (isValidMtVsrbmi(BMI, *BVN) && BMI != 0 && BMI!=0xffff ) {
+      SDValue  SDConstant= DAG.getTargetConstant(BMI, dl, MVT::i32);
+      MachineSDNode* MSDNode = DAG.getMachineNode(PPC::MTVSRBMI, dl,MVT::v16i8, SDConstant);
+      SDValue  SDV = SDValue(MSDNode,0);
+      EVT DVT = BVN->getValueType(0);
+      EVT SVT = SDV.getValueType();
+      if (SVT != DVT ) {
+	SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
+      }
+      return SDV;
+    }
+  }
   // Check if this is a splat of a constant value.
   APInt APSplatBits, APSplatUndef;
   unsigned SplatBitSize;
diff --git a/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll b/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
index 5486dc02faf90..232014db9a012 100644
--- a/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
+++ b/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
@@ -10,28 +10,13 @@
 ; RUN:   | FileCheck %s --check-prefix=CHECK
 
 define dso_local noundef range(i8 -1, 1) <16 x i8> @_Z5v00FFv() {
-; CHECK:      L..CPI0_0:
-; CHECK-NEXT:   .byte   255                             # 0xff
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
-; CHECK-NEXT:   .byte   0                               # 0x0
+; CHECK-NOT:      L..CPI0_0:
+; CHECK-NOT:   .byte   255                             # 0xff
+; CHECK-NOT:   .byte   0                               # 0x0
 
 ; CHECK-LABEL: _Z5v00FFv:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lwz r3, L..C0(r2) # %const.0
-; CHECK-NEXT:    lxv vs34, 0(r3)
+; CHECK-NEXT:    mtvsrbmi v2, 1
 ; CHECK-NEXT:    blr
 entry:
   ret <16 x i8> <i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>

>From b5ac0bf5360a93501f95fff7a69b95da35661b77 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 18 Jun 2025 19:48:16 +0000
Subject: [PATCH 2/3] address comment

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 26 ++++++++++-----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 60c89aff155f3..322ad810fbccc 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9580,19 +9580,19 @@ static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
   return false;
 }
 
-bool isValidMtVsrbmi(APInt &BMI, BuildVectorSDNode &BVN) {
+bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN) {
   unsigned int NumOps = BVN.getNumOperands();
-  assert(NumOps > 0 && "isConstantSplat has 0-size build vector");
+  assert(NumOps > 0 && "Unexpected 0-size build vector");
 
-  BMI.clearAllBits();
+  BitMask.clearAllBits();
   EVT VT = BVN.getValueType(0);
   APInt ConstValue(VT.getSizeInBits(), 0);
 
   unsigned EltWidth = VT.getScalarSizeInBits();
 
-  for (unsigned j = 0; j < NumOps; ++j) {
-    SDValue OpVal = BVN.getOperand(j);
-    unsigned BitPos = j * EltWidth;
+  for (unsigned J = 0; J < NumOps; ++J) {
+    SDValue OpVal = BVN.getOperand(J);
+    unsigned BitPos = J * EltWidth;
     auto *CN = dyn_cast<ConstantSDNode>(OpVal);
 
     if (!CN)
@@ -9601,12 +9601,12 @@ bool isValidMtVsrbmi(APInt &BMI, BuildVectorSDNode &BVN) {
     ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos);
   }
 
-  for (unsigned J = 0; J < 16; J++) {
+  for (unsigned J = 0; J < 16; ++J) {
     APInt ExtractValue = ConstValue.extractBits(8, J * 8);
     if (ExtractValue != 0x00 && ExtractValue != 0xFF)
       return false;
     if (ExtractValue == 0xFF)
-      BMI.setBit(J);
+      BitMask.setBit(J);
   }
   return true;
 }
@@ -9623,19 +9623,19 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
 
   if(Subtarget.hasP10Vector()) {
-    APInt BMI(32, 0);
+    APInt BitMask(32, 0);
     // If the value of the vector is all zeros or all ones,
     // we do not convert it to MTVSRBMI.
     // The xxleqv instruction sets a vector with all ones.
     // The xxlxor instruction sets a vector with all zeros.
-    if (isValidMtVsrbmi(BMI, *BVN) && BMI != 0 && BMI!=0xffff ) {
-      SDValue  SDConstant= DAG.getTargetConstant(BMI, dl, MVT::i32);
+    if (isValidMtVsrBmi(BitMask, *BVN) && BitMask != 0 && BitMask != 0xffff) {
+      SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
       MachineSDNode* MSDNode = DAG.getMachineNode(PPC::MTVSRBMI, dl,MVT::v16i8, SDConstant);
-      SDValue  SDV = SDValue(MSDNode,0);
+      SDValue SDV = SDValue(MSDNode, 0);
       EVT DVT = BVN->getValueType(0);
       EVT SVT = SDV.getValueType();
       if (SVT != DVT ) {
-	SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
+        SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
       }
       return SDV;
     }

>From a3a724a569d7116e969e00a573f8a70f20ccca85 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Mon, 23 Jun 2025 14:11:20 +0000
Subject: [PATCH 3/3] address comment

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 322ad810fbccc..210e3a5c83bd6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9590,15 +9590,15 @@ bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN) {
 
   unsigned EltWidth = VT.getScalarSizeInBits();
 
-  for (unsigned J = 0; J < NumOps; ++J) {
-    SDValue OpVal = BVN.getOperand(J);
-    unsigned BitPos = J * EltWidth;
+  unsigned BitPos = 0;
+  for (auto OpVal : BVN.op_values()) {
     auto *CN = dyn_cast<ConstantSDNode>(OpVal);
 
     if (!CN)
       return false;
 
     ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos);
+    BitPos += EltWidth;
   }
 
   for (unsigned J = 0; J < 16; ++J) {
@@ -9630,7 +9630,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
     // The xxlxor instruction sets a vector with all zeros.
     if (isValidMtVsrBmi(BitMask, *BVN) && BitMask != 0 && BitMask != 0xffff) {
       SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
-      MachineSDNode* MSDNode = DAG.getMachineNode(PPC::MTVSRBMI, dl,MVT::v16i8, SDConstant);
+      MachineSDNode *MSDNode =
+          DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
       SDValue SDV = SDValue(MSDNode, 0);
       EVT DVT = BVN->getValueType(0);
       EVT SVT = SDV.getValueType();



More information about the llvm-commits mailing list