[llvm] [PowerPC] Implement a more efficient memcmp in cases where the length is known. (PR #158657)

Mon Oct 6 11:23:21 PDT 2025

================
@@ -15556,6 +15556,89 @@ SDValue PPCTargetLowering::combineSetCC(SDNode *N,
       SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
       return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
     }
+
+    // Optimization: Fold i128 equality/inequality compares of two loads into a
+    // vectorized compare using vcmpequb.p when VSX is available.
+    //
+    // Rationale:
+    //   A scalar i128 SETCC (eq/ne) normally lowers to multiple scalar ops.
+    //   On VSX-capable subtargets, we can instead reinterpret the i128 loads
+    //   as v16i8 vectors and use the Altivec/VSX vcmpequb.p instruction to
+    //   perform a full 128-bit equality check in a single vector compare.
+
+    if (Subtarget.hasVSX()) {
+      if (LHS.getOpcode() == ISD::LOAD && RHS.getOpcode() == ISD::LOAD &&
+          LHS.hasOneUse() && RHS.hasOneUse() &&
+          LHS.getValueType() == MVT::i128 && RHS.getValueType() == MVT::i128) {
+        SDLoc DL(N);
+        SelectionDAG &DAG = DCI.DAG;
+        auto *LA = dyn_cast<LoadSDNode>(LHS);
+        auto *LB = dyn_cast<LoadSDNode>(RHS);
+        if (!LA || !LB)
+          return DAGCombineTruncBoolExt(N, DCI);
+
+        // If either memory operation (LA or LB) is volatile, do not perform any
+        // optimization or transformation. Volatile operations must be preserved
+        // as written to ensure correct program behavior, so we return an empty
+        // SDValue to indicate no action.
+        if (LA->isVolatile() || LB->isVolatile())
+          return DAGCombineTruncBoolExt(N, DCI);
+
+        // Only combine loads if both use the unindexed addressing mode.
+        // PowerPC AltiVec/VMX does not support vector loads or stores with
+        // pre/post-increment addressing. Indexed modes may imply implicit
+        // pointer updates, which are not compatible with AltiVec vector
+        // instructions.
+        if (LA->getAddressingMode() != ISD::UNINDEXED ||
+            LB->getAddressingMode() != ISD::UNINDEXED)
+          return DAGCombineTruncBoolExt(N, DCI);
+
+        // Only combine loads if both are non-extending loads
+        // (ISD::NON_EXTLOAD). Extending loads (such as ISD::ZEXTLOAD or
+        // ISD::SEXTLOAD) perform zero or sign extension, which may change the
+        // loaded value's semantics and are not compatible with vector loads.
+        if (LA->getExtensionType() != ISD::NON_EXTLOAD ||
+            LB->getExtensionType() != ISD::NON_EXTLOAD)
+          return DAGCombineTruncBoolExt(N, DCI);
+
+        // Following code transforms the DAG
+        // t0: ch,glue = EntryToken
+        // t2: i64,ch = CopyFromReg t0, Register:i64 %0
+        // t3: i128,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
+        // undef:i64 t4: i64,ch = CopyFromReg t0, Register:i64 %1 t5: i128,ch =
+        // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64 t6: i1 =
+        // setcc t3, t5, setne:ch
+        //
+        //  ---->
+        //
+        // t0: ch,glue = EntryToken
+        // t2: i64,ch = CopyFromReg t0, Register:i64 %0
+        // t3: v16i8,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
+        // undef:i64 t4: i64,ch = CopyFromReg t0, Register:i64 %1 t5: v16i8,ch =
+        // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64 t6: i32 =
+        // llvm.ppc.altivec.vcmpequb.p TargetConstant:i32<10505>,
+        // Constant:i32<2>, t3, t5 t7: i1 = setcc t6, Constant:i32<0>, seteq:ch
+
+        SDValue LHSVec = DAG.getLoad(MVT::v16i8, DL, LA->getChain(),
+                                     LA->getBasePtr(), LA->getMemOperand());
+        SDValue RHSVec = DAG.getLoad(MVT::v16i8, DL, LB->getChain(),
+                                     LB->getBasePtr(), LB->getMemOperand());
+
+        SDValue IntrID =
+            DAG.getTargetConstant(Intrinsic::ppc_altivec_vcmpequb_p, DL,
----------------
RolandF77 wrote:

Can just use getConstant.

https://github.com/llvm/llvm-project/pull/158657