[llvm] [AMDGPU][SDAG] Legalise v2i32 or/xor/and instructions to make use of 64-bit wide instructions (PR #140694)

Janek van Oirschot via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 15 05:34:55 PDT 2025


================
@@ -4048,6 +4048,59 @@ SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
   return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
 }
 
+// Part of the shift combines is to optimise for the case where its possible
+// to reduce e.g shl64 to shl32 if shift range is [63-32]. This
+// transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
+// '&' is then elided by ISel. The vector code for this was being
+// completely scalarised by the vector legalizer, but when v2i32 is
+// legal the vector legaliser only partially scalarises the
+// vector operations and the and is not elided. This function
+// scalarises the AND for this optimisation case.
+static SDValue getShiftForReduction(unsigned ShiftOpc, SDValue LHS, SDValue RHS,
+                                    SelectionDAG &DAG) {
+  assert(
+      (ShiftOpc == ISD::SRA || ShiftOpc == ISD::SRL || ShiftOpc == ISD::SHL) &&
+      "Expected shift Opcode.");
+
+  SDLoc SL = SDLoc(RHS);
+  if (RHS->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+    return SDValue();
+
+  SDValue VAND = RHS.getOperand(0);
+  if (VAND->getOpcode() != ISD::AND)
+    return SDValue();
+
+  ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
+  if (!CRRHS)
+    return SDValue();
+
+  SDValue LHSAND = VAND.getOperand(0);
+  SDValue RHSAND = VAND.getOperand(1);
+  if (RHSAND->getOpcode() != ISD::BUILD_VECTOR)
+    return SDValue();
+
+  ConstantSDNode *CANDL = dyn_cast<ConstantSDNode>(RHSAND->getOperand(0));
+  ConstantSDNode *CANDR = dyn_cast<ConstantSDNode>(RHSAND->getOperand(1));
+  if (!CANDL || !CANDR || RHSAND->getConstantOperandVal(0) != 0x1f ||
+      RHSAND->getConstantOperandVal(1) != 0x1f)
+    return SDValue();
+  // Get the non-const AND operands and produce scalar AND
+  const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
+  const SDValue One = DAG.getConstant(1, SL, MVT::i32);
+  SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, LHSAND, Zero);
+  SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, LHSAND, One);
+  SDValue AndMask = DAG.getConstant(0x1f, SL, MVT::i32);
+  SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, AndMask);
+  SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, AndMask);
+  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
----------------
JanekvO wrote:

These are allocated while it may be possible they're not being used, right? I'm not sure about the cleanup logic for `SDNode*`s so for all I know it may be a non issue but otherwise, can these be guarded behind `(AndIndex == 0 || AndIndex == 1)`?

https://github.com/llvm/llvm-project/pull/140694


More information about the llvm-commits mailing list