[llvm] [AMDGPU][SDAG] Legalise v2i32 or/xor/and instructions to make use of 64-bit wide instructions (PR #140694)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 5 08:16:05 PDT 2025
================
@@ -4069,6 +4069,74 @@ SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
}
+// Each shift has an optimisation to transform a 64-bit shift into a 32-bit
+// shift coupled with an AND if the shift amount is within certain bounds. The
+// vector code for this was being completely scalarised by the vector legalizer,
+// but when v2i32 is legal the vector legaliser only partially scalarises the
+// vector operations and the and is not elided. This function
+// scalarises the AND for this optimisation case, ensuring it is elided.
+// (shiftop x, (extract_vector_element (and {y0, y1},
+// (build_vector 0x1f, 0x1f))), index)
+// -> (shiftop x, (and (extract_vector_element {yo, y1}, index), 0x1f))
+static SDValue getShiftForReduction(SDNode *N, SelectionDAG &DAG) {
+ assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL ||
+ N->getOpcode() == ISD::SHL) &&
+ "Expected shift Opcode.");
+
+ if (N->getValueType(0) != MVT::i32)
+ return SDValue();
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ SDLoc SL = SDLoc(N);
+ if (RHS->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ SDValue VAND = RHS.getOperand(0);
+ if (VAND->getOpcode() != ISD::AND)
+ return SDValue();
+
+ ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
+ if (!CRHS)
+ return SDValue();
+
+ SDValue LHSAND = VAND.getOperand(0);
+ SDValue RHSAND = VAND.getOperand(1);
----------------
LU-JOHN wrote:
Consider using sd_match to match complex expressions.
https://github.com/llvm/llvm-project/pull/140694
More information about the llvm-commits
mailing list