[llvm] [AMDGPU] Optimize rotate/funnel shift pattern matching in instruction selection (PR #149817)

Fri Aug 1 02:15:14 PDT 2025

================
@@ -4105,6 +4112,168 @@ void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
   } while (IsModified);
 }
 
+// Pattern matching for rotate/funnel shift operations
+// and converts them to v_alignbit_b32 instructions
+SDNode *AMDGPUDAGToDAGISel::selectRotateOrFunnelShiftPattern(SDNode *N) {
+  if (N->getOpcode() != ISD::OR)
+    return nullptr;
+
+  // Only handle 32-bit operations
+  if (N->getValueType(0) != MVT::i32)
+    return nullptr;
+
+  if (!N->isDivergent())
+    return nullptr;
+
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+
+  SDNode *ShlNode = nullptr;
+  SDNode *SrlNode = nullptr;
+
+  // Check both orderings: (shl, srl) and (srl, shl)
+  bool IsLHSShl = LHS.getOpcode() == ISD::SHL;
+  bool IsRHSSrl = RHS.getOpcode() == ISD::SRL;
+  bool IsLHSSrl = LHS.getOpcode() == ISD::SRL;
+  bool IsRHSShl = RHS.getOpcode() == ISD::SHL;
+
+  if ((IsLHSShl && IsRHSSrl) || (IsLHSSrl && IsRHSShl)) {
+    ShlNode = IsLHSShl ? LHS.getNode() : RHS.getNode();
+    SrlNode = IsRHSSrl ? RHS.getNode() : LHS.getNode();
+  } else {
+    return nullptr;
+  }
+
+  // Extract sources and shift amounts
+  SDValue ShlSrc = ShlNode->getOperand(0);
+  SDValue ShlAmt = ShlNode->getOperand(1);
+  SDValue SrlSrc = SrlNode->getOperand(0);
+  SDValue SrlAmt = SrlNode->getOperand(1);
+
+  // Handle the legalizer's (src << 1) pattern for SHL source
+  if (ShlSrc.getOpcode() == ISD::SHL)
+    if (ConstantSDNode *PreShlAmt =
+            dyn_cast<ConstantSDNode>(ShlSrc.getOperand(1)))
+      if (PreShlAmt->getZExtValue() == 1)
+        ShlSrc = ShlSrc.getOperand(0);
+
+  // Helper function to build AlignBit instruction
+  auto buildAlignBitInstruction = [&](SDValue AlignBitSrc0,
+                                      SDValue AlignBitSrc1,
+                                      SDValue ShiftAmount) -> SDNode * {
+    SDLoc DL(N);
+
+    // Select opcode based on subtarget features
+    const GCNSubtarget &ST = CurDAG->getSubtarget<GCNSubtarget>();
----------------
arsenm wrote:

I think there's already a member for this 

https://github.com/llvm/llvm-project/pull/149817