[llvm] [AMDGPU] Extend SRA i64 simplification for shift amts in range [33:62] (PR #138913)

Thu May 22 01:24:50 PDT 2025

================
@@ -4153,22 +4153,23 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
   SDLoc SL(N);
   unsigned RHSVal = RHS->getZExtValue();
 
-  // (sra i64:x, 32) -> build_pair x, (sra hi_32(x), 31)
-  if (RHSVal == 32) {
+  // For C >= 32
+  // (sra i64:x, C) -> build_pair (sra hi_32(x), C - 32), (sra hi_32(x), 31)
+  if (32 <= RHSVal) {
     SDValue Hi = getHiHalf64(N->getOperand(0), DAG);
-    SDValue NewShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi,
-                                   DAG.getConstant(31, SL, MVT::i32));
-
-    SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {Hi, NewShift});
-    return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildVec);
-  }
+    SDValue HiShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi,
+                                  DAG.getConstant(31, SL, MVT::i32));
+    SDValue LoShift;
+
+    if (RHSVal == 63)
+      LoShift = HiShift;
+    else if (RHSVal == 32)
+      LoShift = Hi;
+    else
----------------
arsenm wrote:

Also better to do this in terms of the bitwidth instead of hardcoding the constant values. Do we do this in globalisel already? I thought that version was slightly ahead of the DAG version 

https://github.com/llvm/llvm-project/pull/138913