[llvm] 5dfaf84 - [LLVM][AArch64] Correctly lower funnel shifts by constants. (#140058)

Tue May 20 03:15:24 PDT 2025

Author: Paul Walker
Date: 2025-05-20T11:15:21+01:00
New Revision: 5dfaf8418d6b597ef75cf768dba1cd26fc8b318c

URL: https://github.com/llvm/llvm-project/commit/5dfaf8418d6b597ef75cf768dba1cd26fc8b318c
DIFF: https://github.com/llvm/llvm-project/commit/5dfaf8418d6b597ef75cf768dba1cd26fc8b318c.diff

LOG: [LLVM][AArch64] Correctly lower funnel shifts by constants. (#140058)

Prevent LowerFunnelShift from creating an invalid ISD::FSHR when
lowering "ISD::FSHL X, Y, 0". Such inputs are rare because it's a NOP
that DAGCombiner will optimise away. However, we should not rely on this
and so this PR mirrors the same optimisation.
    
Ensure LowerFunnelShift normalises constant shift amounts because isel
rules expect them to be in the range [0, src bit length).
    
NOTE: To simiplify testing, this PR also adds a command line option to
disable the DAG combiner (-combiner-disabled).

Added: 
    llvm/test/CodeGen/AArch64/fsh-combiner-disabled.ll

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 531f6fb2a627a..efaa8bd4a7950 100644

--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -149,6 +149,10 @@ static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
     cl::desc("DAG combiner enable load/<replace bytes>/store with "
              "a narrower store"));
 
+static cl::opt<bool> DisableCombines("combiner-disabled", cl::Hidden,
+                                     cl::init(false),
+                                     cl::desc("Disable the DAG combiner"));
+
 namespace {
 
   class DAGCombiner {
@@ -248,7 +252,8 @@ namespace {
           STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL),
           BatchAA(BatchAA) {
       ForCodeSize = DAG.shouldOptForSize();
-      DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
+      DisableGenericCombines =
+          DisableCombines || (STI && STI->disableGenericCombines(OptLevel));
 
       MaximumLegalStoreInBits = 0;
       // We use the minimum store size here, since that's all we can guarantee

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 293292d47dd48..f2800145cc603 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7262,20 +7262,34 @@ static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
 // FSHL is converted to FSHR before deciding what to do with it
 static SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) {
   SDValue Shifts = Op.getOperand(2);
-  // Check if the shift amount is a constant
+  // Check if the shift amount is a constant and normalise to [0, SrcBitLen)
   // If opcode is FSHL, convert it to FSHR
   if (auto *ShiftNo = dyn_cast<ConstantSDNode>(Shifts)) {
     SDLoc DL(Op);
     MVT VT = Op.getSimpleValueType();
+    unsigned int NewShiftNo = ShiftNo->getZExtValue() % VT.getFixedSizeInBits();
 
     if (Op.getOpcode() == ISD::FSHL) {
-      unsigned int NewShiftNo =
-          VT.getFixedSizeInBits() - ShiftNo->getZExtValue();
+      if (NewShiftNo == 0)
+        return Op.getOperand(0);
+
+      NewShiftNo = VT.getFixedSizeInBits() - NewShiftNo;
+      return DAG.getNode(
+          ISD::FSHR, DL, VT, Op.getOperand(0), Op.getOperand(1),
+          DAG.getConstant(NewShiftNo, DL, Shifts.getValueType()));
+    }
+
+    if (Op.getOpcode() == ISD::FSHR) {
+      if (NewShiftNo == 0)
+        return Op.getOperand(1);
+
+      if (ShiftNo->getZExtValue() == NewShiftNo)
+        return Op;
+
+      // Rewrite using the normalised shift amount.
       return DAG.getNode(
           ISD::FSHR, DL, VT, Op.getOperand(0), Op.getOperand(1),
           DAG.getConstant(NewShiftNo, DL, Shifts.getValueType()));
-    } else if (Op.getOpcode() == ISD::FSHR) {
-      return Op;
     }
   }
 

diff  --git a/llvm/test/CodeGen/AArch64/fsh-combiner-disabled.ll b/llvm/test/CodeGen/AArch64/fsh-combiner-disabled.ll
new file mode 100644
index 0000000000000..091fe483e7d8d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fsh-combiner-disabled.ll
@@ -0,0 +1,134 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc %s -o - | FileCheck %s
+; RUN: llc -combiner-disabled %s -o - | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Verify lowering code in isolation to ensure we can lower shifts that would
+; normally be optimised away.
+
+define i32 @fshl_i32_by_zero(i32 %unused, i32 %a, i32 %b) {
+; CHECK-LABEL: fshl_i32_by_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w0, w1
+; CHECK-NEXT:    ret
+  %r = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 0)
+  ret i32 %r
+}
+
+define i32 @fshl_i32_by_half_srclen(i32 %unused, i32 %a, i32 %b) {
+; CHECK-LABEL: fshl_i32_by_half_srclen:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    extr w0, w1, w2, #16
+; CHECK-NEXT:    ret
+  %r = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 16)
+  ret i32 %r
+}
+
+define i32 @fshl_i32_by_srclen(i32 %unused, i32 %a, i32 %b) {
+; CHECK-LABEL: fshl_i32_by_srclen:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w0, w1
+; CHECK-NEXT:    ret
+  %r = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 32)
+  ret i32 %r
+}
+
+define i32 @fshl_i32_by_srclen_plus1(i32 %unused, i32 %a, i32 %b) {
+; CHECK-LABEL: fshl_i32_by_srclen_plus1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    extr w0, w1, w2, #31
+; CHECK-NEXT:    ret
+  %r = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 33)
+  ret i32 %r
+}
+
+define i64 @fshl_i64_by_zero(i64 %unused, i64 %a, i64 %b) {
+; CHECK-LABEL: fshl_i64_by_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x0, x1
+; CHECK-NEXT:    ret
+  %r = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 0)
+  ret i64 %r
+}
+
+define i64 @fshl_i64_by_srclen(i64 %unused, i64 %a, i64 %b) {
+; CHECK-LABEL: fshl_i64_by_srclen:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x0, x1
+; CHECK-NEXT:    ret
+  %r = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 64)
+  ret i64 %r
+}
+
+define i64 @fshl_i64_by_srclen_plus1(i64 %unused, i64 %a, i64 %b) {
+; CHECK-LABEL: fshl_i64_by_srclen_plus1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    extr x0, x1, x2, #63
+; CHECK-NEXT:    ret
+  %r = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 65)
+  ret i64 %r
+}
+
+define i32 @fshr_i32_by_zero(i32 %unused, i32 %a, i32 %b) {
+; CHECK-LABEL: fshr_i32_by_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w0, w2
+; CHECK-NEXT:    ret
+  %r = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 0)
+  ret i32 %r
+}
+
+define i32 @fshr_i32_by_srclen(i32 %unused, i32 %a, i32 %b) {
+; CHECK-LABEL: fshr_i32_by_srclen:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w0, w2
+; CHECK-NEXT:    ret
+  %r = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 32)
+  ret i32 %r
+}
+
+define i32 @fshr_i32_by_half_srclen(i32 %unused, i32 %a, i32 %b) {
+; CHECK-LABEL: fshr_i32_by_half_srclen:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    extr w0, w1, w2, #16
+; CHECK-NEXT:    ret
+  %r = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 16)
+  ret i32 %r
+}
+
+define i32 @fshr_i32_by_srclen_plus1(i32 %unused, i32 %a, i32 %b) {
+; CHECK-LABEL: fshr_i32_by_srclen_plus1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    extr w0, w1, w2, #1
+; CHECK-NEXT:    ret
+  %r = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 33)
+  ret i32 %r
+}
+
+define i64 @fshr_i64_by_zero(i64 %unused, i64 %a, i64 %b) {
+; CHECK-LABEL: fshr_i64_by_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x0, x2
+; CHECK-NEXT:    ret
+  %r = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 0)
+  ret i64 %r
+}
+
+define i64 @fshr_i64_by_srclen(i64 %unused, i64 %a, i64 %b) {
+; CHECK-LABEL: fshr_i64_by_srclen:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x0, x2
+; CHECK-NEXT:    ret
+  %r = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 64)
+  ret i64 %r
+}
+
+define i64 @fshr_i64_by_srclen_plus1(i64 %unused, i64 %a, i64 %b) {
+; CHECK-LABEL: fshr_i64_by_srclen_plus1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    extr x0, x1, x2, #1
+; CHECK-NEXT:    ret
+  %r = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 65)
+  ret i64 %r
+}