[llvm] [RISCV] Introduce new AND combine to expose additional load narrowing opportunities (PR #170483)

Sun Dec 14 07:56:19 PST 2025

https://github.com/asb updated https://github.com/llvm/llvm-project/pull/170483

>From 2e3c1ade6fd29a468d8341c87abec03d9ba75521 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 3 Dec 2025 14:15:08 +0000
Subject: [PATCH 1/7] [RISCV][test] Precommit test case showing opportunity to
 narrow loads for some shift then mask cases

---
 .../RISCV/load-narrow-shift-mask-combine.ll   | 80 +++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/load-narrow-shift-mask-combine.ll

diff --git a/llvm/test/CodeGen/RISCV/load-narrow-shift-mask-combine.ll b/llvm/test/CodeGen/RISCV/load-narrow-shift-mask-combine.ll
new file mode 100644
index 0000000000000..901bce594f913
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/load-narrow-shift-mask-combine.ll
@@ -0,0 +1,80 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I
+
+; For each of these examples, it is fewer instructions to narrow the load and
+; then shift (which is an opportunity that can be exposed by doing the mask
+; before the shift).
+; TODO: Impement this optimisation.
+
+define ptr @narrow_to_lbu(ptr %a, ptr %b) {
+; RV32I-LABEL: narrow_to_lbu:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lhu a0, 0(a0)
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    srli a0, a0, 20
+; RV32I-NEXT:    add a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: narrow_to_lbu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lh a0, 0(a0)
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    srli a0, a0, 52
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    ret
+  %1 = load i16, ptr %a, align 2
+  %2 = shl i16 %1, 1
+  %3 = and i16 %2, 510
+  %4 = zext nneg i16 %3 to i64
+  %5 = getelementptr inbounds double, ptr %b, i64 %4
+  ret ptr %5
+}
+
+define ptr @narrow_to_lhu(ptr %a, ptr %b) {
+; RV32I-LABEL: narrow_to_lhu:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lw a0, 0(a0)
+; RV32I-NEXT:    slli a0, a0, 16
+; RV32I-NEXT:    srli a0, a0, 12
+; RV32I-NEXT:    add a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: narrow_to_lhu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lw a0, 0(a0)
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 44
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    ret
+  %1 = load i32, ptr %a, align 4
+  %2 = shl i32 %1, 1
+  %3 = and i32 %2, 131070
+  %4 = zext nneg i32 %3 to i64
+  %5 = getelementptr inbounds double, ptr %b, i64 %4
+  ret ptr %5
+}
+
+define ptr @narrow_to_lwu(ptr %a, ptr %b) {
+; RV32I-LABEL: narrow_to_lwu:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lw a0, 0(a0)
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    add a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: narrow_to_lwu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 28
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    ret
+  %1 = load i64, ptr %a, align 8
+  %2 = shl i64 %1, 1
+  %3 = and i64 %2, 8589934590
+  %4 = getelementptr inbounds double, ptr %b, i64 %3
+  ret ptr %4
+}

>From d41c9c0329d93ee7a29ef7224f1e5cfb4cfbab0d Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 3 Dec 2025 01:20:45 +0000
Subject: [PATCH 2/7] [RISCV] Introduce new AND combine to expose additional
 load narrowing opportunities

The standard codegen pipeline sometimes ends up with a shift followed by
a mask. If doing the mask first would have enabled load narrowing, then
it is preferable to do so. The motivating example was seen in povray
from SPEC where we had something like:

```
lh a0, 0(a0)
slli a0, a0, 56
srli a0, a0, 52
```

Which can be better implemented as:
```
lbu a0, 0(a0)
slli a0, a0, 4
```

Although this is the only SPEC benchmark where the pattern shows up in a
way that reduces dynamic instruction count, adding this optimisation
does succeed in reducing the povray dynamic instruction count for 0.17%
on an RVA22 O3 build.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   | 32 +++++++++++++++++++
 .../RISCV/load-narrow-shift-mask-combine.ll   | 26 ++++++---------
 .../RISCV/rvv/fixed-vectors-int-buildvec.ll   |  2 +-
 3 files changed, 43 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ab2652eac3823..48049a9be1c86 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16607,6 +16607,38 @@ static SDValue performANDCombine(SDNode *N,
   SelectionDAG &DAG = DCI.DAG;
 
   SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  // Sometimes a mask is applied after a shift. If that shift was fed by a
+  // load, there is sometimes the opportunity to narrow the load, which is
+  // hidden by the intermediate shift. Detect that case and commute the
+  // shift/and in order to enable load narrowing.
+  if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() && isa<ConstantSDNode>(N1) &&
+      isa<ConstantSDNode>(N0.getOperand(1))) {
+
+    EVT VT = N->getValueType(0);
+    auto *MaskC = cast<ConstantSDNode>(N1);
+    auto *ShiftC = cast<ConstantSDNode>(N0.getOperand(1));
+
+    uint64_t ShiftAmt = ShiftC->getZExtValue();
+    APInt MaskVal = MaskC->getAPIntValue();
+    // Calculate the mask if it were applied before the shift.
+    APInt InnerMask = MaskVal.lshr(ShiftAmt);
+
+    bool IsNarrowable =
+        InnerMask == 0xff || InnerMask == 0xffff || (InnerMask == 0xffffffff);
+
+    if (IsNarrowable && isa<LoadSDNode>(N0.getOperand(0))) {
+      // AND the loaded value and change the shift appropriately, allowing
+      // the load to be narrowed.
+      SDLoc DL(N);
+      SDValue LoadNode = N0.getOperand(0);
+      SDValue InnerAnd = DAG.getNode(ISD::AND, DL, VT, LoadNode,
+                                     DAG.getConstant(InnerMask, DL, VT));
+      return DAG.getNode(ISD::SHL, DL, VT, InnerAnd, N0.getOperand(1));
+    }
+  }
+
   // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
   // extending X. This is safe since we only need the LSB after the shift and
   // shift amounts larger than 31 would produce poison. If we wait until
diff --git a/llvm/test/CodeGen/RISCV/load-narrow-shift-mask-combine.ll b/llvm/test/CodeGen/RISCV/load-narrow-shift-mask-combine.ll
index 901bce594f913..fbf5e2b658a92 100644
--- a/llvm/test/CodeGen/RISCV/load-narrow-shift-mask-combine.ll
+++ b/llvm/test/CodeGen/RISCV/load-narrow-shift-mask-combine.ll
@@ -7,22 +7,19 @@
 ; For each of these examples, it is fewer instructions to narrow the load and
 ; then shift (which is an opportunity that can be exposed by doing the mask
 ; before the shift).
-; TODO: Impement this optimisation.
 
 define ptr @narrow_to_lbu(ptr %a, ptr %b) {
 ; RV32I-LABEL: narrow_to_lbu:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    lhu a0, 0(a0)
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    srli a0, a0, 20
+; RV32I-NEXT:    lbu a0, 0(a0)
+; RV32I-NEXT:    slli a0, a0, 4
 ; RV32I-NEXT:    add a0, a1, a0
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: narrow_to_lbu:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    lh a0, 0(a0)
-; RV64I-NEXT:    slli a0, a0, 56
-; RV64I-NEXT:    srli a0, a0, 52
+; RV64I-NEXT:    lbu a0, 0(a0)
+; RV64I-NEXT:    slli a0, a0, 4
 ; RV64I-NEXT:    add a0, a1, a0
 ; RV64I-NEXT:    ret
   %1 = load i16, ptr %a, align 2
@@ -36,17 +33,15 @@ define ptr @narrow_to_lbu(ptr %a, ptr %b) {
 define ptr @narrow_to_lhu(ptr %a, ptr %b) {
 ; RV32I-LABEL: narrow_to_lhu:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    lw a0, 0(a0)
-; RV32I-NEXT:    slli a0, a0, 16
-; RV32I-NEXT:    srli a0, a0, 12
+; RV32I-NEXT:    lhu a0, 0(a0)
+; RV32I-NEXT:    slli a0, a0, 4
 ; RV32I-NEXT:    add a0, a1, a0
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: narrow_to_lhu:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    lw a0, 0(a0)
-; RV64I-NEXT:    slli a0, a0, 48
-; RV64I-NEXT:    srli a0, a0, 44
+; RV64I-NEXT:    lhu a0, 0(a0)
+; RV64I-NEXT:    slli a0, a0, 4
 ; RV64I-NEXT:    add a0, a1, a0
 ; RV64I-NEXT:    ret
   %1 = load i32, ptr %a, align 4
@@ -67,9 +62,8 @@ define ptr @narrow_to_lwu(ptr %a, ptr %b) {
 ;
 ; RV64I-LABEL: narrow_to_lwu:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    ld a0, 0(a0)
-; RV64I-NEXT:    slli a0, a0, 32
-; RV64I-NEXT:    srli a0, a0, 28
+; RV64I-NEXT:    lwu a0, 0(a0)
+; RV64I-NEXT:    slli a0, a0, 4
 ; RV64I-NEXT:    add a0, a1, a0
 ; RV64I-NEXT:    ret
   %1 = load i64, ptr %a, align 8
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index ca72905a0f39b..e04dc58b25d46 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -2629,7 +2629,7 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) {
 ; RVA22U64-NEXT:    or a0, a0, a4
 ; RVA22U64-NEXT:    slli a6, a6, 24
 ; RVA22U64-NEXT:    or a1, a1, a2
-; RVA22U64-NEXT:    add.uw a1, a6, a1
+; RVA22U64-NEXT:    or a1, a6, a1
 ; RVA22U64-NEXT:    or a0, a0, a3
 ; RVA22U64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RVA22U64-NEXT:    vmv.v.x v8, a1

>From de82673edde4031501a93cd516b5f9b6fa09878e Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 3 Dec 2025 14:53:41 +0000
Subject: [PATCH 3/7] Remove stray parentheses

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 48049a9be1c86..69a1ab49ff109 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16626,7 +16626,7 @@ static SDValue performANDCombine(SDNode *N,
     APInt InnerMask = MaskVal.lshr(ShiftAmt);
 
     bool IsNarrowable =
-        InnerMask == 0xff || InnerMask == 0xffff || (InnerMask == 0xffffffff);
+        InnerMask == 0xff || InnerMask == 0xffff || InnerMask == 0xffffffff;
 
     if (IsNarrowable && isa<LoadSDNode>(N0.getOperand(0))) {
       // AND the loaded value and change the shift appropriately, allowing

>From 2b8a250b9245f44ce296d761621579deb1faba70 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Sun, 14 Dec 2025 15:55:42 +0000
Subject: [PATCH 4/7] Use helpers to get ShiftAmt/MaskVal

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 9f9a3c0a14f40..7109ee8e7aa56 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16643,11 +16643,8 @@ static SDValue performANDCombine(SDNode *N,
       isa<ConstantSDNode>(N0.getOperand(1))) {
 
     EVT VT = N->getValueType(0);
-    auto *MaskC = cast<ConstantSDNode>(N1);
-    auto *ShiftC = cast<ConstantSDNode>(N0.getOperand(1));
-
-    uint64_t ShiftAmt = ShiftC->getZExtValue();
-    APInt MaskVal = MaskC->getAPIntValue();
+    uint64_t ShiftAmt = N0.getConstantOperandVal(1);
+    const APInt &MaskVal = N1->getAsAPIntVal();
     // Calculate the mask if it were applied before the shift.
     APInt InnerMask = MaskVal.lshr(ShiftAmt);
 

>From ca0f77aff6b7b42fa78c74b28c215a68c2471e1f Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Sun, 14 Dec 2025 15:55:43 +0000
Subject: [PATCH 5/7] Add check for out of bounds shift amount

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 28 +++++++++++----------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7109ee8e7aa56..a8586659fc555 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16644,21 +16644,23 @@ static SDValue performANDCombine(SDNode *N,
 
     EVT VT = N->getValueType(0);
     uint64_t ShiftAmt = N0.getConstantOperandVal(1);
-    const APInt &MaskVal = N1->getAsAPIntVal();
-    // Calculate the mask if it were applied before the shift.
-    APInt InnerMask = MaskVal.lshr(ShiftAmt);
+    if (ShiftAmt < VT.getSizeInBits()) {
+      const APInt &MaskVal = N1->getAsAPIntVal();
+      // Calculate the mask if it were applied before the shift.
+      APInt InnerMask = MaskVal.lshr(ShiftAmt);
 
-    bool IsNarrowable =
-        InnerMask == 0xff || InnerMask == 0xffff || InnerMask == 0xffffffff;
+      bool IsNarrowable =
+          InnerMask == 0xff || InnerMask == 0xffff || InnerMask == 0xffffffff;
 
-    if (IsNarrowable && isa<LoadSDNode>(N0.getOperand(0))) {
-      // AND the loaded value and change the shift appropriately, allowing
-      // the load to be narrowed.
-      SDLoc DL(N);
-      SDValue LoadNode = N0.getOperand(0);
-      SDValue InnerAnd = DAG.getNode(ISD::AND, DL, VT, LoadNode,
-                                     DAG.getConstant(InnerMask, DL, VT));
-      return DAG.getNode(ISD::SHL, DL, VT, InnerAnd, N0.getOperand(1));
+      if (IsNarrowable && isa<LoadSDNode>(N0.getOperand(0))) {
+        // AND the loaded value and change the shift appropriately, allowing
+        // the load to be narrowed.
+        SDLoc DL(N);
+        SDValue LoadNode = N0.getOperand(0);
+        SDValue InnerAnd = DAG.getNode(ISD::AND, DL, VT, LoadNode,
+                                       DAG.getConstant(InnerMask, DL, VT));
+        return DAG.getNode(ISD::SHL, DL, VT, InnerAnd, N0.getOperand(1));
+      }
     }
   }
 

>From 04814ad984c7de449bb6265ded7fb671ff054406 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Sun, 14 Dec 2025 15:55:43 +0000
Subject: [PATCH 6/7] Pull out combine to a separate function

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 69 ++++++++++++---------
 1 file changed, 38 insertions(+), 31 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a8586659fc555..d4a0ac3e2cefb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16624,6 +16624,42 @@ static SDValue reduceANDOfAtomicLoad(SDNode *N,
   return SDValue(N, 0);
 }
 
+// Sometimes a mask is applied after a shift. If that shift was fed by a
+// load, there is sometimes the opportunity to narrow the load, which is
+// hidden by the intermediate shift. Detect that case and commute the
+// shift/and in order to enable load narrowing.
+static SDValue combineNarrowableShiftedLoad(SDNode *N, SelectionDAG &DAG) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  if (N0.getOpcode() != ISD::SHL || !N0.hasOneUse() ||
+      !isa<ConstantSDNode>(N1) || !isa<ConstantSDNode>(N0.getOperand(1))) {
+    return SDValue();
+  }
+
+  EVT VT = N->getValueType(0);
+  uint64_t ShiftAmt = N0.getConstantOperandVal(1);
+
+  if (ShiftAmt > VT.getSizeInBits())
+    return SDValue();
+
+  const APInt &MaskVal = N1->getAsAPIntVal();
+  // Calculate the appropriate mask if it were applied before the shift.
+  APInt InnerMask = MaskVal.lshr(ShiftAmt);
+  bool IsNarrowable =
+      InnerMask == 0xff || InnerMask == 0xffff || InnerMask == 0xffffffff;
+
+  if (!IsNarrowable || !isa<LoadSDNode>(N0.getOperand(0)))
+    return SDValue();
+
+  // AND the loaded value and change the shift appropriately, allowing
+  // the load to be narrowed.
+  SDLoc DL(N);
+  SDValue LoadNode = N0.getOperand(0);
+  SDValue InnerAnd = DAG.getNode(ISD::AND, DL, VT, LoadNode,
+                                 DAG.getConstant(InnerMask, DL, VT));
+  return DAG.getNode(ISD::SHL, DL, VT, InnerAnd, N0.getOperand(1));
+}
+
 // Combines two comparison operation and logic operation to one selection
 // operation(min, max) and logic operation. Returns new constructed Node if
 // conditions for optimization are satisfied.
@@ -16631,38 +16667,7 @@ static SDValue performANDCombine(SDNode *N,
                                  TargetLowering::DAGCombinerInfo &DCI,
                                  const RISCVSubtarget &Subtarget) {
   SelectionDAG &DAG = DCI.DAG;
-
   SDValue N0 = N->getOperand(0);
-  SDValue N1 = N->getOperand(1);
-
-  // Sometimes a mask is applied after a shift. If that shift was fed by a
-  // load, there is sometimes the opportunity to narrow the load, which is
-  // hidden by the intermediate shift. Detect that case and commute the
-  // shift/and in order to enable load narrowing.
-  if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() && isa<ConstantSDNode>(N1) &&
-      isa<ConstantSDNode>(N0.getOperand(1))) {
-
-    EVT VT = N->getValueType(0);
-    uint64_t ShiftAmt = N0.getConstantOperandVal(1);
-    if (ShiftAmt < VT.getSizeInBits()) {
-      const APInt &MaskVal = N1->getAsAPIntVal();
-      // Calculate the mask if it were applied before the shift.
-      APInt InnerMask = MaskVal.lshr(ShiftAmt);
-
-      bool IsNarrowable =
-          InnerMask == 0xff || InnerMask == 0xffff || InnerMask == 0xffffffff;
-
-      if (IsNarrowable && isa<LoadSDNode>(N0.getOperand(0))) {
-        // AND the loaded value and change the shift appropriately, allowing
-        // the load to be narrowed.
-        SDLoc DL(N);
-        SDValue LoadNode = N0.getOperand(0);
-        SDValue InnerAnd = DAG.getNode(ISD::AND, DL, VT, LoadNode,
-                                       DAG.getConstant(InnerMask, DL, VT));
-        return DAG.getNode(ISD::SHL, DL, VT, InnerAnd, N0.getOperand(1));
-      }
-    }
-  }
 
   // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
   // extending X. This is safe since we only need the LSB after the shift and
@@ -16682,6 +16687,8 @@ static SDValue performANDCombine(SDNode *N,
     return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
   }
 
+  if (SDValue V = combineNarrowableShiftedLoad(N, DAG))
+    return V;
   if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
     return V;
   if (DCI.isAfterLegalizeDAG())

>From 869b63f18312f50af9103ac583b762879007ff80 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Sun, 14 Dec 2025 15:55:43 +0000
Subject: [PATCH 7/7] Use sd_match

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 24 ++++++++++++---------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d4a0ac3e2cefb..63c0834842319 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16629,35 +16629,39 @@ static SDValue reduceANDOfAtomicLoad(SDNode *N,
 // hidden by the intermediate shift. Detect that case and commute the
 // shift/and in order to enable load narrowing.
 static SDValue combineNarrowableShiftedLoad(SDNode *N, SelectionDAG &DAG) {
-  SDValue N0 = N->getOperand(0);
-  SDValue N1 = N->getOperand(1);
-  if (N0.getOpcode() != ISD::SHL || !N0.hasOneUse() ||
-      !isa<ConstantSDNode>(N1) || !isa<ConstantSDNode>(N0.getOperand(1))) {
+  // (and (shl (load ...), ShiftAmt), Mask)
+  using namespace SDPatternMatch;
+  SDValue LoadNode, ShiftNode;
+  APInt MaskVal, ShiftVal;
+  // (and (shl (load ...), ShiftAmt), Mask)
+  if (!sd_match(N,
+                m_And(m_OneUse(m_Shl(
+                          m_AllOf(m_Opc(ISD::LOAD), m_Value(LoadNode)),
+                          m_AllOf(m_ConstInt(ShiftVal), m_Value(ShiftNode)))),
+                      m_ConstInt(MaskVal)))) {
     return SDValue();
   }
 
   EVT VT = N->getValueType(0);
-  uint64_t ShiftAmt = N0.getConstantOperandVal(1);
+  uint64_t ShiftAmt = ShiftVal.getZExtValue();
 
-  if (ShiftAmt > VT.getSizeInBits())
+  if (ShiftAmt >= VT.getSizeInBits())
     return SDValue();
 
-  const APInt &MaskVal = N1->getAsAPIntVal();
   // Calculate the appropriate mask if it were applied before the shift.
   APInt InnerMask = MaskVal.lshr(ShiftAmt);
   bool IsNarrowable =
       InnerMask == 0xff || InnerMask == 0xffff || InnerMask == 0xffffffff;
 
-  if (!IsNarrowable || !isa<LoadSDNode>(N0.getOperand(0)))
+  if (!IsNarrowable)
     return SDValue();
 
   // AND the loaded value and change the shift appropriately, allowing
   // the load to be narrowed.
   SDLoc DL(N);
-  SDValue LoadNode = N0.getOperand(0);
   SDValue InnerAnd = DAG.getNode(ISD::AND, DL, VT, LoadNode,
                                  DAG.getConstant(InnerMask, DL, VT));
-  return DAG.getNode(ISD::SHL, DL, VT, InnerAnd, N0.getOperand(1));
+  return DAG.getNode(ISD::SHL, DL, VT, InnerAnd, ShiftNode);
 }
 
 // Combines two comparison operation and logic operation to one selection