[llvm] [RISCV] Introduce new AND combine to expose additional load narrowing opportunities (PR #170483)
Alex Bradbury via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 14 07:56:19 PST 2025
https://github.com/asb updated https://github.com/llvm/llvm-project/pull/170483
>From 2e3c1ade6fd29a468d8341c87abec03d9ba75521 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 3 Dec 2025 14:15:08 +0000
Subject: [PATCH 1/7] [RISCV][test] Precommit test case showing opportunity to
narrow loads for some shift then mask cases
---
.../RISCV/load-narrow-shift-mask-combine.ll | 80 +++++++++++++++++++
1 file changed, 80 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/load-narrow-shift-mask-combine.ll
diff --git a/llvm/test/CodeGen/RISCV/load-narrow-shift-mask-combine.ll b/llvm/test/CodeGen/RISCV/load-narrow-shift-mask-combine.ll
new file mode 100644
index 0000000000000..901bce594f913
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/load-narrow-shift-mask-combine.ll
@@ -0,0 +1,80 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64I
+
+; For each of these examples, it is fewer instructions to narrow the load and
+; then shift (which is an opportunity that can be exposed by doing the mask
+; before the shift).
+; TODO: Impement this optimisation.
+
+define ptr @narrow_to_lbu(ptr %a, ptr %b) {
+; RV32I-LABEL: narrow_to_lbu:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lhu a0, 0(a0)
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: srli a0, a0, 20
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: narrow_to_lbu:
+; RV64I: # %bb.0:
+; RV64I-NEXT: lh a0, 0(a0)
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srli a0, a0, 52
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: ret
+ %1 = load i16, ptr %a, align 2
+ %2 = shl i16 %1, 1
+ %3 = and i16 %2, 510
+ %4 = zext nneg i16 %3 to i64
+ %5 = getelementptr inbounds double, ptr %b, i64 %4
+ ret ptr %5
+}
+
+define ptr @narrow_to_lhu(ptr %a, ptr %b) {
+; RV32I-LABEL: narrow_to_lhu:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lw a0, 0(a0)
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli a0, a0, 12
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: narrow_to_lhu:
+; RV64I: # %bb.0:
+; RV64I-NEXT: lw a0, 0(a0)
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli a0, a0, 44
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: ret
+ %1 = load i32, ptr %a, align 4
+ %2 = shl i32 %1, 1
+ %3 = and i32 %2, 131070
+ %4 = zext nneg i32 %3 to i64
+ %5 = getelementptr inbounds double, ptr %b, i64 %4
+ ret ptr %5
+}
+
+define ptr @narrow_to_lwu(ptr %a, ptr %b) {
+; RV32I-LABEL: narrow_to_lwu:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lw a0, 0(a0)
+; RV32I-NEXT: slli a0, a0, 4
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: narrow_to_lwu:
+; RV64I: # %bb.0:
+; RV64I-NEXT: ld a0, 0(a0)
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 28
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: ret
+ %1 = load i64, ptr %a, align 8
+ %2 = shl i64 %1, 1
+ %3 = and i64 %2, 8589934590
+ %4 = getelementptr inbounds double, ptr %b, i64 %3
+ ret ptr %4
+}
>From d41c9c0329d93ee7a29ef7224f1e5cfb4cfbab0d Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 3 Dec 2025 01:20:45 +0000
Subject: [PATCH 2/7] [RISCV] Introduce new AND combine to expose additional
load narrowing opportunities
The standard codegen pipeline sometimes ends up with a shift followed by
a mask. If doing the mask first would have enabled load narrowing, then
it is preferable to do so. The motivating example was seen in povray
from SPEC where we had something like:
```
lh a0, 0(a0)
slli a0, a0, 56
srli a0, a0, 52
```
Which can be better implemented as:
```
lbu a0, 0(a0)
slli a0, a0, 4
```
Although this is the only SPEC benchmark where the pattern shows up in a
way that reduces dynamic instruction count, adding this optimisation
does succeed in reducing the povray dynamic instruction count for 0.17%
on an RVA22 O3 build.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 32 +++++++++++++++++++
.../RISCV/load-narrow-shift-mask-combine.ll | 26 ++++++---------
.../RISCV/rvv/fixed-vectors-int-buildvec.ll | 2 +-
3 files changed, 43 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ab2652eac3823..48049a9be1c86 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16607,6 +16607,38 @@ static SDValue performANDCombine(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Sometimes a mask is applied after a shift. If that shift was fed by a
+ // load, there is sometimes the opportunity to narrow the load, which is
+ // hidden by the intermediate shift. Detect that case and commute the
+ // shift/and in order to enable load narrowing.
+ if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() && isa<ConstantSDNode>(N1) &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+
+ EVT VT = N->getValueType(0);
+ auto *MaskC = cast<ConstantSDNode>(N1);
+ auto *ShiftC = cast<ConstantSDNode>(N0.getOperand(1));
+
+ uint64_t ShiftAmt = ShiftC->getZExtValue();
+ APInt MaskVal = MaskC->getAPIntValue();
+ // Calculate the mask if it were applied before the shift.
+ APInt InnerMask = MaskVal.lshr(ShiftAmt);
+
+ bool IsNarrowable =
+ InnerMask == 0xff || InnerMask == 0xffff || (InnerMask == 0xffffffff);
+
+ if (IsNarrowable && isa<LoadSDNode>(N0.getOperand(0))) {
+ // AND the loaded value and change the shift appropriately, allowing
+ // the load to be narrowed.
+ SDLoc DL(N);
+ SDValue LoadNode = N0.getOperand(0);
+ SDValue InnerAnd = DAG.getNode(ISD::AND, DL, VT, LoadNode,
+ DAG.getConstant(InnerMask, DL, VT));
+ return DAG.getNode(ISD::SHL, DL, VT, InnerAnd, N0.getOperand(1));
+ }
+ }
+
// Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
// extending X. This is safe since we only need the LSB after the shift and
// shift amounts larger than 31 would produce poison. If we wait until
diff --git a/llvm/test/CodeGen/RISCV/load-narrow-shift-mask-combine.ll b/llvm/test/CodeGen/RISCV/load-narrow-shift-mask-combine.ll
index 901bce594f913..fbf5e2b658a92 100644
--- a/llvm/test/CodeGen/RISCV/load-narrow-shift-mask-combine.ll
+++ b/llvm/test/CodeGen/RISCV/load-narrow-shift-mask-combine.ll
@@ -7,22 +7,19 @@
; For each of these examples, it is fewer instructions to narrow the load and
; then shift (which is an opportunity that can be exposed by doing the mask
; before the shift).
-; TODO: Impement this optimisation.
define ptr @narrow_to_lbu(ptr %a, ptr %b) {
; RV32I-LABEL: narrow_to_lbu:
; RV32I: # %bb.0:
-; RV32I-NEXT: lhu a0, 0(a0)
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: srli a0, a0, 20
+; RV32I-NEXT: lbu a0, 0(a0)
+; RV32I-NEXT: slli a0, a0, 4
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: narrow_to_lbu:
; RV64I: # %bb.0:
-; RV64I-NEXT: lh a0, 0(a0)
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srli a0, a0, 52
+; RV64I-NEXT: lbu a0, 0(a0)
+; RV64I-NEXT: slli a0, a0, 4
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: ret
%1 = load i16, ptr %a, align 2
@@ -36,17 +33,15 @@ define ptr @narrow_to_lbu(ptr %a, ptr %b) {
define ptr @narrow_to_lhu(ptr %a, ptr %b) {
; RV32I-LABEL: narrow_to_lhu:
; RV32I: # %bb.0:
-; RV32I-NEXT: lw a0, 0(a0)
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli a0, a0, 12
+; RV32I-NEXT: lhu a0, 0(a0)
+; RV32I-NEXT: slli a0, a0, 4
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: narrow_to_lhu:
; RV64I: # %bb.0:
-; RV64I-NEXT: lw a0, 0(a0)
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli a0, a0, 44
+; RV64I-NEXT: lhu a0, 0(a0)
+; RV64I-NEXT: slli a0, a0, 4
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: ret
%1 = load i32, ptr %a, align 4
@@ -67,9 +62,8 @@ define ptr @narrow_to_lwu(ptr %a, ptr %b) {
;
; RV64I-LABEL: narrow_to_lwu:
; RV64I: # %bb.0:
-; RV64I-NEXT: ld a0, 0(a0)
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 28
+; RV64I-NEXT: lwu a0, 0(a0)
+; RV64I-NEXT: slli a0, a0, 4
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: ret
%1 = load i64, ptr %a, align 8
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index ca72905a0f39b..e04dc58b25d46 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -2629,7 +2629,7 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) {
; RVA22U64-NEXT: or a0, a0, a4
; RVA22U64-NEXT: slli a6, a6, 24
; RVA22U64-NEXT: or a1, a1, a2
-; RVA22U64-NEXT: add.uw a1, a6, a1
+; RVA22U64-NEXT: or a1, a6, a1
; RVA22U64-NEXT: or a0, a0, a3
; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.v.x v8, a1
>From de82673edde4031501a93cd516b5f9b6fa09878e Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 3 Dec 2025 14:53:41 +0000
Subject: [PATCH 3/7] Remove stray parentheses
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 48049a9be1c86..69a1ab49ff109 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16626,7 +16626,7 @@ static SDValue performANDCombine(SDNode *N,
APInt InnerMask = MaskVal.lshr(ShiftAmt);
bool IsNarrowable =
- InnerMask == 0xff || InnerMask == 0xffff || (InnerMask == 0xffffffff);
+ InnerMask == 0xff || InnerMask == 0xffff || InnerMask == 0xffffffff;
if (IsNarrowable && isa<LoadSDNode>(N0.getOperand(0))) {
// AND the loaded value and change the shift appropriately, allowing
>From 2b8a250b9245f44ce296d761621579deb1faba70 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Sun, 14 Dec 2025 15:55:42 +0000
Subject: [PATCH 4/7] Use helpers to get ShiftAmt/MaskVal
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 9f9a3c0a14f40..7109ee8e7aa56 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16643,11 +16643,8 @@ static SDValue performANDCombine(SDNode *N,
isa<ConstantSDNode>(N0.getOperand(1))) {
EVT VT = N->getValueType(0);
- auto *MaskC = cast<ConstantSDNode>(N1);
- auto *ShiftC = cast<ConstantSDNode>(N0.getOperand(1));
-
- uint64_t ShiftAmt = ShiftC->getZExtValue();
- APInt MaskVal = MaskC->getAPIntValue();
+ uint64_t ShiftAmt = N0.getConstantOperandVal(1);
+ const APInt &MaskVal = N1->getAsAPIntVal();
// Calculate the mask if it were applied before the shift.
APInt InnerMask = MaskVal.lshr(ShiftAmt);
>From ca0f77aff6b7b42fa78c74b28c215a68c2471e1f Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Sun, 14 Dec 2025 15:55:43 +0000
Subject: [PATCH 5/7] Add check for out of bounds shift amount
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 28 +++++++++++----------
1 file changed, 15 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7109ee8e7aa56..a8586659fc555 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16644,21 +16644,23 @@ static SDValue performANDCombine(SDNode *N,
EVT VT = N->getValueType(0);
uint64_t ShiftAmt = N0.getConstantOperandVal(1);
- const APInt &MaskVal = N1->getAsAPIntVal();
- // Calculate the mask if it were applied before the shift.
- APInt InnerMask = MaskVal.lshr(ShiftAmt);
+ if (ShiftAmt < VT.getSizeInBits()) {
+ const APInt &MaskVal = N1->getAsAPIntVal();
+ // Calculate the mask if it were applied before the shift.
+ APInt InnerMask = MaskVal.lshr(ShiftAmt);
- bool IsNarrowable =
- InnerMask == 0xff || InnerMask == 0xffff || InnerMask == 0xffffffff;
+ bool IsNarrowable =
+ InnerMask == 0xff || InnerMask == 0xffff || InnerMask == 0xffffffff;
- if (IsNarrowable && isa<LoadSDNode>(N0.getOperand(0))) {
- // AND the loaded value and change the shift appropriately, allowing
- // the load to be narrowed.
- SDLoc DL(N);
- SDValue LoadNode = N0.getOperand(0);
- SDValue InnerAnd = DAG.getNode(ISD::AND, DL, VT, LoadNode,
- DAG.getConstant(InnerMask, DL, VT));
- return DAG.getNode(ISD::SHL, DL, VT, InnerAnd, N0.getOperand(1));
+ if (IsNarrowable && isa<LoadSDNode>(N0.getOperand(0))) {
+ // AND the loaded value and change the shift appropriately, allowing
+ // the load to be narrowed.
+ SDLoc DL(N);
+ SDValue LoadNode = N0.getOperand(0);
+ SDValue InnerAnd = DAG.getNode(ISD::AND, DL, VT, LoadNode,
+ DAG.getConstant(InnerMask, DL, VT));
+ return DAG.getNode(ISD::SHL, DL, VT, InnerAnd, N0.getOperand(1));
+ }
}
}
>From 04814ad984c7de449bb6265ded7fb671ff054406 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Sun, 14 Dec 2025 15:55:43 +0000
Subject: [PATCH 6/7] Pull out combine to a separate function
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 69 ++++++++++++---------
1 file changed, 38 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a8586659fc555..d4a0ac3e2cefb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16624,6 +16624,42 @@ static SDValue reduceANDOfAtomicLoad(SDNode *N,
return SDValue(N, 0);
}
+// Sometimes a mask is applied after a shift. If that shift was fed by a
+// load, there is sometimes the opportunity to narrow the load, which is
+// hidden by the intermediate shift. Detect that case and commute the
+// shift/and in order to enable load narrowing.
+static SDValue combineNarrowableShiftedLoad(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (N0.getOpcode() != ISD::SHL || !N0.hasOneUse() ||
+ !isa<ConstantSDNode>(N1) || !isa<ConstantSDNode>(N0.getOperand(1))) {
+ return SDValue();
+ }
+
+ EVT VT = N->getValueType(0);
+ uint64_t ShiftAmt = N0.getConstantOperandVal(1);
+
+ if (ShiftAmt > VT.getSizeInBits())
+ return SDValue();
+
+ const APInt &MaskVal = N1->getAsAPIntVal();
+ // Calculate the appropriate mask if it were applied before the shift.
+ APInt InnerMask = MaskVal.lshr(ShiftAmt);
+ bool IsNarrowable =
+ InnerMask == 0xff || InnerMask == 0xffff || InnerMask == 0xffffffff;
+
+ if (!IsNarrowable || !isa<LoadSDNode>(N0.getOperand(0)))
+ return SDValue();
+
+ // AND the loaded value and change the shift appropriately, allowing
+ // the load to be narrowed.
+ SDLoc DL(N);
+ SDValue LoadNode = N0.getOperand(0);
+ SDValue InnerAnd = DAG.getNode(ISD::AND, DL, VT, LoadNode,
+ DAG.getConstant(InnerMask, DL, VT));
+ return DAG.getNode(ISD::SHL, DL, VT, InnerAnd, N0.getOperand(1));
+}
+
// Combines two comparison operation and logic operation to one selection
// operation(min, max) and logic operation. Returns new constructed Node if
// conditions for optimization are satisfied.
@@ -16631,38 +16667,7 @@ static SDValue performANDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
SelectionDAG &DAG = DCI.DAG;
-
SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
-
- // Sometimes a mask is applied after a shift. If that shift was fed by a
- // load, there is sometimes the opportunity to narrow the load, which is
- // hidden by the intermediate shift. Detect that case and commute the
- // shift/and in order to enable load narrowing.
- if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() && isa<ConstantSDNode>(N1) &&
- isa<ConstantSDNode>(N0.getOperand(1))) {
-
- EVT VT = N->getValueType(0);
- uint64_t ShiftAmt = N0.getConstantOperandVal(1);
- if (ShiftAmt < VT.getSizeInBits()) {
- const APInt &MaskVal = N1->getAsAPIntVal();
- // Calculate the mask if it were applied before the shift.
- APInt InnerMask = MaskVal.lshr(ShiftAmt);
-
- bool IsNarrowable =
- InnerMask == 0xff || InnerMask == 0xffff || InnerMask == 0xffffffff;
-
- if (IsNarrowable && isa<LoadSDNode>(N0.getOperand(0))) {
- // AND the loaded value and change the shift appropriately, allowing
- // the load to be narrowed.
- SDLoc DL(N);
- SDValue LoadNode = N0.getOperand(0);
- SDValue InnerAnd = DAG.getNode(ISD::AND, DL, VT, LoadNode,
- DAG.getConstant(InnerMask, DL, VT));
- return DAG.getNode(ISD::SHL, DL, VT, InnerAnd, N0.getOperand(1));
- }
- }
- }
// Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
// extending X. This is safe since we only need the LSB after the shift and
@@ -16682,6 +16687,8 @@ static SDValue performANDCombine(SDNode *N,
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
}
+ if (SDValue V = combineNarrowableShiftedLoad(N, DAG))
+ return V;
if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
return V;
if (DCI.isAfterLegalizeDAG())
>From 869b63f18312f50af9103ac583b762879007ff80 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Sun, 14 Dec 2025 15:55:43 +0000
Subject: [PATCH 7/7] Use sd_match
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 24 ++++++++++++---------
1 file changed, 14 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d4a0ac3e2cefb..63c0834842319 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16629,35 +16629,39 @@ static SDValue reduceANDOfAtomicLoad(SDNode *N,
// hidden by the intermediate shift. Detect that case and commute the
// shift/and in order to enable load narrowing.
static SDValue combineNarrowableShiftedLoad(SDNode *N, SelectionDAG &DAG) {
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- if (N0.getOpcode() != ISD::SHL || !N0.hasOneUse() ||
- !isa<ConstantSDNode>(N1) || !isa<ConstantSDNode>(N0.getOperand(1))) {
+ // (and (shl (load ...), ShiftAmt), Mask)
+ using namespace SDPatternMatch;
+ SDValue LoadNode, ShiftNode;
+ APInt MaskVal, ShiftVal;
+ // (and (shl (load ...), ShiftAmt), Mask)
+ if (!sd_match(N,
+ m_And(m_OneUse(m_Shl(
+ m_AllOf(m_Opc(ISD::LOAD), m_Value(LoadNode)),
+ m_AllOf(m_ConstInt(ShiftVal), m_Value(ShiftNode)))),
+ m_ConstInt(MaskVal)))) {
return SDValue();
}
EVT VT = N->getValueType(0);
- uint64_t ShiftAmt = N0.getConstantOperandVal(1);
+ uint64_t ShiftAmt = ShiftVal.getZExtValue();
- if (ShiftAmt > VT.getSizeInBits())
+ if (ShiftAmt >= VT.getSizeInBits())
return SDValue();
- const APInt &MaskVal = N1->getAsAPIntVal();
// Calculate the appropriate mask if it were applied before the shift.
APInt InnerMask = MaskVal.lshr(ShiftAmt);
bool IsNarrowable =
InnerMask == 0xff || InnerMask == 0xffff || InnerMask == 0xffffffff;
- if (!IsNarrowable || !isa<LoadSDNode>(N0.getOperand(0)))
+ if (!IsNarrowable)
return SDValue();
// AND the loaded value and change the shift appropriately, allowing
// the load to be narrowed.
SDLoc DL(N);
- SDValue LoadNode = N0.getOperand(0);
SDValue InnerAnd = DAG.getNode(ISD::AND, DL, VT, LoadNode,
DAG.getConstant(InnerMask, DL, VT));
- return DAG.getNode(ISD::SHL, DL, VT, InnerAnd, N0.getOperand(1));
+ return DAG.getNode(ISD::SHL, DL, VT, InnerAnd, ShiftNode);
}
// Combines two comparison operation and logic operation to one selection
More information about the llvm-commits
mailing list