[llvm] d7e631c - [RISCV] Remove `AND` mask generated by `( zext ( atomic_load ) )` by replacing the load with `zextload` for orderings not stronger then monotonic. (#136502)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 28 08:35:54 PDT 2025
Author: Jan Górski
Date: 2025-04-28T08:35:51-07:00
New Revision: d7e631c7cd6d9c13b9519991ec6becf08bc6b8aa
URL: https://github.com/llvm/llvm-project/commit/d7e631c7cd6d9c13b9519991ec6becf08bc6b8aa
DIFF: https://github.com/llvm/llvm-project/commit/d7e631c7cd6d9c13b9519991ec6becf08bc6b8aa.diff
LOG: [RISCV] Remove `AND` mask generated by `( zext ( atomic_load ) )` by replacing the load with `zextload` for orderings not stronger then monotonic. (#136502)
Extends changes from
[ff687af](https://github.com/llvm/llvm-project/commit/ff687af04f5b0e85305250587b524cb0b3849aa0).
Fixes https://github.com/llvm/llvm-project/issues/131476.
This patch adds a DAG combine to replace an `AND` of an `ATOMIC_LOAD`
with a full-bit mask (e.g. `0xFF`, `0xFFFF`, etc.) which is generated as
a result of `(zext (atomic_load))`, by a zero-extended load, provided
the atomic operation is monotonic or weaker.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVInstrInfoA.td
llvm/test/CodeGen/RISCV/atomic-load-zext.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 76e9900a06a1e..722dcbbc6dd53 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15321,6 +15321,40 @@ static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
}
+static SDValue reduceANDOfAtomicLoad(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ if (N->getOpcode() != ISD::AND)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ if (N0.getOpcode() != ISD::ATOMIC_LOAD)
+ return SDValue();
+ if (!N0.hasOneUse())
+ return SDValue();
+
+ AtomicSDNode *ALoad = cast<AtomicSDNode>(N0.getNode());
+ if (isStrongerThanMonotonic(ALoad->getSuccessOrdering()))
+ return SDValue();
+
+ EVT LoadedVT = ALoad->getMemoryVT();
+ ConstantSDNode *MaskConst = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!MaskConst)
+ return SDValue();
+ uint64_t Mask = MaskConst->getZExtValue();
+ uint64_t ExpectedMask = maskTrailingOnes<uint64_t>(LoadedVT.getSizeInBits());
+ if (Mask != ExpectedMask)
+ return SDValue();
+
+ SDValue ZextLoad = DAG.getAtomicLoad(
+ ISD::ZEXTLOAD, SDLoc(N), ALoad->getMemoryVT(), N->getValueType(0),
+ ALoad->getChain(), ALoad->getBasePtr(), ALoad->getMemOperand());
+ DCI.CombineTo(N, ZextLoad);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1));
+ DCI.recursivelyDeleteUnusedNodes(N0.getNode());
+ return SDValue(N, 0);
+}
+
// Combines two comparison operation and logic operation to one selection
// operation(min, max) and logic operation. Returns new constructed Node if
// conditions for optimization are satisfied.
@@ -15355,6 +15389,8 @@ static SDValue performANDCombine(SDNode *N,
return V;
if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
return V;
+ if (SDValue V = reduceANDOfAtomicLoad(N, DCI))
+ return V;
if (DCI.isAfterLegalizeDAG())
if (SDValue V = combineDeMorganOfBoolean(N, DAG))
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 74873a66bc8c9..5fa7d4160752f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -167,6 +167,8 @@ class seq_cst_store<PatFrag base>
let Predicates = [HasAtomicLdSt] in {
def : LdPat<relaxed_load<atomic_load_asext_8>, LB>;
def : LdPat<relaxed_load<atomic_load_asext_16>, LH>;
+ def : LdPat<relaxed_load<atomic_load_zext_8>, LBU>;
+ def : LdPat<relaxed_load<atomic_load_zext_16>, LHU>;
def : StPat<relaxed_store<atomic_store_8>, SB, GPR, XLenVT>;
def : StPat<relaxed_store<atomic_store_16>, SH, GPR, XLenVT>;
@@ -179,6 +181,7 @@ let Predicates = [HasAtomicLdSt, IsRV32] in {
let Predicates = [HasAtomicLdSt, IsRV64] in {
def : LdPat<relaxed_load<atomic_load_asext_32>, LW>;
+ def : LdPat<relaxed_load<atomic_load_zext_32>, LWU>;
def : LdPat<relaxed_load<atomic_load_nonext_64>, LD, i64>;
def : StPat<relaxed_store<atomic_store_64>, SD, GPR, i64>;
}
diff --git a/llvm/test/CodeGen/RISCV/atomic-load-zext.ll b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
index 8097179443791..68d6b127ac6f1 100644
--- a/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
@@ -301,8 +301,7 @@ define zeroext i8 @atomic_load_i8_unordered(ptr %a) nounwind {
;
; RV32IA-LABEL: atomic_load_i8_unordered:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: lb a0, 0(a0)
-; RV32IA-NEXT: zext.b a0, a0
+; RV32IA-NEXT: lbu a0, 0(a0)
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomic_load_i8_unordered:
@@ -318,8 +317,7 @@ define zeroext i8 @atomic_load_i8_unordered(ptr %a) nounwind {
;
; RV64IA-LABEL: atomic_load_i8_unordered:
; RV64IA: # %bb.0:
-; RV64IA-NEXT: lb a0, 0(a0)
-; RV64IA-NEXT: zext.b a0, a0
+; RV64IA-NEXT: lbu a0, 0(a0)
; RV64IA-NEXT: ret
%1 = load atomic i8, ptr %a unordered, align 1
ret i8 %1
@@ -339,8 +337,7 @@ define zeroext i8 @atomic_load_i8_monotonic(ptr %a) nounwind {
;
; RV32IA-LABEL: atomic_load_i8_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: lb a0, 0(a0)
-; RV32IA-NEXT: zext.b a0, a0
+; RV32IA-NEXT: lbu a0, 0(a0)
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomic_load_i8_monotonic:
@@ -356,8 +353,7 @@ define zeroext i8 @atomic_load_i8_monotonic(ptr %a) nounwind {
;
; RV64IA-LABEL: atomic_load_i8_monotonic:
; RV64IA: # %bb.0:
-; RV64IA-NEXT: lb a0, 0(a0)
-; RV64IA-NEXT: zext.b a0, a0
+; RV64IA-NEXT: lbu a0, 0(a0)
; RV64IA-NEXT: ret
%1 = load atomic i8, ptr %a monotonic, align 1
ret i8 %1
@@ -377,15 +373,13 @@ define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind {
;
; RV32IA-WMO-LABEL: atomic_load_i8_acquire:
; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-NEXT: lbu a0, 0(a0)
; RV32IA-WMO-NEXT: fence r, rw
-; RV32IA-WMO-NEXT: zext.b a0, a0
; RV32IA-WMO-NEXT: ret
;
; RV32IA-TSO-LABEL: atomic_load_i8_acquire:
; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: lb a0, 0(a0)
-; RV32IA-TSO-NEXT: zext.b a0, a0
+; RV32IA-TSO-NEXT: lbu a0, 0(a0)
; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: atomic_load_i8_acquire:
@@ -401,41 +395,35 @@ define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind {
;
; RV64IA-WMO-LABEL: atomic_load_i8_acquire:
; RV64IA-WMO: # %bb.0:
-; RV64IA-WMO-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-NEXT: lbu a0, 0(a0)
; RV64IA-WMO-NEXT: fence r, rw
-; RV64IA-WMO-NEXT: zext.b a0, a0
; RV64IA-WMO-NEXT: ret
;
; RV64IA-TSO-LABEL: atomic_load_i8_acquire:
; RV64IA-TSO: # %bb.0:
-; RV64IA-TSO-NEXT: lb a0, 0(a0)
-; RV64IA-TSO-NEXT: zext.b a0, a0
+; RV64IA-TSO-NEXT: lbu a0, 0(a0)
; RV64IA-TSO-NEXT: ret
;
; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
-; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
-; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
-; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
-; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
-; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire:
@@ -446,8 +434,7 @@ define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind {
;
; RV32IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire:
; RV32IA-ZALASR-TSO: # %bb.0:
-; RV32IA-ZALASR-TSO-NEXT: lb a0, 0(a0)
-; RV32IA-ZALASR-TSO-NEXT: zext.b a0, a0
+; RV32IA-ZALASR-TSO-NEXT: lbu a0, 0(a0)
; RV32IA-ZALASR-TSO-NEXT: ret
;
; RV64IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire:
@@ -458,8 +445,7 @@ define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind {
;
; RV64IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire:
; RV64IA-ZALASR-TSO: # %bb.0:
-; RV64IA-ZALASR-TSO-NEXT: lb a0, 0(a0)
-; RV64IA-ZALASR-TSO-NEXT: zext.b a0, a0
+; RV64IA-ZALASR-TSO-NEXT: lbu a0, 0(a0)
; RV64IA-ZALASR-TSO-NEXT: ret
%1 = load atomic i8, ptr %a acquire, align 1
ret i8 %1
@@ -480,16 +466,14 @@ define zeroext i8 @atomic_load_i8_seq_cst(ptr %a) nounwind {
; RV32IA-WMO-LABEL: atomic_load_i8_seq_cst:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: fence rw, rw
-; RV32IA-WMO-NEXT: lb a0, 0(a0)
-; RV32IA-WMO-NEXT: zext.b a0, a0
+; RV32IA-WMO-NEXT: lbu a0, 0(a0)
; RV32IA-WMO-NEXT: fence r, rw
; RV32IA-WMO-NEXT: ret
;
; RV32IA-TSO-LABEL: atomic_load_i8_seq_cst:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: fence rw, rw
-; RV32IA-TSO-NEXT: lb a0, 0(a0)
-; RV32IA-TSO-NEXT: zext.b a0, a0
+; RV32IA-TSO-NEXT: lbu a0, 0(a0)
; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: atomic_load_i8_seq_cst:
@@ -506,46 +490,40 @@ define zeroext i8 @atomic_load_i8_seq_cst(ptr %a) nounwind {
; RV64IA-WMO-LABEL: atomic_load_i8_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: fence rw, rw
-; RV64IA-WMO-NEXT: lb a0, 0(a0)
-; RV64IA-WMO-NEXT: zext.b a0, a0
+; RV64IA-WMO-NEXT: lbu a0, 0(a0)
; RV64IA-WMO-NEXT: fence r, rw
; RV64IA-WMO-NEXT: ret
;
; RV64IA-TSO-LABEL: atomic_load_i8_seq_cst:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: fence rw, rw
-; RV64IA-TSO-NEXT: lb a0, 0(a0)
-; RV64IA-TSO-NEXT: zext.b a0, a0
+; RV64IA-TSO-NEXT: lbu a0, 0(a0)
; RV64IA-TSO-NEXT: ret
;
; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-ZALASR-LABEL: atomic_load_i8_seq_cst:
@@ -578,9 +556,7 @@ define zeroext i16 @atomic_load_i16_unordered(ptr %a) nounwind {
;
; RV32IA-LABEL: atomic_load_i16_unordered:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: lh a0, 0(a0)
-; RV32IA-NEXT: slli a0, a0, 16
-; RV32IA-NEXT: srli a0, a0, 16
+; RV32IA-NEXT: lhu a0, 0(a0)
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomic_load_i16_unordered:
@@ -597,9 +573,7 @@ define zeroext i16 @atomic_load_i16_unordered(ptr %a) nounwind {
;
; RV64IA-LABEL: atomic_load_i16_unordered:
; RV64IA: # %bb.0:
-; RV64IA-NEXT: lh a0, 0(a0)
-; RV64IA-NEXT: slli a0, a0, 48
-; RV64IA-NEXT: srli a0, a0, 48
+; RV64IA-NEXT: lhu a0, 0(a0)
; RV64IA-NEXT: ret
%1 = load atomic i16, ptr %a unordered, align 2
ret i16 %1
@@ -620,9 +594,7 @@ define zeroext i16 @atomic_load_i16_monotonic(ptr %a) nounwind {
;
; RV32IA-LABEL: atomic_load_i16_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: lh a0, 0(a0)
-; RV32IA-NEXT: slli a0, a0, 16
-; RV32IA-NEXT: srli a0, a0, 16
+; RV32IA-NEXT: lhu a0, 0(a0)
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomic_load_i16_monotonic:
@@ -639,9 +611,7 @@ define zeroext i16 @atomic_load_i16_monotonic(ptr %a) nounwind {
;
; RV64IA-LABEL: atomic_load_i16_monotonic:
; RV64IA: # %bb.0:
-; RV64IA-NEXT: lh a0, 0(a0)
-; RV64IA-NEXT: slli a0, a0, 48
-; RV64IA-NEXT: srli a0, a0, 48
+; RV64IA-NEXT: lhu a0, 0(a0)
; RV64IA-NEXT: ret
%1 = load atomic i16, ptr %a monotonic, align 2
ret i16 %1
@@ -662,17 +632,13 @@ define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind {
;
; RV32IA-WMO-LABEL: atomic_load_i16_acquire:
; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: lh a0, 0(a0)
+; RV32IA-WMO-NEXT: lhu a0, 0(a0)
; RV32IA-WMO-NEXT: fence r, rw
-; RV32IA-WMO-NEXT: slli a0, a0, 16
-; RV32IA-WMO-NEXT: srli a0, a0, 16
; RV32IA-WMO-NEXT: ret
;
; RV32IA-TSO-LABEL: atomic_load_i16_acquire:
; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: lh a0, 0(a0)
-; RV32IA-TSO-NEXT: slli a0, a0, 16
-; RV32IA-TSO-NEXT: srli a0, a0, 16
+; RV32IA-TSO-NEXT: lhu a0, 0(a0)
; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: atomic_load_i16_acquire:
@@ -689,47 +655,35 @@ define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind {
;
; RV64IA-WMO-LABEL: atomic_load_i16_acquire:
; RV64IA-WMO: # %bb.0:
-; RV64IA-WMO-NEXT: lh a0, 0(a0)
+; RV64IA-WMO-NEXT: lhu a0, 0(a0)
; RV64IA-WMO-NEXT: fence r, rw
-; RV64IA-WMO-NEXT: slli a0, a0, 48
-; RV64IA-WMO-NEXT: srli a0, a0, 48
; RV64IA-WMO-NEXT: ret
;
; RV64IA-TSO-LABEL: atomic_load_i16_acquire:
; RV64IA-TSO: # %bb.0:
-; RV64IA-TSO-NEXT: lh a0, 0(a0)
-; RV64IA-TSO-NEXT: slli a0, a0, 48
-; RV64IA-TSO-NEXT: srli a0, a0, 48
+; RV64IA-TSO-NEXT: lhu a0, 0(a0)
; RV64IA-TSO-NEXT: ret
;
; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
-; RV32IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lhu a0, 0(a0)
; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
-; RV32IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 16
-; RV32IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 16
; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
-; RV32IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
-; RV32IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 16
-; RV32IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 16
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lhu a0, 0(a0)
; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
-; RV64IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lhu a0, 0(a0)
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 48
-; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 48
; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
-; RV64IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 48
-; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 48
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lhu a0, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-ZALASR-WMO-LABEL: atomic_load_i16_acquire:
@@ -741,9 +695,7 @@ define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind {
;
; RV32IA-ZALASR-TSO-LABEL: atomic_load_i16_acquire:
; RV32IA-ZALASR-TSO: # %bb.0:
-; RV32IA-ZALASR-TSO-NEXT: lh a0, 0(a0)
-; RV32IA-ZALASR-TSO-NEXT: slli a0, a0, 16
-; RV32IA-ZALASR-TSO-NEXT: srli a0, a0, 16
+; RV32IA-ZALASR-TSO-NEXT: lhu a0, 0(a0)
; RV32IA-ZALASR-TSO-NEXT: ret
;
; RV64IA-ZALASR-WMO-LABEL: atomic_load_i16_acquire:
@@ -755,9 +707,7 @@ define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind {
;
; RV64IA-ZALASR-TSO-LABEL: atomic_load_i16_acquire:
; RV64IA-ZALASR-TSO: # %bb.0:
-; RV64IA-ZALASR-TSO-NEXT: lh a0, 0(a0)
-; RV64IA-ZALASR-TSO-NEXT: slli a0, a0, 48
-; RV64IA-ZALASR-TSO-NEXT: srli a0, a0, 48
+; RV64IA-ZALASR-TSO-NEXT: lhu a0, 0(a0)
; RV64IA-ZALASR-TSO-NEXT: ret
%1 = load atomic i16, ptr %a acquire, align 2
ret i16 %1
@@ -779,18 +729,14 @@ define zeroext i16 @atomic_load_i16_seq_cst(ptr %a) nounwind {
; RV32IA-WMO-LABEL: atomic_load_i16_seq_cst:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: fence rw, rw
-; RV32IA-WMO-NEXT: lh a0, 0(a0)
-; RV32IA-WMO-NEXT: slli a0, a0, 16
-; RV32IA-WMO-NEXT: srli a0, a0, 16
+; RV32IA-WMO-NEXT: lhu a0, 0(a0)
; RV32IA-WMO-NEXT: fence r, rw
; RV32IA-WMO-NEXT: ret
;
; RV32IA-TSO-LABEL: atomic_load_i16_seq_cst:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: fence rw, rw
-; RV32IA-TSO-NEXT: lh a0, 0(a0)
-; RV32IA-TSO-NEXT: slli a0, a0, 16
-; RV32IA-TSO-NEXT: srli a0, a0, 16
+; RV32IA-TSO-NEXT: lhu a0, 0(a0)
; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: atomic_load_i16_seq_cst:
@@ -808,52 +754,40 @@ define zeroext i16 @atomic_load_i16_seq_cst(ptr %a) nounwind {
; RV64IA-WMO-LABEL: atomic_load_i16_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: fence rw, rw
-; RV64IA-WMO-NEXT: lh a0, 0(a0)
-; RV64IA-WMO-NEXT: slli a0, a0, 48
-; RV64IA-WMO-NEXT: srli a0, a0, 48
+; RV64IA-WMO-NEXT: lhu a0, 0(a0)
; RV64IA-WMO-NEXT: fence r, rw
; RV64IA-WMO-NEXT: ret
;
; RV64IA-TSO-LABEL: atomic_load_i16_seq_cst:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: fence rw, rw
-; RV64IA-TSO-NEXT: lh a0, 0(a0)
-; RV64IA-TSO-NEXT: slli a0, a0, 48
-; RV64IA-TSO-NEXT: srli a0, a0, 48
+; RV64IA-TSO-NEXT: lhu a0, 0(a0)
; RV64IA-TSO-NEXT: ret
;
; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV32IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
-; RV32IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 16
-; RV32IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 16
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lhu a0, 0(a0)
; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV32IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
-; RV32IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 16
-; RV32IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 16
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lhu a0, 0(a0)
; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
-; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 48
-; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 48
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lhu a0, 0(a0)
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV64IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 48
-; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 48
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lhu a0, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-ZALASR-LABEL: atomic_load_i16_seq_cst:
@@ -903,9 +837,7 @@ define zeroext i32 @atomic_load_i32_unordered(ptr %a) nounwind {
;
; RV64IA-LABEL: atomic_load_i32_unordered:
; RV64IA: # %bb.0:
-; RV64IA-NEXT: lw a0, 0(a0)
-; RV64IA-NEXT: slli a0, a0, 32
-; RV64IA-NEXT: srli a0, a0, 32
+; RV64IA-NEXT: lwu a0, 0(a0)
; RV64IA-NEXT: ret
%1 = load atomic i32, ptr %a unordered, align 4
ret i32 %1
@@ -941,9 +873,7 @@ define zeroext i32 @atomic_load_i32_monotonic(ptr %a) nounwind {
;
; RV64IA-LABEL: atomic_load_i32_monotonic:
; RV64IA: # %bb.0:
-; RV64IA-NEXT: lw a0, 0(a0)
-; RV64IA-NEXT: slli a0, a0, 32
-; RV64IA-NEXT: srli a0, a0, 32
+; RV64IA-NEXT: lwu a0, 0(a0)
; RV64IA-NEXT: ret
%1 = load atomic i32, ptr %a monotonic, align 4
ret i32 %1
@@ -985,17 +915,13 @@ define zeroext i32 @atomic_load_i32_acquire(ptr %a) nounwind {
;
; RV64IA-WMO-LABEL: atomic_load_i32_acquire:
; RV64IA-WMO: # %bb.0:
-; RV64IA-WMO-NEXT: lw a0, 0(a0)
+; RV64IA-WMO-NEXT: lwu a0, 0(a0)
; RV64IA-WMO-NEXT: fence r, rw
-; RV64IA-WMO-NEXT: slli a0, a0, 32
-; RV64IA-WMO-NEXT: srli a0, a0, 32
; RV64IA-WMO-NEXT: ret
;
; RV64IA-TSO-LABEL: atomic_load_i32_acquire:
; RV64IA-TSO: # %bb.0:
-; RV64IA-TSO-NEXT: lw a0, 0(a0)
-; RV64IA-TSO-NEXT: slli a0, a0, 32
-; RV64IA-TSO-NEXT: srli a0, a0, 32
+; RV64IA-TSO-NEXT: lwu a0, 0(a0)
; RV64IA-TSO-NEXT: ret
;
; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
@@ -1011,17 +937,13 @@ define zeroext i32 @atomic_load_i32_acquire(ptr %a) nounwind {
;
; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
-; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lwu a0, 0(a0)
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 32
-; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 32
; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
-; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 32
-; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 32
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lwu a0, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-ZALASR-WMO-LABEL: atomic_load_i32_acquire:
@@ -1043,9 +965,7 @@ define zeroext i32 @atomic_load_i32_acquire(ptr %a) nounwind {
;
; RV64IA-ZALASR-TSO-LABEL: atomic_load_i32_acquire:
; RV64IA-ZALASR-TSO: # %bb.0:
-; RV64IA-ZALASR-TSO-NEXT: lw a0, 0(a0)
-; RV64IA-ZALASR-TSO-NEXT: slli a0, a0, 32
-; RV64IA-ZALASR-TSO-NEXT: srli a0, a0, 32
+; RV64IA-ZALASR-TSO-NEXT: lwu a0, 0(a0)
; RV64IA-ZALASR-TSO-NEXT: ret
%1 = load atomic i32, ptr %a acquire, align 4
ret i32 %1
@@ -1090,18 +1010,14 @@ define zeroext i32 @atomic_load_i32_seq_cst(ptr %a) nounwind {
; RV64IA-WMO-LABEL: atomic_load_i32_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: fence rw, rw
-; RV64IA-WMO-NEXT: lw a0, 0(a0)
-; RV64IA-WMO-NEXT: slli a0, a0, 32
-; RV64IA-WMO-NEXT: srli a0, a0, 32
+; RV64IA-WMO-NEXT: lwu a0, 0(a0)
; RV64IA-WMO-NEXT: fence r, rw
; RV64IA-WMO-NEXT: ret
;
; RV64IA-TSO-LABEL: atomic_load_i32_seq_cst:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: fence rw, rw
-; RV64IA-TSO-NEXT: lw a0, 0(a0)
-; RV64IA-TSO-NEXT: slli a0, a0, 32
-; RV64IA-TSO-NEXT: srli a0, a0, 32
+; RV64IA-TSO-NEXT: lwu a0, 0(a0)
; RV64IA-TSO-NEXT: ret
;
; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
@@ -1120,18 +1036,14 @@ define zeroext i32 @atomic_load_i32_seq_cst(ptr %a) nounwind {
; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
-; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 32
-; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 32
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lwu a0, 0(a0)
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 32
-; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 32
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lwu a0, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-ZALASR-LABEL: atomic_load_i32_seq_cst:
More information about the llvm-commits
mailing list