[llvm] d7e631c - [RISCV] Remove `AND` mask generated by `( zext ( atomic_load ) )` by replacing the load with `zextload` for orderings not stronger then monotonic. (#136502)

via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 28 08:35:54 PDT 2025


Author: Jan Górski
Date: 2025-04-28T08:35:51-07:00
New Revision: d7e631c7cd6d9c13b9519991ec6becf08bc6b8aa

URL: https://github.com/llvm/llvm-project/commit/d7e631c7cd6d9c13b9519991ec6becf08bc6b8aa
DIFF: https://github.com/llvm/llvm-project/commit/d7e631c7cd6d9c13b9519991ec6becf08bc6b8aa.diff

LOG: [RISCV] Remove `AND` mask generated by `( zext  ( atomic_load ) )` by replacing the load with `zextload` for orderings not stronger then monotonic. (#136502)

Extends changes from
[ff687af](https://github.com/llvm/llvm-project/commit/ff687af04f5b0e85305250587b524cb0b3849aa0).
Fixes https://github.com/llvm/llvm-project/issues/131476.

This patch adds a DAG combine to replace an `AND` of an `ATOMIC_LOAD`
with a full-bit mask (e.g. `0xFF`, `0xFFFF`, etc.) which is generated as
a result of `(zext (atomic_load))`, by a zero-extended load, provided
the atomic operation is monotonic or weaker.

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVInstrInfoA.td
    llvm/test/CodeGen/RISCV/atomic-load-zext.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 76e9900a06a1e..722dcbbc6dd53 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15321,6 +15321,40 @@ static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG,
   return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
 }
 
+static SDValue reduceANDOfAtomicLoad(SDNode *N,
+                                     TargetLowering::DAGCombinerInfo &DCI) {
+  SelectionDAG &DAG = DCI.DAG;
+  if (N->getOpcode() != ISD::AND)
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0);
+  if (N0.getOpcode() != ISD::ATOMIC_LOAD)
+    return SDValue();
+  if (!N0.hasOneUse())
+    return SDValue();
+
+  AtomicSDNode *ALoad = cast<AtomicSDNode>(N0.getNode());
+  if (isStrongerThanMonotonic(ALoad->getSuccessOrdering()))
+    return SDValue();
+
+  EVT LoadedVT = ALoad->getMemoryVT();
+  ConstantSDNode *MaskConst = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  if (!MaskConst)
+    return SDValue();
+  uint64_t Mask = MaskConst->getZExtValue();
+  uint64_t ExpectedMask = maskTrailingOnes<uint64_t>(LoadedVT.getSizeInBits());
+  if (Mask != ExpectedMask)
+    return SDValue();
+
+  SDValue ZextLoad = DAG.getAtomicLoad(
+      ISD::ZEXTLOAD, SDLoc(N), ALoad->getMemoryVT(), N->getValueType(0),
+      ALoad->getChain(), ALoad->getBasePtr(), ALoad->getMemOperand());
+  DCI.CombineTo(N, ZextLoad);
+  DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1));
+  DCI.recursivelyDeleteUnusedNodes(N0.getNode());
+  return SDValue(N, 0);
+}
+
 // Combines two comparison operation and logic operation to one selection
 // operation(min, max) and logic operation. Returns new constructed Node if
 // conditions for optimization are satisfied.
@@ -15355,6 +15389,8 @@ static SDValue performANDCombine(SDNode *N,
     return V;
   if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
     return V;
+  if (SDValue V = reduceANDOfAtomicLoad(N, DCI))
+    return V;
 
   if (DCI.isAfterLegalizeDAG())
     if (SDValue V = combineDeMorganOfBoolean(N, DAG))

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 74873a66bc8c9..5fa7d4160752f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -167,6 +167,8 @@ class seq_cst_store<PatFrag base>
 let Predicates = [HasAtomicLdSt] in {
   def : LdPat<relaxed_load<atomic_load_asext_8>,  LB>;
   def : LdPat<relaxed_load<atomic_load_asext_16>, LH>;
+  def : LdPat<relaxed_load<atomic_load_zext_8>,  LBU>;
+  def : LdPat<relaxed_load<atomic_load_zext_16>, LHU>;
 
   def : StPat<relaxed_store<atomic_store_8>,  SB, GPR, XLenVT>;
   def : StPat<relaxed_store<atomic_store_16>, SH, GPR, XLenVT>;
@@ -179,6 +181,7 @@ let Predicates = [HasAtomicLdSt, IsRV32] in {
 
 let Predicates = [HasAtomicLdSt, IsRV64] in {
   def : LdPat<relaxed_load<atomic_load_asext_32>, LW>;
+  def : LdPat<relaxed_load<atomic_load_zext_32>, LWU>;
   def : LdPat<relaxed_load<atomic_load_nonext_64>, LD, i64>;
   def : StPat<relaxed_store<atomic_store_64>, SD, GPR, i64>;
 }

diff  --git a/llvm/test/CodeGen/RISCV/atomic-load-zext.ll b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
index 8097179443791..68d6b127ac6f1 100644
--- a/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
@@ -301,8 +301,7 @@ define zeroext i8 @atomic_load_i8_unordered(ptr %a) nounwind {
 ;
 ; RV32IA-LABEL: atomic_load_i8_unordered:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    lb a0, 0(a0)
-; RV32IA-NEXT:    zext.b a0, a0
+; RV32IA-NEXT:    lbu a0, 0(a0)
 ; RV32IA-NEXT:    ret
 ;
 ; RV64I-LABEL: atomic_load_i8_unordered:
@@ -318,8 +317,7 @@ define zeroext i8 @atomic_load_i8_unordered(ptr %a) nounwind {
 ;
 ; RV64IA-LABEL: atomic_load_i8_unordered:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    lb a0, 0(a0)
-; RV64IA-NEXT:    zext.b a0, a0
+; RV64IA-NEXT:    lbu a0, 0(a0)
 ; RV64IA-NEXT:    ret
   %1 = load atomic i8, ptr %a unordered, align 1
   ret i8 %1
@@ -339,8 +337,7 @@ define zeroext i8 @atomic_load_i8_monotonic(ptr %a) nounwind {
 ;
 ; RV32IA-LABEL: atomic_load_i8_monotonic:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    lb a0, 0(a0)
-; RV32IA-NEXT:    zext.b a0, a0
+; RV32IA-NEXT:    lbu a0, 0(a0)
 ; RV32IA-NEXT:    ret
 ;
 ; RV64I-LABEL: atomic_load_i8_monotonic:
@@ -356,8 +353,7 @@ define zeroext i8 @atomic_load_i8_monotonic(ptr %a) nounwind {
 ;
 ; RV64IA-LABEL: atomic_load_i8_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    lb a0, 0(a0)
-; RV64IA-NEXT:    zext.b a0, a0
+; RV64IA-NEXT:    lbu a0, 0(a0)
 ; RV64IA-NEXT:    ret
   %1 = load atomic i8, ptr %a monotonic, align 1
   ret i8 %1
@@ -377,15 +373,13 @@ define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind {
 ;
 ; RV32IA-WMO-LABEL: atomic_load_i8_acquire:
 ; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    lb a0, 0(a0)
+; RV32IA-WMO-NEXT:    lbu a0, 0(a0)
 ; RV32IA-WMO-NEXT:    fence r, rw
-; RV32IA-WMO-NEXT:    zext.b a0, a0
 ; RV32IA-WMO-NEXT:    ret
 ;
 ; RV32IA-TSO-LABEL: atomic_load_i8_acquire:
 ; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    lb a0, 0(a0)
-; RV32IA-TSO-NEXT:    zext.b a0, a0
+; RV32IA-TSO-NEXT:    lbu a0, 0(a0)
 ; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomic_load_i8_acquire:
@@ -401,41 +395,35 @@ define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind {
 ;
 ; RV64IA-WMO-LABEL: atomic_load_i8_acquire:
 ; RV64IA-WMO:       # %bb.0:
-; RV64IA-WMO-NEXT:    lb a0, 0(a0)
+; RV64IA-WMO-NEXT:    lbu a0, 0(a0)
 ; RV64IA-WMO-NEXT:    fence r, rw
-; RV64IA-WMO-NEXT:    zext.b a0, a0
 ; RV64IA-WMO-NEXT:    ret
 ;
 ; RV64IA-TSO-LABEL: atomic_load_i8_acquire:
 ; RV64IA-TSO:       # %bb.0:
-; RV64IA-TSO-NEXT:    lb a0, 0(a0)
-; RV64IA-TSO-NEXT:    zext.b a0, a0
+; RV64IA-TSO-NEXT:    lbu a0, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
 ;
 ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
 ; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    zext.b a0, a0
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
 ; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
 ; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    zext.b a0, a0
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire:
@@ -446,8 +434,7 @@ define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind {
 ;
 ; RV32IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire:
 ; RV32IA-ZALASR-TSO:       # %bb.0:
-; RV32IA-ZALASR-TSO-NEXT:    lb a0, 0(a0)
-; RV32IA-ZALASR-TSO-NEXT:    zext.b a0, a0
+; RV32IA-ZALASR-TSO-NEXT:    lbu a0, 0(a0)
 ; RV32IA-ZALASR-TSO-NEXT:    ret
 ;
 ; RV64IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire:
@@ -458,8 +445,7 @@ define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind {
 ;
 ; RV64IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire:
 ; RV64IA-ZALASR-TSO:       # %bb.0:
-; RV64IA-ZALASR-TSO-NEXT:    lb a0, 0(a0)
-; RV64IA-ZALASR-TSO-NEXT:    zext.b a0, a0
+; RV64IA-ZALASR-TSO-NEXT:    lbu a0, 0(a0)
 ; RV64IA-ZALASR-TSO-NEXT:    ret
   %1 = load atomic i8, ptr %a acquire, align 1
   ret i8 %1
@@ -480,16 +466,14 @@ define zeroext i8 @atomic_load_i8_seq_cst(ptr %a) nounwind {
 ; RV32IA-WMO-LABEL: atomic_load_i8_seq_cst:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    fence rw, rw
-; RV32IA-WMO-NEXT:    lb a0, 0(a0)
-; RV32IA-WMO-NEXT:    zext.b a0, a0
+; RV32IA-WMO-NEXT:    lbu a0, 0(a0)
 ; RV32IA-WMO-NEXT:    fence r, rw
 ; RV32IA-WMO-NEXT:    ret
 ;
 ; RV32IA-TSO-LABEL: atomic_load_i8_seq_cst:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:    fence rw, rw
-; RV32IA-TSO-NEXT:    lb a0, 0(a0)
-; RV32IA-TSO-NEXT:    zext.b a0, a0
+; RV32IA-TSO-NEXT:    lbu a0, 0(a0)
 ; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomic_load_i8_seq_cst:
@@ -506,46 +490,40 @@ define zeroext i8 @atomic_load_i8_seq_cst(ptr %a) nounwind {
 ; RV64IA-WMO-LABEL: atomic_load_i8_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    fence rw, rw
-; RV64IA-WMO-NEXT:    lb a0, 0(a0)
-; RV64IA-WMO-NEXT:    zext.b a0, a0
+; RV64IA-WMO-NEXT:    lbu a0, 0(a0)
 ; RV64IA-WMO-NEXT:    fence r, rw
 ; RV64IA-WMO-NEXT:    ret
 ;
 ; RV64IA-TSO-LABEL: atomic_load_i8_seq_cst:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    fence rw, rw
-; RV64IA-TSO-NEXT:    lb a0, 0(a0)
-; RV64IA-TSO-NEXT:    zext.b a0, a0
+; RV64IA-TSO-NEXT:    lbu a0, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
 ;
 ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
 ; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
 ; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV32IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
 ; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-ZALASR-LABEL: atomic_load_i8_seq_cst:
@@ -578,9 +556,7 @@ define zeroext i16 @atomic_load_i16_unordered(ptr %a) nounwind {
 ;
 ; RV32IA-LABEL: atomic_load_i16_unordered:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    lh a0, 0(a0)
-; RV32IA-NEXT:    slli a0, a0, 16
-; RV32IA-NEXT:    srli a0, a0, 16
+; RV32IA-NEXT:    lhu a0, 0(a0)
 ; RV32IA-NEXT:    ret
 ;
 ; RV64I-LABEL: atomic_load_i16_unordered:
@@ -597,9 +573,7 @@ define zeroext i16 @atomic_load_i16_unordered(ptr %a) nounwind {
 ;
 ; RV64IA-LABEL: atomic_load_i16_unordered:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    lh a0, 0(a0)
-; RV64IA-NEXT:    slli a0, a0, 48
-; RV64IA-NEXT:    srli a0, a0, 48
+; RV64IA-NEXT:    lhu a0, 0(a0)
 ; RV64IA-NEXT:    ret
   %1 = load atomic i16, ptr %a unordered, align 2
   ret i16 %1
@@ -620,9 +594,7 @@ define zeroext i16 @atomic_load_i16_monotonic(ptr %a) nounwind {
 ;
 ; RV32IA-LABEL: atomic_load_i16_monotonic:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    lh a0, 0(a0)
-; RV32IA-NEXT:    slli a0, a0, 16
-; RV32IA-NEXT:    srli a0, a0, 16
+; RV32IA-NEXT:    lhu a0, 0(a0)
 ; RV32IA-NEXT:    ret
 ;
 ; RV64I-LABEL: atomic_load_i16_monotonic:
@@ -639,9 +611,7 @@ define zeroext i16 @atomic_load_i16_monotonic(ptr %a) nounwind {
 ;
 ; RV64IA-LABEL: atomic_load_i16_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    lh a0, 0(a0)
-; RV64IA-NEXT:    slli a0, a0, 48
-; RV64IA-NEXT:    srli a0, a0, 48
+; RV64IA-NEXT:    lhu a0, 0(a0)
 ; RV64IA-NEXT:    ret
   %1 = load atomic i16, ptr %a monotonic, align 2
   ret i16 %1
@@ -662,17 +632,13 @@ define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind {
 ;
 ; RV32IA-WMO-LABEL: atomic_load_i16_acquire:
 ; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    lh a0, 0(a0)
+; RV32IA-WMO-NEXT:    lhu a0, 0(a0)
 ; RV32IA-WMO-NEXT:    fence r, rw
-; RV32IA-WMO-NEXT:    slli a0, a0, 16
-; RV32IA-WMO-NEXT:    srli a0, a0, 16
 ; RV32IA-WMO-NEXT:    ret
 ;
 ; RV32IA-TSO-LABEL: atomic_load_i16_acquire:
 ; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    lh a0, 0(a0)
-; RV32IA-TSO-NEXT:    slli a0, a0, 16
-; RV32IA-TSO-NEXT:    srli a0, a0, 16
+; RV32IA-TSO-NEXT:    lhu a0, 0(a0)
 ; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomic_load_i16_acquire:
@@ -689,47 +655,35 @@ define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind {
 ;
 ; RV64IA-WMO-LABEL: atomic_load_i16_acquire:
 ; RV64IA-WMO:       # %bb.0:
-; RV64IA-WMO-NEXT:    lh a0, 0(a0)
+; RV64IA-WMO-NEXT:    lhu a0, 0(a0)
 ; RV64IA-WMO-NEXT:    fence r, rw
-; RV64IA-WMO-NEXT:    slli a0, a0, 48
-; RV64IA-WMO-NEXT:    srli a0, a0, 48
 ; RV64IA-WMO-NEXT:    ret
 ;
 ; RV64IA-TSO-LABEL: atomic_load_i16_acquire:
 ; RV64IA-TSO:       # %bb.0:
-; RV64IA-TSO-NEXT:    lh a0, 0(a0)
-; RV64IA-TSO-NEXT:    slli a0, a0, 48
-; RV64IA-TSO-NEXT:    srli a0, a0, 48
+; RV64IA-TSO-NEXT:    lhu a0, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
 ;
 ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
 ; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lhu a0, 0(a0)
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    slli a0, a0, 16
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    srli a0, a0, 16
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
 ; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    slli a0, a0, 16
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    srli a0, a0, 16
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lhu a0, 0(a0)
 ; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
 ; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lhu a0, 0(a0)
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    slli a0, a0, 48
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    srli a0, a0, 48
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    slli a0, a0, 48
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    srli a0, a0, 48
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lhu a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-ZALASR-WMO-LABEL: atomic_load_i16_acquire:
@@ -741,9 +695,7 @@ define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind {
 ;
 ; RV32IA-ZALASR-TSO-LABEL: atomic_load_i16_acquire:
 ; RV32IA-ZALASR-TSO:       # %bb.0:
-; RV32IA-ZALASR-TSO-NEXT:    lh a0, 0(a0)
-; RV32IA-ZALASR-TSO-NEXT:    slli a0, a0, 16
-; RV32IA-ZALASR-TSO-NEXT:    srli a0, a0, 16
+; RV32IA-ZALASR-TSO-NEXT:    lhu a0, 0(a0)
 ; RV32IA-ZALASR-TSO-NEXT:    ret
 ;
 ; RV64IA-ZALASR-WMO-LABEL: atomic_load_i16_acquire:
@@ -755,9 +707,7 @@ define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind {
 ;
 ; RV64IA-ZALASR-TSO-LABEL: atomic_load_i16_acquire:
 ; RV64IA-ZALASR-TSO:       # %bb.0:
-; RV64IA-ZALASR-TSO-NEXT:    lh a0, 0(a0)
-; RV64IA-ZALASR-TSO-NEXT:    slli a0, a0, 48
-; RV64IA-ZALASR-TSO-NEXT:    srli a0, a0, 48
+; RV64IA-ZALASR-TSO-NEXT:    lhu a0, 0(a0)
 ; RV64IA-ZALASR-TSO-NEXT:    ret
   %1 = load atomic i16, ptr %a acquire, align 2
   ret i16 %1
@@ -779,18 +729,14 @@ define zeroext i16 @atomic_load_i16_seq_cst(ptr %a) nounwind {
 ; RV32IA-WMO-LABEL: atomic_load_i16_seq_cst:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    fence rw, rw
-; RV32IA-WMO-NEXT:    lh a0, 0(a0)
-; RV32IA-WMO-NEXT:    slli a0, a0, 16
-; RV32IA-WMO-NEXT:    srli a0, a0, 16
+; RV32IA-WMO-NEXT:    lhu a0, 0(a0)
 ; RV32IA-WMO-NEXT:    fence r, rw
 ; RV32IA-WMO-NEXT:    ret
 ;
 ; RV32IA-TSO-LABEL: atomic_load_i16_seq_cst:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:    fence rw, rw
-; RV32IA-TSO-NEXT:    lh a0, 0(a0)
-; RV32IA-TSO-NEXT:    slli a0, a0, 16
-; RV32IA-TSO-NEXT:    srli a0, a0, 16
+; RV32IA-TSO-NEXT:    lhu a0, 0(a0)
 ; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomic_load_i16_seq_cst:
@@ -808,52 +754,40 @@ define zeroext i16 @atomic_load_i16_seq_cst(ptr %a) nounwind {
 ; RV64IA-WMO-LABEL: atomic_load_i16_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    fence rw, rw
-; RV64IA-WMO-NEXT:    lh a0, 0(a0)
-; RV64IA-WMO-NEXT:    slli a0, a0, 48
-; RV64IA-WMO-NEXT:    srli a0, a0, 48
+; RV64IA-WMO-NEXT:    lhu a0, 0(a0)
 ; RV64IA-WMO-NEXT:    fence r, rw
 ; RV64IA-WMO-NEXT:    ret
 ;
 ; RV64IA-TSO-LABEL: atomic_load_i16_seq_cst:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    fence rw, rw
-; RV64IA-TSO-NEXT:    lh a0, 0(a0)
-; RV64IA-TSO-NEXT:    slli a0, a0, 48
-; RV64IA-TSO-NEXT:    srli a0, a0, 48
+; RV64IA-TSO-NEXT:    lhu a0, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
 ;
 ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
 ; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    slli a0, a0, 16
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    srli a0, a0, 16
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lhu a0, 0(a0)
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
 ; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV32IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    slli a0, a0, 16
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    srli a0, a0, 16
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lhu a0, 0(a0)
 ; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
 ; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    slli a0, a0, 48
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    srli a0, a0, 48
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lhu a0, 0(a0)
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    slli a0, a0, 48
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    srli a0, a0, 48
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lhu a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-ZALASR-LABEL: atomic_load_i16_seq_cst:
@@ -903,9 +837,7 @@ define zeroext i32 @atomic_load_i32_unordered(ptr %a) nounwind {
 ;
 ; RV64IA-LABEL: atomic_load_i32_unordered:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    lw a0, 0(a0)
-; RV64IA-NEXT:    slli a0, a0, 32
-; RV64IA-NEXT:    srli a0, a0, 32
+; RV64IA-NEXT:    lwu a0, 0(a0)
 ; RV64IA-NEXT:    ret
   %1 = load atomic i32, ptr %a unordered, align 4
   ret i32 %1
@@ -941,9 +873,7 @@ define zeroext i32 @atomic_load_i32_monotonic(ptr %a) nounwind {
 ;
 ; RV64IA-LABEL: atomic_load_i32_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    lw a0, 0(a0)
-; RV64IA-NEXT:    slli a0, a0, 32
-; RV64IA-NEXT:    srli a0, a0, 32
+; RV64IA-NEXT:    lwu a0, 0(a0)
 ; RV64IA-NEXT:    ret
   %1 = load atomic i32, ptr %a monotonic, align 4
   ret i32 %1
@@ -985,17 +915,13 @@ define zeroext i32 @atomic_load_i32_acquire(ptr %a) nounwind {
 ;
 ; RV64IA-WMO-LABEL: atomic_load_i32_acquire:
 ; RV64IA-WMO:       # %bb.0:
-; RV64IA-WMO-NEXT:    lw a0, 0(a0)
+; RV64IA-WMO-NEXT:    lwu a0, 0(a0)
 ; RV64IA-WMO-NEXT:    fence r, rw
-; RV64IA-WMO-NEXT:    slli a0, a0, 32
-; RV64IA-WMO-NEXT:    srli a0, a0, 32
 ; RV64IA-WMO-NEXT:    ret
 ;
 ; RV64IA-TSO-LABEL: atomic_load_i32_acquire:
 ; RV64IA-TSO:       # %bb.0:
-; RV64IA-TSO-NEXT:    lw a0, 0(a0)
-; RV64IA-TSO-NEXT:    slli a0, a0, 32
-; RV64IA-TSO-NEXT:    srli a0, a0, 32
+; RV64IA-TSO-NEXT:    lwu a0, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
 ;
 ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
@@ -1011,17 +937,13 @@ define zeroext i32 @atomic_load_i32_acquire(ptr %a) nounwind {
 ;
 ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
 ; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lwu a0, 0(a0)
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    slli a0, a0, 32
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    srli a0, a0, 32
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    slli a0, a0, 32
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    srli a0, a0, 32
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lwu a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-ZALASR-WMO-LABEL: atomic_load_i32_acquire:
@@ -1043,9 +965,7 @@ define zeroext i32 @atomic_load_i32_acquire(ptr %a) nounwind {
 ;
 ; RV64IA-ZALASR-TSO-LABEL: atomic_load_i32_acquire:
 ; RV64IA-ZALASR-TSO:       # %bb.0:
-; RV64IA-ZALASR-TSO-NEXT:    lw a0, 0(a0)
-; RV64IA-ZALASR-TSO-NEXT:    slli a0, a0, 32
-; RV64IA-ZALASR-TSO-NEXT:    srli a0, a0, 32
+; RV64IA-ZALASR-TSO-NEXT:    lwu a0, 0(a0)
 ; RV64IA-ZALASR-TSO-NEXT:    ret
   %1 = load atomic i32, ptr %a acquire, align 4
   ret i32 %1
@@ -1090,18 +1010,14 @@ define zeroext i32 @atomic_load_i32_seq_cst(ptr %a) nounwind {
 ; RV64IA-WMO-LABEL: atomic_load_i32_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    fence rw, rw
-; RV64IA-WMO-NEXT:    lw a0, 0(a0)
-; RV64IA-WMO-NEXT:    slli a0, a0, 32
-; RV64IA-WMO-NEXT:    srli a0, a0, 32
+; RV64IA-WMO-NEXT:    lwu a0, 0(a0)
 ; RV64IA-WMO-NEXT:    fence r, rw
 ; RV64IA-WMO-NEXT:    ret
 ;
 ; RV64IA-TSO-LABEL: atomic_load_i32_seq_cst:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    fence rw, rw
-; RV64IA-TSO-NEXT:    lw a0, 0(a0)
-; RV64IA-TSO-NEXT:    slli a0, a0, 32
-; RV64IA-TSO-NEXT:    srli a0, a0, 32
+; RV64IA-TSO-NEXT:    lwu a0, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
 ;
 ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
@@ -1120,18 +1036,14 @@ define zeroext i32 @atomic_load_i32_seq_cst(ptr %a) nounwind {
 ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
 ; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    slli a0, a0, 32
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    srli a0, a0, 32
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lwu a0, 0(a0)
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    slli a0, a0, 32
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    srli a0, a0, 32
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lwu a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-ZALASR-LABEL: atomic_load_i32_seq_cst:


        


More information about the llvm-commits mailing list