[llvm] [clang][CodeGen] Add range metadata for atomic load of boolean type. (PR #136502)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 20 11:58:21 PDT 2025
Jan =?utf-8?q?Górski?= <jan.a.gorski at wp.pl>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/136502 at github.com>
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Jan Górski (janagor)
<details>
<summary>Changes</summary>
Extends changes from [ff687af](https://github.com/llvm/llvm-project/commit/ff687af04f5b0e85305250587b524cb0b3849aa0).
Fixes https://github.com/llvm/llvm-project/issues/131476.
This patch adds a DAG combine to replace an `AND` of an `ATOMIC_LOAD` with a full-bit mask (e.g. `0xFF`, `0xFFFF`, etc.) which is generated as a result of `(zext (atomic_load))`, by a zero-extended load, provided the atomic operation is monotonic or weaker.
---
Patch is 45.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/136502.diff
2 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+44)
- (added) llvm/test/CodeGen/RISCV/atomic-load-zext.ll (+1276)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 98fba9e86e88a..2ec2ccb8cf132 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15220,6 +15220,48 @@ static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
}
+static SDValue reduceANDOfAtomicLoad(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ if (N->getOpcode() != ISD::AND)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ if (N0.getOpcode() != ISD::ATOMIC_LOAD)
+ return SDValue();
+
+ AtomicSDNode *ALoad = cast<AtomicSDNode>(N0.getNode());
+ if (isStrongerThanMonotonic(ALoad->getSuccessOrdering()))
+ return SDValue();
+
+ EVT LoadedVT = ALoad->getMemoryVT();
+ EVT ResultVT = N->getValueType(0);
+
+ SDValue MaskVal = N->getOperand(1);
+ ConstantSDNode *MaskConst = dyn_cast<ConstantSDNode>(MaskVal);
+ if (!MaskConst)
+ return SDValue();
+ uint64_t Mask = MaskConst->getZExtValue();
+ uint64_t ExpectedMask = LoadedVT.getSizeInBits() == 8 ? 0xFF
+ : LoadedVT.getSizeInBits() == 16 ? 0xFFFF
+ : LoadedVT.getSizeInBits() == 32 ? 0xFFFFFFFF
+ : 0xFFFFFFFFFFFFFFFF;
+ if (Mask != ExpectedMask)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue Chain = ALoad->getChain();
+ SDValue Ptr = ALoad->getBasePtr();
+ MachineMemOperand *MemOp = ALoad->getMemOperand();
+ SDValue ZextLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, ResultVT, Chain, Ptr,
+ MemOp->getPointerInfo(), LoadedVT,
+ MemOp->getAlign(), MemOp->getFlags());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), ZextLoad);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1));
+ DCI.recursivelyDeleteUnusedNodes(N0.getNode());
+ return SDValue(N, 0);
+}
+
// Combines two comparison operation and logic operation to one selection
// operation(min, max) and logic operation. Returns new constructed Node if
// conditions for optimization are satisfied.
@@ -15254,6 +15296,8 @@ static SDValue performANDCombine(SDNode *N,
return V;
if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
return V;
+ if (SDValue V = reduceANDOfAtomicLoad(N, DCI))
+ return V;
if (DCI.isAfterLegalizeDAG())
if (SDValue V = combineDeMorganOfBoolean(N, DAG))
diff --git a/llvm/test/CodeGen/RISCV/atomic-load-zext.ll b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
new file mode 100644
index 0000000000000..1fcf5f085646d
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
@@ -0,0 +1,1276 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s
+
+
+; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO-TRAILING-FENCE %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO-TRAILING-FENCE %s
+
+; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO-TRAILING-FENCE %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO-TRAILING-FENCE %s
+
+; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zalasr -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-WMO %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-TSO %s
+
+; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zalasr -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-WMO %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-TSO %s
+
+define zeroext i1 @atomic_load_i1_unordered(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i1_unordered:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i1_unordered:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lbu a0, 0(a0)
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i1_unordered:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: andi a0, a0, 1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i1_unordered:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lbu a0, 0(a0)
+; RV64IA-NEXT: ret
+ %1 = load atomic i8, ptr %a unordered, align 1
+ %2 = trunc nuw i8 %1 to i1
+ ret i1 %2
+}
+
+define zeroext i1 @atomic_load_i1_monotonic(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i1_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i1_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lbu a0, 0(a0)
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i1_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: andi a0, a0, 1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i1_monotonic:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lbu a0, 0(a0)
+; RV64IA-NEXT: ret
+ %1 = load atomic i8, ptr %a monotonic, align 1
+ %2 = trunc nuw i8 %1 to i1
+ ret i1 %2
+}
+
+define zeroext i1 @atomic_load_i1_acquire(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i1_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i1_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: lbu a0, 0(a0)
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i1_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: lbu a0, 0(a0)
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i1_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: andi a0, a0, 1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i1_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: lbu a0, 0(a0)
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i1_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: lbu a0, 0(a0)
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire:
+; RV32IA-ZALASR-WMO: # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT: lb.aq a0, (a0)
+; RV32IA-ZALASR-WMO-NEXT: zext.b a0, a0
+; RV32IA-ZALASR-WMO-NEXT: ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire:
+; RV32IA-ZALASR-TSO: # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT: lbu a0, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT: ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire:
+; RV64IA-ZALASR-WMO: # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT: lb.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT: zext.b a0, a0
+; RV64IA-ZALASR-WMO-NEXT: ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire:
+; RV64IA-ZALASR-TSO: # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT: lbu a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT: ret
+ %1 = load atomic i8, ptr %a acquire, align 1
+ %2 = trunc nuw i8 %1 to i1
+ ret i1 %2
+}
+
+define zeroext i1 @atomic_load_i1_seq_cst(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i1_seq_cst:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: fence rw, rw
+; RV32IA-WMO-NEXT: lbu a0, 0(a0)
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: fence rw, rw
+; RV32IA-TSO-NEXT: lbu a0, 0(a0)
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i1_seq_cst:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: andi a0, a0, 1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: fence rw, rw
+; RV64IA-WMO-NEXT: lbu a0, 0(a0)
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: fence rw, rw
+; RV64IA-TSO-NEXT: lbu a0, 0(a0)
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-ZALASR: # %bb.0:
+; RV32IA-ZALASR-NEXT: lb.aq a0, (a0)
+; RV32IA-ZALASR-NEXT: zext.b a0, a0
+; RV32IA-ZALASR-NEXT: ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-ZALASR: # %bb.0:
+; RV64IA-ZALASR-NEXT: lb.aq a0, (a0)
+; RV64IA-ZALASR-NEXT: zext.b a0, a0
+; RV64IA-ZALASR-NEXT: ret
+ %1 = load atomic i8, ptr %a seq_cst, align 1
+ %2 = trunc nuw i8 %1 to i1
+ ret i1 %2
+}
+
+define zeroext i8 @atomic_load_i8_unordered(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i8_unordered:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: zext.b a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i8_unordered:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lbu a0, 0(a0)
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i8_unordered:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: zext.b a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i8_unordered:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lbu a0, 0(a0)
+; RV64IA-NEXT: ret
+ %1 = load atomic i8, ptr %a unordered, align 1
+ ret i8 %1
+}
+
+define zeroext i8 @atomic_load_i8_monotonic(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i8_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: zext.b a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i8_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lbu a0, 0(a0)
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i8_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: zext.b a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i8_monotonic:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lbu a0, 0(a0)
+; RV64IA-NEXT: ret
+ %1 = load atomic i8, ptr %a monotonic, align 1
+ ret i8 %1
+}
+
+define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i8_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: zext.b a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i8_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: lbu a0, 0(a0)
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i8_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: lbu a0, 0(a0)
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i8_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: zext.b a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i8_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: lbu a0, 0(a0)
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i8_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: lbu a0, 0(a0)
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire:
+; RV32IA-ZALASR-WMO: # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT: lb.aq a0, (a0)
+; RV32IA-ZALASR-WMO-NEXT: zext.b a0, a0
+; RV32IA-ZALASR-WMO-NEXT: ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire:
+; RV32IA-ZALASR-TSO: # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT: lbu a0, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT: ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire:
+; RV64IA-ZALASR-WMO: # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT: lb.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT: zext.b a0, a0
+; RV64IA-ZALASR-WMO-NEXT: ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire:
+; RV64IA-ZALASR-TSO: # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT: lbu a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT: ret
+ %1 = load atomic i8, ptr %a acquire, align 1
+ ret i8 %1
+}
+
+define zeroext i8 @atomic_load_i8_seq_cst(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i8_seq_cst:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: zext.b a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i8_seq_cst:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: fence rw, rw
+; RV32IA-WMO-NEXT: lbu a0, 0(a0)
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i8_seq_cst:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: fence rw, rw
+; RV32IA-TSO-NEXT: lbu a0, 0(a0)
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i8_seq_cst:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: zext.b a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i8_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: fence rw, rw
+; RV64IA-WMO-NEXT: lbu a0, 0(a0)
+; RV64IA-WMO-NEXT: fence ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/136502
More information about the llvm-commits
mailing list