[llvm] [SelectionDAG][RISCV] Teach computeKnownBits to use range metadata for atomic_load. (PR #137119)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 23 23:30:50 PDT 2025
https://github.com/topperc created https://github.com/llvm/llvm-project/pull/137119
And teach SelectionDAGBuilder to get the range metadata in
visitAtomicLoad.
This allows us to recognize that sign extending a byte load of a
boolean value from memory will produces zeros for the extended bits.
This allow us to remove an AND on RISC-V.
Tests copied from #136502 with range metadata added to i1 cases.
Some of the test effects overlap with #136502, but that patch can't
handle the acquire or seq_cst cases with the Zalasr extension. We
only have sign extending versions of those loads.
>From 0b5a2008a9934e12ffff9dcbb1b07da57efde611 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 23 Apr 2025 22:19:18 -0700
Subject: [PATCH 1/2] Pre-commit tests
---
llvm/test/CodeGen/RISCV/atomic-load-zext.ll | 1388 +++++++++++++++++++
1 file changed, 1388 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/atomic-load-zext.ll
diff --git a/llvm/test/CodeGen/RISCV/atomic-load-zext.ll b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
new file mode 100644
index 0000000000000..d7315ff460753
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
@@ -0,0 +1,1388 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s
+
+
+; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO-TRAILING-FENCE %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO-TRAILING-FENCE %s
+
+; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO-TRAILING-FENCE %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO-TRAILING-FENCE %s
+
+; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zalasr -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-WMO %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-TSO %s
+
+; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zalasr -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-WMO %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-TSO %s
+
+define zeroext i1 @atomic_load_i1_unordered(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i1_unordered:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i1_unordered:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lb a0, 0(a0)
+; RV32IA-NEXT: zext.b a0, a0
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i1_unordered:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: andi a0, a0, 1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i1_unordered:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lb a0, 0(a0)
+; RV64IA-NEXT: zext.b a0, a0
+; RV64IA-NEXT: ret
+ %1 = load atomic i8, ptr %a unordered, align 1, !range !0, !noundef !1
+ %2 = trunc nuw i8 %1 to i1
+ ret i1 %2
+}
+
+define zeroext i1 @atomic_load_i1_monotonic(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i1_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i1_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lb a0, 0(a0)
+; RV32IA-NEXT: zext.b a0, a0
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i1_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: andi a0, a0, 1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i1_monotonic:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lb a0, 0(a0)
+; RV64IA-NEXT: zext.b a0, a0
+; RV64IA-NEXT: ret
+ %1 = load atomic i8, ptr %a monotonic, align 1, !range !0, !noundef !1
+ %2 = trunc nuw i8 %1 to i1
+ ret i1 %2
+}
+
+define zeroext i1 @atomic_load_i1_acquire(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i1_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i1_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: zext.b a0, a0
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i1_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: lb a0, 0(a0)
+; RV32IA-TSO-NEXT: zext.b a0, a0
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i1_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: andi a0, a0, 1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i1_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: zext.b a0, a0
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i1_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: lb a0, 0(a0)
+; RV64IA-TSO-NEXT: zext.b a0, a0
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire:
+; RV32IA-ZALASR-WMO: # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT: lb.aq a0, (a0)
+; RV32IA-ZALASR-WMO-NEXT: zext.b a0, a0
+; RV32IA-ZALASR-WMO-NEXT: ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire:
+; RV32IA-ZALASR-TSO: # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT: lb a0, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT: zext.b a0, a0
+; RV32IA-ZALASR-TSO-NEXT: ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire:
+; RV64IA-ZALASR-WMO: # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT: lb.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT: zext.b a0, a0
+; RV64IA-ZALASR-WMO-NEXT: ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire:
+; RV64IA-ZALASR-TSO: # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT: lb a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT: zext.b a0, a0
+; RV64IA-ZALASR-TSO-NEXT: ret
+ %1 = load atomic i8, ptr %a acquire, align 1, !range !0, !noundef !1
+ %2 = trunc nuw i8 %1 to i1
+ ret i1 %2
+}
+
+define zeroext i1 @atomic_load_i1_seq_cst(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i1_seq_cst:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: fence rw, rw
+; RV32IA-WMO-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-NEXT: zext.b a0, a0
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: fence rw, rw
+; RV32IA-TSO-NEXT: lb a0, 0(a0)
+; RV32IA-TSO-NEXT: zext.b a0, a0
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i1_seq_cst:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: andi a0, a0, 1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: fence rw, rw
+; RV64IA-WMO-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-NEXT: zext.b a0, a0
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: fence rw, rw
+; RV64IA-TSO-NEXT: lb a0, 0(a0)
+; RV64IA-TSO-NEXT: zext.b a0, a0
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-ZALASR: # %bb.0:
+; RV32IA-ZALASR-NEXT: lb.aq a0, (a0)
+; RV32IA-ZALASR-NEXT: zext.b a0, a0
+; RV32IA-ZALASR-NEXT: ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-ZALASR: # %bb.0:
+; RV64IA-ZALASR-NEXT: lb.aq a0, (a0)
+; RV64IA-ZALASR-NEXT: zext.b a0, a0
+; RV64IA-ZALASR-NEXT: ret
+ %1 = load atomic i8, ptr %a seq_cst, align 1, !range !0, !noundef !1
+ %2 = trunc nuw i8 %1 to i1
+ ret i1 %2
+}
+
+define zeroext i8 @atomic_load_i8_unordered(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i8_unordered:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: zext.b a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i8_unordered:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lb a0, 0(a0)
+; RV32IA-NEXT: zext.b a0, a0
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i8_unordered:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: zext.b a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i8_unordered:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lb a0, 0(a0)
+; RV64IA-NEXT: zext.b a0, a0
+; RV64IA-NEXT: ret
+ %1 = load atomic i8, ptr %a unordered, align 1
+ ret i8 %1
+}
+
+define zeroext i8 @atomic_load_i8_monotonic(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i8_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: zext.b a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i8_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lb a0, 0(a0)
+; RV32IA-NEXT: zext.b a0, a0
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i8_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: zext.b a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i8_monotonic:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lb a0, 0(a0)
+; RV64IA-NEXT: zext.b a0, a0
+; RV64IA-NEXT: ret
+ %1 = load atomic i8, ptr %a monotonic, align 1
+ ret i8 %1
+}
+
+define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i8_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: zext.b a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i8_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: zext.b a0, a0
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i8_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: lb a0, 0(a0)
+; RV32IA-TSO-NEXT: zext.b a0, a0
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i8_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: zext.b a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i8_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: zext.b a0, a0
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i8_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: lb a0, 0(a0)
+; RV64IA-TSO-NEXT: zext.b a0, a0
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire:
+; RV32IA-ZALASR-WMO: # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT: lb.aq a0, (a0)
+; RV32IA-ZALASR-WMO-NEXT: zext.b a0, a0
+; RV32IA-ZALASR-WMO-NEXT: ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire:
+; RV32IA-ZALASR-TSO: # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT: lb a0, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT: zext.b a0, a0
+; RV32IA-ZALASR-TSO-NEXT: ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire:
+; RV64IA-ZALASR-WMO: # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT: lb.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT: zext.b a0, a0
+; RV64IA-ZALASR-WMO-NEXT: ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire:
+; RV64IA-ZALASR-TSO: # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT: lb a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT: zext.b a0, a0
+; RV64IA-ZALASR-TSO-NEXT: ret
+ %1 = load atomic i8, ptr %a acquire, align 1
+ ret i8 %1
+}
+
+define zeroext i8 @atomic_load_i8_seq_cst(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i8_seq_cst:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: zext.b a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i8_seq_cst:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: fence rw, rw
+; RV32IA-WMO-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-NEXT: zext.b a0, a0
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i8_seq_cst:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: fence rw, rw
+; RV32IA-TSO-NEXT: lb a0, 0(a0)
+; RV32IA-TSO-NEXT: zext.b a0, a0
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i8_seq_cst:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: zext.b a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i8_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: fence rw, rw
+; RV64IA-WMO-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-NEXT: zext.b a0, a0
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i8_seq_cst:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: fence rw, rw
+; RV64IA-TSO-NEXT: lb a0, 0(a0)
+; RV64IA-TSO-NEXT: zext.b a0, a0
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-LABEL: atomic_load_i8_seq_cst:
+; RV32IA-ZALASR: # %bb.0:
+; RV32IA-ZALASR-NEXT: lb.aq a0, (a0)
+; RV32IA-ZALASR-NEXT: zext.b a0, a0
+; RV32IA-ZALASR-NEXT: ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_i8_seq_cst:
+; RV64IA-ZALASR: # %bb.0:
+; RV64IA-ZALASR-NEXT: lb.aq a0, (a0)
+; RV64IA-ZALASR-NEXT: zext.b a0, a0
+; RV64IA-ZALASR-NEXT: ret
+ %1 = load atomic i8, ptr %a seq_cst, align 1
+ ret i8 %1
+}
+
+define zeroext i16 @atomic_load_i16_unordered(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i16_unordered:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli a0, a0, 16
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i16_unordered:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lh a0, 0(a0)
+; RV32IA-NEXT: slli a0, a0, 16
+; RV32IA-NEXT: srli a0, a0, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i16_unordered:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli a0, a0, 48
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i16_unordered:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lh a0, 0(a0)
+; RV64IA-NEXT: slli a0, a0, 48
+; RV64IA-NEXT: srli a0, a0, 48
+; RV64IA-NEXT: ret
+ %1 = load atomic i16, ptr %a unordered, align 2
+ ret i16 %1
+}
+
+define zeroext i16 @atomic_load_i16_monotonic(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i16_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli a0, a0, 16
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i16_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lh a0, 0(a0)
+; RV32IA-NEXT: slli a0, a0, 16
+; RV32IA-NEXT: srli a0, a0, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i16_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli a0, a0, 48
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i16_monotonic:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lh a0, 0(a0)
+; RV64IA-NEXT: slli a0, a0, 48
+; RV64IA-NEXT: srli a0, a0, 48
+; RV64IA-NEXT: ret
+ %1 = load atomic i16, ptr %a monotonic, align 2
+ ret i16 %1
+}
+
+define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i16_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli a0, a0, 16
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i16_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: lh a0, 0(a0)
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: slli a0, a0, 16
+; RV32IA-WMO-NEXT: srli a0, a0, 16
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i16_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: lh a0, 0(a0)
+; RV32IA-TSO-NEXT: slli a0, a0, 16
+; RV32IA-TSO-NEXT: srli a0, a0, 16
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i16_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli a0, a0, 48
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i16_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: lh a0, 0(a0)
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: slli a0, a0, 48
+; RV64IA-WMO-NEXT: srli a0, a0, 48
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i16_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: lh a0, 0(a0)
+; RV64IA-TSO-NEXT: slli a0, a0, 48
+; RV64IA-TSO-NEXT: srli a0, a0, 48
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 16
+; RV32IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 16
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 16
+; RV32IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 16
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 48
+; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 48
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 48
+; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 48
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_load_i16_acquire:
+; RV32IA-ZALASR-WMO: # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT: lh.aq a0, (a0)
+; RV32IA-ZALASR-WMO-NEXT: slli a0, a0, 16
+; RV32IA-ZALASR-WMO-NEXT: srli a0, a0, 16
+; RV32IA-ZALASR-WMO-NEXT: ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_load_i16_acquire:
+; RV32IA-ZALASR-TSO: # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT: lh a0, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT: slli a0, a0, 16
+; RV32IA-ZALASR-TSO-NEXT: srli a0, a0, 16
+; RV32IA-ZALASR-TSO-NEXT: ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_i16_acquire:
+; RV64IA-ZALASR-WMO: # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT: lh.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT: slli a0, a0, 48
+; RV64IA-ZALASR-WMO-NEXT: srli a0, a0, 48
+; RV64IA-ZALASR-WMO-NEXT: ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_i16_acquire:
+; RV64IA-ZALASR-TSO: # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT: lh a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT: slli a0, a0, 48
+; RV64IA-ZALASR-TSO-NEXT: srli a0, a0, 48
+; RV64IA-ZALASR-TSO-NEXT: ret
+ %1 = load atomic i16, ptr %a acquire, align 2
+ ret i16 %1
+}
+
+define zeroext i16 @atomic_load_i16_seq_cst(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i16_seq_cst:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli a0, a0, 16
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i16_seq_cst:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: fence rw, rw
+; RV32IA-WMO-NEXT: lh a0, 0(a0)
+; RV32IA-WMO-NEXT: slli a0, a0, 16
+; RV32IA-WMO-NEXT: srli a0, a0, 16
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i16_seq_cst:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: fence rw, rw
+; RV32IA-TSO-NEXT: lh a0, 0(a0)
+; RV32IA-TSO-NEXT: slli a0, a0, 16
+; RV32IA-TSO-NEXT: srli a0, a0, 16
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i16_seq_cst:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli a0, a0, 48
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i16_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: fence rw, rw
+; RV64IA-WMO-NEXT: lh a0, 0(a0)
+; RV64IA-WMO-NEXT: slli a0, a0, 48
+; RV64IA-WMO-NEXT: srli a0, a0, 48
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i16_seq_cst:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: fence rw, rw
+; RV64IA-TSO-NEXT: lh a0, 0(a0)
+; RV64IA-TSO-NEXT: slli a0, a0, 48
+; RV64IA-TSO-NEXT: srli a0, a0, 48
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 16
+; RV32IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 16
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 16
+; RV32IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 16
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 48
+; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 48
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 48
+; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 48
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-LABEL: atomic_load_i16_seq_cst:
+; RV32IA-ZALASR: # %bb.0:
+; RV32IA-ZALASR-NEXT: lh.aq a0, (a0)
+; RV32IA-ZALASR-NEXT: slli a0, a0, 16
+; RV32IA-ZALASR-NEXT: srli a0, a0, 16
+; RV32IA-ZALASR-NEXT: ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_i16_seq_cst:
+; RV64IA-ZALASR: # %bb.0:
+; RV64IA-ZALASR-NEXT: lh.aq a0, (a0)
+; RV64IA-ZALASR-NEXT: slli a0, a0, 48
+; RV64IA-ZALASR-NEXT: srli a0, a0, 48
+; RV64IA-ZALASR-NEXT: ret
+ %1 = load atomic i16, ptr %a seq_cst, align 2
+ ret i16 %1
+}
+
+define zeroext i32 @atomic_load_i32_unordered(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i32_unordered:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i32_unordered:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lw a0, 0(a0)
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i32_unordered:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_4
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i32_unordered:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lw a0, 0(a0)
+; RV64IA-NEXT: slli a0, a0, 32
+; RV64IA-NEXT: srli a0, a0, 32
+; RV64IA-NEXT: ret
+ %1 = load atomic i32, ptr %a unordered, align 4
+ ret i32 %1
+}
+
+define zeroext i32 @atomic_load_i32_monotonic(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i32_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i32_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lw a0, 0(a0)
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i32_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_4
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i32_monotonic:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lw a0, 0(a0)
+; RV64IA-NEXT: slli a0, a0, 32
+; RV64IA-NEXT: srli a0, a0, 32
+; RV64IA-NEXT: ret
+ %1 = load atomic i32, ptr %a monotonic, align 4
+ ret i32 %1
+}
+
+define zeroext i32 @atomic_load_i32_acquire(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i32_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i32_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: lw a0, 0(a0)
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i32_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: lw a0, 0(a0)
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i32_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_4
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i32_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: lw a0, 0(a0)
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: slli a0, a0, 32
+; RV64IA-WMO-NEXT: srli a0, a0, 32
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i32_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: lw a0, 0(a0)
+; RV64IA-TSO-NEXT: slli a0, a0, 32
+; RV64IA-TSO-NEXT: srli a0, a0, 32
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 32
+; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 32
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 32
+; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 32
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_load_i32_acquire:
+; RV32IA-ZALASR-WMO: # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT: lw.aq a0, (a0)
+; RV32IA-ZALASR-WMO-NEXT: ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_load_i32_acquire:
+; RV32IA-ZALASR-TSO: # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT: lw a0, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT: ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_i32_acquire:
+; RV64IA-ZALASR-WMO: # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT: lw.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT: slli a0, a0, 32
+; RV64IA-ZALASR-WMO-NEXT: srli a0, a0, 32
+; RV64IA-ZALASR-WMO-NEXT: ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_i32_acquire:
+; RV64IA-ZALASR-TSO: # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT: lw a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT: slli a0, a0, 32
+; RV64IA-ZALASR-TSO-NEXT: srli a0, a0, 32
+; RV64IA-ZALASR-TSO-NEXT: ret
+ %1 = load atomic i32, ptr %a acquire, align 4
+ ret i32 %1
+}
+
+define zeroext i32 @atomic_load_i32_seq_cst(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i32_seq_cst:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i32_seq_cst:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: fence rw, rw
+; RV32IA-WMO-NEXT: lw a0, 0(a0)
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i32_seq_cst:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: fence rw, rw
+; RV32IA-TSO-NEXT: lw a0, 0(a0)
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i32_seq_cst:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_4
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i32_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: fence rw, rw
+; RV64IA-WMO-NEXT: lw a0, 0(a0)
+; RV64IA-WMO-NEXT: slli a0, a0, 32
+; RV64IA-WMO-NEXT: srli a0, a0, 32
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i32_seq_cst:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: fence rw, rw
+; RV64IA-TSO-NEXT: lw a0, 0(a0)
+; RV64IA-TSO-NEXT: slli a0, a0, 32
+; RV64IA-TSO-NEXT: srli a0, a0, 32
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 32
+; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 32
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 32
+; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 32
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-LABEL: atomic_load_i32_seq_cst:
+; RV32IA-ZALASR: # %bb.0:
+; RV32IA-ZALASR-NEXT: lw.aq a0, (a0)
+; RV32IA-ZALASR-NEXT: ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_i32_seq_cst:
+; RV64IA-ZALASR: # %bb.0:
+; RV64IA-ZALASR-NEXT: lw.aq a0, (a0)
+; RV64IA-ZALASR-NEXT: slli a0, a0, 32
+; RV64IA-ZALASR-NEXT: srli a0, a0, 32
+; RV64IA-ZALASR-NEXT: ret
+ %1 = load atomic i32, ptr %a seq_cst, align 4
+ ret i32 %1
+}
+
+define zeroext i64 @atomic_load_i64_unordered(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i64_unordered:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i64_unordered:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: li a1, 0
+; RV32IA-NEXT: call __atomic_load_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i64_unordered:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i64_unordered:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: ld a0, 0(a0)
+; RV64IA-NEXT: ret
+ %1 = load atomic i64, ptr %a unordered, align 8
+ ret i64 %1
+}
+
+define zeroext i64 @atomic_load_i64_monotonic(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i64_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i64_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: li a1, 0
+; RV32IA-NEXT: call __atomic_load_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i64_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i64_monotonic:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: ld a0, 0(a0)
+; RV64IA-NEXT: ret
+ %1 = load atomic i64, ptr %a monotonic, align 8
+ ret i64 %1
+}
+
+define zeroext i64 @atomic_load_i64_acquire(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i64_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i64_acquire:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: li a1, 2
+; RV32IA-NEXT: call __atomic_load_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i64_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i64_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: ld a0, 0(a0)
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i64_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: ld a0, 0(a0)
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i64_acquire:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ld a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i64_acquire:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_i64_acquire:
+; RV64IA-ZALASR-WMO: # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT: ld.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT: ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_i64_acquire:
+; RV64IA-ZALASR-TSO: # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT: ld a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT: ret
+ %1 = load atomic i64, ptr %a acquire, align 8
+ ret i64 %1
+}
+
+define zeroext i64 @atomic_load_i64_seq_cst(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i64_seq_cst:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i64_seq_cst:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: li a1, 5
+; RV32IA-NEXT: call __atomic_load_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i64_seq_cst:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i64_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: fence rw, rw
+; RV64IA-WMO-NEXT: ld a0, 0(a0)
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i64_seq_cst:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: fence rw, rw
+; RV64IA-TSO-NEXT: ld a0, 0(a0)
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i64_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ld a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i64_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_i64_seq_cst:
+; RV64IA-ZALASR: # %bb.0:
+; RV64IA-ZALASR-NEXT: ld.aq a0, (a0)
+; RV64IA-ZALASR-NEXT: ret
+ %1 = load atomic i64, ptr %a seq_cst, align 8
+ ret i64 %1
+}
+
+!0 = !{i8 0, i8 2}
+!1 = !{}
>From 5abee2b49737dfe398641b890b606cff7b43ad40 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 23 Apr 2025 23:22:54 -0700
Subject: [PATCH 2/2] [SelectionDAG][RISCV] Teach computeKnownBits to use range
metadata for atomic_load.
And teach SelectionDAGBuilder to get the range metadata in
visitAtomicLoad.
This allows us to recognize that sign extending a byte load of a
boolean value from memory will produces zeros for the extended bits.
This allow us to remove an AND on RISC-V.
Tests copied from #136502 with range metadata added to i1 cases.
Some of the test effects overlap with #136502, but that patch can't
handle the acquire or seq_cst cases with the Zalasr extension. We
only have sign extending versions of those loads.
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 44 ++++++++++++++-----
.../SelectionDAG/SelectionDAGBuilder.cpp | 3 +-
llvm/test/CodeGen/RISCV/atomic-load-zext.ll | 26 -----------
3 files changed, 36 insertions(+), 37 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index cf88c1f4ae937..092dbf926eec9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4382,6 +4382,38 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero |= APInt::getBitsSetFrom(BitWidth, VT.getScalarSizeInBits());
break;
}
+ case ISD::ATOMIC_LOAD: {
+ // If we are looking at the loaded value.
+ if (Op.getResNo() == 0) {
+ auto *AT = cast<AtomicSDNode>(Op);
+ unsigned ScalarMemorySize = AT->getMemoryVT().getScalarSizeInBits();
+ KnownBits KnownScalarMemory(ScalarMemorySize);
+ if (const MDNode *MD = AT->getRanges())
+ computeKnownBitsFromRangeMetadata(*MD, KnownScalarMemory);
+
+ switch (AT->getExtensionType()) {
+ case ISD::ZEXTLOAD:
+ Known = KnownScalarMemory.zext(BitWidth);
+ break;
+ case ISD::SEXTLOAD:
+ Known = KnownScalarMemory.sext(BitWidth);
+ break;
+ case ISD::EXTLOAD:
+ if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
+ Known = KnownScalarMemory.zext(BitWidth);
+ else if (TLI->getExtendForAtomicOps() == ISD::SIGN_EXTEND)
+ Known = KnownScalarMemory.sext(BitWidth);
+ else
+ Known = KnownScalarMemory.anyext(BitWidth);
+ break;
+ case ISD::NON_EXTLOAD:
+ Known = KnownScalarMemory;
+ break;
+ }
+ assert(Known.getBitWidth() == BitWidth);
+ }
+ break;
+ }
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
if (Op.getResNo() == 1) {
// The boolean result conforms to getBooleanContents.
@@ -4407,21 +4439,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
case ISD::ATOMIC_LOAD_MIN:
case ISD::ATOMIC_LOAD_MAX:
case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_LOAD_UMAX:
- case ISD::ATOMIC_LOAD: {
+ case ISD::ATOMIC_LOAD_UMAX: {
// If we are looking at the loaded value.
if (Op.getResNo() == 0) {
auto *AT = cast<AtomicSDNode>(Op);
unsigned MemBits = AT->getMemoryVT().getScalarSizeInBits();
- // For atomic_load, prefer to use the extension type.
- if (Op->getOpcode() == ISD::ATOMIC_LOAD) {
- if (AT->getExtensionType() == ISD::ZEXTLOAD)
- Known.Zero.setBitsFrom(MemBits);
- else if (AT->getExtensionType() != ISD::SEXTLOAD &&
- TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
- Known.Zero.setBitsFrom(MemBits);
- } else if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
+ if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
Known.Zero.setBitsFrom(MemBits);
}
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 66bd78af2939c..4b05d89417d2c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5150,9 +5150,10 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo);
+ const MDNode *Ranges = getRangeMetadata(I);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
- I.getAlign(), AAMDNodes(), nullptr, SSID, Order);
+ I.getAlign(), AAMDNodes(), Ranges, SSID, Order);
InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
diff --git a/llvm/test/CodeGen/RISCV/atomic-load-zext.ll b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
index d7315ff460753..8097179443791 100644
--- a/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
@@ -48,7 +48,6 @@ define zeroext i1 @atomic_load_i1_unordered(ptr %a) nounwind {
; RV32IA-LABEL: atomic_load_i1_unordered:
; RV32IA: # %bb.0:
; RV32IA-NEXT: lb a0, 0(a0)
-; RV32IA-NEXT: zext.b a0, a0
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomic_load_i1_unordered:
@@ -65,7 +64,6 @@ define zeroext i1 @atomic_load_i1_unordered(ptr %a) nounwind {
; RV64IA-LABEL: atomic_load_i1_unordered:
; RV64IA: # %bb.0:
; RV64IA-NEXT: lb a0, 0(a0)
-; RV64IA-NEXT: zext.b a0, a0
; RV64IA-NEXT: ret
%1 = load atomic i8, ptr %a unordered, align 1, !range !0, !noundef !1
%2 = trunc nuw i8 %1 to i1
@@ -87,7 +85,6 @@ define zeroext i1 @atomic_load_i1_monotonic(ptr %a) nounwind {
; RV32IA-LABEL: atomic_load_i1_monotonic:
; RV32IA: # %bb.0:
; RV32IA-NEXT: lb a0, 0(a0)
-; RV32IA-NEXT: zext.b a0, a0
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomic_load_i1_monotonic:
@@ -104,7 +101,6 @@ define zeroext i1 @atomic_load_i1_monotonic(ptr %a) nounwind {
; RV64IA-LABEL: atomic_load_i1_monotonic:
; RV64IA: # %bb.0:
; RV64IA-NEXT: lb a0, 0(a0)
-; RV64IA-NEXT: zext.b a0, a0
; RV64IA-NEXT: ret
%1 = load atomic i8, ptr %a monotonic, align 1, !range !0, !noundef !1
%2 = trunc nuw i8 %1 to i1
@@ -127,13 +123,11 @@ define zeroext i1 @atomic_load_i1_acquire(ptr %a) nounwind {
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: lb a0, 0(a0)
; RV32IA-WMO-NEXT: fence r, rw
-; RV32IA-WMO-NEXT: zext.b a0, a0
; RV32IA-WMO-NEXT: ret
;
; RV32IA-TSO-LABEL: atomic_load_i1_acquire:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: lb a0, 0(a0)
-; RV32IA-TSO-NEXT: zext.b a0, a0
; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: atomic_load_i1_acquire:
@@ -151,63 +145,53 @@ define zeroext i1 @atomic_load_i1_acquire(ptr %a) nounwind {
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: lb a0, 0(a0)
; RV64IA-WMO-NEXT: fence r, rw
-; RV64IA-WMO-NEXT: zext.b a0, a0
; RV64IA-WMO-NEXT: ret
;
; RV64IA-TSO-LABEL: atomic_load_i1_acquire:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: lb a0, 0(a0)
-; RV64IA-TSO-NEXT: zext.b a0, a0
; RV64IA-TSO-NEXT: ret
;
; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
-; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire:
; RV32IA-ZALASR-WMO: # %bb.0:
; RV32IA-ZALASR-WMO-NEXT: lb.aq a0, (a0)
-; RV32IA-ZALASR-WMO-NEXT: zext.b a0, a0
; RV32IA-ZALASR-WMO-NEXT: ret
;
; RV32IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire:
; RV32IA-ZALASR-TSO: # %bb.0:
; RV32IA-ZALASR-TSO-NEXT: lb a0, 0(a0)
-; RV32IA-ZALASR-TSO-NEXT: zext.b a0, a0
; RV32IA-ZALASR-TSO-NEXT: ret
;
; RV64IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire:
; RV64IA-ZALASR-WMO: # %bb.0:
; RV64IA-ZALASR-WMO-NEXT: lb.aq a0, (a0)
-; RV64IA-ZALASR-WMO-NEXT: zext.b a0, a0
; RV64IA-ZALASR-WMO-NEXT: ret
;
; RV64IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire:
; RV64IA-ZALASR-TSO: # %bb.0:
; RV64IA-ZALASR-TSO-NEXT: lb a0, 0(a0)
-; RV64IA-ZALASR-TSO-NEXT: zext.b a0, a0
; RV64IA-ZALASR-TSO-NEXT: ret
%1 = load atomic i8, ptr %a acquire, align 1, !range !0, !noundef !1
%2 = trunc nuw i8 %1 to i1
@@ -230,7 +214,6 @@ define zeroext i1 @atomic_load_i1_seq_cst(ptr %a) nounwind {
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: fence rw, rw
; RV32IA-WMO-NEXT: lb a0, 0(a0)
-; RV32IA-WMO-NEXT: zext.b a0, a0
; RV32IA-WMO-NEXT: fence r, rw
; RV32IA-WMO-NEXT: ret
;
@@ -238,7 +221,6 @@ define zeroext i1 @atomic_load_i1_seq_cst(ptr %a) nounwind {
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: fence rw, rw
; RV32IA-TSO-NEXT: lb a0, 0(a0)
-; RV32IA-TSO-NEXT: zext.b a0, a0
; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: atomic_load_i1_seq_cst:
@@ -256,7 +238,6 @@ define zeroext i1 @atomic_load_i1_seq_cst(ptr %a) nounwind {
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: fence rw, rw
; RV64IA-WMO-NEXT: lb a0, 0(a0)
-; RV64IA-WMO-NEXT: zext.b a0, a0
; RV64IA-WMO-NEXT: fence r, rw
; RV64IA-WMO-NEXT: ret
;
@@ -264,14 +245,12 @@ define zeroext i1 @atomic_load_i1_seq_cst(ptr %a) nounwind {
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: fence rw, rw
; RV64IA-TSO-NEXT: lb a0, 0(a0)
-; RV64IA-TSO-NEXT: zext.b a0, a0
; RV64IA-TSO-NEXT: ret
;
; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
;
@@ -279,14 +258,12 @@ define zeroext i1 @atomic_load_i1_seq_cst(ptr %a) nounwind {
; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
;
@@ -294,19 +271,16 @@ define zeroext i1 @atomic_load_i1_seq_cst(ptr %a) nounwind {
; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-ZALASR-LABEL: atomic_load_i1_seq_cst:
; RV32IA-ZALASR: # %bb.0:
; RV32IA-ZALASR-NEXT: lb.aq a0, (a0)
-; RV32IA-ZALASR-NEXT: zext.b a0, a0
; RV32IA-ZALASR-NEXT: ret
;
; RV64IA-ZALASR-LABEL: atomic_load_i1_seq_cst:
; RV64IA-ZALASR: # %bb.0:
; RV64IA-ZALASR-NEXT: lb.aq a0, (a0)
-; RV64IA-ZALASR-NEXT: zext.b a0, a0
; RV64IA-ZALASR-NEXT: ret
%1 = load atomic i8, ptr %a seq_cst, align 1, !range !0, !noundef !1
%2 = trunc nuw i8 %1 to i1
More information about the llvm-commits
mailing list