[llvm] [RISCV] Remove `AND` mask generated by `( zext ( atomic_load ) )` by replacing the load with `zextload` for orderings not stronger then monotonic. (PR #136502)
Jan Górski via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 22 16:14:40 PDT 2025
https://github.com/janagor updated https://github.com/llvm/llvm-project/pull/136502
>From 9678b8ea25c5d64291995d58fc7f4c0448d9065b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20G=C3=B3rski?= <jan.a.gorski at wp.pl>
Date: Sat, 19 Apr 2025 23:28:02 +0200
Subject: [PATCH 1/2] Added pre-commit test.
---
llvm/test/CodeGen/RISCV/atomic-load-zext.ll | 1386 +++++++++++++++++++
1 file changed, 1386 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/atomic-load-zext.ll
diff --git a/llvm/test/CodeGen/RISCV/atomic-load-zext.ll b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
new file mode 100644
index 0000000000000..ca76a9ba58614
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
@@ -0,0 +1,1386 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s
+
+
+; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO-TRAILING-FENCE %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO-TRAILING-FENCE %s
+
+; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO-TRAILING-FENCE %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO-TRAILING-FENCE %s
+
+; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zalasr -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-WMO %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-TSO %s
+
+; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zalasr -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-WMO %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-TSO %s
+
+define zeroext i1 @atomic_load_i1_unordered(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i1_unordered:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i1_unordered:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lb a0, 0(a0)
+; RV32IA-NEXT: zext.b a0, a0
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i1_unordered:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: andi a0, a0, 1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i1_unordered:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lb a0, 0(a0)
+; RV64IA-NEXT: zext.b a0, a0
+; RV64IA-NEXT: ret
+ %1 = load atomic i8, ptr %a unordered, align 1
+ %2 = trunc nuw i8 %1 to i1
+ ret i1 %2
+}
+
+define zeroext i1 @atomic_load_i1_monotonic(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i1_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i1_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lb a0, 0(a0)
+; RV32IA-NEXT: zext.b a0, a0
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i1_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: andi a0, a0, 1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i1_monotonic:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lb a0, 0(a0)
+; RV64IA-NEXT: zext.b a0, a0
+; RV64IA-NEXT: ret
+ %1 = load atomic i8, ptr %a monotonic, align 1
+ %2 = trunc nuw i8 %1 to i1
+ ret i1 %2
+}
+
+define zeroext i1 @atomic_load_i1_acquire(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i1_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i1_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: zext.b a0, a0
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i1_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: lb a0, 0(a0)
+; RV32IA-TSO-NEXT: zext.b a0, a0
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i1_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: andi a0, a0, 1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i1_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: zext.b a0, a0
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i1_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: lb a0, 0(a0)
+; RV64IA-TSO-NEXT: zext.b a0, a0
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire:
+; RV32IA-ZALASR-WMO: # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT: lb.aq a0, (a0)
+; RV32IA-ZALASR-WMO-NEXT: zext.b a0, a0
+; RV32IA-ZALASR-WMO-NEXT: ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire:
+; RV32IA-ZALASR-TSO: # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT: lb a0, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT: zext.b a0, a0
+; RV32IA-ZALASR-TSO-NEXT: ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire:
+; RV64IA-ZALASR-WMO: # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT: lb.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT: zext.b a0, a0
+; RV64IA-ZALASR-WMO-NEXT: ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire:
+; RV64IA-ZALASR-TSO: # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT: lb a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT: zext.b a0, a0
+; RV64IA-ZALASR-TSO-NEXT: ret
+ %1 = load atomic i8, ptr %a acquire, align 1
+ %2 = trunc nuw i8 %1 to i1
+ ret i1 %2
+}
+
+define zeroext i1 @atomic_load_i1_seq_cst(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i1_seq_cst:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: fence rw, rw
+; RV32IA-WMO-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-NEXT: zext.b a0, a0
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: fence rw, rw
+; RV32IA-TSO-NEXT: lb a0, 0(a0)
+; RV32IA-TSO-NEXT: zext.b a0, a0
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i1_seq_cst:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: andi a0, a0, 1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: fence rw, rw
+; RV64IA-WMO-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-NEXT: zext.b a0, a0
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: fence rw, rw
+; RV64IA-TSO-NEXT: lb a0, 0(a0)
+; RV64IA-TSO-NEXT: zext.b a0, a0
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-ZALASR: # %bb.0:
+; RV32IA-ZALASR-NEXT: lb.aq a0, (a0)
+; RV32IA-ZALASR-NEXT: zext.b a0, a0
+; RV32IA-ZALASR-NEXT: ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-ZALASR: # %bb.0:
+; RV64IA-ZALASR-NEXT: lb.aq a0, (a0)
+; RV64IA-ZALASR-NEXT: zext.b a0, a0
+; RV64IA-ZALASR-NEXT: ret
+ %1 = load atomic i8, ptr %a seq_cst, align 1
+ %2 = trunc nuw i8 %1 to i1
+ ret i1 %2
+}
+
+define zeroext i8 @atomic_load_i8_unordered(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i8_unordered:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: zext.b a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i8_unordered:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lb a0, 0(a0)
+; RV32IA-NEXT: zext.b a0, a0
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i8_unordered:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: zext.b a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i8_unordered:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lb a0, 0(a0)
+; RV64IA-NEXT: zext.b a0, a0
+; RV64IA-NEXT: ret
+ %1 = load atomic i8, ptr %a unordered, align 1
+ ret i8 %1
+}
+
+define zeroext i8 @atomic_load_i8_monotonic(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i8_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: zext.b a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i8_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lb a0, 0(a0)
+; RV32IA-NEXT: zext.b a0, a0
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i8_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: zext.b a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i8_monotonic:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lb a0, 0(a0)
+; RV64IA-NEXT: zext.b a0, a0
+; RV64IA-NEXT: ret
+ %1 = load atomic i8, ptr %a monotonic, align 1
+ ret i8 %1
+}
+
+define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i8_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: zext.b a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i8_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: zext.b a0, a0
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i8_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: lb a0, 0(a0)
+; RV32IA-TSO-NEXT: zext.b a0, a0
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i8_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: zext.b a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i8_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: zext.b a0, a0
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i8_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: lb a0, 0(a0)
+; RV64IA-TSO-NEXT: zext.b a0, a0
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire:
+; RV32IA-ZALASR-WMO: # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT: lb.aq a0, (a0)
+; RV32IA-ZALASR-WMO-NEXT: zext.b a0, a0
+; RV32IA-ZALASR-WMO-NEXT: ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire:
+; RV32IA-ZALASR-TSO: # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT: lb a0, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT: zext.b a0, a0
+; RV32IA-ZALASR-TSO-NEXT: ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire:
+; RV64IA-ZALASR-WMO: # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT: lb.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT: zext.b a0, a0
+; RV64IA-ZALASR-WMO-NEXT: ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire:
+; RV64IA-ZALASR-TSO: # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT: lb a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT: zext.b a0, a0
+; RV64IA-ZALASR-TSO-NEXT: ret
+ %1 = load atomic i8, ptr %a acquire, align 1
+ ret i8 %1
+}
+
+define zeroext i8 @atomic_load_i8_seq_cst(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i8_seq_cst:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_1
+; RV32I-NEXT: zext.b a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i8_seq_cst:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: fence rw, rw
+; RV32IA-WMO-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-NEXT: zext.b a0, a0
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i8_seq_cst:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: fence rw, rw
+; RV32IA-TSO-NEXT: lb a0, 0(a0)
+; RV32IA-TSO-NEXT: zext.b a0, a0
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i8_seq_cst:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_1
+; RV64I-NEXT: zext.b a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i8_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: fence rw, rw
+; RV64IA-WMO-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-NEXT: zext.b a0, a0
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i8_seq_cst:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: fence rw, rw
+; RV64IA-TSO-NEXT: lb a0, 0(a0)
+; RV64IA-TSO-NEXT: zext.b a0, a0
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-LABEL: atomic_load_i8_seq_cst:
+; RV32IA-ZALASR: # %bb.0:
+; RV32IA-ZALASR-NEXT: lb.aq a0, (a0)
+; RV32IA-ZALASR-NEXT: zext.b a0, a0
+; RV32IA-ZALASR-NEXT: ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_i8_seq_cst:
+; RV64IA-ZALASR: # %bb.0:
+; RV64IA-ZALASR-NEXT: lb.aq a0, (a0)
+; RV64IA-ZALASR-NEXT: zext.b a0, a0
+; RV64IA-ZALASR-NEXT: ret
+ %1 = load atomic i8, ptr %a seq_cst, align 1
+ ret i8 %1
+}
+
+define zeroext i16 @atomic_load_i16_unordered(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i16_unordered:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli a0, a0, 16
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i16_unordered:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lh a0, 0(a0)
+; RV32IA-NEXT: slli a0, a0, 16
+; RV32IA-NEXT: srli a0, a0, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i16_unordered:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli a0, a0, 48
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i16_unordered:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lh a0, 0(a0)
+; RV64IA-NEXT: slli a0, a0, 48
+; RV64IA-NEXT: srli a0, a0, 48
+; RV64IA-NEXT: ret
+ %1 = load atomic i16, ptr %a unordered, align 2
+ ret i16 %1
+}
+
+define zeroext i16 @atomic_load_i16_monotonic(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i16_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli a0, a0, 16
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i16_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lh a0, 0(a0)
+; RV32IA-NEXT: slli a0, a0, 16
+; RV32IA-NEXT: srli a0, a0, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i16_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli a0, a0, 48
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i16_monotonic:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lh a0, 0(a0)
+; RV64IA-NEXT: slli a0, a0, 48
+; RV64IA-NEXT: srli a0, a0, 48
+; RV64IA-NEXT: ret
+ %1 = load atomic i16, ptr %a monotonic, align 2
+ ret i16 %1
+}
+
+define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i16_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli a0, a0, 16
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i16_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: lh a0, 0(a0)
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: slli a0, a0, 16
+; RV32IA-WMO-NEXT: srli a0, a0, 16
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i16_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: lh a0, 0(a0)
+; RV32IA-TSO-NEXT: slli a0, a0, 16
+; RV32IA-TSO-NEXT: srli a0, a0, 16
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i16_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli a0, a0, 48
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i16_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: lh a0, 0(a0)
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: slli a0, a0, 48
+; RV64IA-WMO-NEXT: srli a0, a0, 48
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i16_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: lh a0, 0(a0)
+; RV64IA-TSO-NEXT: slli a0, a0, 48
+; RV64IA-TSO-NEXT: srli a0, a0, 48
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 16
+; RV32IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 16
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 16
+; RV32IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 16
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 48
+; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 48
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 48
+; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 48
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_load_i16_acquire:
+; RV32IA-ZALASR-WMO: # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT: lh.aq a0, (a0)
+; RV32IA-ZALASR-WMO-NEXT: slli a0, a0, 16
+; RV32IA-ZALASR-WMO-NEXT: srli a0, a0, 16
+; RV32IA-ZALASR-WMO-NEXT: ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_load_i16_acquire:
+; RV32IA-ZALASR-TSO: # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT: lh a0, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT: slli a0, a0, 16
+; RV32IA-ZALASR-TSO-NEXT: srli a0, a0, 16
+; RV32IA-ZALASR-TSO-NEXT: ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_i16_acquire:
+; RV64IA-ZALASR-WMO: # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT: lh.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT: slli a0, a0, 48
+; RV64IA-ZALASR-WMO-NEXT: srli a0, a0, 48
+; RV64IA-ZALASR-WMO-NEXT: ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_i16_acquire:
+; RV64IA-ZALASR-TSO: # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT: lh a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT: slli a0, a0, 48
+; RV64IA-ZALASR-TSO-NEXT: srli a0, a0, 48
+; RV64IA-ZALASR-TSO-NEXT: ret
+ %1 = load atomic i16, ptr %a acquire, align 2
+ ret i16 %1
+}
+
+define zeroext i16 @atomic_load_i16_seq_cst(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i16_seq_cst:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_2
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli a0, a0, 16
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i16_seq_cst:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: fence rw, rw
+; RV32IA-WMO-NEXT: lh a0, 0(a0)
+; RV32IA-WMO-NEXT: slli a0, a0, 16
+; RV32IA-WMO-NEXT: srli a0, a0, 16
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i16_seq_cst:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: fence rw, rw
+; RV32IA-TSO-NEXT: lh a0, 0(a0)
+; RV32IA-TSO-NEXT: slli a0, a0, 16
+; RV32IA-TSO-NEXT: srli a0, a0, 16
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i16_seq_cst:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_2
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli a0, a0, 48
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i16_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: fence rw, rw
+; RV64IA-WMO-NEXT: lh a0, 0(a0)
+; RV64IA-WMO-NEXT: slli a0, a0, 48
+; RV64IA-WMO-NEXT: srli a0, a0, 48
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i16_seq_cst:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: fence rw, rw
+; RV64IA-TSO-NEXT: lh a0, 0(a0)
+; RV64IA-TSO-NEXT: slli a0, a0, 48
+; RV64IA-TSO-NEXT: srli a0, a0, 48
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 16
+; RV32IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 16
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 16
+; RV32IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 16
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 48
+; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 48
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 48
+; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 48
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-LABEL: atomic_load_i16_seq_cst:
+; RV32IA-ZALASR: # %bb.0:
+; RV32IA-ZALASR-NEXT: lh.aq a0, (a0)
+; RV32IA-ZALASR-NEXT: slli a0, a0, 16
+; RV32IA-ZALASR-NEXT: srli a0, a0, 16
+; RV32IA-ZALASR-NEXT: ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_i16_seq_cst:
+; RV64IA-ZALASR: # %bb.0:
+; RV64IA-ZALASR-NEXT: lh.aq a0, (a0)
+; RV64IA-ZALASR-NEXT: slli a0, a0, 48
+; RV64IA-ZALASR-NEXT: srli a0, a0, 48
+; RV64IA-ZALASR-NEXT: ret
+ %1 = load atomic i16, ptr %a seq_cst, align 2
+ ret i16 %1
+}
+
+define zeroext i32 @atomic_load_i32_unordered(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i32_unordered:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i32_unordered:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lw a0, 0(a0)
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i32_unordered:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_4
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i32_unordered:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lw a0, 0(a0)
+; RV64IA-NEXT: slli a0, a0, 32
+; RV64IA-NEXT: srli a0, a0, 32
+; RV64IA-NEXT: ret
+ %1 = load atomic i32, ptr %a unordered, align 4
+ ret i32 %1
+}
+
+define zeroext i32 @atomic_load_i32_monotonic(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i32_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i32_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lw a0, 0(a0)
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i32_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_4
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i32_monotonic:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lw a0, 0(a0)
+; RV64IA-NEXT: slli a0, a0, 32
+; RV64IA-NEXT: srli a0, a0, 32
+; RV64IA-NEXT: ret
+ %1 = load atomic i32, ptr %a monotonic, align 4
+ ret i32 %1
+}
+
+define zeroext i32 @atomic_load_i32_acquire(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i32_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i32_acquire:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: lw a0, 0(a0)
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i32_acquire:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: lw a0, 0(a0)
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i32_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_4
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i32_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: lw a0, 0(a0)
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: slli a0, a0, 32
+; RV64IA-WMO-NEXT: srli a0, a0, 32
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i32_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: lw a0, 0(a0)
+; RV64IA-TSO-NEXT: slli a0, a0, 32
+; RV64IA-TSO-NEXT: srli a0, a0, 32
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 32
+; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 32
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 32
+; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 32
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_load_i32_acquire:
+; RV32IA-ZALASR-WMO: # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT: lw.aq a0, (a0)
+; RV32IA-ZALASR-WMO-NEXT: ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_load_i32_acquire:
+; RV32IA-ZALASR-TSO: # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT: lw a0, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT: ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_i32_acquire:
+; RV64IA-ZALASR-WMO: # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT: lw.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT: slli a0, a0, 32
+; RV64IA-ZALASR-WMO-NEXT: srli a0, a0, 32
+; RV64IA-ZALASR-WMO-NEXT: ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_i32_acquire:
+; RV64IA-ZALASR-TSO: # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT: lw a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT: slli a0, a0, 32
+; RV64IA-ZALASR-TSO-NEXT: srli a0, a0, 32
+; RV64IA-ZALASR-TSO-NEXT: ret
+ %1 = load atomic i32, ptr %a acquire, align 4
+ ret i32 %1
+}
+
+define zeroext i32 @atomic_load_i32_seq_cst(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i32_seq_cst:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i32_seq_cst:
+; RV32IA-WMO: # %bb.0:
+; RV32IA-WMO-NEXT: fence rw, rw
+; RV32IA-WMO-NEXT: lw a0, 0(a0)
+; RV32IA-WMO-NEXT: fence r, rw
+; RV32IA-WMO-NEXT: ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i32_seq_cst:
+; RV32IA-TSO: # %bb.0:
+; RV32IA-TSO-NEXT: fence rw, rw
+; RV32IA-TSO-NEXT: lw a0, 0(a0)
+; RV32IA-TSO-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i32_seq_cst:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_4
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i32_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: fence rw, rw
+; RV64IA-WMO-NEXT: lw a0, 0(a0)
+; RV64IA-WMO-NEXT: slli a0, a0, 32
+; RV64IA-WMO-NEXT: srli a0, a0, 32
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i32_seq_cst:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: fence rw, rw
+; RV64IA-TSO-NEXT: lw a0, 0(a0)
+; RV64IA-TSO-NEXT: slli a0, a0, 32
+; RV64IA-TSO-NEXT: srli a0, a0, 32
+; RV64IA-TSO-NEXT: ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
+; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
+; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 32
+; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 32
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 32
+; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 32
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-LABEL: atomic_load_i32_seq_cst:
+; RV32IA-ZALASR: # %bb.0:
+; RV32IA-ZALASR-NEXT: lw.aq a0, (a0)
+; RV32IA-ZALASR-NEXT: ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_i32_seq_cst:
+; RV64IA-ZALASR: # %bb.0:
+; RV64IA-ZALASR-NEXT: lw.aq a0, (a0)
+; RV64IA-ZALASR-NEXT: slli a0, a0, 32
+; RV64IA-ZALASR-NEXT: srli a0, a0, 32
+; RV64IA-ZALASR-NEXT: ret
+ %1 = load atomic i32, ptr %a seq_cst, align 4
+ ret i32 %1
+}
+
+define zeroext i64 @atomic_load_i64_unordered(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i64_unordered:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i64_unordered:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: li a1, 0
+; RV32IA-NEXT: call __atomic_load_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i64_unordered:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i64_unordered:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: ld a0, 0(a0)
+; RV64IA-NEXT: ret
+ %1 = load atomic i64, ptr %a unordered, align 8
+ ret i64 %1
+}
+
+define zeroext i64 @atomic_load_i64_monotonic(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i64_monotonic:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i64_monotonic:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: li a1, 0
+; RV32IA-NEXT: call __atomic_load_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i64_monotonic:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: call __atomic_load_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomic_load_i64_monotonic:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: ld a0, 0(a0)
+; RV64IA-NEXT: ret
+ %1 = load atomic i64, ptr %a monotonic, align 8
+ ret i64 %1
+}
+
+define zeroext i64 @atomic_load_i64_acquire(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i64_acquire:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i64_acquire:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: li a1, 2
+; RV32IA-NEXT: call __atomic_load_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i64_acquire:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: call __atomic_load_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i64_acquire:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: ld a0, 0(a0)
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i64_acquire:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: ld a0, 0(a0)
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i64_acquire:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ld a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i64_acquire:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_i64_acquire:
+; RV64IA-ZALASR-WMO: # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT: ld.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT: ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_i64_acquire:
+; RV64IA-ZALASR-TSO: # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT: ld a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT: ret
+ %1 = load atomic i64, ptr %a acquire, align 8
+ ret i64 %1
+}
+
+define zeroext i64 @atomic_load_i64_seq_cst(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i64_seq_cst:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 5
+; RV32I-NEXT: call __atomic_load_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomic_load_i64_seq_cst:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: li a1, 5
+; RV32IA-NEXT: call __atomic_load_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomic_load_i64_seq_cst:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 5
+; RV64I-NEXT: call __atomic_load_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i64_seq_cst:
+; RV64IA-WMO: # %bb.0:
+; RV64IA-WMO-NEXT: fence rw, rw
+; RV64IA-WMO-NEXT: ld a0, 0(a0)
+; RV64IA-WMO-NEXT: fence r, rw
+; RV64IA-WMO-NEXT: ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i64_seq_cst:
+; RV64IA-TSO: # %bb.0:
+; RV64IA-TSO-NEXT: fence rw, rw
+; RV64IA-TSO-NEXT: ld a0, 0(a0)
+; RV64IA-TSO-NEXT: ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i64_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ld a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i64_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_i64_seq_cst:
+; RV64IA-ZALASR: # %bb.0:
+; RV64IA-ZALASR-NEXT: ld.aq a0, (a0)
+; RV64IA-ZALASR-NEXT: ret
+ %1 = load atomic i64, ptr %a seq_cst, align 8
+ ret i64 %1
+}
+
>From 22bf5ceb077c47a870b76958ef3855b91e22671c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20G=C3=B3rski?= <jan.a.gorski at wp.pl>
Date: Sun, 20 Apr 2025 01:43:24 +0200
Subject: [PATCH 2/2] [RISCV] Optimized `and` with `atomic_load` into
`zextload` when safe.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 44 ++++
llvm/test/CodeGen/RISCV/atomic-load-zext.ll | 264 ++++++--------------
2 files changed, 121 insertions(+), 187 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f7d192756fd56..3af3fea7eb161 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15148,6 +15148,48 @@ static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
}
+static SDValue reduceANDOfAtomicLoad(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ if (N->getOpcode() != ISD::AND)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ if (N0.getOpcode() != ISD::ATOMIC_LOAD)
+ return SDValue();
+
+ AtomicSDNode *ALoad = cast<AtomicSDNode>(N0.getNode());
+ if (isStrongerThanMonotonic(ALoad->getSuccessOrdering()))
+ return SDValue();
+
+ EVT LoadedVT = ALoad->getMemoryVT();
+ EVT ResultVT = N->getValueType(0);
+
+ SDValue MaskVal = N->getOperand(1);
+ ConstantSDNode *MaskConst = dyn_cast<ConstantSDNode>(MaskVal);
+ if (!MaskConst)
+ return SDValue();
+ uint64_t Mask = MaskConst->getZExtValue();
+ uint64_t ExpectedMask = LoadedVT.getSizeInBits() == 8 ? 0xFF
+ : LoadedVT.getSizeInBits() == 16 ? 0xFFFF
+ : LoadedVT.getSizeInBits() == 32 ? 0xFFFFFFFF
+ : 0xFFFFFFFFFFFFFFFF;
+ if (Mask != ExpectedMask)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue Chain = ALoad->getChain();
+ SDValue Ptr = ALoad->getBasePtr();
+ MachineMemOperand *MemOp = ALoad->getMemOperand();
+ SDValue ZextLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, ResultVT, Chain, Ptr,
+ MemOp->getPointerInfo(), LoadedVT,
+ MemOp->getAlign(), MemOp->getFlags());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), ZextLoad);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1));
+ DCI.recursivelyDeleteUnusedNodes(N0.getNode());
+ return SDValue(N, 0);
+}
+
// Combines two comparison operation and logic operation to one selection
// operation(min, max) and logic operation. Returns new constructed Node if
// conditions for optimization are satisfied.
@@ -15182,6 +15224,8 @@ static SDValue performANDCombine(SDNode *N,
return V;
if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
return V;
+ if (SDValue V = reduceANDOfAtomicLoad(N, DCI))
+ return V;
if (DCI.isAfterLegalizeDAG())
if (SDValue V = combineDeMorganOfBoolean(N, DAG))
diff --git a/llvm/test/CodeGen/RISCV/atomic-load-zext.ll b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
index ca76a9ba58614..1fcf5f085646d 100644
--- a/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
@@ -47,8 +47,7 @@ define zeroext i1 @atomic_load_i1_unordered(ptr %a) nounwind {
;
; RV32IA-LABEL: atomic_load_i1_unordered:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: lb a0, 0(a0)
-; RV32IA-NEXT: zext.b a0, a0
+; RV32IA-NEXT: lbu a0, 0(a0)
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomic_load_i1_unordered:
@@ -64,8 +63,7 @@ define zeroext i1 @atomic_load_i1_unordered(ptr %a) nounwind {
;
; RV64IA-LABEL: atomic_load_i1_unordered:
; RV64IA: # %bb.0:
-; RV64IA-NEXT: lb a0, 0(a0)
-; RV64IA-NEXT: zext.b a0, a0
+; RV64IA-NEXT: lbu a0, 0(a0)
; RV64IA-NEXT: ret
%1 = load atomic i8, ptr %a unordered, align 1
%2 = trunc nuw i8 %1 to i1
@@ -86,8 +84,7 @@ define zeroext i1 @atomic_load_i1_monotonic(ptr %a) nounwind {
;
; RV32IA-LABEL: atomic_load_i1_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: lb a0, 0(a0)
-; RV32IA-NEXT: zext.b a0, a0
+; RV32IA-NEXT: lbu a0, 0(a0)
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomic_load_i1_monotonic:
@@ -103,8 +100,7 @@ define zeroext i1 @atomic_load_i1_monotonic(ptr %a) nounwind {
;
; RV64IA-LABEL: atomic_load_i1_monotonic:
; RV64IA: # %bb.0:
-; RV64IA-NEXT: lb a0, 0(a0)
-; RV64IA-NEXT: zext.b a0, a0
+; RV64IA-NEXT: lbu a0, 0(a0)
; RV64IA-NEXT: ret
%1 = load atomic i8, ptr %a monotonic, align 1
%2 = trunc nuw i8 %1 to i1
@@ -125,15 +121,13 @@ define zeroext i1 @atomic_load_i1_acquire(ptr %a) nounwind {
;
; RV32IA-WMO-LABEL: atomic_load_i1_acquire:
; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-NEXT: lbu a0, 0(a0)
; RV32IA-WMO-NEXT: fence r, rw
-; RV32IA-WMO-NEXT: zext.b a0, a0
; RV32IA-WMO-NEXT: ret
;
; RV32IA-TSO-LABEL: atomic_load_i1_acquire:
; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: lb a0, 0(a0)
-; RV32IA-TSO-NEXT: zext.b a0, a0
+; RV32IA-TSO-NEXT: lbu a0, 0(a0)
; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: atomic_load_i1_acquire:
@@ -149,41 +143,35 @@ define zeroext i1 @atomic_load_i1_acquire(ptr %a) nounwind {
;
; RV64IA-WMO-LABEL: atomic_load_i1_acquire:
; RV64IA-WMO: # %bb.0:
-; RV64IA-WMO-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-NEXT: lbu a0, 0(a0)
; RV64IA-WMO-NEXT: fence r, rw
-; RV64IA-WMO-NEXT: zext.b a0, a0
; RV64IA-WMO-NEXT: ret
;
; RV64IA-TSO-LABEL: atomic_load_i1_acquire:
; RV64IA-TSO: # %bb.0:
-; RV64IA-TSO-NEXT: lb a0, 0(a0)
-; RV64IA-TSO-NEXT: zext.b a0, a0
+; RV64IA-TSO-NEXT: lbu a0, 0(a0)
; RV64IA-TSO-NEXT: ret
;
; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
-; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
-; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
-; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
-; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
-; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire:
@@ -194,8 +182,7 @@ define zeroext i1 @atomic_load_i1_acquire(ptr %a) nounwind {
;
; RV32IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire:
; RV32IA-ZALASR-TSO: # %bb.0:
-; RV32IA-ZALASR-TSO-NEXT: lb a0, 0(a0)
-; RV32IA-ZALASR-TSO-NEXT: zext.b a0, a0
+; RV32IA-ZALASR-TSO-NEXT: lbu a0, 0(a0)
; RV32IA-ZALASR-TSO-NEXT: ret
;
; RV64IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire:
@@ -206,8 +193,7 @@ define zeroext i1 @atomic_load_i1_acquire(ptr %a) nounwind {
;
; RV64IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire:
; RV64IA-ZALASR-TSO: # %bb.0:
-; RV64IA-ZALASR-TSO-NEXT: lb a0, 0(a0)
-; RV64IA-ZALASR-TSO-NEXT: zext.b a0, a0
+; RV64IA-ZALASR-TSO-NEXT: lbu a0, 0(a0)
; RV64IA-ZALASR-TSO-NEXT: ret
%1 = load atomic i8, ptr %a acquire, align 1
%2 = trunc nuw i8 %1 to i1
@@ -229,16 +215,14 @@ define zeroext i1 @atomic_load_i1_seq_cst(ptr %a) nounwind {
; RV32IA-WMO-LABEL: atomic_load_i1_seq_cst:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: fence rw, rw
-; RV32IA-WMO-NEXT: lb a0, 0(a0)
-; RV32IA-WMO-NEXT: zext.b a0, a0
+; RV32IA-WMO-NEXT: lbu a0, 0(a0)
; RV32IA-WMO-NEXT: fence r, rw
; RV32IA-WMO-NEXT: ret
;
; RV32IA-TSO-LABEL: atomic_load_i1_seq_cst:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: fence rw, rw
-; RV32IA-TSO-NEXT: lb a0, 0(a0)
-; RV32IA-TSO-NEXT: zext.b a0, a0
+; RV32IA-TSO-NEXT: lbu a0, 0(a0)
; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: atomic_load_i1_seq_cst:
@@ -255,46 +239,40 @@ define zeroext i1 @atomic_load_i1_seq_cst(ptr %a) nounwind {
; RV64IA-WMO-LABEL: atomic_load_i1_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: fence rw, rw
-; RV64IA-WMO-NEXT: lb a0, 0(a0)
-; RV64IA-WMO-NEXT: zext.b a0, a0
+; RV64IA-WMO-NEXT: lbu a0, 0(a0)
; RV64IA-WMO-NEXT: fence r, rw
; RV64IA-WMO-NEXT: ret
;
; RV64IA-TSO-LABEL: atomic_load_i1_seq_cst:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: fence rw, rw
-; RV64IA-TSO-NEXT: lb a0, 0(a0)
-; RV64IA-TSO-NEXT: zext.b a0, a0
+; RV64IA-TSO-NEXT: lbu a0, 0(a0)
; RV64IA-TSO-NEXT: ret
;
; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-ZALASR-LABEL: atomic_load_i1_seq_cst:
@@ -327,8 +305,7 @@ define zeroext i8 @atomic_load_i8_unordered(ptr %a) nounwind {
;
; RV32IA-LABEL: atomic_load_i8_unordered:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: lb a0, 0(a0)
-; RV32IA-NEXT: zext.b a0, a0
+; RV32IA-NEXT: lbu a0, 0(a0)
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomic_load_i8_unordered:
@@ -344,8 +321,7 @@ define zeroext i8 @atomic_load_i8_unordered(ptr %a) nounwind {
;
; RV64IA-LABEL: atomic_load_i8_unordered:
; RV64IA: # %bb.0:
-; RV64IA-NEXT: lb a0, 0(a0)
-; RV64IA-NEXT: zext.b a0, a0
+; RV64IA-NEXT: lbu a0, 0(a0)
; RV64IA-NEXT: ret
%1 = load atomic i8, ptr %a unordered, align 1
ret i8 %1
@@ -365,8 +341,7 @@ define zeroext i8 @atomic_load_i8_monotonic(ptr %a) nounwind {
;
; RV32IA-LABEL: atomic_load_i8_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: lb a0, 0(a0)
-; RV32IA-NEXT: zext.b a0, a0
+; RV32IA-NEXT: lbu a0, 0(a0)
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomic_load_i8_monotonic:
@@ -382,8 +357,7 @@ define zeroext i8 @atomic_load_i8_monotonic(ptr %a) nounwind {
;
; RV64IA-LABEL: atomic_load_i8_monotonic:
; RV64IA: # %bb.0:
-; RV64IA-NEXT: lb a0, 0(a0)
-; RV64IA-NEXT: zext.b a0, a0
+; RV64IA-NEXT: lbu a0, 0(a0)
; RV64IA-NEXT: ret
%1 = load atomic i8, ptr %a monotonic, align 1
ret i8 %1
@@ -403,15 +377,13 @@ define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind {
;
; RV32IA-WMO-LABEL: atomic_load_i8_acquire:
; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-NEXT: lbu a0, 0(a0)
; RV32IA-WMO-NEXT: fence r, rw
-; RV32IA-WMO-NEXT: zext.b a0, a0
; RV32IA-WMO-NEXT: ret
;
; RV32IA-TSO-LABEL: atomic_load_i8_acquire:
; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: lb a0, 0(a0)
-; RV32IA-TSO-NEXT: zext.b a0, a0
+; RV32IA-TSO-NEXT: lbu a0, 0(a0)
; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: atomic_load_i8_acquire:
@@ -427,41 +399,35 @@ define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind {
;
; RV64IA-WMO-LABEL: atomic_load_i8_acquire:
; RV64IA-WMO: # %bb.0:
-; RV64IA-WMO-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-NEXT: lbu a0, 0(a0)
; RV64IA-WMO-NEXT: fence r, rw
-; RV64IA-WMO-NEXT: zext.b a0, a0
; RV64IA-WMO-NEXT: ret
;
; RV64IA-TSO-LABEL: atomic_load_i8_acquire:
; RV64IA-TSO: # %bb.0:
-; RV64IA-TSO-NEXT: lb a0, 0(a0)
-; RV64IA-TSO-NEXT: zext.b a0, a0
+; RV64IA-TSO-NEXT: lbu a0, 0(a0)
; RV64IA-TSO-NEXT: ret
;
; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
-; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
-; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
-; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
-; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
-; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire:
@@ -472,8 +438,7 @@ define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind {
;
; RV32IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire:
; RV32IA-ZALASR-TSO: # %bb.0:
-; RV32IA-ZALASR-TSO-NEXT: lb a0, 0(a0)
-; RV32IA-ZALASR-TSO-NEXT: zext.b a0, a0
+; RV32IA-ZALASR-TSO-NEXT: lbu a0, 0(a0)
; RV32IA-ZALASR-TSO-NEXT: ret
;
; RV64IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire:
@@ -484,8 +449,7 @@ define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind {
;
; RV64IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire:
; RV64IA-ZALASR-TSO: # %bb.0:
-; RV64IA-ZALASR-TSO-NEXT: lb a0, 0(a0)
-; RV64IA-ZALASR-TSO-NEXT: zext.b a0, a0
+; RV64IA-ZALASR-TSO-NEXT: lbu a0, 0(a0)
; RV64IA-ZALASR-TSO-NEXT: ret
%1 = load atomic i8, ptr %a acquire, align 1
ret i8 %1
@@ -506,16 +470,14 @@ define zeroext i8 @atomic_load_i8_seq_cst(ptr %a) nounwind {
; RV32IA-WMO-LABEL: atomic_load_i8_seq_cst:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: fence rw, rw
-; RV32IA-WMO-NEXT: lb a0, 0(a0)
-; RV32IA-WMO-NEXT: zext.b a0, a0
+; RV32IA-WMO-NEXT: lbu a0, 0(a0)
; RV32IA-WMO-NEXT: fence r, rw
; RV32IA-WMO-NEXT: ret
;
; RV32IA-TSO-LABEL: atomic_load_i8_seq_cst:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: fence rw, rw
-; RV32IA-TSO-NEXT: lb a0, 0(a0)
-; RV32IA-TSO-NEXT: zext.b a0, a0
+; RV32IA-TSO-NEXT: lbu a0, 0(a0)
; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: atomic_load_i8_seq_cst:
@@ -532,46 +494,40 @@ define zeroext i8 @atomic_load_i8_seq_cst(ptr %a) nounwind {
; RV64IA-WMO-LABEL: atomic_load_i8_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: fence rw, rw
-; RV64IA-WMO-NEXT: lb a0, 0(a0)
-; RV64IA-WMO-NEXT: zext.b a0, a0
+; RV64IA-WMO-NEXT: lbu a0, 0(a0)
; RV64IA-WMO-NEXT: fence r, rw
; RV64IA-WMO-NEXT: ret
;
; RV64IA-TSO-LABEL: atomic_load_i8_seq_cst:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: fence rw, rw
-; RV64IA-TSO-NEXT: lb a0, 0(a0)
-; RV64IA-TSO-NEXT: zext.b a0, a0
+; RV64IA-TSO-NEXT: lbu a0, 0(a0)
; RV64IA-TSO-NEXT: ret
;
; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-ZALASR-LABEL: atomic_load_i8_seq_cst:
@@ -604,9 +560,7 @@ define zeroext i16 @atomic_load_i16_unordered(ptr %a) nounwind {
;
; RV32IA-LABEL: atomic_load_i16_unordered:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: lh a0, 0(a0)
-; RV32IA-NEXT: slli a0, a0, 16
-; RV32IA-NEXT: srli a0, a0, 16
+; RV32IA-NEXT: lhu a0, 0(a0)
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomic_load_i16_unordered:
@@ -623,9 +577,7 @@ define zeroext i16 @atomic_load_i16_unordered(ptr %a) nounwind {
;
; RV64IA-LABEL: atomic_load_i16_unordered:
; RV64IA: # %bb.0:
-; RV64IA-NEXT: lh a0, 0(a0)
-; RV64IA-NEXT: slli a0, a0, 48
-; RV64IA-NEXT: srli a0, a0, 48
+; RV64IA-NEXT: lhu a0, 0(a0)
; RV64IA-NEXT: ret
%1 = load atomic i16, ptr %a unordered, align 2
ret i16 %1
@@ -646,9 +598,7 @@ define zeroext i16 @atomic_load_i16_monotonic(ptr %a) nounwind {
;
; RV32IA-LABEL: atomic_load_i16_monotonic:
; RV32IA: # %bb.0:
-; RV32IA-NEXT: lh a0, 0(a0)
-; RV32IA-NEXT: slli a0, a0, 16
-; RV32IA-NEXT: srli a0, a0, 16
+; RV32IA-NEXT: lhu a0, 0(a0)
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomic_load_i16_monotonic:
@@ -665,9 +615,7 @@ define zeroext i16 @atomic_load_i16_monotonic(ptr %a) nounwind {
;
; RV64IA-LABEL: atomic_load_i16_monotonic:
; RV64IA: # %bb.0:
-; RV64IA-NEXT: lh a0, 0(a0)
-; RV64IA-NEXT: slli a0, a0, 48
-; RV64IA-NEXT: srli a0, a0, 48
+; RV64IA-NEXT: lhu a0, 0(a0)
; RV64IA-NEXT: ret
%1 = load atomic i16, ptr %a monotonic, align 2
ret i16 %1
@@ -688,17 +636,13 @@ define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind {
;
; RV32IA-WMO-LABEL: atomic_load_i16_acquire:
; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: lh a0, 0(a0)
+; RV32IA-WMO-NEXT: lhu a0, 0(a0)
; RV32IA-WMO-NEXT: fence r, rw
-; RV32IA-WMO-NEXT: slli a0, a0, 16
-; RV32IA-WMO-NEXT: srli a0, a0, 16
; RV32IA-WMO-NEXT: ret
;
; RV32IA-TSO-LABEL: atomic_load_i16_acquire:
; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: lh a0, 0(a0)
-; RV32IA-TSO-NEXT: slli a0, a0, 16
-; RV32IA-TSO-NEXT: srli a0, a0, 16
+; RV32IA-TSO-NEXT: lhu a0, 0(a0)
; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: atomic_load_i16_acquire:
@@ -715,47 +659,35 @@ define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind {
;
; RV64IA-WMO-LABEL: atomic_load_i16_acquire:
; RV64IA-WMO: # %bb.0:
-; RV64IA-WMO-NEXT: lh a0, 0(a0)
+; RV64IA-WMO-NEXT: lhu a0, 0(a0)
; RV64IA-WMO-NEXT: fence r, rw
-; RV64IA-WMO-NEXT: slli a0, a0, 48
-; RV64IA-WMO-NEXT: srli a0, a0, 48
; RV64IA-WMO-NEXT: ret
;
; RV64IA-TSO-LABEL: atomic_load_i16_acquire:
; RV64IA-TSO: # %bb.0:
-; RV64IA-TSO-NEXT: lh a0, 0(a0)
-; RV64IA-TSO-NEXT: slli a0, a0, 48
-; RV64IA-TSO-NEXT: srli a0, a0, 48
+; RV64IA-TSO-NEXT: lhu a0, 0(a0)
; RV64IA-TSO-NEXT: ret
;
; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
-; RV32IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lhu a0, 0(a0)
; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
-; RV32IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 16
-; RV32IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 16
; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
-; RV32IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
-; RV32IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 16
-; RV32IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 16
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lhu a0, 0(a0)
; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
-; RV64IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lhu a0, 0(a0)
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 48
-; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 48
; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
-; RV64IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 48
-; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 48
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lhu a0, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-ZALASR-WMO-LABEL: atomic_load_i16_acquire:
@@ -767,9 +699,7 @@ define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind {
;
; RV32IA-ZALASR-TSO-LABEL: atomic_load_i16_acquire:
; RV32IA-ZALASR-TSO: # %bb.0:
-; RV32IA-ZALASR-TSO-NEXT: lh a0, 0(a0)
-; RV32IA-ZALASR-TSO-NEXT: slli a0, a0, 16
-; RV32IA-ZALASR-TSO-NEXT: srli a0, a0, 16
+; RV32IA-ZALASR-TSO-NEXT: lhu a0, 0(a0)
; RV32IA-ZALASR-TSO-NEXT: ret
;
; RV64IA-ZALASR-WMO-LABEL: atomic_load_i16_acquire:
@@ -781,9 +711,7 @@ define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind {
;
; RV64IA-ZALASR-TSO-LABEL: atomic_load_i16_acquire:
; RV64IA-ZALASR-TSO: # %bb.0:
-; RV64IA-ZALASR-TSO-NEXT: lh a0, 0(a0)
-; RV64IA-ZALASR-TSO-NEXT: slli a0, a0, 48
-; RV64IA-ZALASR-TSO-NEXT: srli a0, a0, 48
+; RV64IA-ZALASR-TSO-NEXT: lhu a0, 0(a0)
; RV64IA-ZALASR-TSO-NEXT: ret
%1 = load atomic i16, ptr %a acquire, align 2
ret i16 %1
@@ -805,18 +733,14 @@ define zeroext i16 @atomic_load_i16_seq_cst(ptr %a) nounwind {
; RV32IA-WMO-LABEL: atomic_load_i16_seq_cst:
; RV32IA-WMO: # %bb.0:
; RV32IA-WMO-NEXT: fence rw, rw
-; RV32IA-WMO-NEXT: lh a0, 0(a0)
-; RV32IA-WMO-NEXT: slli a0, a0, 16
-; RV32IA-WMO-NEXT: srli a0, a0, 16
+; RV32IA-WMO-NEXT: lhu a0, 0(a0)
; RV32IA-WMO-NEXT: fence r, rw
; RV32IA-WMO-NEXT: ret
;
; RV32IA-TSO-LABEL: atomic_load_i16_seq_cst:
; RV32IA-TSO: # %bb.0:
; RV32IA-TSO-NEXT: fence rw, rw
-; RV32IA-TSO-NEXT: lh a0, 0(a0)
-; RV32IA-TSO-NEXT: slli a0, a0, 16
-; RV32IA-TSO-NEXT: srli a0, a0, 16
+; RV32IA-TSO-NEXT: lhu a0, 0(a0)
; RV32IA-TSO-NEXT: ret
;
; RV64I-LABEL: atomic_load_i16_seq_cst:
@@ -834,52 +758,40 @@ define zeroext i16 @atomic_load_i16_seq_cst(ptr %a) nounwind {
; RV64IA-WMO-LABEL: atomic_load_i16_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: fence rw, rw
-; RV64IA-WMO-NEXT: lh a0, 0(a0)
-; RV64IA-WMO-NEXT: slli a0, a0, 48
-; RV64IA-WMO-NEXT: srli a0, a0, 48
+; RV64IA-WMO-NEXT: lhu a0, 0(a0)
; RV64IA-WMO-NEXT: fence r, rw
; RV64IA-WMO-NEXT: ret
;
; RV64IA-TSO-LABEL: atomic_load_i16_seq_cst:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: fence rw, rw
-; RV64IA-TSO-NEXT: lh a0, 0(a0)
-; RV64IA-TSO-NEXT: slli a0, a0, 48
-; RV64IA-TSO-NEXT: srli a0, a0, 48
+; RV64IA-TSO-NEXT: lhu a0, 0(a0)
; RV64IA-TSO-NEXT: ret
;
; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV32IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
-; RV32IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 16
-; RV32IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 16
+; RV32IA-WMO-TRAILING-FENCE-NEXT: lhu a0, 0(a0)
; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV32IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
-; RV32IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 16
-; RV32IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 16
+; RV32IA-TSO-TRAILING-FENCE-NEXT: lhu a0, 0(a0)
; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
-; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 48
-; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 48
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lhu a0, 0(a0)
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV64IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 48
-; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 48
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lhu a0, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-ZALASR-LABEL: atomic_load_i16_seq_cst:
@@ -929,9 +841,7 @@ define zeroext i32 @atomic_load_i32_unordered(ptr %a) nounwind {
;
; RV64IA-LABEL: atomic_load_i32_unordered:
; RV64IA: # %bb.0:
-; RV64IA-NEXT: lw a0, 0(a0)
-; RV64IA-NEXT: slli a0, a0, 32
-; RV64IA-NEXT: srli a0, a0, 32
+; RV64IA-NEXT: lwu a0, 0(a0)
; RV64IA-NEXT: ret
%1 = load atomic i32, ptr %a unordered, align 4
ret i32 %1
@@ -967,9 +877,7 @@ define zeroext i32 @atomic_load_i32_monotonic(ptr %a) nounwind {
;
; RV64IA-LABEL: atomic_load_i32_monotonic:
; RV64IA: # %bb.0:
-; RV64IA-NEXT: lw a0, 0(a0)
-; RV64IA-NEXT: slli a0, a0, 32
-; RV64IA-NEXT: srli a0, a0, 32
+; RV64IA-NEXT: lwu a0, 0(a0)
; RV64IA-NEXT: ret
%1 = load atomic i32, ptr %a monotonic, align 4
ret i32 %1
@@ -1011,17 +919,13 @@ define zeroext i32 @atomic_load_i32_acquire(ptr %a) nounwind {
;
; RV64IA-WMO-LABEL: atomic_load_i32_acquire:
; RV64IA-WMO: # %bb.0:
-; RV64IA-WMO-NEXT: lw a0, 0(a0)
+; RV64IA-WMO-NEXT: lwu a0, 0(a0)
; RV64IA-WMO-NEXT: fence r, rw
-; RV64IA-WMO-NEXT: slli a0, a0, 32
-; RV64IA-WMO-NEXT: srli a0, a0, 32
; RV64IA-WMO-NEXT: ret
;
; RV64IA-TSO-LABEL: atomic_load_i32_acquire:
; RV64IA-TSO: # %bb.0:
-; RV64IA-TSO-NEXT: lw a0, 0(a0)
-; RV64IA-TSO-NEXT: slli a0, a0, 32
-; RV64IA-TSO-NEXT: srli a0, a0, 32
+; RV64IA-TSO-NEXT: lwu a0, 0(a0)
; RV64IA-TSO-NEXT: ret
;
; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
@@ -1037,17 +941,13 @@ define zeroext i32 @atomic_load_i32_acquire(ptr %a) nounwind {
;
; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
-; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lwu a0, 0(a0)
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 32
-; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 32
; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
-; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 32
-; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 32
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lwu a0, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-ZALASR-WMO-LABEL: atomic_load_i32_acquire:
@@ -1069,9 +969,7 @@ define zeroext i32 @atomic_load_i32_acquire(ptr %a) nounwind {
;
; RV64IA-ZALASR-TSO-LABEL: atomic_load_i32_acquire:
; RV64IA-ZALASR-TSO: # %bb.0:
-; RV64IA-ZALASR-TSO-NEXT: lw a0, 0(a0)
-; RV64IA-ZALASR-TSO-NEXT: slli a0, a0, 32
-; RV64IA-ZALASR-TSO-NEXT: srli a0, a0, 32
+; RV64IA-ZALASR-TSO-NEXT: lwu a0, 0(a0)
; RV64IA-ZALASR-TSO-NEXT: ret
%1 = load atomic i32, ptr %a acquire, align 4
ret i32 %1
@@ -1116,18 +1014,14 @@ define zeroext i32 @atomic_load_i32_seq_cst(ptr %a) nounwind {
; RV64IA-WMO-LABEL: atomic_load_i32_seq_cst:
; RV64IA-WMO: # %bb.0:
; RV64IA-WMO-NEXT: fence rw, rw
-; RV64IA-WMO-NEXT: lw a0, 0(a0)
-; RV64IA-WMO-NEXT: slli a0, a0, 32
-; RV64IA-WMO-NEXT: srli a0, a0, 32
+; RV64IA-WMO-NEXT: lwu a0, 0(a0)
; RV64IA-WMO-NEXT: fence r, rw
; RV64IA-WMO-NEXT: ret
;
; RV64IA-TSO-LABEL: atomic_load_i32_seq_cst:
; RV64IA-TSO: # %bb.0:
; RV64IA-TSO-NEXT: fence rw, rw
-; RV64IA-TSO-NEXT: lw a0, 0(a0)
-; RV64IA-TSO-NEXT: slli a0, a0, 32
-; RV64IA-TSO-NEXT: srli a0, a0, 32
+; RV64IA-TSO-NEXT: lwu a0, 0(a0)
; RV64IA-TSO-NEXT: ret
;
; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
@@ -1146,18 +1040,14 @@ define zeroext i32 @atomic_load_i32_seq_cst(ptr %a) nounwind {
; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
-; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 32
-; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 32
+; RV64IA-WMO-TRAILING-FENCE-NEXT: lwu a0, 0(a0)
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
;
; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
-; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 32
-; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 32
+; RV64IA-TSO-TRAILING-FENCE-NEXT: lwu a0, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
;
; RV32IA-ZALASR-LABEL: atomic_load_i32_seq_cst:
More information about the llvm-commits
mailing list