[llvm] [RISCV] Remove `AND` mask generated by `( zext ( atomic_load ) )` by replacing the load with `zextload` for orderings not stronger then monotonic. (PR #136502)

Jan Górski via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 25 10:10:46 PDT 2025


https://github.com/janagor updated https://github.com/llvm/llvm-project/pull/136502

>From 9678b8ea25c5d64291995d58fc7f4c0448d9065b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20G=C3=B3rski?= <jan.a.gorski at wp.pl>
Date: Sat, 19 Apr 2025 23:28:02 +0200
Subject: [PATCH 1/6] Added pre-commit test.

---
 llvm/test/CodeGen/RISCV/atomic-load-zext.ll | 1386 +++++++++++++++++++
 1 file changed, 1386 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/atomic-load-zext.ll

diff --git a/llvm/test/CodeGen/RISCV/atomic-load-zext.ll b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
new file mode 100644
index 0000000000000..ca76a9ba58614
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
@@ -0,0 +1,1386 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV64I %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s
+
+
+; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-WMO-TRAILING-FENCE %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-TSO-TRAILING-FENCE %s
+
+; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-WMO-TRAILING-FENCE %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-TSO-TRAILING-FENCE %s
+
+; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zalasr -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-WMO %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-TSO %s
+
+; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zalasr -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-WMO %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-TSO %s
+
+define zeroext i1 @atomic_load_i1_unordered(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i1_unordered:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    call __atomic_load_1
+; RV32I-NEXT:    andi a0, a0, 1
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomic_load_i1_unordered:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lb a0, 0(a0)
+; RV32IA-NEXT:    zext.b a0, a0
+; RV32IA-NEXT:    ret
+;
+; RV64I-LABEL: atomic_load_i1_unordered:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 0
+; RV64I-NEXT:    call __atomic_load_1
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IA-LABEL: atomic_load_i1_unordered:
+; RV64IA:       # %bb.0:
+; RV64IA-NEXT:    lb a0, 0(a0)
+; RV64IA-NEXT:    zext.b a0, a0
+; RV64IA-NEXT:    ret
+  %1 = load atomic i8, ptr %a unordered, align 1
+  %2 = trunc nuw i8 %1 to i1
+  ret i1 %2
+}
+
+define zeroext i1 @atomic_load_i1_monotonic(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i1_monotonic:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    call __atomic_load_1
+; RV32I-NEXT:    andi a0, a0, 1
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomic_load_i1_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lb a0, 0(a0)
+; RV32IA-NEXT:    zext.b a0, a0
+; RV32IA-NEXT:    ret
+;
+; RV64I-LABEL: atomic_load_i1_monotonic:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 0
+; RV64I-NEXT:    call __atomic_load_1
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IA-LABEL: atomic_load_i1_monotonic:
+; RV64IA:       # %bb.0:
+; RV64IA-NEXT:    lb a0, 0(a0)
+; RV64IA-NEXT:    zext.b a0, a0
+; RV64IA-NEXT:    ret
+  %1 = load atomic i8, ptr %a monotonic, align 1
+  %2 = trunc nuw i8 %1 to i1
+  ret i1 %2
+}
+
+define zeroext i1 @atomic_load_i1_acquire(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i1_acquire:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a1, 2
+; RV32I-NEXT:    call __atomic_load_1
+; RV32I-NEXT:    andi a0, a0, 1
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i1_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    lb a0, 0(a0)
+; RV32IA-WMO-NEXT:    fence r, rw
+; RV32IA-WMO-NEXT:    zext.b a0, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i1_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    lb a0, 0(a0)
+; RV32IA-TSO-NEXT:    zext.b a0, a0
+; RV32IA-TSO-NEXT:    ret
+;
+; RV64I-LABEL: atomic_load_i1_acquire:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 2
+; RV64I-NEXT:    call __atomic_load_1
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i1_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    lb a0, 0(a0)
+; RV64IA-WMO-NEXT:    fence r, rw
+; RV64IA-WMO-NEXT:    zext.b a0, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i1_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    lb a0, 0(a0)
+; RV64IA-TSO-NEXT:    zext.b a0, a0
+; RV64IA-TSO-NEXT:    ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
+; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
+; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire:
+; RV32IA-ZALASR-WMO:       # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT:    lb.aq a0, (a0)
+; RV32IA-ZALASR-WMO-NEXT:    zext.b a0, a0
+; RV32IA-ZALASR-WMO-NEXT:    ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire:
+; RV32IA-ZALASR-TSO:       # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT:    lb a0, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT:    zext.b a0, a0
+; RV32IA-ZALASR-TSO-NEXT:    ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire:
+; RV64IA-ZALASR-WMO:       # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT:    lb.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT:    zext.b a0, a0
+; RV64IA-ZALASR-WMO-NEXT:    ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire:
+; RV64IA-ZALASR-TSO:       # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT:    lb a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT:    zext.b a0, a0
+; RV64IA-ZALASR-TSO-NEXT:    ret
+  %1 = load atomic i8, ptr %a acquire, align 1
+  %2 = trunc nuw i8 %1 to i1
+  ret i1 %2
+}
+
+define zeroext i1 @atomic_load_i1_seq_cst(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i1_seq_cst:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a1, 5
+; RV32I-NEXT:    call __atomic_load_1
+; RV32I-NEXT:    andi a0, a0, 1
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    fence rw, rw
+; RV32IA-WMO-NEXT:    lb a0, 0(a0)
+; RV32IA-WMO-NEXT:    zext.b a0, a0
+; RV32IA-WMO-NEXT:    fence r, rw
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    fence rw, rw
+; RV32IA-TSO-NEXT:    lb a0, 0(a0)
+; RV32IA-TSO-NEXT:    zext.b a0, a0
+; RV32IA-TSO-NEXT:    ret
+;
+; RV64I-LABEL: atomic_load_i1_seq_cst:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    call __atomic_load_1
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    fence rw, rw
+; RV64IA-WMO-NEXT:    lb a0, 0(a0)
+; RV64IA-WMO-NEXT:    zext.b a0, a0
+; RV64IA-WMO-NEXT:    fence r, rw
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    fence rw, rw
+; RV64IA-TSO-NEXT:    lb a0, 0(a0)
+; RV64IA-TSO-NEXT:    zext.b a0, a0
+; RV64IA-TSO-NEXT:    ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-LABEL: atomic_load_i1_seq_cst:
+; RV32IA-ZALASR:       # %bb.0:
+; RV32IA-ZALASR-NEXT:    lb.aq a0, (a0)
+; RV32IA-ZALASR-NEXT:    zext.b a0, a0
+; RV32IA-ZALASR-NEXT:    ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_i1_seq_cst:
+; RV64IA-ZALASR:       # %bb.0:
+; RV64IA-ZALASR-NEXT:    lb.aq a0, (a0)
+; RV64IA-ZALASR-NEXT:    zext.b a0, a0
+; RV64IA-ZALASR-NEXT:    ret
+  %1 = load atomic i8, ptr %a seq_cst, align 1
+  %2 = trunc nuw i8 %1 to i1
+  ret i1 %2
+}
+
+define zeroext i8 @atomic_load_i8_unordered(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i8_unordered:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    call __atomic_load_1
+; RV32I-NEXT:    zext.b a0, a0
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomic_load_i8_unordered:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lb a0, 0(a0)
+; RV32IA-NEXT:    zext.b a0, a0
+; RV32IA-NEXT:    ret
+;
+; RV64I-LABEL: atomic_load_i8_unordered:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 0
+; RV64I-NEXT:    call __atomic_load_1
+; RV64I-NEXT:    zext.b a0, a0
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IA-LABEL: atomic_load_i8_unordered:
+; RV64IA:       # %bb.0:
+; RV64IA-NEXT:    lb a0, 0(a0)
+; RV64IA-NEXT:    zext.b a0, a0
+; RV64IA-NEXT:    ret
+  %1 = load atomic i8, ptr %a unordered, align 1
+  ret i8 %1
+}
+
+define zeroext i8 @atomic_load_i8_monotonic(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i8_monotonic:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    call __atomic_load_1
+; RV32I-NEXT:    zext.b a0, a0
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomic_load_i8_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lb a0, 0(a0)
+; RV32IA-NEXT:    zext.b a0, a0
+; RV32IA-NEXT:    ret
+;
+; RV64I-LABEL: atomic_load_i8_monotonic:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 0
+; RV64I-NEXT:    call __atomic_load_1
+; RV64I-NEXT:    zext.b a0, a0
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IA-LABEL: atomic_load_i8_monotonic:
+; RV64IA:       # %bb.0:
+; RV64IA-NEXT:    lb a0, 0(a0)
+; RV64IA-NEXT:    zext.b a0, a0
+; RV64IA-NEXT:    ret
+  %1 = load atomic i8, ptr %a monotonic, align 1
+  ret i8 %1
+}
+
+define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i8_acquire:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a1, 2
+; RV32I-NEXT:    call __atomic_load_1
+; RV32I-NEXT:    zext.b a0, a0
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i8_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    lb a0, 0(a0)
+; RV32IA-WMO-NEXT:    fence r, rw
+; RV32IA-WMO-NEXT:    zext.b a0, a0
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i8_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    lb a0, 0(a0)
+; RV32IA-TSO-NEXT:    zext.b a0, a0
+; RV32IA-TSO-NEXT:    ret
+;
+; RV64I-LABEL: atomic_load_i8_acquire:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 2
+; RV64I-NEXT:    call __atomic_load_1
+; RV64I-NEXT:    zext.b a0, a0
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i8_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    lb a0, 0(a0)
+; RV64IA-WMO-NEXT:    fence r, rw
+; RV64IA-WMO-NEXT:    zext.b a0, a0
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i8_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    lb a0, 0(a0)
+; RV64IA-TSO-NEXT:    zext.b a0, a0
+; RV64IA-TSO-NEXT:    ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire:
+; RV32IA-ZALASR-WMO:       # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT:    lb.aq a0, (a0)
+; RV32IA-ZALASR-WMO-NEXT:    zext.b a0, a0
+; RV32IA-ZALASR-WMO-NEXT:    ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire:
+; RV32IA-ZALASR-TSO:       # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT:    lb a0, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT:    zext.b a0, a0
+; RV32IA-ZALASR-TSO-NEXT:    ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire:
+; RV64IA-ZALASR-WMO:       # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT:    lb.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT:    zext.b a0, a0
+; RV64IA-ZALASR-WMO-NEXT:    ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire:
+; RV64IA-ZALASR-TSO:       # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT:    lb a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT:    zext.b a0, a0
+; RV64IA-ZALASR-TSO-NEXT:    ret
+  %1 = load atomic i8, ptr %a acquire, align 1
+  ret i8 %1
+}
+
+define zeroext i8 @atomic_load_i8_seq_cst(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i8_seq_cst:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a1, 5
+; RV32I-NEXT:    call __atomic_load_1
+; RV32I-NEXT:    zext.b a0, a0
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i8_seq_cst:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    fence rw, rw
+; RV32IA-WMO-NEXT:    lb a0, 0(a0)
+; RV32IA-WMO-NEXT:    zext.b a0, a0
+; RV32IA-WMO-NEXT:    fence r, rw
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i8_seq_cst:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    fence rw, rw
+; RV32IA-TSO-NEXT:    lb a0, 0(a0)
+; RV32IA-TSO-NEXT:    zext.b a0, a0
+; RV32IA-TSO-NEXT:    ret
+;
+; RV64I-LABEL: atomic_load_i8_seq_cst:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    call __atomic_load_1
+; RV64I-NEXT:    zext.b a0, a0
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i8_seq_cst:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    fence rw, rw
+; RV64IA-WMO-NEXT:    lb a0, 0(a0)
+; RV64IA-WMO-NEXT:    zext.b a0, a0
+; RV64IA-WMO-NEXT:    fence r, rw
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i8_seq_cst:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    fence rw, rw
+; RV64IA-TSO-NEXT:    lb a0, 0(a0)
+; RV64IA-TSO-NEXT:    zext.b a0, a0
+; RV64IA-TSO-NEXT:    ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
+; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
+; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-LABEL: atomic_load_i8_seq_cst:
+; RV32IA-ZALASR:       # %bb.0:
+; RV32IA-ZALASR-NEXT:    lb.aq a0, (a0)
+; RV32IA-ZALASR-NEXT:    zext.b a0, a0
+; RV32IA-ZALASR-NEXT:    ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_i8_seq_cst:
+; RV64IA-ZALASR:       # %bb.0:
+; RV64IA-ZALASR-NEXT:    lb.aq a0, (a0)
+; RV64IA-ZALASR-NEXT:    zext.b a0, a0
+; RV64IA-ZALASR-NEXT:    ret
+  %1 = load atomic i8, ptr %a seq_cst, align 1
+  ret i8 %1
+}
+
+define zeroext i16 @atomic_load_i16_unordered(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i16_unordered:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    call __atomic_load_2
+; RV32I-NEXT:    slli a0, a0, 16
+; RV32I-NEXT:    srli a0, a0, 16
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomic_load_i16_unordered:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lh a0, 0(a0)
+; RV32IA-NEXT:    slli a0, a0, 16
+; RV32IA-NEXT:    srli a0, a0, 16
+; RV32IA-NEXT:    ret
+;
+; RV64I-LABEL: atomic_load_i16_unordered:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 0
+; RV64I-NEXT:    call __atomic_load_2
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IA-LABEL: atomic_load_i16_unordered:
+; RV64IA:       # %bb.0:
+; RV64IA-NEXT:    lh a0, 0(a0)
+; RV64IA-NEXT:    slli a0, a0, 48
+; RV64IA-NEXT:    srli a0, a0, 48
+; RV64IA-NEXT:    ret
+  %1 = load atomic i16, ptr %a unordered, align 2
+  ret i16 %1
+}
+
+define zeroext i16 @atomic_load_i16_monotonic(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i16_monotonic:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    call __atomic_load_2
+; RV32I-NEXT:    slli a0, a0, 16
+; RV32I-NEXT:    srli a0, a0, 16
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomic_load_i16_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lh a0, 0(a0)
+; RV32IA-NEXT:    slli a0, a0, 16
+; RV32IA-NEXT:    srli a0, a0, 16
+; RV32IA-NEXT:    ret
+;
+; RV64I-LABEL: atomic_load_i16_monotonic:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 0
+; RV64I-NEXT:    call __atomic_load_2
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IA-LABEL: atomic_load_i16_monotonic:
+; RV64IA:       # %bb.0:
+; RV64IA-NEXT:    lh a0, 0(a0)
+; RV64IA-NEXT:    slli a0, a0, 48
+; RV64IA-NEXT:    srli a0, a0, 48
+; RV64IA-NEXT:    ret
+  %1 = load atomic i16, ptr %a monotonic, align 2
+  ret i16 %1
+}
+
+define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i16_acquire:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a1, 2
+; RV32I-NEXT:    call __atomic_load_2
+; RV32I-NEXT:    slli a0, a0, 16
+; RV32I-NEXT:    srli a0, a0, 16
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i16_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    lh a0, 0(a0)
+; RV32IA-WMO-NEXT:    fence r, rw
+; RV32IA-WMO-NEXT:    slli a0, a0, 16
+; RV32IA-WMO-NEXT:    srli a0, a0, 16
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i16_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    lh a0, 0(a0)
+; RV32IA-TSO-NEXT:    slli a0, a0, 16
+; RV32IA-TSO-NEXT:    srli a0, a0, 16
+; RV32IA-TSO-NEXT:    ret
+;
+; RV64I-LABEL: atomic_load_i16_acquire:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 2
+; RV64I-NEXT:    call __atomic_load_2
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i16_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    lh a0, 0(a0)
+; RV64IA-WMO-NEXT:    fence r, rw
+; RV64IA-WMO-NEXT:    slli a0, a0, 48
+; RV64IA-WMO-NEXT:    srli a0, a0, 48
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i16_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    lh a0, 0(a0)
+; RV64IA-TSO-NEXT:    slli a0, a0, 48
+; RV64IA-TSO-NEXT:    srli a0, a0, 48
+; RV64IA-TSO-NEXT:    ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
+; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    slli a0, a0, 16
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    srli a0, a0, 16
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
+; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    slli a0, a0, 16
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    srli a0, a0, 16
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    slli a0, a0, 48
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    srli a0, a0, 48
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    slli a0, a0, 48
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    srli a0, a0, 48
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_load_i16_acquire:
+; RV32IA-ZALASR-WMO:       # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT:    lh.aq a0, (a0)
+; RV32IA-ZALASR-WMO-NEXT:    slli a0, a0, 16
+; RV32IA-ZALASR-WMO-NEXT:    srli a0, a0, 16
+; RV32IA-ZALASR-WMO-NEXT:    ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_load_i16_acquire:
+; RV32IA-ZALASR-TSO:       # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT:    lh a0, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT:    slli a0, a0, 16
+; RV32IA-ZALASR-TSO-NEXT:    srli a0, a0, 16
+; RV32IA-ZALASR-TSO-NEXT:    ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_i16_acquire:
+; RV64IA-ZALASR-WMO:       # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT:    lh.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT:    slli a0, a0, 48
+; RV64IA-ZALASR-WMO-NEXT:    srli a0, a0, 48
+; RV64IA-ZALASR-WMO-NEXT:    ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_i16_acquire:
+; RV64IA-ZALASR-TSO:       # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT:    lh a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT:    slli a0, a0, 48
+; RV64IA-ZALASR-TSO-NEXT:    srli a0, a0, 48
+; RV64IA-ZALASR-TSO-NEXT:    ret
+  %1 = load atomic i16, ptr %a acquire, align 2
+  ret i16 %1
+}
+
+define zeroext i16 @atomic_load_i16_seq_cst(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i16_seq_cst:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a1, 5
+; RV32I-NEXT:    call __atomic_load_2
+; RV32I-NEXT:    slli a0, a0, 16
+; RV32I-NEXT:    srli a0, a0, 16
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i16_seq_cst:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    fence rw, rw
+; RV32IA-WMO-NEXT:    lh a0, 0(a0)
+; RV32IA-WMO-NEXT:    slli a0, a0, 16
+; RV32IA-WMO-NEXT:    srli a0, a0, 16
+; RV32IA-WMO-NEXT:    fence r, rw
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i16_seq_cst:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    fence rw, rw
+; RV32IA-TSO-NEXT:    lh a0, 0(a0)
+; RV32IA-TSO-NEXT:    slli a0, a0, 16
+; RV32IA-TSO-NEXT:    srli a0, a0, 16
+; RV32IA-TSO-NEXT:    ret
+;
+; RV64I-LABEL: atomic_load_i16_seq_cst:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    call __atomic_load_2
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i16_seq_cst:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    fence rw, rw
+; RV64IA-WMO-NEXT:    lh a0, 0(a0)
+; RV64IA-WMO-NEXT:    slli a0, a0, 48
+; RV64IA-WMO-NEXT:    srli a0, a0, 48
+; RV64IA-WMO-NEXT:    fence r, rw
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i16_seq_cst:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    fence rw, rw
+; RV64IA-TSO-NEXT:    lh a0, 0(a0)
+; RV64IA-TSO-NEXT:    slli a0, a0, 48
+; RV64IA-TSO-NEXT:    srli a0, a0, 48
+; RV64IA-TSO-NEXT:    ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
+; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    slli a0, a0, 16
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    srli a0, a0, 16
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
+; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    slli a0, a0, 16
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    srli a0, a0, 16
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    slli a0, a0, 48
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    srli a0, a0, 48
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    slli a0, a0, 48
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    srli a0, a0, 48
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-LABEL: atomic_load_i16_seq_cst:
+; RV32IA-ZALASR:       # %bb.0:
+; RV32IA-ZALASR-NEXT:    lh.aq a0, (a0)
+; RV32IA-ZALASR-NEXT:    slli a0, a0, 16
+; RV32IA-ZALASR-NEXT:    srli a0, a0, 16
+; RV32IA-ZALASR-NEXT:    ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_i16_seq_cst:
+; RV64IA-ZALASR:       # %bb.0:
+; RV64IA-ZALASR-NEXT:    lh.aq a0, (a0)
+; RV64IA-ZALASR-NEXT:    slli a0, a0, 48
+; RV64IA-ZALASR-NEXT:    srli a0, a0, 48
+; RV64IA-ZALASR-NEXT:    ret
+  %1 = load atomic i16, ptr %a seq_cst, align 2
+  ret i16 %1
+}
+
+define zeroext i32 @atomic_load_i32_unordered(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i32_unordered:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    call __atomic_load_4
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomic_load_i32_unordered:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lw a0, 0(a0)
+; RV32IA-NEXT:    ret
+;
+; RV64I-LABEL: atomic_load_i32_unordered:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 0
+; RV64I-NEXT:    call __atomic_load_4
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IA-LABEL: atomic_load_i32_unordered:
+; RV64IA:       # %bb.0:
+; RV64IA-NEXT:    lw a0, 0(a0)
+; RV64IA-NEXT:    slli a0, a0, 32
+; RV64IA-NEXT:    srli a0, a0, 32
+; RV64IA-NEXT:    ret
+  %1 = load atomic i32, ptr %a unordered, align 4
+  ret i32 %1
+}
+
+define zeroext i32 @atomic_load_i32_monotonic(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i32_monotonic:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    call __atomic_load_4
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomic_load_i32_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    lw a0, 0(a0)
+; RV32IA-NEXT:    ret
+;
+; RV64I-LABEL: atomic_load_i32_monotonic:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 0
+; RV64I-NEXT:    call __atomic_load_4
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IA-LABEL: atomic_load_i32_monotonic:
+; RV64IA:       # %bb.0:
+; RV64IA-NEXT:    lw a0, 0(a0)
+; RV64IA-NEXT:    slli a0, a0, 32
+; RV64IA-NEXT:    srli a0, a0, 32
+; RV64IA-NEXT:    ret
+  %1 = load atomic i32, ptr %a monotonic, align 4
+  ret i32 %1
+}
+
+define zeroext i32 @atomic_load_i32_acquire(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i32_acquire:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a1, 2
+; RV32I-NEXT:    call __atomic_load_4
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i32_acquire:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    lw a0, 0(a0)
+; RV32IA-WMO-NEXT:    fence r, rw
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i32_acquire:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    lw a0, 0(a0)
+; RV32IA-TSO-NEXT:    ret
+;
+; RV64I-LABEL: atomic_load_i32_acquire:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 2
+; RV64I-NEXT:    call __atomic_load_4
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i32_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    lw a0, 0(a0)
+; RV64IA-WMO-NEXT:    fence r, rw
+; RV64IA-WMO-NEXT:    slli a0, a0, 32
+; RV64IA-WMO-NEXT:    srli a0, a0, 32
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i32_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    lw a0, 0(a0)
+; RV64IA-TSO-NEXT:    slli a0, a0, 32
+; RV64IA-TSO-NEXT:    srli a0, a0, 32
+; RV64IA-TSO-NEXT:    ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
+; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
+; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    slli a0, a0, 32
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    srli a0, a0, 32
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    slli a0, a0, 32
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    srli a0, a0, 32
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_load_i32_acquire:
+; RV32IA-ZALASR-WMO:       # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT:    lw.aq a0, (a0)
+; RV32IA-ZALASR-WMO-NEXT:    ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_load_i32_acquire:
+; RV32IA-ZALASR-TSO:       # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT:    lw a0, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT:    ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_i32_acquire:
+; RV64IA-ZALASR-WMO:       # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT:    lw.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT:    slli a0, a0, 32
+; RV64IA-ZALASR-WMO-NEXT:    srli a0, a0, 32
+; RV64IA-ZALASR-WMO-NEXT:    ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_i32_acquire:
+; RV64IA-ZALASR-TSO:       # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT:    lw a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT:    slli a0, a0, 32
+; RV64IA-ZALASR-TSO-NEXT:    srli a0, a0, 32
+; RV64IA-ZALASR-TSO-NEXT:    ret
+  %1 = load atomic i32, ptr %a acquire, align 4
+  ret i32 %1
+}
+
+define zeroext i32 @atomic_load_i32_seq_cst(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i32_seq_cst:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a1, 5
+; RV32I-NEXT:    call __atomic_load_4
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IA-WMO-LABEL: atomic_load_i32_seq_cst:
+; RV32IA-WMO:       # %bb.0:
+; RV32IA-WMO-NEXT:    fence rw, rw
+; RV32IA-WMO-NEXT:    lw a0, 0(a0)
+; RV32IA-WMO-NEXT:    fence r, rw
+; RV32IA-WMO-NEXT:    ret
+;
+; RV32IA-TSO-LABEL: atomic_load_i32_seq_cst:
+; RV32IA-TSO:       # %bb.0:
+; RV32IA-TSO-NEXT:    fence rw, rw
+; RV32IA-TSO-NEXT:    lw a0, 0(a0)
+; RV32IA-TSO-NEXT:    ret
+;
+; RV64I-LABEL: atomic_load_i32_seq_cst:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    call __atomic_load_4
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i32_seq_cst:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    fence rw, rw
+; RV64IA-WMO-NEXT:    lw a0, 0(a0)
+; RV64IA-WMO-NEXT:    slli a0, a0, 32
+; RV64IA-WMO-NEXT:    srli a0, a0, 32
+; RV64IA-WMO-NEXT:    fence r, rw
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i32_seq_cst:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    fence rw, rw
+; RV64IA-TSO-NEXT:    lw a0, 0(a0)
+; RV64IA-TSO-NEXT:    slli a0, a0, 32
+; RV64IA-TSO-NEXT:    srli a0, a0, 32
+; RV64IA-TSO-NEXT:    ret
+;
+; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
+; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
+; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    slli a0, a0, 32
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    srli a0, a0, 32
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    slli a0, a0, 32
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    srli a0, a0, 32
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-LABEL: atomic_load_i32_seq_cst:
+; RV32IA-ZALASR:       # %bb.0:
+; RV32IA-ZALASR-NEXT:    lw.aq a0, (a0)
+; RV32IA-ZALASR-NEXT:    ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_i32_seq_cst:
+; RV64IA-ZALASR:       # %bb.0:
+; RV64IA-ZALASR-NEXT:    lw.aq a0, (a0)
+; RV64IA-ZALASR-NEXT:    slli a0, a0, 32
+; RV64IA-ZALASR-NEXT:    srli a0, a0, 32
+; RV64IA-ZALASR-NEXT:    ret
+  %1 = load atomic i32, ptr %a seq_cst, align 4
+  ret i32 %1
+}
+
+define zeroext i64 @atomic_load_i64_unordered(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i64_unordered:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    call __atomic_load_8
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomic_load_i64_unordered:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT:    li a1, 0
+; RV32IA-NEXT:    call __atomic_load_8
+; RV32IA-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
+;
+; RV64I-LABEL: atomic_load_i64_unordered:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 0
+; RV64I-NEXT:    call __atomic_load_8
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IA-LABEL: atomic_load_i64_unordered:
+; RV64IA:       # %bb.0:
+; RV64IA-NEXT:    ld a0, 0(a0)
+; RV64IA-NEXT:    ret
+  %1 = load atomic i64, ptr %a unordered, align 8
+  ret i64 %1
+}
+
+define zeroext i64 @atomic_load_i64_monotonic(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i64_monotonic:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    call __atomic_load_8
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomic_load_i64_monotonic:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT:    li a1, 0
+; RV32IA-NEXT:    call __atomic_load_8
+; RV32IA-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
+;
+; RV64I-LABEL: atomic_load_i64_monotonic:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 0
+; RV64I-NEXT:    call __atomic_load_8
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IA-LABEL: atomic_load_i64_monotonic:
+; RV64IA:       # %bb.0:
+; RV64IA-NEXT:    ld a0, 0(a0)
+; RV64IA-NEXT:    ret
+  %1 = load atomic i64, ptr %a monotonic, align 8
+  ret i64 %1
+}
+
+define zeroext i64 @atomic_load_i64_acquire(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i64_acquire:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a1, 2
+; RV32I-NEXT:    call __atomic_load_8
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomic_load_i64_acquire:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT:    li a1, 2
+; RV32IA-NEXT:    call __atomic_load_8
+; RV32IA-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
+;
+; RV64I-LABEL: atomic_load_i64_acquire:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 2
+; RV64I-NEXT:    call __atomic_load_8
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i64_acquire:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    ld a0, 0(a0)
+; RV64IA-WMO-NEXT:    fence r, rw
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i64_acquire:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    ld a0, 0(a0)
+; RV64IA-TSO-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i64_acquire:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ld a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i64_acquire:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ld a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_i64_acquire:
+; RV64IA-ZALASR-WMO:       # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT:    ld.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT:    ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_i64_acquire:
+; RV64IA-ZALASR-TSO:       # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT:    ld a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT:    ret
+  %1 = load atomic i64, ptr %a acquire, align 8
+  ret i64 %1
+}
+
+define zeroext i64 @atomic_load_i64_seq_cst(ptr %a) nounwind {
+; RV32I-LABEL: atomic_load_i64_seq_cst:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a1, 5
+; RV32I-NEXT:    call __atomic_load_8
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IA-LABEL: atomic_load_i64_seq_cst:
+; RV32IA:       # %bb.0:
+; RV32IA-NEXT:    addi sp, sp, -16
+; RV32IA-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT:    li a1, 5
+; RV32IA-NEXT:    call __atomic_load_8
+; RV32IA-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    addi sp, sp, 16
+; RV32IA-NEXT:    ret
+;
+; RV64I-LABEL: atomic_load_i64_seq_cst:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 5
+; RV64I-NEXT:    call __atomic_load_8
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IA-WMO-LABEL: atomic_load_i64_seq_cst:
+; RV64IA-WMO:       # %bb.0:
+; RV64IA-WMO-NEXT:    fence rw, rw
+; RV64IA-WMO-NEXT:    ld a0, 0(a0)
+; RV64IA-WMO-NEXT:    fence r, rw
+; RV64IA-WMO-NEXT:    ret
+;
+; RV64IA-TSO-LABEL: atomic_load_i64_seq_cst:
+; RV64IA-TSO:       # %bb.0:
+; RV64IA-TSO-NEXT:    fence rw, rw
+; RV64IA-TSO-NEXT:    ld a0, 0(a0)
+; RV64IA-TSO-NEXT:    ret
+;
+; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i64_seq_cst:
+; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ld a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i64_seq_cst:
+; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ld a0, 0(a0)
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_i64_seq_cst:
+; RV64IA-ZALASR:       # %bb.0:
+; RV64IA-ZALASR-NEXT:    ld.aq a0, (a0)
+; RV64IA-ZALASR-NEXT:    ret
+  %1 = load atomic i64, ptr %a seq_cst, align 8
+  ret i64 %1
+}
+

>From 22bf5ceb077c47a870b76958ef3855b91e22671c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20G=C3=B3rski?= <jan.a.gorski at wp.pl>
Date: Sun, 20 Apr 2025 01:43:24 +0200
Subject: [PATCH 2/6] [RISCV] Optimized `and` with `atomic_load` into
 `zextload` when safe.

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp |  44 ++++
 llvm/test/CodeGen/RISCV/atomic-load-zext.ll | 264 ++++++--------------
 2 files changed, 121 insertions(+), 187 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f7d192756fd56..3af3fea7eb161 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15148,6 +15148,48 @@ static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG,
   return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
 }
 
+static SDValue reduceANDOfAtomicLoad(SDNode *N,
+                                     TargetLowering::DAGCombinerInfo &DCI) {
+  SelectionDAG &DAG = DCI.DAG;
+  if (N->getOpcode() != ISD::AND)
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0);
+  if (N0.getOpcode() != ISD::ATOMIC_LOAD)
+    return SDValue();
+
+  AtomicSDNode *ALoad = cast<AtomicSDNode>(N0.getNode());
+  if (isStrongerThanMonotonic(ALoad->getSuccessOrdering()))
+    return SDValue();
+
+  EVT LoadedVT = ALoad->getMemoryVT();
+  EVT ResultVT = N->getValueType(0);
+
+  SDValue MaskVal = N->getOperand(1);
+  ConstantSDNode *MaskConst = dyn_cast<ConstantSDNode>(MaskVal);
+  if (!MaskConst)
+    return SDValue();
+  uint64_t Mask = MaskConst->getZExtValue();
+  uint64_t ExpectedMask = LoadedVT.getSizeInBits() == 8    ? 0xFF
+                          : LoadedVT.getSizeInBits() == 16 ? 0xFFFF
+                          : LoadedVT.getSizeInBits() == 32 ? 0xFFFFFFFF
+                                                           : 0xFFFFFFFFFFFFFFFF;
+  if (Mask != ExpectedMask)
+    return SDValue();
+
+  SDLoc DL(N);
+  SDValue Chain = ALoad->getChain();
+  SDValue Ptr = ALoad->getBasePtr();
+  MachineMemOperand *MemOp = ALoad->getMemOperand();
+  SDValue ZextLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, ResultVT, Chain, Ptr,
+                                    MemOp->getPointerInfo(), LoadedVT,
+                                    MemOp->getAlign(), MemOp->getFlags());
+  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), ZextLoad);
+  DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1));
+  DCI.recursivelyDeleteUnusedNodes(N0.getNode());
+  return SDValue(N, 0);
+}
+
 // Combines two comparison operation and logic operation to one selection
 // operation(min, max) and logic operation. Returns new constructed Node if
 // conditions for optimization are satisfied.
@@ -15182,6 +15224,8 @@ static SDValue performANDCombine(SDNode *N,
     return V;
   if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
     return V;
+  if (SDValue V = reduceANDOfAtomicLoad(N, DCI))
+    return V;
 
   if (DCI.isAfterLegalizeDAG())
     if (SDValue V = combineDeMorganOfBoolean(N, DAG))
diff --git a/llvm/test/CodeGen/RISCV/atomic-load-zext.ll b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
index ca76a9ba58614..1fcf5f085646d 100644
--- a/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll
@@ -47,8 +47,7 @@ define zeroext i1 @atomic_load_i1_unordered(ptr %a) nounwind {
 ;
 ; RV32IA-LABEL: atomic_load_i1_unordered:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    lb a0, 0(a0)
-; RV32IA-NEXT:    zext.b a0, a0
+; RV32IA-NEXT:    lbu a0, 0(a0)
 ; RV32IA-NEXT:    ret
 ;
 ; RV64I-LABEL: atomic_load_i1_unordered:
@@ -64,8 +63,7 @@ define zeroext i1 @atomic_load_i1_unordered(ptr %a) nounwind {
 ;
 ; RV64IA-LABEL: atomic_load_i1_unordered:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    lb a0, 0(a0)
-; RV64IA-NEXT:    zext.b a0, a0
+; RV64IA-NEXT:    lbu a0, 0(a0)
 ; RV64IA-NEXT:    ret
   %1 = load atomic i8, ptr %a unordered, align 1
   %2 = trunc nuw i8 %1 to i1
@@ -86,8 +84,7 @@ define zeroext i1 @atomic_load_i1_monotonic(ptr %a) nounwind {
 ;
 ; RV32IA-LABEL: atomic_load_i1_monotonic:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    lb a0, 0(a0)
-; RV32IA-NEXT:    zext.b a0, a0
+; RV32IA-NEXT:    lbu a0, 0(a0)
 ; RV32IA-NEXT:    ret
 ;
 ; RV64I-LABEL: atomic_load_i1_monotonic:
@@ -103,8 +100,7 @@ define zeroext i1 @atomic_load_i1_monotonic(ptr %a) nounwind {
 ;
 ; RV64IA-LABEL: atomic_load_i1_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    lb a0, 0(a0)
-; RV64IA-NEXT:    zext.b a0, a0
+; RV64IA-NEXT:    lbu a0, 0(a0)
 ; RV64IA-NEXT:    ret
   %1 = load atomic i8, ptr %a monotonic, align 1
   %2 = trunc nuw i8 %1 to i1
@@ -125,15 +121,13 @@ define zeroext i1 @atomic_load_i1_acquire(ptr %a) nounwind {
 ;
 ; RV32IA-WMO-LABEL: atomic_load_i1_acquire:
 ; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    lb a0, 0(a0)
+; RV32IA-WMO-NEXT:    lbu a0, 0(a0)
 ; RV32IA-WMO-NEXT:    fence r, rw
-; RV32IA-WMO-NEXT:    zext.b a0, a0
 ; RV32IA-WMO-NEXT:    ret
 ;
 ; RV32IA-TSO-LABEL: atomic_load_i1_acquire:
 ; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    lb a0, 0(a0)
-; RV32IA-TSO-NEXT:    zext.b a0, a0
+; RV32IA-TSO-NEXT:    lbu a0, 0(a0)
 ; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomic_load_i1_acquire:
@@ -149,41 +143,35 @@ define zeroext i1 @atomic_load_i1_acquire(ptr %a) nounwind {
 ;
 ; RV64IA-WMO-LABEL: atomic_load_i1_acquire:
 ; RV64IA-WMO:       # %bb.0:
-; RV64IA-WMO-NEXT:    lb a0, 0(a0)
+; RV64IA-WMO-NEXT:    lbu a0, 0(a0)
 ; RV64IA-WMO-NEXT:    fence r, rw
-; RV64IA-WMO-NEXT:    zext.b a0, a0
 ; RV64IA-WMO-NEXT:    ret
 ;
 ; RV64IA-TSO-LABEL: atomic_load_i1_acquire:
 ; RV64IA-TSO:       # %bb.0:
-; RV64IA-TSO-NEXT:    lb a0, 0(a0)
-; RV64IA-TSO-NEXT:    zext.b a0, a0
+; RV64IA-TSO-NEXT:    lbu a0, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
 ;
 ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
 ; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    zext.b a0, a0
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
 ; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
 ; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    zext.b a0, a0
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire:
@@ -194,8 +182,7 @@ define zeroext i1 @atomic_load_i1_acquire(ptr %a) nounwind {
 ;
 ; RV32IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire:
 ; RV32IA-ZALASR-TSO:       # %bb.0:
-; RV32IA-ZALASR-TSO-NEXT:    lb a0, 0(a0)
-; RV32IA-ZALASR-TSO-NEXT:    zext.b a0, a0
+; RV32IA-ZALASR-TSO-NEXT:    lbu a0, 0(a0)
 ; RV32IA-ZALASR-TSO-NEXT:    ret
 ;
 ; RV64IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire:
@@ -206,8 +193,7 @@ define zeroext i1 @atomic_load_i1_acquire(ptr %a) nounwind {
 ;
 ; RV64IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire:
 ; RV64IA-ZALASR-TSO:       # %bb.0:
-; RV64IA-ZALASR-TSO-NEXT:    lb a0, 0(a0)
-; RV64IA-ZALASR-TSO-NEXT:    zext.b a0, a0
+; RV64IA-ZALASR-TSO-NEXT:    lbu a0, 0(a0)
 ; RV64IA-ZALASR-TSO-NEXT:    ret
   %1 = load atomic i8, ptr %a acquire, align 1
   %2 = trunc nuw i8 %1 to i1
@@ -229,16 +215,14 @@ define zeroext i1 @atomic_load_i1_seq_cst(ptr %a) nounwind {
 ; RV32IA-WMO-LABEL: atomic_load_i1_seq_cst:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    fence rw, rw
-; RV32IA-WMO-NEXT:    lb a0, 0(a0)
-; RV32IA-WMO-NEXT:    zext.b a0, a0
+; RV32IA-WMO-NEXT:    lbu a0, 0(a0)
 ; RV32IA-WMO-NEXT:    fence r, rw
 ; RV32IA-WMO-NEXT:    ret
 ;
 ; RV32IA-TSO-LABEL: atomic_load_i1_seq_cst:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:    fence rw, rw
-; RV32IA-TSO-NEXT:    lb a0, 0(a0)
-; RV32IA-TSO-NEXT:    zext.b a0, a0
+; RV32IA-TSO-NEXT:    lbu a0, 0(a0)
 ; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomic_load_i1_seq_cst:
@@ -255,46 +239,40 @@ define zeroext i1 @atomic_load_i1_seq_cst(ptr %a) nounwind {
 ; RV64IA-WMO-LABEL: atomic_load_i1_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    fence rw, rw
-; RV64IA-WMO-NEXT:    lb a0, 0(a0)
-; RV64IA-WMO-NEXT:    zext.b a0, a0
+; RV64IA-WMO-NEXT:    lbu a0, 0(a0)
 ; RV64IA-WMO-NEXT:    fence r, rw
 ; RV64IA-WMO-NEXT:    ret
 ;
 ; RV64IA-TSO-LABEL: atomic_load_i1_seq_cst:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    fence rw, rw
-; RV64IA-TSO-NEXT:    lb a0, 0(a0)
-; RV64IA-TSO-NEXT:    zext.b a0, a0
+; RV64IA-TSO-NEXT:    lbu a0, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
 ;
 ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
 ; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
 ; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV32IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
 ; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-ZALASR-LABEL: atomic_load_i1_seq_cst:
@@ -327,8 +305,7 @@ define zeroext i8 @atomic_load_i8_unordered(ptr %a) nounwind {
 ;
 ; RV32IA-LABEL: atomic_load_i8_unordered:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    lb a0, 0(a0)
-; RV32IA-NEXT:    zext.b a0, a0
+; RV32IA-NEXT:    lbu a0, 0(a0)
 ; RV32IA-NEXT:    ret
 ;
 ; RV64I-LABEL: atomic_load_i8_unordered:
@@ -344,8 +321,7 @@ define zeroext i8 @atomic_load_i8_unordered(ptr %a) nounwind {
 ;
 ; RV64IA-LABEL: atomic_load_i8_unordered:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    lb a0, 0(a0)
-; RV64IA-NEXT:    zext.b a0, a0
+; RV64IA-NEXT:    lbu a0, 0(a0)
 ; RV64IA-NEXT:    ret
   %1 = load atomic i8, ptr %a unordered, align 1
   ret i8 %1
@@ -365,8 +341,7 @@ define zeroext i8 @atomic_load_i8_monotonic(ptr %a) nounwind {
 ;
 ; RV32IA-LABEL: atomic_load_i8_monotonic:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    lb a0, 0(a0)
-; RV32IA-NEXT:    zext.b a0, a0
+; RV32IA-NEXT:    lbu a0, 0(a0)
 ; RV32IA-NEXT:    ret
 ;
 ; RV64I-LABEL: atomic_load_i8_monotonic:
@@ -382,8 +357,7 @@ define zeroext i8 @atomic_load_i8_monotonic(ptr %a) nounwind {
 ;
 ; RV64IA-LABEL: atomic_load_i8_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    lb a0, 0(a0)
-; RV64IA-NEXT:    zext.b a0, a0
+; RV64IA-NEXT:    lbu a0, 0(a0)
 ; RV64IA-NEXT:    ret
   %1 = load atomic i8, ptr %a monotonic, align 1
   ret i8 %1
@@ -403,15 +377,13 @@ define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind {
 ;
 ; RV32IA-WMO-LABEL: atomic_load_i8_acquire:
 ; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    lb a0, 0(a0)
+; RV32IA-WMO-NEXT:    lbu a0, 0(a0)
 ; RV32IA-WMO-NEXT:    fence r, rw
-; RV32IA-WMO-NEXT:    zext.b a0, a0
 ; RV32IA-WMO-NEXT:    ret
 ;
 ; RV32IA-TSO-LABEL: atomic_load_i8_acquire:
 ; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    lb a0, 0(a0)
-; RV32IA-TSO-NEXT:    zext.b a0, a0
+; RV32IA-TSO-NEXT:    lbu a0, 0(a0)
 ; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomic_load_i8_acquire:
@@ -427,41 +399,35 @@ define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind {
 ;
 ; RV64IA-WMO-LABEL: atomic_load_i8_acquire:
 ; RV64IA-WMO:       # %bb.0:
-; RV64IA-WMO-NEXT:    lb a0, 0(a0)
+; RV64IA-WMO-NEXT:    lbu a0, 0(a0)
 ; RV64IA-WMO-NEXT:    fence r, rw
-; RV64IA-WMO-NEXT:    zext.b a0, a0
 ; RV64IA-WMO-NEXT:    ret
 ;
 ; RV64IA-TSO-LABEL: atomic_load_i8_acquire:
 ; RV64IA-TSO:       # %bb.0:
-; RV64IA-TSO-NEXT:    lb a0, 0(a0)
-; RV64IA-TSO-NEXT:    zext.b a0, a0
+; RV64IA-TSO-NEXT:    lbu a0, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
 ;
 ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
 ; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    zext.b a0, a0
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
 ; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
 ; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    zext.b a0, a0
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire:
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire:
@@ -472,8 +438,7 @@ define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind {
 ;
 ; RV32IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire:
 ; RV32IA-ZALASR-TSO:       # %bb.0:
-; RV32IA-ZALASR-TSO-NEXT:    lb a0, 0(a0)
-; RV32IA-ZALASR-TSO-NEXT:    zext.b a0, a0
+; RV32IA-ZALASR-TSO-NEXT:    lbu a0, 0(a0)
 ; RV32IA-ZALASR-TSO-NEXT:    ret
 ;
 ; RV64IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire:
@@ -484,8 +449,7 @@ define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind {
 ;
 ; RV64IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire:
 ; RV64IA-ZALASR-TSO:       # %bb.0:
-; RV64IA-ZALASR-TSO-NEXT:    lb a0, 0(a0)
-; RV64IA-ZALASR-TSO-NEXT:    zext.b a0, a0
+; RV64IA-ZALASR-TSO-NEXT:    lbu a0, 0(a0)
 ; RV64IA-ZALASR-TSO-NEXT:    ret
   %1 = load atomic i8, ptr %a acquire, align 1
   ret i8 %1
@@ -506,16 +470,14 @@ define zeroext i8 @atomic_load_i8_seq_cst(ptr %a) nounwind {
 ; RV32IA-WMO-LABEL: atomic_load_i8_seq_cst:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    fence rw, rw
-; RV32IA-WMO-NEXT:    lb a0, 0(a0)
-; RV32IA-WMO-NEXT:    zext.b a0, a0
+; RV32IA-WMO-NEXT:    lbu a0, 0(a0)
 ; RV32IA-WMO-NEXT:    fence r, rw
 ; RV32IA-WMO-NEXT:    ret
 ;
 ; RV32IA-TSO-LABEL: atomic_load_i8_seq_cst:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:    fence rw, rw
-; RV32IA-TSO-NEXT:    lb a0, 0(a0)
-; RV32IA-TSO-NEXT:    zext.b a0, a0
+; RV32IA-TSO-NEXT:    lbu a0, 0(a0)
 ; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomic_load_i8_seq_cst:
@@ -532,46 +494,40 @@ define zeroext i8 @atomic_load_i8_seq_cst(ptr %a) nounwind {
 ; RV64IA-WMO-LABEL: atomic_load_i8_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    fence rw, rw
-; RV64IA-WMO-NEXT:    lb a0, 0(a0)
-; RV64IA-WMO-NEXT:    zext.b a0, a0
+; RV64IA-WMO-NEXT:    lbu a0, 0(a0)
 ; RV64IA-WMO-NEXT:    fence r, rw
 ; RV64IA-WMO-NEXT:    ret
 ;
 ; RV64IA-TSO-LABEL: atomic_load_i8_seq_cst:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    fence rw, rw
-; RV64IA-TSO-NEXT:    lb a0, 0(a0)
-; RV64IA-TSO-NEXT:    zext.b a0, a0
+; RV64IA-TSO-NEXT:    lbu a0, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
 ;
 ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
 ; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
 ; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV32IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
 ; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst:
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    lb a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    zext.b a0, a0
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-ZALASR-LABEL: atomic_load_i8_seq_cst:
@@ -604,9 +560,7 @@ define zeroext i16 @atomic_load_i16_unordered(ptr %a) nounwind {
 ;
 ; RV32IA-LABEL: atomic_load_i16_unordered:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    lh a0, 0(a0)
-; RV32IA-NEXT:    slli a0, a0, 16
-; RV32IA-NEXT:    srli a0, a0, 16
+; RV32IA-NEXT:    lhu a0, 0(a0)
 ; RV32IA-NEXT:    ret
 ;
 ; RV64I-LABEL: atomic_load_i16_unordered:
@@ -623,9 +577,7 @@ define zeroext i16 @atomic_load_i16_unordered(ptr %a) nounwind {
 ;
 ; RV64IA-LABEL: atomic_load_i16_unordered:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    lh a0, 0(a0)
-; RV64IA-NEXT:    slli a0, a0, 48
-; RV64IA-NEXT:    srli a0, a0, 48
+; RV64IA-NEXT:    lhu a0, 0(a0)
 ; RV64IA-NEXT:    ret
   %1 = load atomic i16, ptr %a unordered, align 2
   ret i16 %1
@@ -646,9 +598,7 @@ define zeroext i16 @atomic_load_i16_monotonic(ptr %a) nounwind {
 ;
 ; RV32IA-LABEL: atomic_load_i16_monotonic:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    lh a0, 0(a0)
-; RV32IA-NEXT:    slli a0, a0, 16
-; RV32IA-NEXT:    srli a0, a0, 16
+; RV32IA-NEXT:    lhu a0, 0(a0)
 ; RV32IA-NEXT:    ret
 ;
 ; RV64I-LABEL: atomic_load_i16_monotonic:
@@ -665,9 +615,7 @@ define zeroext i16 @atomic_load_i16_monotonic(ptr %a) nounwind {
 ;
 ; RV64IA-LABEL: atomic_load_i16_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    lh a0, 0(a0)
-; RV64IA-NEXT:    slli a0, a0, 48
-; RV64IA-NEXT:    srli a0, a0, 48
+; RV64IA-NEXT:    lhu a0, 0(a0)
 ; RV64IA-NEXT:    ret
   %1 = load atomic i16, ptr %a monotonic, align 2
   ret i16 %1
@@ -688,17 +636,13 @@ define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind {
 ;
 ; RV32IA-WMO-LABEL: atomic_load_i16_acquire:
 ; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    lh a0, 0(a0)
+; RV32IA-WMO-NEXT:    lhu a0, 0(a0)
 ; RV32IA-WMO-NEXT:    fence r, rw
-; RV32IA-WMO-NEXT:    slli a0, a0, 16
-; RV32IA-WMO-NEXT:    srli a0, a0, 16
 ; RV32IA-WMO-NEXT:    ret
 ;
 ; RV32IA-TSO-LABEL: atomic_load_i16_acquire:
 ; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    lh a0, 0(a0)
-; RV32IA-TSO-NEXT:    slli a0, a0, 16
-; RV32IA-TSO-NEXT:    srli a0, a0, 16
+; RV32IA-TSO-NEXT:    lhu a0, 0(a0)
 ; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomic_load_i16_acquire:
@@ -715,47 +659,35 @@ define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind {
 ;
 ; RV64IA-WMO-LABEL: atomic_load_i16_acquire:
 ; RV64IA-WMO:       # %bb.0:
-; RV64IA-WMO-NEXT:    lh a0, 0(a0)
+; RV64IA-WMO-NEXT:    lhu a0, 0(a0)
 ; RV64IA-WMO-NEXT:    fence r, rw
-; RV64IA-WMO-NEXT:    slli a0, a0, 48
-; RV64IA-WMO-NEXT:    srli a0, a0, 48
 ; RV64IA-WMO-NEXT:    ret
 ;
 ; RV64IA-TSO-LABEL: atomic_load_i16_acquire:
 ; RV64IA-TSO:       # %bb.0:
-; RV64IA-TSO-NEXT:    lh a0, 0(a0)
-; RV64IA-TSO-NEXT:    slli a0, a0, 48
-; RV64IA-TSO-NEXT:    srli a0, a0, 48
+; RV64IA-TSO-NEXT:    lhu a0, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
 ;
 ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
 ; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lhu a0, 0(a0)
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    slli a0, a0, 16
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    srli a0, a0, 16
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
 ; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    slli a0, a0, 16
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    srli a0, a0, 16
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lhu a0, 0(a0)
 ; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
 ; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lhu a0, 0(a0)
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    slli a0, a0, 48
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    srli a0, a0, 48
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire:
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    slli a0, a0, 48
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    srli a0, a0, 48
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lhu a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-ZALASR-WMO-LABEL: atomic_load_i16_acquire:
@@ -767,9 +699,7 @@ define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind {
 ;
 ; RV32IA-ZALASR-TSO-LABEL: atomic_load_i16_acquire:
 ; RV32IA-ZALASR-TSO:       # %bb.0:
-; RV32IA-ZALASR-TSO-NEXT:    lh a0, 0(a0)
-; RV32IA-ZALASR-TSO-NEXT:    slli a0, a0, 16
-; RV32IA-ZALASR-TSO-NEXT:    srli a0, a0, 16
+; RV32IA-ZALASR-TSO-NEXT:    lhu a0, 0(a0)
 ; RV32IA-ZALASR-TSO-NEXT:    ret
 ;
 ; RV64IA-ZALASR-WMO-LABEL: atomic_load_i16_acquire:
@@ -781,9 +711,7 @@ define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind {
 ;
 ; RV64IA-ZALASR-TSO-LABEL: atomic_load_i16_acquire:
 ; RV64IA-ZALASR-TSO:       # %bb.0:
-; RV64IA-ZALASR-TSO-NEXT:    lh a0, 0(a0)
-; RV64IA-ZALASR-TSO-NEXT:    slli a0, a0, 48
-; RV64IA-ZALASR-TSO-NEXT:    srli a0, a0, 48
+; RV64IA-ZALASR-TSO-NEXT:    lhu a0, 0(a0)
 ; RV64IA-ZALASR-TSO-NEXT:    ret
   %1 = load atomic i16, ptr %a acquire, align 2
   ret i16 %1
@@ -805,18 +733,14 @@ define zeroext i16 @atomic_load_i16_seq_cst(ptr %a) nounwind {
 ; RV32IA-WMO-LABEL: atomic_load_i16_seq_cst:
 ; RV32IA-WMO:       # %bb.0:
 ; RV32IA-WMO-NEXT:    fence rw, rw
-; RV32IA-WMO-NEXT:    lh a0, 0(a0)
-; RV32IA-WMO-NEXT:    slli a0, a0, 16
-; RV32IA-WMO-NEXT:    srli a0, a0, 16
+; RV32IA-WMO-NEXT:    lhu a0, 0(a0)
 ; RV32IA-WMO-NEXT:    fence r, rw
 ; RV32IA-WMO-NEXT:    ret
 ;
 ; RV32IA-TSO-LABEL: atomic_load_i16_seq_cst:
 ; RV32IA-TSO:       # %bb.0:
 ; RV32IA-TSO-NEXT:    fence rw, rw
-; RV32IA-TSO-NEXT:    lh a0, 0(a0)
-; RV32IA-TSO-NEXT:    slli a0, a0, 16
-; RV32IA-TSO-NEXT:    srli a0, a0, 16
+; RV32IA-TSO-NEXT:    lhu a0, 0(a0)
 ; RV32IA-TSO-NEXT:    ret
 ;
 ; RV64I-LABEL: atomic_load_i16_seq_cst:
@@ -834,52 +758,40 @@ define zeroext i16 @atomic_load_i16_seq_cst(ptr %a) nounwind {
 ; RV64IA-WMO-LABEL: atomic_load_i16_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    fence rw, rw
-; RV64IA-WMO-NEXT:    lh a0, 0(a0)
-; RV64IA-WMO-NEXT:    slli a0, a0, 48
-; RV64IA-WMO-NEXT:    srli a0, a0, 48
+; RV64IA-WMO-NEXT:    lhu a0, 0(a0)
 ; RV64IA-WMO-NEXT:    fence r, rw
 ; RV64IA-WMO-NEXT:    ret
 ;
 ; RV64IA-TSO-LABEL: atomic_load_i16_seq_cst:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    fence rw, rw
-; RV64IA-TSO-NEXT:    lh a0, 0(a0)
-; RV64IA-TSO-NEXT:    slli a0, a0, 48
-; RV64IA-TSO-NEXT:    srli a0, a0, 48
+; RV64IA-TSO-NEXT:    lhu a0, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
 ;
 ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
 ; RV32IA-WMO-TRAILING-FENCE:       # %bb.0:
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    slli a0, a0, 16
-; RV32IA-WMO-TRAILING-FENCE-NEXT:    srli a0, a0, 16
+; RV32IA-WMO-TRAILING-FENCE-NEXT:    lhu a0, 0(a0)
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
 ; RV32IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
 ; RV32IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV32IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    slli a0, a0, 16
-; RV32IA-TSO-TRAILING-FENCE-NEXT:    srli a0, a0, 16
+; RV32IA-TSO-TRAILING-FENCE-NEXT:    lhu a0, 0(a0)
 ; RV32IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
 ; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    slli a0, a0, 48
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    srli a0, a0, 48
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lhu a0, 0(a0)
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst:
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    slli a0, a0, 48
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    srli a0, a0, 48
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lhu a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-ZALASR-LABEL: atomic_load_i16_seq_cst:
@@ -929,9 +841,7 @@ define zeroext i32 @atomic_load_i32_unordered(ptr %a) nounwind {
 ;
 ; RV64IA-LABEL: atomic_load_i32_unordered:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    lw a0, 0(a0)
-; RV64IA-NEXT:    slli a0, a0, 32
-; RV64IA-NEXT:    srli a0, a0, 32
+; RV64IA-NEXT:    lwu a0, 0(a0)
 ; RV64IA-NEXT:    ret
   %1 = load atomic i32, ptr %a unordered, align 4
   ret i32 %1
@@ -967,9 +877,7 @@ define zeroext i32 @atomic_load_i32_monotonic(ptr %a) nounwind {
 ;
 ; RV64IA-LABEL: atomic_load_i32_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    lw a0, 0(a0)
-; RV64IA-NEXT:    slli a0, a0, 32
-; RV64IA-NEXT:    srli a0, a0, 32
+; RV64IA-NEXT:    lwu a0, 0(a0)
 ; RV64IA-NEXT:    ret
   %1 = load atomic i32, ptr %a monotonic, align 4
   ret i32 %1
@@ -1011,17 +919,13 @@ define zeroext i32 @atomic_load_i32_acquire(ptr %a) nounwind {
 ;
 ; RV64IA-WMO-LABEL: atomic_load_i32_acquire:
 ; RV64IA-WMO:       # %bb.0:
-; RV64IA-WMO-NEXT:    lw a0, 0(a0)
+; RV64IA-WMO-NEXT:    lwu a0, 0(a0)
 ; RV64IA-WMO-NEXT:    fence r, rw
-; RV64IA-WMO-NEXT:    slli a0, a0, 32
-; RV64IA-WMO-NEXT:    srli a0, a0, 32
 ; RV64IA-WMO-NEXT:    ret
 ;
 ; RV64IA-TSO-LABEL: atomic_load_i32_acquire:
 ; RV64IA-TSO:       # %bb.0:
-; RV64IA-TSO-NEXT:    lw a0, 0(a0)
-; RV64IA-TSO-NEXT:    slli a0, a0, 32
-; RV64IA-TSO-NEXT:    srli a0, a0, 32
+; RV64IA-TSO-NEXT:    lwu a0, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
 ;
 ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
@@ -1037,17 +941,13 @@ define zeroext i32 @atomic_load_i32_acquire(ptr %a) nounwind {
 ;
 ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
 ; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lwu a0, 0(a0)
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    slli a0, a0, 32
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    srli a0, a0, 32
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire:
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    slli a0, a0, 32
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    srli a0, a0, 32
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lwu a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-ZALASR-WMO-LABEL: atomic_load_i32_acquire:
@@ -1069,9 +969,7 @@ define zeroext i32 @atomic_load_i32_acquire(ptr %a) nounwind {
 ;
 ; RV64IA-ZALASR-TSO-LABEL: atomic_load_i32_acquire:
 ; RV64IA-ZALASR-TSO:       # %bb.0:
-; RV64IA-ZALASR-TSO-NEXT:    lw a0, 0(a0)
-; RV64IA-ZALASR-TSO-NEXT:    slli a0, a0, 32
-; RV64IA-ZALASR-TSO-NEXT:    srli a0, a0, 32
+; RV64IA-ZALASR-TSO-NEXT:    lwu a0, 0(a0)
 ; RV64IA-ZALASR-TSO-NEXT:    ret
   %1 = load atomic i32, ptr %a acquire, align 4
   ret i32 %1
@@ -1116,18 +1014,14 @@ define zeroext i32 @atomic_load_i32_seq_cst(ptr %a) nounwind {
 ; RV64IA-WMO-LABEL: atomic_load_i32_seq_cst:
 ; RV64IA-WMO:       # %bb.0:
 ; RV64IA-WMO-NEXT:    fence rw, rw
-; RV64IA-WMO-NEXT:    lw a0, 0(a0)
-; RV64IA-WMO-NEXT:    slli a0, a0, 32
-; RV64IA-WMO-NEXT:    srli a0, a0, 32
+; RV64IA-WMO-NEXT:    lwu a0, 0(a0)
 ; RV64IA-WMO-NEXT:    fence r, rw
 ; RV64IA-WMO-NEXT:    ret
 ;
 ; RV64IA-TSO-LABEL: atomic_load_i32_seq_cst:
 ; RV64IA-TSO:       # %bb.0:
 ; RV64IA-TSO-NEXT:    fence rw, rw
-; RV64IA-TSO-NEXT:    lw a0, 0(a0)
-; RV64IA-TSO-NEXT:    slli a0, a0, 32
-; RV64IA-TSO-NEXT:    srli a0, a0, 32
+; RV64IA-TSO-NEXT:    lwu a0, 0(a0)
 ; RV64IA-TSO-NEXT:    ret
 ;
 ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
@@ -1146,18 +1040,14 @@ define zeroext i32 @atomic_load_i32_seq_cst(ptr %a) nounwind {
 ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
 ; RV64IA-WMO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    slli a0, a0, 32
-; RV64IA-WMO-TRAILING-FENCE-NEXT:    srli a0, a0, 32
+; RV64IA-WMO-TRAILING-FENCE-NEXT:    lwu a0, 0(a0)
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    fence r, rw
 ; RV64IA-WMO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst:
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    slli a0, a0, 32
-; RV64IA-TSO-TRAILING-FENCE-NEXT:    srli a0, a0, 32
+; RV64IA-TSO-TRAILING-FENCE-NEXT:    lwu a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
 ;
 ; RV32IA-ZALASR-LABEL: atomic_load_i32_seq_cst:

>From 14f212efec8758c7414cb075783650bab8c521fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20G=C3=B3rski?= <jan.a.gorski at wp.pl>
Date: Thu, 24 Apr 2025 15:07:14 +0200
Subject: [PATCH 3/6] fixup! [RISCV] Optimized `and` with `atomic_load` into
 `zextload` when safe.

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 614b5631b3bd5..742beaaf5644b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15283,6 +15283,8 @@ static SDValue reduceANDOfAtomicLoad(SDNode *N,
   SDValue N0 = N->getOperand(0);
   if (N0.getOpcode() != ISD::ATOMIC_LOAD)
     return SDValue();
+  if (!N0.hasOneUse())
+    return SDValue();
 
   AtomicSDNode *ALoad = cast<AtomicSDNode>(N0.getNode());
   if (isStrongerThanMonotonic(ALoad->getSuccessOrdering()))
@@ -15310,7 +15312,7 @@ static SDValue reduceANDOfAtomicLoad(SDNode *N,
   SDValue ZextLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, ResultVT, Chain, Ptr,
                                     MemOp->getPointerInfo(), LoadedVT,
                                     MemOp->getAlign(), MemOp->getFlags());
-  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), ZextLoad);
+  DCI.CombineTo(N, ZextLoad);
   DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1));
   DCI.recursivelyDeleteUnusedNodes(N0.getNode());
   return SDValue(N, 0);

>From ce6f04f9163fce4dde40d96bba22d742573f2bd3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20G=C3=B3rski?= <jan.a.gorski at wp.pl>
Date: Thu, 24 Apr 2025 22:07:46 +0200
Subject: [PATCH 4/6] fixup! [RISCV] Optimized `and` with `atomic_load` into
 `zextload` when safe.

used maskTrailingOnes
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 2f154990085a3..a6e165545367d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15299,11 +15299,7 @@ static SDValue reduceANDOfAtomicLoad(SDNode *N,
   EVT LoadedVT = ALoad->getMemoryVT();
   EVT ResultVT = N->getValueType(0);
 
-  SDValue MaskVal = N->getOperand(1);
-  ConstantSDNode *MaskConst = dyn_cast<ConstantSDNode>(MaskVal);
-  if (!MaskConst)
-    return SDValue();
-  uint64_t Mask = MaskConst->getZExtValue();
+  uint64_t Mask = maskTrailingOnes<uint64_t>(LoadedVT.getSizeInBits());
   uint64_t ExpectedMask = LoadedVT.getSizeInBits() == 8    ? 0xFF
                           : LoadedVT.getSizeInBits() == 16 ? 0xFFFF
                           : LoadedVT.getSizeInBits() == 32 ? 0xFFFFFFFF

>From 78ecf8aaf1c3b842f4e908475431aa668a82f655 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20G=C3=B3rski?= <jan.a.gorski at wp.pl>
Date: Fri, 25 Apr 2025 14:57:25 +0200
Subject: [PATCH 5/6] fixup! [RISCV] Optimized `and` with `atomic_load` into
 `zextload` when safe.

Added pattern match for atomic_load_zext_n with replacement of getLoad with getAtomicLoad.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 12 +++---------
 llvm/lib/Target/RISCV/RISCVInstrInfoA.td    |  3 +++
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a6e165545367d..6ad6fa8255844 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15297,8 +15297,6 @@ static SDValue reduceANDOfAtomicLoad(SDNode *N,
     return SDValue();
 
   EVT LoadedVT = ALoad->getMemoryVT();
-  EVT ResultVT = N->getValueType(0);
-
   uint64_t Mask = maskTrailingOnes<uint64_t>(LoadedVT.getSizeInBits());
   uint64_t ExpectedMask = LoadedVT.getSizeInBits() == 8    ? 0xFF
                           : LoadedVT.getSizeInBits() == 16 ? 0xFFFF
@@ -15307,13 +15305,9 @@ static SDValue reduceANDOfAtomicLoad(SDNode *N,
   if (Mask != ExpectedMask)
     return SDValue();
 
-  SDLoc DL(N);
-  SDValue Chain = ALoad->getChain();
-  SDValue Ptr = ALoad->getBasePtr();
-  MachineMemOperand *MemOp = ALoad->getMemOperand();
-  SDValue ZextLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, ResultVT, Chain, Ptr,
-                                    MemOp->getPointerInfo(), LoadedVT,
-                                    MemOp->getAlign(), MemOp->getFlags());
+  SDValue ZextLoad = DAG.getAtomicLoad(
+      ISD::ZEXTLOAD, SDLoc(N), ALoad->getMemoryVT(), N->getValueType(0),
+      ALoad->getChain(), ALoad->getBasePtr(), ALoad->getMemOperand());
   DCI.CombineTo(N, ZextLoad);
   DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1));
   DCI.recursivelyDeleteUnusedNodes(N0.getNode());
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index b348e774d50b8..fc649847e078c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -167,6 +167,8 @@ class seq_cst_store<PatFrag base>
 let Predicates = [HasAtomicLdSt] in {
   def : LdPat<relaxed_load<atomic_load_asext_8>,  LB>;
   def : LdPat<relaxed_load<atomic_load_asext_16>, LH>;
+  def : LdPat<relaxed_load<atomic_load_zext_8>,  LBU>;
+  def : LdPat<relaxed_load<atomic_load_zext_16>, LHU>;
 
   def : StPat<relaxed_store<atomic_store_8>,  SB, GPR, XLenVT>;
   def : StPat<relaxed_store<atomic_store_16>, SH, GPR, XLenVT>;
@@ -179,6 +181,7 @@ let Predicates = [HasAtomicLdSt, IsRV32] in {
 
 let Predicates = [HasAtomicLdSt, IsRV64] in {
   def : LdPat<relaxed_load<atomic_load_asext_32>, LW>;
+  def : LdPat<relaxed_load<atomic_load_zext_32>, LWU>;
   def : LdPat<relaxed_load<atomic_load_64>, LD, i64>;
   def : StPat<relaxed_store<atomic_store_64>, SD, GPR, i64>;
 }

>From 769d14fba67d5965212f647721ca543b48df53a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20G=C3=B3rski?= <jan.a.gorski at wp.pl>
Date: Fri, 25 Apr 2025 19:01:03 +0200
Subject: [PATCH 6/6] fixup! fixup! [RISCV] Optimized `and` with `atomic_load`
 into `zextload` when safe.

corrected wrong usage of maskTrailingOnes
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 6ad6fa8255844..86938e3e32ca9 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15297,11 +15297,11 @@ static SDValue reduceANDOfAtomicLoad(SDNode *N,
     return SDValue();
 
   EVT LoadedVT = ALoad->getMemoryVT();
-  uint64_t Mask = maskTrailingOnes<uint64_t>(LoadedVT.getSizeInBits());
-  uint64_t ExpectedMask = LoadedVT.getSizeInBits() == 8    ? 0xFF
-                          : LoadedVT.getSizeInBits() == 16 ? 0xFFFF
-                          : LoadedVT.getSizeInBits() == 32 ? 0xFFFFFFFF
-                                                           : 0xFFFFFFFFFFFFFFFF;
+  ConstantSDNode *MaskConst = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  if (!MaskConst)
+    return SDValue();
+  uint64_t Mask = MaskConst->getZExtValue();
+  uint64_t ExpectedMask = maskTrailingOnes<uint64_t>(LoadedVT.getSizeInBits());
   if (Mask != ExpectedMask)
     return SDValue();
 



More information about the llvm-commits mailing list