[llvm] [RISCV] Add short forward branch support for `lb`, `lbu`, `lh`, `lhu`, `lw`, `lwu` and `ld` (PR #170829)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 7 22:48:07 PST 2025
- Previous message: [llvm] [RISCV] Add short forward branch support for `lb`, `lbu`, `lh`, `lhu`, `lw`, `lwu` and `ld` (PR #170829)
- Next message: [llvm] [RISCV] Add short forward branch support for `lb`, `lbu`, `lh`, `lhu`, `lw`, `lwu` and `ld` (PR #170829)
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
================
@@ -0,0 +1,4284 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a | FileCheck %s --check-prefixes=RV32I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a | FileCheck %s --check-prefixes=RV64I
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a,+short-forward-branch-ialu | \
+; RUN: FileCheck %s --check-prefixes=RV32I-SFB
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a,+short-forward-branch-ialu | \
+; RUN: FileCheck %s --check-prefixes=RV64I-SFB
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32 -mattr=+a,+short-forward-branch-iload | \
+; RUN: FileCheck %s --check-prefixes=RV32I-SFBILOAD
+; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64 -mattr=+a,+short-forward-branch-iload | \
+; RUN: FileCheck %s --check-prefixes=RV64I-SFBILOAD
+
+define i32 @test_i8_s_3(ptr %base, i1 zeroext %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i8_s_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB0_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB0_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB0_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB0_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB0_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB0_2: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB0_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB0_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB0_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB0_2: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB0_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB0_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i32 ; sign-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_z_3(ptr %base, i1 zeroext %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i8_z_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB1_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB1_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB1_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB1_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB1_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB1_2: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB1_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB1_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB1_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB1_2: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB1_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB1_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i32 ; zero-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_s_3(ptr %base, i1 zeroext %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i16_s_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB2_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB2_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB2_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB2_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB2_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB2_2: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB2_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB2_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB2_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB2_2: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB2_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB2_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i32 ; sign-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_z_3(ptr %base, i1 zeroext %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i16_z_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB3_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB3_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB3_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB3_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB3_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB3_2: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB3_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB3_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB3_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB3_2: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB3_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB3_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i32 ; zero-extend to 32 bits
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i32_3(ptr %base, i1 zeroext %x, i32 %b) nounwind {
+; RV32I-LABEL: test_i32_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lw a0, 16(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB4_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB4_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lw a0, 16(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB4_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB4_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: bnez a1, .LBB4_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB4_2: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lw a0, 16(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB4_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB4_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB4_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB4_2: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB4_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB4_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr acquire, align 4 ; load 32-bit value
+ %res = select i1 %x, i32 %val, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_s_store_3(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
+; RV32I-LABEL: test_i8_s_store_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB5_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB5_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_store_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB5_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB5_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_store_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB5_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB5_2: # %entry
+; RV32I-SFB-NEXT: sw a4, 0(a3)
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_store_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB5_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB5_2: # %entry
+; RV64I-SFB-NEXT: sw a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_store_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB5_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB5_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_store_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB5_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB5_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i32 ; sign-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i8_z_store_3(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
+; RV32I-LABEL: test_i8_z_store_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB6_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB6_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_store_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB6_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB6_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_store_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB6_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB6_2: # %entry
+; RV32I-SFB-NEXT: sw a4, 0(a3)
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_store_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB6_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB6_2: # %entry
+; RV64I-SFB-NEXT: sw a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_store_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB6_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB6_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_store_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB6_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB6_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i32 ; zero-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_s_store_3(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
+; RV32I-LABEL: test_i16_s_store_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB7_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB7_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_store_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB7_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB7_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_store_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB7_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB7_2: # %entry
+; RV32I-SFB-NEXT: sw a4, 0(a3)
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_store_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB7_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB7_2: # %entry
+; RV64I-SFB-NEXT: sw a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_store_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB7_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB7_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_store_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB7_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB7_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i32 ; sign-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i16_z_store_3(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
+; RV32I-LABEL: test_i16_z_store_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB8_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB8_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_store_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB8_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB8_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_store_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB8_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB8_2: # %entry
+; RV32I-SFB-NEXT: sw a4, 0(a3)
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_store_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB8_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB8_2: # %entry
+; RV64I-SFB-NEXT: sw a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_store_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB8_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB8_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_store_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB8_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB8_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i32 ; zero-extend to 32 bits
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %ext, i32 %b
+ ret i32 %res
+}
+
+define i32 @test_i32_store_3(ptr %base, i1 zeroext %x, i32 %b, ptr %base1, i32 %c) nounwind {
+; RV32I-LABEL: test_i32_store_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lw a0, 16(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: sw a4, 0(a3)
+; RV32I-NEXT: bnez a1, .LBB9_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB9_2: # %entry
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_store_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lw a0, 16(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: sw a4, 0(a3)
+; RV64I-NEXT: bnez a1, .LBB9_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB9_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_store_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lw a0, 16(a0)
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: bnez a1, .LBB9_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB9_2: # %entry
+; RV32I-SFB-NEXT: sw a4, 0(a3)
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_store_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lw a0, 16(a0)
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: bnez a1, .LBB9_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB9_2: # %entry
+; RV64I-SFB-NEXT: sw a4, 0(a3)
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_store_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB9_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB9_2: # %entry
+; RV32I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_store_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lw a0, 16(a0)
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB9_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB9_2: # %entry
+; RV64I-SFBILOAD-NEXT: sw a4, 0(a3)
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i32 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr acquire, align 4 ; load 32-bit value
+ store i32 %c, ptr %base1
+ %res = select i1 %x, i32 %val, i32 %b
+ ret i32 %res
+}
+
+define i64 @test_i8_s_1_3(ptr %base, i1 zeroext %x, i64 %b) nounwind {
+; RV32I-LABEL: test_i8_s_1_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lb a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB10_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB10_2:
+; RV32I-NEXT: srai a1, a0, 31
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_s_1_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lb a0, 4(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB10_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB10_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_s_1_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lb a0, 4(a0)
+; RV32I-SFB-NEXT: beqz a1, .LBB10_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a2, a0
+; RV32I-SFB-NEXT: .LBB10_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB10_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai a3, a0, 31
+; RV32I-SFB-NEXT: .LBB10_4: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_s_1_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lb a0, 4(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB10_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB10_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_s_1_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB10_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a2, a0
+; RV32I-SFBILOAD-NEXT: .LBB10_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB10_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB10_4: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_s_1_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lb a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB10_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB10_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = sext i8 %val to i64 ; sign-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i8_z_1_3(ptr %base, i1 zeroext %x, i64 %b) nounwind {
+; RV32I-LABEL: test_i8_z_1_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lbu a0, 4(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB11_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB11_2: # %entry
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i8_z_1_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lbu a0, 4(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB11_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB11_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i8_z_1_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lbu a0, 4(a0)
+; RV32I-SFB-NEXT: beqz a1, .LBB11_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li a3, 0
+; RV32I-SFB-NEXT: .LBB11_2: # %entry
+; RV32I-SFB-NEXT: bnez a1, .LBB11_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB11_4: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i8_z_1_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lbu a0, 4(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB11_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB11_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i8_z_1_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB11_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li a3, 0
+; RV32I-SFBILOAD-NEXT: .LBB11_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB11_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB11_4: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i8_z_1_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lbu a0, 4(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB11_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB11_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i8, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i8, ptr %addr acquire, align 1 ; load 8-bit value
+ %ext = zext i8 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_s_1_3(ptr %base, i1 zeroext %x, i64 %b) nounwind {
+; RV32I-LABEL: test_i16_s_1_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lh a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB12_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB12_2:
+; RV32I-NEXT: srai a1, a0, 31
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_s_1_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lh a0, 8(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB12_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB12_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_s_1_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lh a0, 8(a0)
+; RV32I-SFB-NEXT: beqz a1, .LBB12_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a2, a0
+; RV32I-SFB-NEXT: .LBB12_2: # %entry
+; RV32I-SFB-NEXT: beqz a1, .LBB12_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: srai a3, a0, 31
+; RV32I-SFB-NEXT: .LBB12_4: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_s_1_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lh a0, 8(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB12_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB12_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_s_1_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB12_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: mv a2, a0
+; RV32I-SFBILOAD-NEXT: .LBB12_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB12_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: srai a3, a0, 31
+; RV32I-SFBILOAD-NEXT: .LBB12_4: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_s_1_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lh a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB12_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB12_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = sext i16 %val to i64 ; sign-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i16_z_1_3(ptr %base, i1 zeroext %x, i64 %b) nounwind {
+; RV32I-LABEL: test_i16_z_1_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lhu a0, 8(a0)
+; RV32I-NEXT: fence r, rw
+; RV32I-NEXT: bnez a1, .LBB13_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: .LBB13_2: # %entry
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i16_z_1_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lhu a0, 8(a0)
+; RV64I-NEXT: fence r, rw
+; RV64I-NEXT: bnez a1, .LBB13_2
+; RV64I-NEXT: # %bb.1: # %entry
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: .LBB13_2: # %entry
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i16_z_1_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: lhu a0, 8(a0)
+; RV32I-SFB-NEXT: beqz a1, .LBB13_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: li a3, 0
+; RV32I-SFB-NEXT: .LBB13_2: # %entry
+; RV32I-SFB-NEXT: bnez a1, .LBB13_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: mv a0, a2
+; RV32I-SFB-NEXT: .LBB13_4: # %entry
+; RV32I-SFB-NEXT: fence r, rw
+; RV32I-SFB-NEXT: mv a1, a3
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i16_z_1_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: lhu a0, 8(a0)
+; RV64I-SFB-NEXT: bnez a1, .LBB13_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, a2
+; RV64I-SFB-NEXT: .LBB13_2: # %entry
+; RV64I-SFB-NEXT: fence r, rw
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i16_z_1_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV32I-SFBILOAD-NEXT: beqz a1, .LBB13_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: li a3, 0
+; RV32I-SFBILOAD-NEXT: .LBB13_2: # %entry
+; RV32I-SFBILOAD-NEXT: bnez a1, .LBB13_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, a2
+; RV32I-SFBILOAD-NEXT: .LBB13_4: # %entry
+; RV32I-SFBILOAD-NEXT: fence r, rw
+; RV32I-SFBILOAD-NEXT: mv a1, a3
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i16_z_1_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: lhu a0, 8(a0)
+; RV64I-SFBILOAD-NEXT: bnez a1, .LBB13_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, a2
+; RV64I-SFBILOAD-NEXT: .LBB13_2: # %entry
+; RV64I-SFBILOAD-NEXT: fence r, rw
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i16, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i16, ptr %addr acquire, align 2 ; load 16-bit value
+ %ext = zext i16 %val to i64 ; zero-extend to 64 bits
+ %res = select i1 %x, i64 %ext, i64 %b
+ ret i64 %res
+}
+
+define i64 @test_i32_z_1_3(ptr %base, i1 zeroext %x, i64 %b) nounwind {
+; RV32I-LABEL: test_i32_z_1_3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s2, a1
+; RV32I-NEXT: addi a1, a0, 16
+; RV32I-NEXT: li a0, 4
+; RV32I-NEXT: addi a2, sp, 12
+; RV32I-NEXT: li a3, 2
+; RV32I-NEXT: call __atomic_load
+; RV32I-NEXT: beqz s2, .LBB14_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: lw s1, 12(sp)
+; RV32I-NEXT: .LBB14_2: # %entry
+; RV32I-NEXT: addi a1, s2, -1
+; RV32I-NEXT: and a1, a1, s0
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_i32_z_1_3:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: addi a1, a0, 16
+; RV64I-NEXT: li a0, 4
+; RV64I-NEXT: addi a2, sp, 4
+; RV64I-NEXT: li a3, 2
+; RV64I-NEXT: call __atomic_load
+; RV64I-NEXT: beqz s1, .LBB14_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: lwu s0, 4(sp)
+; RV64I-NEXT: .LBB14_2: # %entry
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+;
+; RV32I-SFB-LABEL: test_i32_z_1_3:
+; RV32I-SFB: # %bb.0: # %entry
+; RV32I-SFB-NEXT: addi sp, sp, -32
+; RV32I-SFB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFB-NEXT: mv s0, a3
+; RV32I-SFB-NEXT: mv s1, a2
+; RV32I-SFB-NEXT: mv s2, a1
+; RV32I-SFB-NEXT: addi a1, a0, 16
+; RV32I-SFB-NEXT: li a0, 4
+; RV32I-SFB-NEXT: addi a2, sp, 12
+; RV32I-SFB-NEXT: li a3, 2
+; RV32I-SFB-NEXT: call __atomic_load
+; RV32I-SFB-NEXT: lw a0, 12(sp)
+; RV32I-SFB-NEXT: bnez s2, .LBB14_2
+; RV32I-SFB-NEXT: # %bb.1: # %entry
+; RV32I-SFB-NEXT: mv a0, s1
+; RV32I-SFB-NEXT: .LBB14_2: # %entry
+; RV32I-SFB-NEXT: beqz s2, .LBB14_4
+; RV32I-SFB-NEXT: # %bb.3: # %entry
+; RV32I-SFB-NEXT: li s0, 0
+; RV32I-SFB-NEXT: .LBB14_4: # %entry
+; RV32I-SFB-NEXT: mv a1, s0
+; RV32I-SFB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFB-NEXT: addi sp, sp, 32
+; RV32I-SFB-NEXT: ret
+;
+; RV64I-SFB-LABEL: test_i32_z_1_3:
+; RV64I-SFB: # %bb.0: # %entry
+; RV64I-SFB-NEXT: addi sp, sp, -32
+; RV64I-SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFB-NEXT: mv s0, a2
+; RV64I-SFB-NEXT: mv s1, a1
+; RV64I-SFB-NEXT: addi a1, a0, 16
+; RV64I-SFB-NEXT: li a0, 4
+; RV64I-SFB-NEXT: addi a2, sp, 4
+; RV64I-SFB-NEXT: li a3, 2
+; RV64I-SFB-NEXT: call __atomic_load
+; RV64I-SFB-NEXT: lwu a0, 4(sp)
+; RV64I-SFB-NEXT: bnez s1, .LBB14_2
+; RV64I-SFB-NEXT: # %bb.1: # %entry
+; RV64I-SFB-NEXT: mv a0, s0
+; RV64I-SFB-NEXT: .LBB14_2: # %entry
+; RV64I-SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFB-NEXT: addi sp, sp, 32
+; RV64I-SFB-NEXT: ret
+;
+; RV32I-SFBILOAD-LABEL: test_i32_z_1_3:
+; RV32I-SFBILOAD: # %bb.0: # %entry
+; RV32I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV32I-SFBILOAD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-SFBILOAD-NEXT: mv s0, a3
+; RV32I-SFBILOAD-NEXT: mv s1, a2
+; RV32I-SFBILOAD-NEXT: mv s2, a1
+; RV32I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV32I-SFBILOAD-NEXT: li a0, 4
+; RV32I-SFBILOAD-NEXT: addi a2, sp, 12
+; RV32I-SFBILOAD-NEXT: li a3, 2
+; RV32I-SFBILOAD-NEXT: call __atomic_load
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB14_2
+; RV32I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV32I-SFBILOAD-NEXT: lw s1, 12(sp)
+; RV32I-SFBILOAD-NEXT: .LBB14_2: # %entry
+; RV32I-SFBILOAD-NEXT: beqz s2, .LBB14_4
+; RV32I-SFBILOAD-NEXT: # %bb.3: # %entry
+; RV32I-SFBILOAD-NEXT: li s0, 0
+; RV32I-SFBILOAD-NEXT: .LBB14_4: # %entry
+; RV32I-SFBILOAD-NEXT: mv a0, s1
+; RV32I-SFBILOAD-NEXT: mv a1, s0
+; RV32I-SFBILOAD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV32I-SFBILOAD-NEXT: ret
+;
+; RV64I-SFBILOAD-LABEL: test_i32_z_1_3:
+; RV64I-SFBILOAD: # %bb.0: # %entry
+; RV64I-SFBILOAD-NEXT: addi sp, sp, -32
+; RV64I-SFBILOAD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-SFBILOAD-NEXT: mv s0, a2
+; RV64I-SFBILOAD-NEXT: mv s1, a1
+; RV64I-SFBILOAD-NEXT: addi a1, a0, 16
+; RV64I-SFBILOAD-NEXT: li a0, 4
+; RV64I-SFBILOAD-NEXT: addi a2, sp, 4
+; RV64I-SFBILOAD-NEXT: li a3, 2
+; RV64I-SFBILOAD-NEXT: call __atomic_load
+; RV64I-SFBILOAD-NEXT: beqz s1, .LBB14_2
+; RV64I-SFBILOAD-NEXT: # %bb.1: # %entry
+; RV64I-SFBILOAD-NEXT: lwu s0, 4(sp)
+; RV64I-SFBILOAD-NEXT: .LBB14_2: # %entry
+; RV64I-SFBILOAD-NEXT: mv a0, s0
+; RV64I-SFBILOAD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-SFBILOAD-NEXT: addi sp, sp, 32
+; RV64I-SFBILOAD-NEXT: ret
+entry:
+ %addr = getelementptr i32, ptr %base, i64 4 ; compute base + 4
+ %val = load atomic i32, ptr %addr acquire, align 2 ; load 32-bit value
----------------
topperc wrote:
Need `align 4` here to make it not call a library function.
https://github.com/llvm/llvm-project/pull/170829
- Previous message: [llvm] [RISCV] Add short forward branch support for `lb`, `lbu`, `lh`, `lhu`, `lw`, `lwu` and `ld` (PR #170829)
- Next message: [llvm] [RISCV] Add short forward branch support for `lb`, `lbu`, `lh`, `lhu`, `lw`, `lwu` and `ld` (PR #170829)
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the llvm-commits
mailing list