[llvm] [RISCV] Implement EmitTargetCodeForMemset for Xqcilsm (PR #151555)
Sam Elliott via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 31 10:51:24 PDT 2025
================
@@ -0,0 +1,925 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefixes=RV32I
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+experimental-xqcilsm < %s \
+; RUN: | FileCheck %s -check-prefixes=RV32IXQCILSM
+
+%struct.anon = type { [16 x i32] }
+%struct.anon.0 = type { [47 x i32] }
+%struct.anon.1 = type { [48 x i32] }
+%struct.anon.2 = type { [64 x i8] }
+%struct.struct1_t = type { [16 x i32] }
+
+ at struct1 = common dso_local local_unnamed_addr global %struct.anon zeroinitializer, align 4
+ at struct4b = common dso_local local_unnamed_addr global %struct.anon.0 zeroinitializer, align 4
+ at struct4b1 = common dso_local local_unnamed_addr global %struct.anon.1 zeroinitializer, align 4
+ at struct2 = common dso_local local_unnamed_addr global %struct.anon.2 zeroinitializer, align 1
+ at arr1 = common dso_local local_unnamed_addr global [100 x i32] zeroinitializer, align 4
+ at struct1_ = common dso_local local_unnamed_addr global %struct.struct1_t zeroinitializer, align 4
+
+define void @test1(ptr nocapture %p, i32 %n) nounwind {
+; RV32I-LABEL: test1:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: mv a2, a1
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: tail memset
+;
+; RV32IXQCILSM-LABEL: test1:
+; RV32IXQCILSM: # %bb.0: # %entry
+; RV32IXQCILSM-NEXT: mv a2, a1
+; RV32IXQCILSM-NEXT: li a1, 0
+; RV32IXQCILSM-NEXT: tail memset
+entry:
+ tail call void @llvm.memset.p0.i32(ptr align 1 %p, i8 0, i32 %n, i1 false)
+ ret void
+}
+
+declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1)
+
+define void @test2(ptr nocapture %p) nounwind {
+; RV32I-LABEL: test2:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: li a1, 165
+; RV32I-NEXT: li a2, 128
+; RV32I-NEXT: tail memset
+;
+; RV32IXQCILSM-LABEL: test2:
+; RV32IXQCILSM: # %bb.0: # %entry
+; RV32IXQCILSM-NEXT: lui a1, 678490
+; RV32IXQCILSM-NEXT: addi a1, a1, 1445
+; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 0(a0)
+; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 64(a0)
+; RV32IXQCILSM-NEXT: ret
+entry:
+ tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 128, i1 false)
+ ret void
+}
+
+define void @test2a(ptr nocapture %p) nounwind {
+; RV32I-LABEL: test2a:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: li a1, 165
+; RV32I-NEXT: li a2, 188
+; RV32I-NEXT: tail memset
+;
+; RV32IXQCILSM-LABEL: test2a:
+; RV32IXQCILSM: # %bb.0: # %entry
+; RV32IXQCILSM-NEXT: lui a1, 678490
+; RV32IXQCILSM-NEXT: addi a1, a1, 1445
+; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 0(a0)
+; RV32IXQCILSM-NEXT: qc.setwmi a1, 15, 64(a0)
+; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 124(a0)
+; RV32IXQCILSM-NEXT: ret
+entry:
+ tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 188, i1 false)
+ ret void
+}
+
+define void @test2b(ptr nocapture %p) nounwind {
+; RV32I-LABEL: test2b:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: li a1, 165
+; RV32I-NEXT: li a2, 192
+; RV32I-NEXT: tail memset
+;
+; RV32IXQCILSM-LABEL: test2b:
+; RV32IXQCILSM: # %bb.0: # %entry
+; RV32IXQCILSM-NEXT: li a1, 165
+; RV32IXQCILSM-NEXT: li a2, 192
+; RV32IXQCILSM-NEXT: tail memset
+entry:
+ tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 192, i1 false)
+ ret void
+}
+
+define void @test2c(ptr nocapture %p) nounwind {
+; RV32I-LABEL: test2c:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: li a1, 165
+; RV32I-NEXT: li a2, 128
+; RV32I-NEXT: tail memset
+;
+; RV32IXQCILSM-LABEL: test2c:
+; RV32IXQCILSM: # %bb.0: # %entry
+; RV32IXQCILSM-NEXT: lui a1, 678490
+; RV32IXQCILSM-NEXT: addi a1, a1, 1445
+; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 0(a0)
+; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 64(a0)
+; RV32IXQCILSM-NEXT: ret
+entry:
+ tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 128, i1 false)
+ ret void
+}
+
+define void @test2d(ptr nocapture %p) nounwind {
+; RV32I-LABEL: test2d:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: li a1, -91
+; RV32I-NEXT: lui a2, 1048570
+; RV32I-NEXT: lui a3, 678490
+; RV32I-NEXT: addi a2, a2, 1445
+; RV32I-NEXT: addi a3, a3, 1445
+; RV32I-NEXT: sw a3, 0(a0)
+; RV32I-NEXT: sw a3, 4(a0)
+; RV32I-NEXT: sh a2, 8(a0)
+; RV32I-NEXT: sb a1, 10(a0)
+; RV32I-NEXT: ret
+;
+; RV32IXQCILSM-LABEL: test2d:
+; RV32IXQCILSM: # %bb.0: # %entry
+; RV32IXQCILSM-NEXT: li a1, -91
+; RV32IXQCILSM-NEXT: lui a2, 1048570
+; RV32IXQCILSM-NEXT: lui a3, 678490
+; RV32IXQCILSM-NEXT: addi a2, a2, 1445
+; RV32IXQCILSM-NEXT: addi a3, a3, 1445
+; RV32IXQCILSM-NEXT: sw a3, 0(a0)
+; RV32IXQCILSM-NEXT: sw a3, 4(a0)
+; RV32IXQCILSM-NEXT: sh a2, 8(a0)
+; RV32IXQCILSM-NEXT: sb a1, 10(a0)
+; RV32IXQCILSM-NEXT: ret
+entry:
+ tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 11, i1 false)
+ ret void
+}
+
+
+define ptr @test3(ptr %p) nounwind {
+; RV32I-LABEL: test3:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: li a2, 256
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: tail memset
+;
+; RV32IXQCILSM-LABEL: test3:
+; RV32IXQCILSM: # %bb.0: # %entry
+; RV32IXQCILSM-NEXT: li a2, 256
+; RV32IXQCILSM-NEXT: li a1, 0
+; RV32IXQCILSM-NEXT: tail memset
+entry:
+ tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 0, i32 256, i1 false)
+ ret ptr %p
+}
+
+define ptr @test3a(ptr %p) nounwind {
+; RV32I-LABEL: test3a:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: li a2, 128
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: tail memset
+;
+; RV32IXQCILSM-LABEL: test3a:
+; RV32IXQCILSM: # %bb.0: # %entry
+; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 0(a0)
+; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 64(a0)
+; RV32IXQCILSM-NEXT: ret
+entry:
+ tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 0, i32 128, i1 false)
+ ret ptr %p
+}
+
+define void @test4() nounwind {
+; RV32I-LABEL: test4:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, %hi(struct1)
+; RV32I-NEXT: addi a0, a0, %lo(struct1)
+; RV32I-NEXT: li a2, 64
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: tail memset
+;
+; RV32IXQCILSM-LABEL: test4:
+; RV32IXQCILSM: # %bb.0: # %entry
+; RV32IXQCILSM-NEXT: lui a0, %hi(struct1)
+; RV32IXQCILSM-NEXT: addi a0, a0, %lo(struct1)
+; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 0(a0)
+; RV32IXQCILSM-NEXT: ret
+entry:
+ tail call void @llvm.memset.p0.i32(ptr align 4 @struct1, i8 0, i32 64, i1 false)
+ ret void
+}
+
+define void @test4a(ptr nocapture %s) nounwind {
+; RV32I-LABEL: test4a:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: li a1, 166
+; RV32I-NEXT: li a2, 64
+; RV32I-NEXT: tail memset
+;
+; RV32IXQCILSM-LABEL: test4a:
+; RV32IXQCILSM: # %bb.0: # %entry
+; RV32IXQCILSM-NEXT: lui a1, 682602
+; RV32IXQCILSM-NEXT: addi a1, a1, 1702
+; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 0(a0)
+; RV32IXQCILSM-NEXT: ret
+entry:
+ tail call void @llvm.memset.p0.i32(ptr align 4 %s, i8 -90, i32 64, i1 false)
+ ret void
+}
+
+declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
+
+declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
+
+define void @test4b() nounwind {
+; RV32I-LABEL: test4b:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lui a0, %hi(struct4b)
+; RV32I-NEXT: addi a0, a0, %lo(struct4b)
+; RV32I-NEXT: li a2, 188
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: call memset
+; RV32I-NEXT: lui a0, %hi(struct4b1)
+; RV32I-NEXT: addi a0, a0, %lo(struct4b1)
+; RV32I-NEXT: li a2, 192
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: tail memset
+;
+; RV32IXQCILSM-LABEL: test4b:
+; RV32IXQCILSM: # %bb.0: # %entry
+; RV32IXQCILSM-NEXT: lui a1, %hi(struct4b)
+; RV32IXQCILSM-NEXT: addi a1, a1, %lo(struct4b)
+; RV32IXQCILSM-NEXT: lui a0, %hi(struct4b1)
+; RV32IXQCILSM-NEXT: addi a0, a0, %lo(struct4b1)
+; RV32IXQCILSM-NEXT: li a2, 192
+; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 0(a1)
+; RV32IXQCILSM-NEXT: qc.setwmi zero, 15, 64(a1)
+; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 124(a1)
+; RV32IXQCILSM-NEXT: li a1, 0
+; RV32IXQCILSM-NEXT: tail memset
+entry:
+ tail call void @llvm.memset.p0.i32(ptr align 4 @struct4b, i8 0, i32 188, i1 false)
+ tail call void @llvm.memset.p0.i32(ptr align 4 @struct4b1, i8 0, i32 192, i1 false)
+ ret void
+}
+
+define void @test5() nounwind {
+; RV32I-LABEL: test5:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, %hi(struct2)
+; RV32I-NEXT: addi a0, a0, %lo(struct2)
+; RV32I-NEXT: li a2, 64
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: tail memset
+;
+; RV32IXQCILSM-LABEL: test5:
+; RV32IXQCILSM: # %bb.0: # %entry
+; RV32IXQCILSM-NEXT: lui a0, %hi(struct2)
+; RV32IXQCILSM-NEXT: addi a0, a0, %lo(struct2)
+; RV32IXQCILSM-NEXT: li a2, 64
+; RV32IXQCILSM-NEXT: li a1, 0
+; RV32IXQCILSM-NEXT: tail memset
+entry:
+ tail call void @llvm.memset.p0.i32(ptr align 1 @struct2, i8 0, i32 64, i1 false)
+ ret void
+}
+
+define i32 @test6() nounwind {
+; RV32I-LABEL: test6:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw zero, 12(sp)
+; RV32I-NEXT: li a0, 0
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IXQCILSM-LABEL: test6:
+; RV32IXQCILSM: # %bb.0: # %entry
+; RV32IXQCILSM-NEXT: addi sp, sp, -16
+; RV32IXQCILSM-NEXT: sw zero, 12(sp)
+; RV32IXQCILSM-NEXT: li a0, 0
+; RV32IXQCILSM-NEXT: addi sp, sp, 16
+; RV32IXQCILSM-NEXT: ret
+entry:
+ %x = alloca i32, align 4
+ call void @llvm.memset.p0.i32(ptr align 4 %x, i8 0, i32 4, i1 false)
+ %0 = load i32, ptr %x, align 4
+ ret i32 %0
+}
+
+define i32 @test6a() nounwind {
+; RV32I-LABEL: test6a:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw zero, 12(sp)
+; RV32I-NEXT: lw a0, 12(sp)
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IXQCILSM-LABEL: test6a:
+; RV32IXQCILSM: # %bb.0:
+; RV32IXQCILSM-NEXT: addi sp, sp, -16
+; RV32IXQCILSM-NEXT: sw zero, 12(sp)
+; RV32IXQCILSM-NEXT: lw a0, 12(sp)
+; RV32IXQCILSM-NEXT: addi sp, sp, 16
+; RV32IXQCILSM-NEXT: ret
+ %x = alloca i32, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x)
+ store i32 0, ptr %x, align 4
+ %x.0.x.0. = load volatile i32, ptr %x, align 4
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x)
+ ret i32 %x.0.x.0.
----------------
lenary wrote:
This doesn't seem like a relevant testcase?
https://github.com/llvm/llvm-project/pull/151555
More information about the llvm-commits
mailing list