[llvm] [RISCV] Implement EmitTargetCodeForMemset for Xqcilsm (PR #151555)

Sudharsan Veeravalli via llvm-commits llvm-commits at lists.llvm.org
Sun Aug 3 23:12:14 PDT 2025


https://github.com/svs-quic updated https://github.com/llvm/llvm-project/pull/151555

>From 54bd095e51ede4c9c0437fef2d4275db22ae8fb4 Mon Sep 17 00:00:00 2001
From: Sudharsan Veeravalli <quic_svs at quicinc.com>
Date: Thu, 31 Jul 2025 21:42:32 +0530
Subject: [PATCH 1/9] [RISCV] Implement EmitTargetCodeForMemset for Xqcilsm

---
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp   |   9 +
 llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td   |   8 +
 .../Target/RISCV/RISCVSelectionDAGInfo.cpp    | 101 ++
 llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h |   6 +
 llvm/test/CodeGen/RISCV/xqcilsm-memset.ll     | 929 ++++++++++++++++++
 5 files changed, 1053 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/xqcilsm-memset.ll

diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index f223fdbef4359..b778c33083685 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1845,6 +1845,15 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
     CurDAG->RemoveDeadNode(Node);
     return;
   }
+  case RISCVISD::QC_SETWMI: {
+    SDValue Chain = Node->getOperand(0);
+    SDVTList VTs = Node->getVTList();
+    SDValue Ops[] = {Node->getOperand(1), Node->getOperand(2),
+                     Node->getOperand(3), Node->getOperand(4), Chain};
+    MachineSDNode *New = CurDAG->getMachineNode(RISCV::QC_SETWMI, DL, VTs, Ops);
+    ReplaceNode(Node, New);
+    return;
+  }
   case ISD::INTRINSIC_WO_CHAIN: {
     unsigned IntNo = Node->getConstantOperandVal(0);
     switch (IntNo) {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index 52656134b7774..2479ced164927 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -14,6 +14,14 @@
 // Operand and SDNode transformation definitions.
 //===----------------------------------------------------------------------===//
 
+def SDT_StoreMultiple : SDTypeProfile<0, 4, [SDTCisSameAs<0, 1>,
+                                             SDTCisSameAs<1, 3>,
+                                             SDTCisPtrTy<2>,
+                                             SDTCisVT<3, XLenVT>]>;
+
+def qc_setwmi : RVSDNode<"QC_SETWMI", SDT_StoreMultiple,
+                         [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
 def uimm5nonzero : RISCVOp<XLenVT>,
                    ImmLeaf<XLenVT, [{return (Imm != 0) && isUInt<5>(Imm);}]> {
   let ParserMatchClass = UImmAsmOperand<5, "NonZero">;
diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
index 6ecddad72c078..edfa2992711a0 100644
--- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
@@ -7,6 +7,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "RISCVSelectionDAGInfo.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 
 #define GET_SDNODE_DESC
 #include "RISCVGenSDNodeInfo.inc"
@@ -62,3 +64,102 @@ void RISCVSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
   }
 #endif
 }
+
+SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset(
+    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
+    SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
+    MachinePointerInfo DstPtrInfo) const {
+  const RISCVSubtarget &Subtarget =
+      DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
+  // We currently do this only for Xqcilsm
+  if (!Subtarget.hasVendorXqcilsm())
+    return SDValue();
+
+  // Do this only if we know the size at compile time.
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+  if (!ConstantSize)
+    return SDValue();
+
+  uint64_t NumberOfBytesToWrite = ConstantSize->getZExtValue();
+
+  // Do this only if it is word aligned and we write multiple of 4 bytes.
+  if (!((Alignment.value() & 3) == 0 && (NumberOfBytesToWrite & 3) == 0))
+    return SDValue();
+
+  SmallVector<SDValue, 8> OutChains;
+  SDValue SizeWords, OffsetSetwmi;
+  SDValue SrcValueReplicated = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
+  int NumberOfWords = NumberOfBytesToWrite / 4;
+
+  // Helper for constructing the QC_SETWMI instruction
+  auto getSetwmiNode = [&](SDValue SizeWords, SDValue OffsetSetwmi) -> SDValue {
+    SDValue Ops[] = {Chain, SrcValueReplicated, Dst, SizeWords, OffsetSetwmi};
+    return DAG.getNode(RISCVISD::QC_SETWMI, dl, MVT::Other, Ops);
+  };
+
+  bool IsZeroVal =
+      isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isZero();
+
+  // If i8 type and constant non-zero value.
+  if ((Src.getValueType() == MVT::i8) && !IsZeroVal)
+    // Replicate byte to word by multiplication with 0x01010101.
+    SrcValueReplicated = DAG.getNode(ISD::MUL, dl, MVT::i32, SrcValueReplicated,
+                                     DAG.getConstant(16843009, dl, MVT::i32));
+
+  // We limit a QC_SETWMI to 16 words or less to improve interruptibility.
+  // So for 1-16 words we use a single QC_SETWMI:
+  //
+  // QC_SETWMI reg1, N, 0(reg2)
+  //
+  // For 17-32 words we use two QC_SETWMI's with the first as 16 words and the
+  // second for the remainder:
+  //
+  // QC_SETWMI reg1, 16, 0(reg2)
+  // QC_SETWMI reg1, 32-N, 64(reg2)
+  //
+  // For 33-48 words, we would like to use (16, 16, n), but that means the last
+  // QC_SETWMI needs an offset of 128 which the instruction doesnt support.
+  // So in this case we use a length of 15 for the second instruction and we do
+  // the rest with the third instruction.
+  // This means the maximum inlined number of words is 47 (for now):
+  //
+  // QC_SETWMI R2, R0, 16, 0
+  // QC_SETWMI R2, R0, 15, 64
+  // QC_SETWMI R2, R0, N, 124
+  //
+  // For 48 words or more, call the target independent memset
+  if (NumberOfWords <= 16) {
+    // 1 - 16 words
+    SizeWords = DAG.getTargetConstant(NumberOfWords, dl, MVT::i32);
+    SDValue OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32);
+    return getSetwmiNode(SizeWords, OffsetSetwmi);
+  } else if (NumberOfWords <= 47) {
+    if (NumberOfWords <= 32) {
+      // 17 - 32 words
+      SizeWords = DAG.getTargetConstant(NumberOfWords - 16, dl, MVT::i32);
+      OffsetSetwmi = DAG.getTargetConstant(64, dl, MVT::i32);
+      OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi));
+
+      SizeWords = DAG.getTargetConstant(16, dl, MVT::i32);
+      OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32);
+      OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi));
+    } else {
+      // 33 - 47 words
+      SizeWords = DAG.getTargetConstant(NumberOfWords - 31, dl, MVT::i32);
+      OffsetSetwmi = DAG.getTargetConstant(124, dl, MVT::i32);
+      OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi));
+
+      SizeWords = DAG.getTargetConstant(15, dl, MVT::i32);
+      OffsetSetwmi = DAG.getTargetConstant(64, dl, MVT::i32);
+      OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi));
+
+      SizeWords = DAG.getTargetConstant(16, dl, MVT::i32);
+      OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32);
+      OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi));
+    }
+    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
+  }
+
+  // >= 48 words. Call target independent memset.
+  return SDValue();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h
index 641189f8661c1..08c8d11f2b108 100644
--- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h
@@ -34,6 +34,12 @@ class RISCVSelectionDAGInfo : public SelectionDAGGenTargetInfo {
   void verifyTargetNode(const SelectionDAG &DAG,
                         const SDNode *N) const override;
 
+  SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
+                                  SDValue Chain, SDValue Dst, SDValue Src,
+                                  SDValue Size, Align Alignment,
+                                  bool isVolatile, bool AlwaysInline,
+                                  MachinePointerInfo DstPtrInfo) const override;
+
   bool hasPassthruOp(unsigned Opcode) const {
     return GenNodeInfo.getDesc(Opcode).TSFlags & RISCVISD::HasPassthruOpMask;
   }
diff --git a/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll b/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll
new file mode 100644
index 0000000000000..b0107cc1a4e03
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll
@@ -0,0 +1,929 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefixes=RV32I
+
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+experimental-xqcilsm < %s \
+; RUN: | FileCheck %s -check-prefixes=RV32IXQCISLS
+
+%struct.anon = type { [16 x i32] }
+%struct.anon.0 = type { [47 x i32] }
+%struct.anon.1 = type { [48 x i32] }
+%struct.anon.2 = type { [64 x i8] }
+%struct.struct1_t = type { [16 x i32] }
+
+ at struct1 = common dso_local local_unnamed_addr global %struct.anon zeroinitializer, align 4
+ at struct4b = common dso_local local_unnamed_addr global %struct.anon.0 zeroinitializer, align 4
+ at struct4b1 = common dso_local local_unnamed_addr global %struct.anon.1 zeroinitializer, align 4
+ at struct2 = common dso_local local_unnamed_addr global %struct.anon.2 zeroinitializer, align 1
+ at arr1 = common dso_local local_unnamed_addr global [100 x i32] zeroinitializer, align 4
+ at struct1_ = common dso_local local_unnamed_addr global %struct.struct1_t zeroinitializer, align 4
+
+define void @test1(ptr nocapture %p, i32 %n) nounwind {
+; RV32I-LABEL: test1:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    mv a2, a1
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test1:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    mv a2, a1
+; RV32IXQCISLS-NEXT:    li a1, 0
+; RV32IXQCISLS-NEXT:    tail memset
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 1 %p, i8 0, i32 %n, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1)
+
+define void @test2(ptr nocapture %p) nounwind {
+; RV32I-LABEL: test2:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    li a1, 165
+; RV32I-NEXT:    li a2, 128
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test2:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a1, 678490
+; RV32IXQCISLS-NEXT:    addi a1, a1, 1445
+; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 0(a0)
+; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 64(a0)
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 128, i1 false)
+  ret void
+}
+
+define void @test2a(ptr nocapture %p) nounwind {
+; RV32I-LABEL: test2a:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    li a1, 165
+; RV32I-NEXT:    li a2, 188
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test2a:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a1, 678490
+; RV32IXQCISLS-NEXT:    addi a1, a1, 1445
+; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 0(a0)
+; RV32IXQCISLS-NEXT:    qc.setwmi a1, 15, 64(a0)
+; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 124(a0)
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 188, i1 false)
+  ret void
+}
+
+define void @test2b(ptr nocapture %p) nounwind {
+; RV32I-LABEL: test2b:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    li a1, 165
+; RV32I-NEXT:    li a2, 192
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test2b:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    li a1, 165
+; RV32IXQCISLS-NEXT:    li a2, 192
+; RV32IXQCISLS-NEXT:    tail memset
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 192, i1 false)
+  ret void
+}
+
+define void @test2c(ptr nocapture %p) nounwind {
+; RV32I-LABEL: test2c:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    li a1, 165
+; RV32I-NEXT:    li a2, 128
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test2c:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a1, 678490
+; RV32IXQCISLS-NEXT:    addi a1, a1, 1445
+; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 0(a0)
+; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 64(a0)
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 128, i1 false)
+  ret void
+}
+
+define void @test2d(ptr nocapture %p) nounwind {
+; RV32I-LABEL: test2d:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    li a1, -91
+; RV32I-NEXT:    lui a2, 1048570
+; RV32I-NEXT:    lui a3, 678490
+; RV32I-NEXT:    addi a2, a2, 1445
+; RV32I-NEXT:    addi a3, a3, 1445
+; RV32I-NEXT:    sw a3, 0(a0)
+; RV32I-NEXT:    sw a3, 4(a0)
+; RV32I-NEXT:    sh a2, 8(a0)
+; RV32I-NEXT:    sb a1, 10(a0)
+; RV32I-NEXT:    ret
+;
+; RV32IXQCISLS-LABEL: test2d:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    li a1, -91
+; RV32IXQCISLS-NEXT:    lui a2, 1048570
+; RV32IXQCISLS-NEXT:    lui a3, 678490
+; RV32IXQCISLS-NEXT:    addi a2, a2, 1445
+; RV32IXQCISLS-NEXT:    addi a3, a3, 1445
+; RV32IXQCISLS-NEXT:    sw a3, 0(a0)
+; RV32IXQCISLS-NEXT:    sw a3, 4(a0)
+; RV32IXQCISLS-NEXT:    sh a2, 8(a0)
+; RV32IXQCISLS-NEXT:    sb a1, 10(a0)
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 11, i1 false)
+  ret void
+}
+
+
+define ptr @test3(ptr %p) nounwind {
+; RV32I-LABEL: test3:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    li a2, 256
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test3:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    li a2, 256
+; RV32IXQCISLS-NEXT:    li a1, 0
+; RV32IXQCISLS-NEXT:    tail memset
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 0, i32 256, i1 false)
+  ret ptr %p
+}
+
+define ptr @test3a(ptr %p) nounwind {
+; RV32I-LABEL: test3a:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    li a2, 128
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test3a:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 0(a0)
+; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 64(a0)
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 0, i32 128, i1 false)
+  ret ptr %p
+}
+
+define void @test4() nounwind {
+; RV32I-LABEL: test4:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a0, %hi(struct1)
+; RV32I-NEXT:    addi a0, a0, %lo(struct1)
+; RV32I-NEXT:    li a2, 64
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test4:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a0, %hi(struct1)
+; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(struct1)
+; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 0(a0)
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 @struct1, i8 0, i32 64, i1 false)
+  ret void
+}
+
+define void @test4a(ptr nocapture %s) nounwind {
+; RV32I-LABEL: test4a:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    li a1, 166
+; RV32I-NEXT:    li a2, 64
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test4a:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a1, 682602
+; RV32IXQCISLS-NEXT:    addi a1, a1, 1702
+; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 0(a0)
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 %s, i8 -90, i32 64, i1 false)
+  ret void
+}
+
+declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
+
+declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
+
+define void @test4b() nounwind {
+; RV32I-LABEL: test4b:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lui a0, %hi(struct4b)
+; RV32I-NEXT:    addi a0, a0, %lo(struct4b)
+; RV32I-NEXT:    li a2, 188
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    call memset
+; RV32I-NEXT:    lui a0, %hi(struct4b1)
+; RV32I-NEXT:    addi a0, a0, %lo(struct4b1)
+; RV32I-NEXT:    li a2, 192
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test4b:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a1, %hi(struct4b)
+; RV32IXQCISLS-NEXT:    addi a1, a1, %lo(struct4b)
+; RV32IXQCISLS-NEXT:    lui a0, %hi(struct4b1)
+; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(struct4b1)
+; RV32IXQCISLS-NEXT:    li a2, 192
+; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 0(a1)
+; RV32IXQCISLS-NEXT:    qc.setwmi zero, 15, 64(a1)
+; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 124(a1)
+; RV32IXQCISLS-NEXT:    li a1, 0
+; RV32IXQCISLS-NEXT:    tail memset
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 @struct4b, i8 0, i32 188, i1 false)
+  tail call void @llvm.memset.p0.i32(ptr align 4 @struct4b1, i8 0, i32 192, i1 false)
+  ret void
+}
+
+define void @test5() nounwind {
+; RV32I-LABEL: test5:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a0, %hi(struct2)
+; RV32I-NEXT:    addi a0, a0, %lo(struct2)
+; RV32I-NEXT:    li a2, 64
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test5:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a0, %hi(struct2)
+; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(struct2)
+; RV32IXQCISLS-NEXT:    li a2, 64
+; RV32IXQCISLS-NEXT:    li a1, 0
+; RV32IXQCISLS-NEXT:    tail memset
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 1 @struct2, i8 0, i32 64, i1 false)
+  ret void
+}
+
+define i32 @test6() nounwind {
+; RV32I-LABEL: test6:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw zero, 12(sp)
+; RV32I-NEXT:    li a0, 0
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IXQCISLS-LABEL: test6:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    addi sp, sp, -16
+; RV32IXQCISLS-NEXT:    sw zero, 12(sp)
+; RV32IXQCISLS-NEXT:    li a0, 0
+; RV32IXQCISLS-NEXT:    addi sp, sp, 16
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  %x = alloca i32, align 4
+  call void @llvm.memset.p0.i32(ptr align 4 %x, i8 0, i32 4, i1 false)
+  %0 = load i32, ptr %x, align 4
+  ret i32 %0
+}
+
+define i32 @test6a() nounwind {
+; RV32I-LABEL: test6a:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw zero, 12(sp)
+; RV32I-NEXT:    lw a0, 12(sp)
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IXQCISLS-LABEL: test6a:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    addi sp, sp, -16
+; RV32IXQCISLS-NEXT:    sw zero, 12(sp)
+; RV32IXQCISLS-NEXT:    lw a0, 12(sp)
+; RV32IXQCISLS-NEXT:    addi sp, sp, 16
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  %x = alloca i32, align 4
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x)
+  store i32 0, ptr %x, align 4
+  %x.0.x.0. = load volatile i32, ptr %x, align 4
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x)
+  ret i32 %x.0.x.0.
+}
+
+define zeroext i8 @test6b_c() nounwind {
+; RV32I-LABEL: test6b_c:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sb zero, 12(sp)
+; RV32I-NEXT:    lbu a0, 12(sp)
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IXQCISLS-LABEL: test6b_c:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    addi sp, sp, -16
+; RV32IXQCISLS-NEXT:    sb zero, 12(sp)
+; RV32IXQCISLS-NEXT:    lbu a0, 12(sp)
+; RV32IXQCISLS-NEXT:    addi sp, sp, 16
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  %x = alloca i8, align 4
+  call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %x)
+  call void @llvm.memset.p0.i32(ptr nonnull align 4 %x, i8 0, i32 1, i1 false)
+  %x.0.x.0. = load volatile i8, ptr %x, align 4
+  call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %x)
+  ret i8 %x.0.x.0.
+}
+
+define signext i16 @test6b_s() nounwind {
+; RV32I-LABEL: test6b_s:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sh zero, 12(sp)
+; RV32I-NEXT:    lh a0, 12(sp)
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IXQCISLS-LABEL: test6b_s:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    addi sp, sp, -16
+; RV32IXQCISLS-NEXT:    sh zero, 12(sp)
+; RV32IXQCISLS-NEXT:    lh a0, 12(sp)
+; RV32IXQCISLS-NEXT:    addi sp, sp, 16
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  %x = alloca i16, align 4
+  call void @llvm.lifetime.start.p0(i64 2, ptr nonnull %x)
+  store i16 0, ptr %x, align 4
+  %x.0.x.0. = load volatile i16, ptr %x, align 4
+  call void @llvm.lifetime.end.p0(i64 2, ptr nonnull %x)
+  ret i16 %x.0.x.0.
+}
+
+define i32 @test6b_l() nounwind {
+; RV32I-LABEL: test6b_l:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw zero, 12(sp)
+; RV32I-NEXT:    lw a0, 12(sp)
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IXQCISLS-LABEL: test6b_l:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    addi sp, sp, -16
+; RV32IXQCISLS-NEXT:    sw zero, 12(sp)
+; RV32IXQCISLS-NEXT:    lw a0, 12(sp)
+; RV32IXQCISLS-NEXT:    addi sp, sp, 16
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  %x = alloca i32, align 4
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x)
+  store i32 0, ptr %x, align 4
+  %x.0.x.0. = load volatile i32, ptr %x, align 4
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x)
+  ret i32 %x.0.x.0.
+}
+
+define i64 @test6b_ll() nounwind {
+; RV32I-LABEL: test6b_ll:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw zero, 8(sp)
+; RV32I-NEXT:    sw zero, 12(sp)
+; RV32I-NEXT:    lw a0, 8(sp)
+; RV32I-NEXT:    lw a1, 12(sp)
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IXQCISLS-LABEL: test6b_ll:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    addi sp, sp, -16
+; RV32IXQCISLS-NEXT:    sw zero, 8(sp)
+; RV32IXQCISLS-NEXT:    sw zero, 12(sp)
+; RV32IXQCISLS-NEXT:    lw a0, 8(sp)
+; RV32IXQCISLS-NEXT:    lw a1, 12(sp)
+; RV32IXQCISLS-NEXT:    addi sp, sp, 16
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  %x = alloca i64, align 8
+  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %x)
+  call void @llvm.memset.p0.i32(ptr nonnull align 8 %x, i8 0, i32 8, i1 false)
+  %x.0.x.0. = load volatile i64, ptr %x, align 8
+  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %x)
+  ret i64 %x.0.x.0.
+}
+
+define zeroext i8 @test6c_c() nounwind {
+; RV32I-LABEL: test6c_c:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sb zero, 15(sp)
+; RV32I-NEXT:    li a0, 0
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IXQCISLS-LABEL: test6c_c:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    addi sp, sp, -16
+; RV32IXQCISLS-NEXT:    sb zero, 15(sp)
+; RV32IXQCISLS-NEXT:    li a0, 0
+; RV32IXQCISLS-NEXT:    addi sp, sp, 16
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  %x = alloca i8
+  call void @llvm.memset.p0.i32(ptr align 1 %x, i8 0, i32 1, i1 false)
+  %0 = load i8, ptr %x, align 1
+  ret i8 %0
+}
+
+define signext i16 @test6c_s() nounwind {
+; RV32I-LABEL: test6c_s:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sh zero, 14(sp)
+; RV32I-NEXT:    li a0, 0
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IXQCISLS-LABEL: test6c_s:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    addi sp, sp, -16
+; RV32IXQCISLS-NEXT:    sh zero, 14(sp)
+; RV32IXQCISLS-NEXT:    li a0, 0
+; RV32IXQCISLS-NEXT:    addi sp, sp, 16
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  %x = alloca i16
+  call void @llvm.memset.p0.i32(ptr align 2 %x, i8 0, i32 2, i1 false)
+  %0 = load i16, ptr %x, align 2
+  ret i16 %0
+}
+
+define i32 @test6c_l() nounwind {
+; RV32I-LABEL: test6c_l:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw zero, 12(sp)
+; RV32I-NEXT:    li a0, 0
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IXQCISLS-LABEL: test6c_l:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    addi sp, sp, -16
+; RV32IXQCISLS-NEXT:    sw zero, 12(sp)
+; RV32IXQCISLS-NEXT:    li a0, 0
+; RV32IXQCISLS-NEXT:    addi sp, sp, 16
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  %x = alloca i32, align 4
+  call void @llvm.memset.p0.i32(ptr align 4 %x, i8 0, i32 4, i1 false)
+  %0 = load i32, ptr %x, align 4
+  ret i32 %0
+}
+
+define i64 @test6c_ll() nounwind {
+; RV32I-LABEL: test6c_ll:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw zero, 8(sp)
+; RV32I-NEXT:    sw zero, 12(sp)
+; RV32I-NEXT:    li a0, 0
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IXQCISLS-LABEL: test6c_ll:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    addi sp, sp, -16
+; RV32IXQCISLS-NEXT:    sw zero, 8(sp)
+; RV32IXQCISLS-NEXT:    sw zero, 12(sp)
+; RV32IXQCISLS-NEXT:    li a0, 0
+; RV32IXQCISLS-NEXT:    li a1, 0
+; RV32IXQCISLS-NEXT:    addi sp, sp, 16
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  %x = alloca i64, align 8
+  call void @llvm.memset.p0.i32(ptr align 8 %x, i8 0, i32 8, i1 false)
+  %0 = load i64, ptr %x, align 8
+  ret i64 %0
+}
+
+define void @test7() nounwind {
+; RV32I-LABEL: test7:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a0, %hi(arr1)
+; RV32I-NEXT:    sw zero, %lo(arr1)(a0)
+; RV32I-NEXT:    addi a0, a0, %lo(arr1)
+; RV32I-NEXT:    sw zero, 4(a0)
+; RV32I-NEXT:    ret
+;
+; RV32IXQCISLS-LABEL: test7:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCISLS-NEXT:    sw zero, %lo(arr1)(a0)
+; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCISLS-NEXT:    sw zero, 4(a0)
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 8, i1 false)
+  ret void
+}
+
+define void @test7a() nounwind {
+; RV32I-LABEL: test7a:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    ret
+;
+; RV32IXQCISLS-LABEL: test7a:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 0, i1 false)
+  ret void
+}
+
+define void @test7a_unalign() nounwind {
+; RV32I-LABEL: test7a_unalign:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a0, %hi(arr1)
+; RV32I-NEXT:    li a1, -1
+; RV32I-NEXT:    sw a1, %lo(arr1)(a0)
+; RV32I-NEXT:    addi a0, a0, %lo(arr1)
+; RV32I-NEXT:    sw a1, 4(a0)
+; RV32I-NEXT:    sw a1, 8(a0)
+; RV32I-NEXT:    sw a1, 12(a0)
+; RV32I-NEXT:    sb a1, 16(a0)
+; RV32I-NEXT:    ret
+;
+; RV32IXQCISLS-LABEL: test7a_unalign:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCISLS-NEXT:    li a1, -1
+; RV32IXQCISLS-NEXT:    sw a1, %lo(arr1)(a0)
+; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCISLS-NEXT:    sw a1, 4(a0)
+; RV32IXQCISLS-NEXT:    sw a1, 8(a0)
+; RV32IXQCISLS-NEXT:    sw a1, 12(a0)
+; RV32IXQCISLS-NEXT:    sb a1, 16(a0)
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -1, i32 17, i1 false)
+  ret void
+}
+
+define void @test7b() nounwind {
+; RV32I-LABEL: test7b:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a0, %hi(arr1)
+; RV32I-NEXT:    addi a0, a0, %lo(arr1)
+; RV32I-NEXT:    li a1, 255
+; RV32I-NEXT:    li a2, 68
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test7b:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCISLS-NEXT:    li a1, -1
+; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 0(a0)
+; RV32IXQCISLS-NEXT:    qc.setwmi a1, 1, 64(a0)
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -1, i32 68, i1 false)
+  ret void
+}
+
+define void @test7c() nounwind {
+; RV32I-LABEL: test7c:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a0, %hi(arr1)
+; RV32I-NEXT:    addi a0, a0, %lo(arr1)
+; RV32I-NEXT:    li a1, 128
+; RV32I-NEXT:    li a2, 128
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test7c:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCISLS-NEXT:    lui a1, 526344
+; RV32IXQCISLS-NEXT:    addi a1, a1, 128
+; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 0(a0)
+; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 64(a0)
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -128, i32 128, i1 false)
+  ret void
+}
+
+define void @test7d() nounwind {
+; RV32I-LABEL: test7d:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a0, %hi(arr1)
+; RV32I-NEXT:    addi a0, a0, %lo(arr1)
+; RV32I-NEXT:    li a1, 13
+; RV32I-NEXT:    li a2, 148
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test7d:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCISLS-NEXT:    lui a1, 53457
+; RV32IXQCISLS-NEXT:    addi a1, a1, -755
+; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 0(a0)
+; RV32IXQCISLS-NEXT:    qc.setwmi a1, 15, 64(a0)
+; RV32IXQCISLS-NEXT:    qc.setwmi a1, 6, 124(a0)
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 13, i32 148, i1 false)
+  ret void
+}
+
+define void @test7e() nounwind {
+; RV32I-LABEL: test7e:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a0, %hi(arr1)
+; RV32I-NEXT:    addi a0, a0, %lo(arr1)
+; RV32I-NEXT:    li a1, 239
+; RV32I-NEXT:    li a2, 100
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test7e:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCISLS-NEXT:    lui a1, 982783
+; RV32IXQCISLS-NEXT:    addi a1, a1, -17
+; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 0(a0)
+; RV32IXQCISLS-NEXT:    qc.setwmi a1, 9, 64(a0)
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -17, i32 100, i1 false)
+  ret void
+}
+
+define void @test8() nounwind {
+; RV32I-LABEL: test8:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a0, %hi(arr1)
+; RV32I-NEXT:    sw zero, %lo(arr1)(a0)
+; RV32I-NEXT:    addi a0, a0, %lo(arr1)
+; RV32I-NEXT:    sw zero, 4(a0)
+; RV32I-NEXT:    sw zero, 8(a0)
+; RV32I-NEXT:    sw zero, 12(a0)
+; RV32I-NEXT:    ret
+;
+; RV32IXQCISLS-LABEL: test8:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCISLS-NEXT:    sw zero, %lo(arr1)(a0)
+; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCISLS-NEXT:    sw zero, 4(a0)
+; RV32IXQCISLS-NEXT:    sw zero, 8(a0)
+; RV32IXQCISLS-NEXT:    sw zero, 12(a0)
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 16, i1 false)
+  ret void
+}
+
+define void @test9() nounwind {
+; RV32I-LABEL: test9:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a0, %hi(arr1)
+; RV32I-NEXT:    sw zero, %lo(arr1)(a0)
+; RV32I-NEXT:    addi a0, a0, %lo(arr1)
+; RV32I-NEXT:    sw zero, 20(a0)
+; RV32I-NEXT:    sw zero, 24(a0)
+; RV32I-NEXT:    sw zero, 28(a0)
+; RV32I-NEXT:    sw zero, 4(a0)
+; RV32I-NEXT:    sw zero, 8(a0)
+; RV32I-NEXT:    sw zero, 12(a0)
+; RV32I-NEXT:    sw zero, 16(a0)
+; RV32I-NEXT:    ret
+;
+; RV32IXQCISLS-LABEL: test9:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCISLS-NEXT:    sw zero, %lo(arr1)(a0)
+; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCISLS-NEXT:    sw zero, 20(a0)
+; RV32IXQCISLS-NEXT:    sw zero, 24(a0)
+; RV32IXQCISLS-NEXT:    sw zero, 28(a0)
+; RV32IXQCISLS-NEXT:    sw zero, 4(a0)
+; RV32IXQCISLS-NEXT:    sw zero, 8(a0)
+; RV32IXQCISLS-NEXT:    sw zero, 12(a0)
+; RV32IXQCISLS-NEXT:    sw zero, 16(a0)
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 32, i1 false)
+  ret void
+}
+
+define void @test10() nounwind {
+; RV32I-LABEL: test10:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a0, %hi(arr1)
+; RV32I-NEXT:    addi a0, a0, %lo(arr1)
+; RV32I-NEXT:    li a2, 60
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test10:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCISLS-NEXT:    qc.setwmi zero, 15, 0(a0)
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 60, i1 false)
+  ret void
+}
+
+define void @test11() nounwind {
+; RV32I-LABEL: test11:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a0, %hi(arr1)
+; RV32I-NEXT:    addi a0, a0, %lo(arr1)
+; RV32I-NEXT:    li a2, 64
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test11:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 0(a0)
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 64, i1 false)
+  ret void
+}
+
+define void @test12() nounwind {
+; RV32I-LABEL: test12:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a0, %hi(arr1)
+; RV32I-NEXT:    addi a0, a0, %lo(arr1)
+; RV32I-NEXT:    li a2, 120
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test12:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 0(a0)
+; RV32IXQCISLS-NEXT:    qc.setwmi zero, 14, 64(a0)
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 120, i1 false)
+  ret void
+}
+
+define void @test13() nounwind {
+; RV32I-LABEL: test13:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a0, %hi(arr1)
+; RV32I-NEXT:    addi a0, a0, %lo(arr1)
+; RV32I-NEXT:    li a2, 124
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test13:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 0(a0)
+; RV32IXQCISLS-NEXT:    qc.setwmi zero, 15, 64(a0)
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 124, i1 false)
+  ret void
+}
+
+define void @test14() nounwind {
+; RV32I-LABEL: test14:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a0, %hi(arr1)
+; RV32I-NEXT:    addi a0, a0, %lo(arr1)
+; RV32I-NEXT:    li a2, 180
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test14:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 0(a0)
+; RV32IXQCISLS-NEXT:    qc.setwmi zero, 15, 64(a0)
+; RV32IXQCISLS-NEXT:    qc.setwmi zero, 14, 124(a0)
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 180, i1 false)
+  ret void
+}
+
+define void @test15() nounwind {
+; RV32I-LABEL: test15:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a0, %hi(arr1)
+; RV32I-NEXT:    addi a0, a0, %lo(arr1)
+; RV32I-NEXT:    li a2, 184
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test15:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 0(a0)
+; RV32IXQCISLS-NEXT:    qc.setwmi zero, 15, 64(a0)
+; RV32IXQCISLS-NEXT:    qc.setwmi zero, 15, 124(a0)
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 184, i1 false)
+  ret void
+}
+
+define void @test15a() nounwind {
+; RV32I-LABEL: test15a:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a0, %hi(arr1)
+; RV32I-NEXT:    addi a0, a0, %lo(arr1)
+; RV32I-NEXT:    li a1, 165
+; RV32I-NEXT:    li a2, 192
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test15a:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCISLS-NEXT:    li a1, 165
+; RV32IXQCISLS-NEXT:    li a2, 192
+; RV32IXQCISLS-NEXT:    tail memset
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -91, i32 192, i1 false)
+  ret void
+}
+
+define void @test15b() nounwind {
+; RV32I-LABEL: test15b:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a0, %hi(arr1)
+; RV32I-NEXT:    addi a0, a0, %lo(arr1)
+; RV32I-NEXT:    li a2, 188
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test15b:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 0(a0)
+; RV32IXQCISLS-NEXT:    qc.setwmi zero, 15, 64(a0)
+; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 124(a0)
+; RV32IXQCISLS-NEXT:    ret
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 188, i1 false)
+  ret void
+}
+
+define void @test15c() nounwind {
+; RV32I-LABEL: test15c:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a0, %hi(arr1)
+; RV32I-NEXT:    addi a0, a0, %lo(arr1)
+; RV32I-NEXT:    li a2, 192
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    tail memset
+;
+; RV32IXQCISLS-LABEL: test15c:
+; RV32IXQCISLS:       # %bb.0: # %entry
+; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCISLS-NEXT:    li a2, 192
+; RV32IXQCISLS-NEXT:    li a1, 0
+; RV32IXQCISLS-NEXT:    tail memset
+entry:
+  tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 192, i1 false)
+  ret void
+}

>From a84d52a30866f295749e7786864b64a8a79e6760 Mon Sep 17 00:00:00 2001
From: Sudharsan Veeravalli <quic_svs at quicinc.com>
Date: Thu, 31 Jul 2025 22:44:08 +0530
Subject: [PATCH 2/9] Fix check prefix and comment

---
 .../Target/RISCV/RISCVSelectionDAGInfo.cpp    |   2 +-
 llvm/test/CodeGen/RISCV/xqcilsm-memset.ll     | 618 +++++++++---------
 2 files changed, 308 insertions(+), 312 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
index edfa2992711a0..ce1e3a72c4e34 100644
--- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
@@ -115,7 +115,7 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset(
   // second for the remainder:
   //
   // QC_SETWMI reg1, 16, 0(reg2)
-  // QC_SETWMI reg1, 32-N, 64(reg2)
+  // QC_SETWMI reg1, N, 64(reg2)
   //
   // For 33-48 words, we would like to use (16, 16, n), but that means the last
   // QC_SETWMI needs an offset of 128 which the instruction doesnt support.
diff --git a/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll b/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll
index b0107cc1a4e03..3496438fde5b6 100644
--- a/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll
+++ b/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll
@@ -1,9 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
 ; RUN: | FileCheck %s -check-prefixes=RV32I
-
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+experimental-xqcilsm < %s \
-; RUN: | FileCheck %s -check-prefixes=RV32IXQCISLS
+; RUN: | FileCheck %s -check-prefixes=RV32IXQCILSM
 
 %struct.anon = type { [16 x i32] }
 %struct.anon.0 = type { [47 x i32] }
@@ -25,11 +24,11 @@ define void @test1(ptr nocapture %p, i32 %n) nounwind {
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test1:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    mv a2, a1
-; RV32IXQCISLS-NEXT:    li a1, 0
-; RV32IXQCISLS-NEXT:    tail memset
+; RV32IXQCILSM-LABEL: test1:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    mv a2, a1
+; RV32IXQCILSM-NEXT:    li a1, 0
+; RV32IXQCILSM-NEXT:    tail memset
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 1 %p, i8 0, i32 %n, i1 false)
   ret void
@@ -44,13 +43,13 @@ define void @test2(ptr nocapture %p) nounwind {
 ; RV32I-NEXT:    li a2, 128
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test2:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a1, 678490
-; RV32IXQCISLS-NEXT:    addi a1, a1, 1445
-; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 0(a0)
-; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 64(a0)
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test2:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    lui a1, 678490
+; RV32IXQCILSM-NEXT:    addi a1, a1, 1445
+; RV32IXQCILSM-NEXT:    qc.setwmi a1, 16, 0(a0)
+; RV32IXQCILSM-NEXT:    qc.setwmi a1, 16, 64(a0)
+; RV32IXQCILSM-NEXT:    ret
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 128, i1 false)
   ret void
@@ -63,14 +62,14 @@ define void @test2a(ptr nocapture %p) nounwind {
 ; RV32I-NEXT:    li a2, 188
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test2a:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a1, 678490
-; RV32IXQCISLS-NEXT:    addi a1, a1, 1445
-; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 0(a0)
-; RV32IXQCISLS-NEXT:    qc.setwmi a1, 15, 64(a0)
-; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 124(a0)
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test2a:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    lui a1, 678490
+; RV32IXQCILSM-NEXT:    addi a1, a1, 1445
+; RV32IXQCILSM-NEXT:    qc.setwmi a1, 16, 0(a0)
+; RV32IXQCILSM-NEXT:    qc.setwmi a1, 15, 64(a0)
+; RV32IXQCILSM-NEXT:    qc.setwmi a1, 16, 124(a0)
+; RV32IXQCILSM-NEXT:    ret
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 188, i1 false)
   ret void
@@ -83,11 +82,11 @@ define void @test2b(ptr nocapture %p) nounwind {
 ; RV32I-NEXT:    li a2, 192
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test2b:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    li a1, 165
-; RV32IXQCISLS-NEXT:    li a2, 192
-; RV32IXQCISLS-NEXT:    tail memset
+; RV32IXQCILSM-LABEL: test2b:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    li a1, 165
+; RV32IXQCILSM-NEXT:    li a2, 192
+; RV32IXQCILSM-NEXT:    tail memset
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 192, i1 false)
   ret void
@@ -100,13 +99,13 @@ define void @test2c(ptr nocapture %p) nounwind {
 ; RV32I-NEXT:    li a2, 128
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test2c:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a1, 678490
-; RV32IXQCISLS-NEXT:    addi a1, a1, 1445
-; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 0(a0)
-; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 64(a0)
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test2c:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    lui a1, 678490
+; RV32IXQCILSM-NEXT:    addi a1, a1, 1445
+; RV32IXQCILSM-NEXT:    qc.setwmi a1, 16, 0(a0)
+; RV32IXQCILSM-NEXT:    qc.setwmi a1, 16, 64(a0)
+; RV32IXQCILSM-NEXT:    ret
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 128, i1 false)
   ret void
@@ -126,18 +125,18 @@ define void @test2d(ptr nocapture %p) nounwind {
 ; RV32I-NEXT:    sb a1, 10(a0)
 ; RV32I-NEXT:    ret
 ;
-; RV32IXQCISLS-LABEL: test2d:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    li a1, -91
-; RV32IXQCISLS-NEXT:    lui a2, 1048570
-; RV32IXQCISLS-NEXT:    lui a3, 678490
-; RV32IXQCISLS-NEXT:    addi a2, a2, 1445
-; RV32IXQCISLS-NEXT:    addi a3, a3, 1445
-; RV32IXQCISLS-NEXT:    sw a3, 0(a0)
-; RV32IXQCISLS-NEXT:    sw a3, 4(a0)
-; RV32IXQCISLS-NEXT:    sh a2, 8(a0)
-; RV32IXQCISLS-NEXT:    sb a1, 10(a0)
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test2d:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    li a1, -91
+; RV32IXQCILSM-NEXT:    lui a2, 1048570
+; RV32IXQCILSM-NEXT:    lui a3, 678490
+; RV32IXQCILSM-NEXT:    addi a2, a2, 1445
+; RV32IXQCILSM-NEXT:    addi a3, a3, 1445
+; RV32IXQCILSM-NEXT:    sw a3, 0(a0)
+; RV32IXQCILSM-NEXT:    sw a3, 4(a0)
+; RV32IXQCILSM-NEXT:    sh a2, 8(a0)
+; RV32IXQCILSM-NEXT:    sb a1, 10(a0)
+; RV32IXQCILSM-NEXT:    ret
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 11, i1 false)
   ret void
@@ -151,11 +150,11 @@ define ptr @test3(ptr %p) nounwind {
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test3:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    li a2, 256
-; RV32IXQCISLS-NEXT:    li a1, 0
-; RV32IXQCISLS-NEXT:    tail memset
+; RV32IXQCILSM-LABEL: test3:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    li a2, 256
+; RV32IXQCILSM-NEXT:    li a1, 0
+; RV32IXQCILSM-NEXT:    tail memset
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 0, i32 256, i1 false)
   ret ptr %p
@@ -168,11 +167,11 @@ define ptr @test3a(ptr %p) nounwind {
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test3a:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 0(a0)
-; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 64(a0)
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test3a:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    qc.setwmi zero, 16, 0(a0)
+; RV32IXQCILSM-NEXT:    qc.setwmi zero, 16, 64(a0)
+; RV32IXQCILSM-NEXT:    ret
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 0, i32 128, i1 false)
   ret ptr %p
@@ -187,12 +186,12 @@ define void @test4() nounwind {
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test4:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a0, %hi(struct1)
-; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(struct1)
-; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 0(a0)
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test4:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    lui a0, %hi(struct1)
+; RV32IXQCILSM-NEXT:    addi a0, a0, %lo(struct1)
+; RV32IXQCILSM-NEXT:    qc.setwmi zero, 16, 0(a0)
+; RV32IXQCILSM-NEXT:    ret
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 @struct1, i8 0, i32 64, i1 false)
   ret void
@@ -205,12 +204,12 @@ define void @test4a(ptr nocapture %s) nounwind {
 ; RV32I-NEXT:    li a2, 64
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test4a:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a1, 682602
-; RV32IXQCISLS-NEXT:    addi a1, a1, 1702
-; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 0(a0)
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test4a:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    lui a1, 682602
+; RV32IXQCILSM-NEXT:    addi a1, a1, 1702
+; RV32IXQCILSM-NEXT:    qc.setwmi a1, 16, 0(a0)
+; RV32IXQCILSM-NEXT:    ret
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 %s, i8 -90, i32 64, i1 false)
   ret void
@@ -238,18 +237,18 @@ define void @test4b() nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test4b:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a1, %hi(struct4b)
-; RV32IXQCISLS-NEXT:    addi a1, a1, %lo(struct4b)
-; RV32IXQCISLS-NEXT:    lui a0, %hi(struct4b1)
-; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(struct4b1)
-; RV32IXQCISLS-NEXT:    li a2, 192
-; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 0(a1)
-; RV32IXQCISLS-NEXT:    qc.setwmi zero, 15, 64(a1)
-; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 124(a1)
-; RV32IXQCISLS-NEXT:    li a1, 0
-; RV32IXQCISLS-NEXT:    tail memset
+; RV32IXQCILSM-LABEL: test4b:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    lui a1, %hi(struct4b)
+; RV32IXQCILSM-NEXT:    addi a1, a1, %lo(struct4b)
+; RV32IXQCILSM-NEXT:    lui a0, %hi(struct4b1)
+; RV32IXQCILSM-NEXT:    addi a0, a0, %lo(struct4b1)
+; RV32IXQCILSM-NEXT:    li a2, 192
+; RV32IXQCILSM-NEXT:    qc.setwmi zero, 16, 0(a1)
+; RV32IXQCILSM-NEXT:    qc.setwmi zero, 15, 64(a1)
+; RV32IXQCILSM-NEXT:    qc.setwmi zero, 16, 124(a1)
+; RV32IXQCILSM-NEXT:    li a1, 0
+; RV32IXQCILSM-NEXT:    tail memset
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 @struct4b, i8 0, i32 188, i1 false)
   tail call void @llvm.memset.p0.i32(ptr align 4 @struct4b1, i8 0, i32 192, i1 false)
@@ -265,13 +264,13 @@ define void @test5() nounwind {
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test5:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a0, %hi(struct2)
-; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(struct2)
-; RV32IXQCISLS-NEXT:    li a2, 64
-; RV32IXQCISLS-NEXT:    li a1, 0
-; RV32IXQCISLS-NEXT:    tail memset
+; RV32IXQCILSM-LABEL: test5:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    lui a0, %hi(struct2)
+; RV32IXQCILSM-NEXT:    addi a0, a0, %lo(struct2)
+; RV32IXQCILSM-NEXT:    li a2, 64
+; RV32IXQCILSM-NEXT:    li a1, 0
+; RV32IXQCILSM-NEXT:    tail memset
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 1 @struct2, i8 0, i32 64, i1 false)
   ret void
@@ -286,13 +285,13 @@ define i32 @test6() nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IXQCISLS-LABEL: test6:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    addi sp, sp, -16
-; RV32IXQCISLS-NEXT:    sw zero, 12(sp)
-; RV32IXQCISLS-NEXT:    li a0, 0
-; RV32IXQCISLS-NEXT:    addi sp, sp, 16
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test6:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    addi sp, sp, -16
+; RV32IXQCILSM-NEXT:    sw zero, 12(sp)
+; RV32IXQCILSM-NEXT:    li a0, 0
+; RV32IXQCILSM-NEXT:    addi sp, sp, 16
+; RV32IXQCILSM-NEXT:    ret
 entry:
   %x = alloca i32, align 4
   call void @llvm.memset.p0.i32(ptr align 4 %x, i8 0, i32 4, i1 false)
@@ -302,21 +301,20 @@ entry:
 
 define i32 @test6a() nounwind {
 ; RV32I-LABEL: test6a:
-; RV32I:       # %bb.0: # %entry
+; RV32I:       # %bb.0:
 ; RV32I-NEXT:    addi sp, sp, -16
 ; RV32I-NEXT:    sw zero, 12(sp)
 ; RV32I-NEXT:    lw a0, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IXQCISLS-LABEL: test6a:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    addi sp, sp, -16
-; RV32IXQCISLS-NEXT:    sw zero, 12(sp)
-; RV32IXQCISLS-NEXT:    lw a0, 12(sp)
-; RV32IXQCISLS-NEXT:    addi sp, sp, 16
-; RV32IXQCISLS-NEXT:    ret
-entry:
+; RV32IXQCILSM-LABEL: test6a:
+; RV32IXQCILSM:       # %bb.0:
+; RV32IXQCILSM-NEXT:    addi sp, sp, -16
+; RV32IXQCILSM-NEXT:    sw zero, 12(sp)
+; RV32IXQCILSM-NEXT:    lw a0, 12(sp)
+; RV32IXQCILSM-NEXT:    addi sp, sp, 16
+; RV32IXQCILSM-NEXT:    ret
   %x = alloca i32, align 4
   call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x)
   store i32 0, ptr %x, align 4
@@ -327,21 +325,20 @@ entry:
 
 define zeroext i8 @test6b_c() nounwind {
 ; RV32I-LABEL: test6b_c:
-; RV32I:       # %bb.0: # %entry
+; RV32I:       # %bb.0:
 ; RV32I-NEXT:    addi sp, sp, -16
 ; RV32I-NEXT:    sb zero, 12(sp)
 ; RV32I-NEXT:    lbu a0, 12(sp)
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IXQCISLS-LABEL: test6b_c:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    addi sp, sp, -16
-; RV32IXQCISLS-NEXT:    sb zero, 12(sp)
-; RV32IXQCISLS-NEXT:    lbu a0, 12(sp)
-; RV32IXQCISLS-NEXT:    addi sp, sp, 16
-; RV32IXQCISLS-NEXT:    ret
-entry:
+; RV32IXQCILSM-LABEL: test6b_c:
+; RV32IXQCILSM:       # %bb.0:
+; RV32IXQCILSM-NEXT:    addi sp, sp, -16
+; RV32IXQCILSM-NEXT:    sb zero, 12(sp)
+; RV32IXQCILSM-NEXT:    lbu a0, 12(sp)
+; RV32IXQCILSM-NEXT:    addi sp, sp, 16
+; RV32IXQCILSM-NEXT:    ret
   %x = alloca i8, align 4
   call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %x)
   call void @llvm.memset.p0.i32(ptr nonnull align 4 %x, i8 0, i32 1, i1 false)
@@ -359,13 +356,13 @@ define signext i16 @test6b_s() nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IXQCISLS-LABEL: test6b_s:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    addi sp, sp, -16
-; RV32IXQCISLS-NEXT:    sh zero, 12(sp)
-; RV32IXQCISLS-NEXT:    lh a0, 12(sp)
-; RV32IXQCISLS-NEXT:    addi sp, sp, 16
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test6b_s:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    addi sp, sp, -16
+; RV32IXQCILSM-NEXT:    sh zero, 12(sp)
+; RV32IXQCILSM-NEXT:    lh a0, 12(sp)
+; RV32IXQCILSM-NEXT:    addi sp, sp, 16
+; RV32IXQCILSM-NEXT:    ret
 entry:
   %x = alloca i16, align 4
   call void @llvm.lifetime.start.p0(i64 2, ptr nonnull %x)
@@ -384,13 +381,14 @@ define i32 @test6b_l() nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
+; RV32IXQCILSM-LABEL: test6b_l:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    addi sp, sp, -16
+; RV32IXQCILSM-NEXT:    sw zero, 12(sp)
+; RV32IXQCILSM-NEXT:    lw a0, 12(sp)
+; RV32IXQCILSM-NEXT:    addi sp, sp, 16
+; RV32IXQCILSM-NEXT:    ret
 ; RV32IXQCISLS-LABEL: test6b_l:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    addi sp, sp, -16
-; RV32IXQCISLS-NEXT:    sw zero, 12(sp)
-; RV32IXQCISLS-NEXT:    lw a0, 12(sp)
-; RV32IXQCISLS-NEXT:    addi sp, sp, 16
-; RV32IXQCISLS-NEXT:    ret
 entry:
   %x = alloca i32, align 4
   call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x)
@@ -411,15 +409,15 @@ define i64 @test6b_ll() nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IXQCISLS-LABEL: test6b_ll:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    addi sp, sp, -16
-; RV32IXQCISLS-NEXT:    sw zero, 8(sp)
-; RV32IXQCISLS-NEXT:    sw zero, 12(sp)
-; RV32IXQCISLS-NEXT:    lw a0, 8(sp)
-; RV32IXQCISLS-NEXT:    lw a1, 12(sp)
-; RV32IXQCISLS-NEXT:    addi sp, sp, 16
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test6b_ll:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    addi sp, sp, -16
+; RV32IXQCILSM-NEXT:    sw zero, 8(sp)
+; RV32IXQCILSM-NEXT:    sw zero, 12(sp)
+; RV32IXQCILSM-NEXT:    lw a0, 8(sp)
+; RV32IXQCILSM-NEXT:    lw a1, 12(sp)
+; RV32IXQCILSM-NEXT:    addi sp, sp, 16
+; RV32IXQCILSM-NEXT:    ret
 entry:
   %x = alloca i64, align 8
   call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %x)
@@ -438,13 +436,13 @@ define zeroext i8 @test6c_c() nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IXQCISLS-LABEL: test6c_c:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    addi sp, sp, -16
-; RV32IXQCISLS-NEXT:    sb zero, 15(sp)
-; RV32IXQCISLS-NEXT:    li a0, 0
-; RV32IXQCISLS-NEXT:    addi sp, sp, 16
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test6c_c:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    addi sp, sp, -16
+; RV32IXQCILSM-NEXT:    sb zero, 15(sp)
+; RV32IXQCILSM-NEXT:    li a0, 0
+; RV32IXQCILSM-NEXT:    addi sp, sp, 16
+; RV32IXQCILSM-NEXT:    ret
 entry:
   %x = alloca i8
   call void @llvm.memset.p0.i32(ptr align 1 %x, i8 0, i32 1, i1 false)
@@ -461,13 +459,13 @@ define signext i16 @test6c_s() nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IXQCISLS-LABEL: test6c_s:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    addi sp, sp, -16
-; RV32IXQCISLS-NEXT:    sh zero, 14(sp)
-; RV32IXQCISLS-NEXT:    li a0, 0
-; RV32IXQCISLS-NEXT:    addi sp, sp, 16
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test6c_s:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    addi sp, sp, -16
+; RV32IXQCILSM-NEXT:    sh zero, 14(sp)
+; RV32IXQCILSM-NEXT:    li a0, 0
+; RV32IXQCILSM-NEXT:    addi sp, sp, 16
+; RV32IXQCILSM-NEXT:    ret
 entry:
   %x = alloca i16
   call void @llvm.memset.p0.i32(ptr align 2 %x, i8 0, i32 2, i1 false)
@@ -484,13 +482,13 @@ define i32 @test6c_l() nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IXQCISLS-LABEL: test6c_l:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    addi sp, sp, -16
-; RV32IXQCISLS-NEXT:    sw zero, 12(sp)
-; RV32IXQCISLS-NEXT:    li a0, 0
-; RV32IXQCISLS-NEXT:    addi sp, sp, 16
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test6c_l:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    addi sp, sp, -16
+; RV32IXQCILSM-NEXT:    sw zero, 12(sp)
+; RV32IXQCILSM-NEXT:    li a0, 0
+; RV32IXQCILSM-NEXT:    addi sp, sp, 16
+; RV32IXQCILSM-NEXT:    ret
 entry:
   %x = alloca i32, align 4
   call void @llvm.memset.p0.i32(ptr align 4 %x, i8 0, i32 4, i1 false)
@@ -509,15 +507,15 @@ define i64 @test6c_ll() nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IXQCISLS-LABEL: test6c_ll:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    addi sp, sp, -16
-; RV32IXQCISLS-NEXT:    sw zero, 8(sp)
-; RV32IXQCISLS-NEXT:    sw zero, 12(sp)
-; RV32IXQCISLS-NEXT:    li a0, 0
-; RV32IXQCISLS-NEXT:    li a1, 0
-; RV32IXQCISLS-NEXT:    addi sp, sp, 16
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test6c_ll:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    addi sp, sp, -16
+; RV32IXQCILSM-NEXT:    sw zero, 8(sp)
+; RV32IXQCILSM-NEXT:    sw zero, 12(sp)
+; RV32IXQCILSM-NEXT:    li a0, 0
+; RV32IXQCILSM-NEXT:    li a1, 0
+; RV32IXQCILSM-NEXT:    addi sp, sp, 16
+; RV32IXQCILSM-NEXT:    ret
 entry:
   %x = alloca i64, align 8
   call void @llvm.memset.p0.i32(ptr align 8 %x, i8 0, i32 8, i1 false)
@@ -527,21 +525,20 @@ entry:
 
 define void @test7() nounwind {
 ; RV32I-LABEL: test7:
-; RV32I:       # %bb.0: # %entry
+; RV32I:       # %bb.0:
 ; RV32I-NEXT:    lui a0, %hi(arr1)
 ; RV32I-NEXT:    sw zero, %lo(arr1)(a0)
 ; RV32I-NEXT:    addi a0, a0, %lo(arr1)
 ; RV32I-NEXT:    sw zero, 4(a0)
 ; RV32I-NEXT:    ret
 ;
-; RV32IXQCISLS-LABEL: test7:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
-; RV32IXQCISLS-NEXT:    sw zero, %lo(arr1)(a0)
-; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
-; RV32IXQCISLS-NEXT:    sw zero, 4(a0)
-; RV32IXQCISLS-NEXT:    ret
-entry:
+; RV32IXQCILSM-LABEL: test7:
+; RV32IXQCILSM:       # %bb.0:
+; RV32IXQCILSM-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCILSM-NEXT:    sw zero, %lo(arr1)(a0)
+; RV32IXQCILSM-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCILSM-NEXT:    sw zero, 4(a0)
+; RV32IXQCILSM-NEXT:    ret
   tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 8, i1 false)
   ret void
 }
@@ -551,9 +548,9 @@ define void @test7a() nounwind {
 ; RV32I:       # %bb.0: # %entry
 ; RV32I-NEXT:    ret
 ;
-; RV32IXQCISLS-LABEL: test7a:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test7a:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    ret
 entry:
   call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 0, i1 false)
   ret void
@@ -572,17 +569,17 @@ define void @test7a_unalign() nounwind {
 ; RV32I-NEXT:    sb a1, 16(a0)
 ; RV32I-NEXT:    ret
 ;
-; RV32IXQCISLS-LABEL: test7a_unalign:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
-; RV32IXQCISLS-NEXT:    li a1, -1
-; RV32IXQCISLS-NEXT:    sw a1, %lo(arr1)(a0)
-; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
-; RV32IXQCISLS-NEXT:    sw a1, 4(a0)
-; RV32IXQCISLS-NEXT:    sw a1, 8(a0)
-; RV32IXQCISLS-NEXT:    sw a1, 12(a0)
-; RV32IXQCISLS-NEXT:    sb a1, 16(a0)
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test7a_unalign:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCILSM-NEXT:    li a1, -1
+; RV32IXQCILSM-NEXT:    sw a1, %lo(arr1)(a0)
+; RV32IXQCILSM-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCILSM-NEXT:    sw a1, 4(a0)
+; RV32IXQCILSM-NEXT:    sw a1, 8(a0)
+; RV32IXQCILSM-NEXT:    sw a1, 12(a0)
+; RV32IXQCILSM-NEXT:    sb a1, 16(a0)
+; RV32IXQCILSM-NEXT:    ret
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -1, i32 17, i1 false)
   ret void
@@ -597,14 +594,14 @@ define void @test7b() nounwind {
 ; RV32I-NEXT:    li a2, 68
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test7b:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
-; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
-; RV32IXQCISLS-NEXT:    li a1, -1
-; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 0(a0)
-; RV32IXQCISLS-NEXT:    qc.setwmi a1, 1, 64(a0)
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test7b:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCILSM-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCILSM-NEXT:    li a1, -1
+; RV32IXQCILSM-NEXT:    qc.setwmi a1, 16, 0(a0)
+; RV32IXQCILSM-NEXT:    qc.setwmi a1, 1, 64(a0)
+; RV32IXQCILSM-NEXT:    ret
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -1, i32 68, i1 false)
   ret void
@@ -619,15 +616,15 @@ define void @test7c() nounwind {
 ; RV32I-NEXT:    li a2, 128
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test7c:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
-; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
-; RV32IXQCISLS-NEXT:    lui a1, 526344
-; RV32IXQCISLS-NEXT:    addi a1, a1, 128
-; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 0(a0)
-; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 64(a0)
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test7c:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCILSM-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCILSM-NEXT:    lui a1, 526344
+; RV32IXQCILSM-NEXT:    addi a1, a1, 128
+; RV32IXQCILSM-NEXT:    qc.setwmi a1, 16, 0(a0)
+; RV32IXQCILSM-NEXT:    qc.setwmi a1, 16, 64(a0)
+; RV32IXQCILSM-NEXT:    ret
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -128, i32 128, i1 false)
   ret void
@@ -642,16 +639,16 @@ define void @test7d() nounwind {
 ; RV32I-NEXT:    li a2, 148
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test7d:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
-; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
-; RV32IXQCISLS-NEXT:    lui a1, 53457
-; RV32IXQCISLS-NEXT:    addi a1, a1, -755
-; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 0(a0)
-; RV32IXQCISLS-NEXT:    qc.setwmi a1, 15, 64(a0)
-; RV32IXQCISLS-NEXT:    qc.setwmi a1, 6, 124(a0)
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test7d:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCILSM-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCILSM-NEXT:    lui a1, 53457
+; RV32IXQCILSM-NEXT:    addi a1, a1, -755
+; RV32IXQCILSM-NEXT:    qc.setwmi a1, 16, 0(a0)
+; RV32IXQCILSM-NEXT:    qc.setwmi a1, 15, 64(a0)
+; RV32IXQCILSM-NEXT:    qc.setwmi a1, 6, 124(a0)
+; RV32IXQCILSM-NEXT:    ret
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 13, i32 148, i1 false)
   ret void
@@ -666,15 +663,15 @@ define void @test7e() nounwind {
 ; RV32I-NEXT:    li a2, 100
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test7e:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
-; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
-; RV32IXQCISLS-NEXT:    lui a1, 982783
-; RV32IXQCISLS-NEXT:    addi a1, a1, -17
-; RV32IXQCISLS-NEXT:    qc.setwmi a1, 16, 0(a0)
-; RV32IXQCISLS-NEXT:    qc.setwmi a1, 9, 64(a0)
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test7e:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCILSM-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCILSM-NEXT:    lui a1, 982783
+; RV32IXQCILSM-NEXT:    addi a1, a1, -17
+; RV32IXQCILSM-NEXT:    qc.setwmi a1, 16, 0(a0)
+; RV32IXQCILSM-NEXT:    qc.setwmi a1, 9, 64(a0)
+; RV32IXQCILSM-NEXT:    ret
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -17, i32 100, i1 false)
   ret void
@@ -691,15 +688,15 @@ define void @test8() nounwind {
 ; RV32I-NEXT:    sw zero, 12(a0)
 ; RV32I-NEXT:    ret
 ;
-; RV32IXQCISLS-LABEL: test8:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
-; RV32IXQCISLS-NEXT:    sw zero, %lo(arr1)(a0)
-; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
-; RV32IXQCISLS-NEXT:    sw zero, 4(a0)
-; RV32IXQCISLS-NEXT:    sw zero, 8(a0)
-; RV32IXQCISLS-NEXT:    sw zero, 12(a0)
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test8:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCILSM-NEXT:    sw zero, %lo(arr1)(a0)
+; RV32IXQCILSM-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCILSM-NEXT:    sw zero, 4(a0)
+; RV32IXQCILSM-NEXT:    sw zero, 8(a0)
+; RV32IXQCILSM-NEXT:    sw zero, 12(a0)
+; RV32IXQCILSM-NEXT:    ret
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 16, i1 false)
   ret void
@@ -720,19 +717,19 @@ define void @test9() nounwind {
 ; RV32I-NEXT:    sw zero, 16(a0)
 ; RV32I-NEXT:    ret
 ;
-; RV32IXQCISLS-LABEL: test9:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
-; RV32IXQCISLS-NEXT:    sw zero, %lo(arr1)(a0)
-; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
-; RV32IXQCISLS-NEXT:    sw zero, 20(a0)
-; RV32IXQCISLS-NEXT:    sw zero, 24(a0)
-; RV32IXQCISLS-NEXT:    sw zero, 28(a0)
-; RV32IXQCISLS-NEXT:    sw zero, 4(a0)
-; RV32IXQCISLS-NEXT:    sw zero, 8(a0)
-; RV32IXQCISLS-NEXT:    sw zero, 12(a0)
-; RV32IXQCISLS-NEXT:    sw zero, 16(a0)
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test9:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCILSM-NEXT:    sw zero, %lo(arr1)(a0)
+; RV32IXQCILSM-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCILSM-NEXT:    sw zero, 20(a0)
+; RV32IXQCILSM-NEXT:    sw zero, 24(a0)
+; RV32IXQCILSM-NEXT:    sw zero, 28(a0)
+; RV32IXQCILSM-NEXT:    sw zero, 4(a0)
+; RV32IXQCILSM-NEXT:    sw zero, 8(a0)
+; RV32IXQCILSM-NEXT:    sw zero, 12(a0)
+; RV32IXQCILSM-NEXT:    sw zero, 16(a0)
+; RV32IXQCILSM-NEXT:    ret
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 32, i1 false)
   ret void
@@ -747,12 +744,12 @@ define void @test10() nounwind {
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test10:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
-; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
-; RV32IXQCISLS-NEXT:    qc.setwmi zero, 15, 0(a0)
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test10:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCILSM-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCILSM-NEXT:    qc.setwmi zero, 15, 0(a0)
+; RV32IXQCILSM-NEXT:    ret
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 60, i1 false)
   ret void
@@ -767,12 +764,12 @@ define void @test11() nounwind {
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test11:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
-; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
-; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 0(a0)
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test11:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCILSM-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCILSM-NEXT:    qc.setwmi zero, 16, 0(a0)
+; RV32IXQCILSM-NEXT:    ret
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 64, i1 false)
   ret void
@@ -787,13 +784,13 @@ define void @test12() nounwind {
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test12:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
-; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
-; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 0(a0)
-; RV32IXQCISLS-NEXT:    qc.setwmi zero, 14, 64(a0)
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test12:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCILSM-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCILSM-NEXT:    qc.setwmi zero, 16, 0(a0)
+; RV32IXQCILSM-NEXT:    qc.setwmi zero, 14, 64(a0)
+; RV32IXQCILSM-NEXT:    ret
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 120, i1 false)
   ret void
@@ -808,13 +805,13 @@ define void @test13() nounwind {
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test13:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
-; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
-; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 0(a0)
-; RV32IXQCISLS-NEXT:    qc.setwmi zero, 15, 64(a0)
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test13:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCILSM-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCILSM-NEXT:    qc.setwmi zero, 16, 0(a0)
+; RV32IXQCILSM-NEXT:    qc.setwmi zero, 15, 64(a0)
+; RV32IXQCILSM-NEXT:    ret
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 124, i1 false)
   ret void
@@ -829,14 +826,14 @@ define void @test14() nounwind {
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test14:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
-; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
-; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 0(a0)
-; RV32IXQCISLS-NEXT:    qc.setwmi zero, 15, 64(a0)
-; RV32IXQCISLS-NEXT:    qc.setwmi zero, 14, 124(a0)
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test14:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCILSM-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCILSM-NEXT:    qc.setwmi zero, 16, 0(a0)
+; RV32IXQCILSM-NEXT:    qc.setwmi zero, 15, 64(a0)
+; RV32IXQCILSM-NEXT:    qc.setwmi zero, 14, 124(a0)
+; RV32IXQCILSM-NEXT:    ret
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 180, i1 false)
   ret void
@@ -851,14 +848,14 @@ define void @test15() nounwind {
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test15:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
-; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
-; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 0(a0)
-; RV32IXQCISLS-NEXT:    qc.setwmi zero, 15, 64(a0)
-; RV32IXQCISLS-NEXT:    qc.setwmi zero, 15, 124(a0)
-; RV32IXQCISLS-NEXT:    ret
+; RV32IXQCILSM-LABEL: test15:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCILSM-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCILSM-NEXT:    qc.setwmi zero, 16, 0(a0)
+; RV32IXQCILSM-NEXT:    qc.setwmi zero, 15, 64(a0)
+; RV32IXQCILSM-NEXT:    qc.setwmi zero, 15, 124(a0)
+; RV32IXQCILSM-NEXT:    ret
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 184, i1 false)
   ret void
@@ -873,13 +870,13 @@ define void @test15a() nounwind {
 ; RV32I-NEXT:    li a2, 192
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test15a:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
-; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
-; RV32IXQCISLS-NEXT:    li a1, 165
-; RV32IXQCISLS-NEXT:    li a2, 192
-; RV32IXQCISLS-NEXT:    tail memset
+; RV32IXQCILSM-LABEL: test15a:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCILSM-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCILSM-NEXT:    li a1, 165
+; RV32IXQCILSM-NEXT:    li a2, 192
+; RV32IXQCILSM-NEXT:    tail memset
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -91, i32 192, i1 false)
   ret void
@@ -887,22 +884,21 @@ entry:
 
 define void @test15b() nounwind {
 ; RV32I-LABEL: test15b:
-; RV32I:       # %bb.0: # %entry
+; RV32I:       # %bb.0:
 ; RV32I-NEXT:    lui a0, %hi(arr1)
 ; RV32I-NEXT:    addi a0, a0, %lo(arr1)
 ; RV32I-NEXT:    li a2, 188
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test15b:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
-; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
-; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 0(a0)
-; RV32IXQCISLS-NEXT:    qc.setwmi zero, 15, 64(a0)
-; RV32IXQCISLS-NEXT:    qc.setwmi zero, 16, 124(a0)
-; RV32IXQCISLS-NEXT:    ret
-entry:
+; RV32IXQCILSM-LABEL: test15b:
+; RV32IXQCILSM:       # %bb.0:
+; RV32IXQCILSM-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCILSM-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCILSM-NEXT:    qc.setwmi zero, 16, 0(a0)
+; RV32IXQCILSM-NEXT:    qc.setwmi zero, 15, 64(a0)
+; RV32IXQCILSM-NEXT:    qc.setwmi zero, 16, 124(a0)
+; RV32IXQCILSM-NEXT:    ret
   tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 188, i1 false)
   ret void
 }
@@ -916,13 +912,13 @@ define void @test15c() nounwind {
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    tail memset
 ;
-; RV32IXQCISLS-LABEL: test15c:
-; RV32IXQCISLS:       # %bb.0: # %entry
-; RV32IXQCISLS-NEXT:    lui a0, %hi(arr1)
-; RV32IXQCISLS-NEXT:    addi a0, a0, %lo(arr1)
-; RV32IXQCISLS-NEXT:    li a2, 192
-; RV32IXQCISLS-NEXT:    li a1, 0
-; RV32IXQCISLS-NEXT:    tail memset
+; RV32IXQCILSM-LABEL: test15c:
+; RV32IXQCILSM:       # %bb.0: # %entry
+; RV32IXQCILSM-NEXT:    lui a0, %hi(arr1)
+; RV32IXQCILSM-NEXT:    addi a0, a0, %lo(arr1)
+; RV32IXQCILSM-NEXT:    li a2, 192
+; RV32IXQCILSM-NEXT:    li a1, 0
+; RV32IXQCILSM-NEXT:    tail memset
 entry:
   tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 192, i1 false)
   ret void

>From 44a01d195a25cf3dca3da3ec017332ffd8eeaa95 Mon Sep 17 00:00:00 2001
From: Sudharsan Veeravalli <quic_svs at quicinc.com>
Date: Fri, 1 Aug 2025 08:33:32 +0530
Subject: [PATCH 3/9] Address comments

---
 llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td   | 10 +--
 .../Target/RISCV/RISCVSelectionDAGInfo.cpp    | 61 +++++++++----------
 llvm/test/CodeGen/RISCV/xqcilsm-memset.ll     | 25 --------
 3 files changed, 35 insertions(+), 61 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index 2479ced164927..1b0041b5ca7e3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -14,12 +14,12 @@
 // Operand and SDNode transformation definitions.
 //===----------------------------------------------------------------------===//
 
-def SDT_StoreMultiple : SDTypeProfile<0, 4, [SDTCisSameAs<0, 1>,
-                                             SDTCisSameAs<1, 3>,
-                                             SDTCisPtrTy<2>,
-                                             SDTCisVT<3, XLenVT>]>;
+def SDT_SetMultiple : SDTypeProfile<0, 4, [SDTCisSameAs<0, 1>,
+                                           SDTCisSameAs<1, 3>,
+                                           SDTCisPtrTy<2>,
+                                           SDTCisVT<3, XLenVT>]>;
 
-def qc_setwmi : RVSDNode<"QC_SETWMI", SDT_StoreMultiple,
+def qc_setwmi : RVSDNode<"QC_SETWMI", SDT_SetMultiple,
                          [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 
 def uimm5nonzero : RISCVOp<XLenVT>,
diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
index ce1e3a72c4e34..be0a16e3748e0 100644
--- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
@@ -69,8 +69,7 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset(
     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
     SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
     MachinePointerInfo DstPtrInfo) const {
-  const RISCVSubtarget &Subtarget =
-      DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
+  const auto &Subtarget = DAG.getSubtarget<RISCVSubtarget>();
   // We currently do this only for Xqcilsm
   if (!Subtarget.hasVendorXqcilsm())
     return SDValue();
@@ -83,7 +82,7 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset(
   uint64_t NumberOfBytesToWrite = ConstantSize->getZExtValue();
 
   // Do this only if it is word aligned and we write multiple of 4 bytes.
-  if (!((Alignment.value() & 3) == 0 && (NumberOfBytesToWrite & 3) == 0))
+  if (!(Alignment.value() >= 4) || !((NumberOfBytesToWrite & 3) == 0))
     return SDValue();
 
   SmallVector<SDValue, 8> OutChains;
@@ -104,7 +103,7 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset(
   if ((Src.getValueType() == MVT::i8) && !IsZeroVal)
     // Replicate byte to word by multiplication with 0x01010101.
     SrcValueReplicated = DAG.getNode(ISD::MUL, dl, MVT::i32, SrcValueReplicated,
-                                     DAG.getConstant(16843009, dl, MVT::i32));
+                                     DAG.getConstant(0x01010101ul, dl, MVT::i32));
 
   // We limit a QC_SETWMI to 16 words or less to improve interruptibility.
   // So for 1-16 words we use a single QC_SETWMI:
@@ -128,38 +127,38 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset(
   // QC_SETWMI R2, R0, N, 124
   //
   // For 48 words or more, call the target independent memset
+  if ( NumberOfWords >= 48)
+    return SDValue();
+
   if (NumberOfWords <= 16) {
     // 1 - 16 words
     SizeWords = DAG.getTargetConstant(NumberOfWords, dl, MVT::i32);
     SDValue OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32);
     return getSetwmiNode(SizeWords, OffsetSetwmi);
-  } else if (NumberOfWords <= 47) {
-    if (NumberOfWords <= 32) {
-      // 17 - 32 words
-      SizeWords = DAG.getTargetConstant(NumberOfWords - 16, dl, MVT::i32);
-      OffsetSetwmi = DAG.getTargetConstant(64, dl, MVT::i32);
-      OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi));
-
-      SizeWords = DAG.getTargetConstant(16, dl, MVT::i32);
-      OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32);
-      OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi));
-    } else {
-      // 33 - 47 words
-      SizeWords = DAG.getTargetConstant(NumberOfWords - 31, dl, MVT::i32);
-      OffsetSetwmi = DAG.getTargetConstant(124, dl, MVT::i32);
-      OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi));
-
-      SizeWords = DAG.getTargetConstant(15, dl, MVT::i32);
-      OffsetSetwmi = DAG.getTargetConstant(64, dl, MVT::i32);
-      OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi));
-
-      SizeWords = DAG.getTargetConstant(16, dl, MVT::i32);
-      OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32);
-      OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi));
-    }
-    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
   }
 
-  // >= 48 words. Call target independent memset.
-  return SDValue();
+  if (NumberOfWords <= 32) {
+    // 17 - 32 words
+    SizeWords = DAG.getTargetConstant(NumberOfWords - 16, dl, MVT::i32);
+    OffsetSetwmi = DAG.getTargetConstant(64, dl, MVT::i32);
+    OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi));
+
+    SizeWords = DAG.getTargetConstant(16, dl, MVT::i32);
+    OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32);
+    OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi));
+  } else {
+    // 33 - 47 words
+    SizeWords = DAG.getTargetConstant(NumberOfWords - 31, dl, MVT::i32);
+    OffsetSetwmi = DAG.getTargetConstant(124, dl, MVT::i32);
+    OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi));
+
+    SizeWords = DAG.getTargetConstant(15, dl, MVT::i32);
+    OffsetSetwmi = DAG.getTargetConstant(64, dl, MVT::i32);
+    OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi));
+
+    SizeWords = DAG.getTargetConstant(16, dl, MVT::i32);
+    OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32);
+    OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi));
+  }
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
 }
diff --git a/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll b/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll
index 3496438fde5b6..988bb6ffb8915 100644
--- a/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll
+++ b/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll
@@ -299,30 +299,6 @@ entry:
   ret i32 %0
 }
 
-define i32 @test6a() nounwind {
-; RV32I-LABEL: test6a:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    addi sp, sp, -16
-; RV32I-NEXT:    sw zero, 12(sp)
-; RV32I-NEXT:    lw a0, 12(sp)
-; RV32I-NEXT:    addi sp, sp, 16
-; RV32I-NEXT:    ret
-;
-; RV32IXQCILSM-LABEL: test6a:
-; RV32IXQCILSM:       # %bb.0:
-; RV32IXQCILSM-NEXT:    addi sp, sp, -16
-; RV32IXQCILSM-NEXT:    sw zero, 12(sp)
-; RV32IXQCILSM-NEXT:    lw a0, 12(sp)
-; RV32IXQCILSM-NEXT:    addi sp, sp, 16
-; RV32IXQCILSM-NEXT:    ret
-  %x = alloca i32, align 4
-  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x)
-  store i32 0, ptr %x, align 4
-  %x.0.x.0. = load volatile i32, ptr %x, align 4
-  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x)
-  ret i32 %x.0.x.0.
-}
-
 define zeroext i8 @test6b_c() nounwind {
 ; RV32I-LABEL: test6b_c:
 ; RV32I:       # %bb.0:
@@ -388,7 +364,6 @@ define i32 @test6b_l() nounwind {
 ; RV32IXQCILSM-NEXT:    lw a0, 12(sp)
 ; RV32IXQCILSM-NEXT:    addi sp, sp, 16
 ; RV32IXQCILSM-NEXT:    ret
-; RV32IXQCISLS-LABEL: test6b_l:
 entry:
   %x = alloca i32, align 4
   call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x)

>From fd143e9a6b88a0da0b86adda75b66a20ed3c492b Mon Sep 17 00:00:00 2001
From: Sudharsan Veeravalli <quic_svs at quicinc.com>
Date: Fri, 1 Aug 2025 08:39:48 +0530
Subject: [PATCH 4/9] Clang format

---
 llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
index be0a16e3748e0..d151e12151697 100644
--- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
@@ -102,8 +102,9 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset(
   // If i8 type and constant non-zero value.
   if ((Src.getValueType() == MVT::i8) && !IsZeroVal)
     // Replicate byte to word by multiplication with 0x01010101.
-    SrcValueReplicated = DAG.getNode(ISD::MUL, dl, MVT::i32, SrcValueReplicated,
-                                     DAG.getConstant(0x01010101ul, dl, MVT::i32));
+    SrcValueReplicated =
+        DAG.getNode(ISD::MUL, dl, MVT::i32, SrcValueReplicated,
+                    DAG.getConstant(0x01010101ul, dl, MVT::i32));
 
   // We limit a QC_SETWMI to 16 words or less to improve interruptibility.
   // So for 1-16 words we use a single QC_SETWMI:
@@ -127,7 +128,7 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset(
   // QC_SETWMI R2, R0, N, 124
   //
   // For 48 words or more, call the target independent memset
-  if ( NumberOfWords >= 48)
+  if (NumberOfWords >= 48)
     return SDValue();
 
   if (NumberOfWords <= 16) {

>From 18ee0a91124924f78aa87f77ccecd8f66663dbd1 Mon Sep 17 00:00:00 2001
From: Sudharsan Veeravalli <quic_svs at quicinc.com>
Date: Fri, 1 Aug 2025 17:00:23 +0530
Subject: [PATCH 5/9] Add memoperands

---
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp   |  1 +
 .../Target/RISCV/RISCVSelectionDAGInfo.cpp    | 47 +++++++------------
 2 files changed, 19 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index b778c33083685..efe34f6d5ffc2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1851,6 +1851,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
     SDValue Ops[] = {Node->getOperand(1), Node->getOperand(2),
                      Node->getOperand(3), Node->getOperand(4), Chain};
     MachineSDNode *New = CurDAG->getMachineNode(RISCV::QC_SETWMI, DL, VTs, Ops);
+    CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
     ReplaceNode(Node, New);
     return;
   }
diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
index d151e12151697..935a2982c2776 100644
--- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
@@ -86,21 +86,24 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset(
     return SDValue();
 
   SmallVector<SDValue, 8> OutChains;
-  SDValue SizeWords, OffsetSetwmi;
   SDValue SrcValueReplicated = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
   int NumberOfWords = NumberOfBytesToWrite / 4;
+  MachineFunction &MF = DAG.getMachineFunction();
 
   // Helper for constructing the QC_SETWMI instruction
-  auto getSetwmiNode = [&](SDValue SizeWords, SDValue OffsetSetwmi) -> SDValue {
-    SDValue Ops[] = {Chain, SrcValueReplicated, Dst, SizeWords, OffsetSetwmi};
-    return DAG.getNode(RISCVISD::QC_SETWMI, dl, MVT::Other, Ops);
+  auto getSetwmiNode = [&](uint8_t SizeWords, uint8_t OffsetSetwmi) -> SDValue {
+    SDValue Ops[] = {Chain, SrcValueReplicated, Dst,
+                     DAG.getTargetConstant(SizeWords, dl, MVT::i32),
+                     DAG.getTargetConstant(OffsetSetwmi, dl, MVT::i32)};
+    MachineMemOperand *BaseMemOperand = MF.getMachineMemOperand(
+        DstPtrInfo, MachineMemOperand::MOStore, SizeWords * 4, Align(4));
+    return DAG.getMemIntrinsicNode(RISCVISD::QC_SETWMI, dl,
+                                   DAG.getVTList(MVT::Other), Ops, MVT::i32,
+                                   BaseMemOperand);
   };
 
-  bool IsZeroVal =
-      isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isZero();
-
   // If i8 type and constant non-zero value.
-  if ((Src.getValueType() == MVT::i8) && !IsZeroVal)
+  if ((Src.getValueType() == MVT::i8) && !isNullConstant(Src))
     // Replicate byte to word by multiplication with 0x01010101.
     SrcValueReplicated =
         DAG.getNode(ISD::MUL, dl, MVT::i32, SrcValueReplicated,
@@ -133,33 +136,19 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset(
 
   if (NumberOfWords <= 16) {
     // 1 - 16 words
-    SizeWords = DAG.getTargetConstant(NumberOfWords, dl, MVT::i32);
-    SDValue OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32);
-    return getSetwmiNode(SizeWords, OffsetSetwmi);
+    return getSetwmiNode(NumberOfWords, 0);
   }
 
   if (NumberOfWords <= 32) {
     // 17 - 32 words
-    SizeWords = DAG.getTargetConstant(NumberOfWords - 16, dl, MVT::i32);
-    OffsetSetwmi = DAG.getTargetConstant(64, dl, MVT::i32);
-    OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi));
-
-    SizeWords = DAG.getTargetConstant(16, dl, MVT::i32);
-    OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32);
-    OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi));
+    OutChains.push_back(getSetwmiNode(NumberOfWords - 16, 64));
+    OutChains.push_back(getSetwmiNode(16, 0));
   } else {
     // 33 - 47 words
-    SizeWords = DAG.getTargetConstant(NumberOfWords - 31, dl, MVT::i32);
-    OffsetSetwmi = DAG.getTargetConstant(124, dl, MVT::i32);
-    OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi));
-
-    SizeWords = DAG.getTargetConstant(15, dl, MVT::i32);
-    OffsetSetwmi = DAG.getTargetConstant(64, dl, MVT::i32);
-    OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi));
-
-    SizeWords = DAG.getTargetConstant(16, dl, MVT::i32);
-    OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32);
-    OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi));
+    OutChains.push_back(getSetwmiNode(NumberOfWords - 31, 124));
+    OutChains.push_back(getSetwmiNode(15, 64));
+    OutChains.push_back(getSetwmiNode(16, 0));
   }
+
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
 }

>From c7ab9f76929a4a3cdb039708948e635aa5ad06be Mon Sep 17 00:00:00 2001
From: Sudharsan Veeravalli <quic_svs at quicinc.com>
Date: Fri, 1 Aug 2025 21:29:38 +0530
Subject: [PATCH 6/9] Use tuimm

---
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp     | 10 ----------
 llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td     |  9 +++++++++
 llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp |  2 +-
 3 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index efe34f6d5ffc2..f223fdbef4359 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1845,16 +1845,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
     CurDAG->RemoveDeadNode(Node);
     return;
   }
-  case RISCVISD::QC_SETWMI: {
-    SDValue Chain = Node->getOperand(0);
-    SDVTList VTs = Node->getVTList();
-    SDValue Ops[] = {Node->getOperand(1), Node->getOperand(2),
-                     Node->getOperand(3), Node->getOperand(4), Chain};
-    MachineSDNode *New = CurDAG->getMachineNode(RISCV::QC_SETWMI, DL, VTs, Ops);
-    CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
-    ReplaceNode(Node, New);
-    return;
-  }
   case ISD::INTRINSIC_WO_CHAIN: {
     unsigned IntNo = Node->getConstantOperandVal(0);
     switch (IntNo) {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index 1b0041b5ca7e3..399fb2c9092ed 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -35,6 +35,8 @@ def uimm5nonzero : RISCVOp<XLenVT>,
   }];
 }
 
+def tuimm5nonzero : TImmLeaf<XLenVT, [{return (Imm != 0) && isUInt<5>(Imm);}]>;
+
 def uimm5gt3 : RISCVOp<XLenVT>, ImmLeaf<XLenVT,
   [{return (Imm > 3) && isUInt<5>(Imm);}]> {
   let ParserMatchClass = UImmAsmOperand<5, "GT3">;
@@ -100,6 +102,8 @@ def uimm5slist : RISCVOp<XLenVT>, ImmLeaf<XLenVT,
   }];
 }
 
+def tuimm7_lsb00 : TImmLeaf<XLenVT,[{return isShiftedUInt<5, 2>(Imm);}]>;
+
 def uimm10 : RISCVUImmLeafOp<10>;
 
 def uimm11 : RISCVUImmLeafOp<11>;
@@ -1574,6 +1578,11 @@ def : QCISELECTIICCPat <SETEQ,  QC_SELECTIIEQ>;
 def : QCISELECTIICCPat <SETNE,  QC_SELECTIINE>;
 } // Predicates = [HasVendorXqcics, IsRV32]
 
+let Predicates = [HasVendorXqcilsm, IsRV32] in {
+def : Pat<(qc_setwmi i32:$rd, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7),
+          (QC_SETWMI i32:$rd, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7)>;
+} // Predicates = [HasVendorXqcilsm, IsRV32]
+
 //===----------------------------------------------------------------------===/i
 // Compress Instruction tablegen backend.
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
index 935a2982c2776..bc3213b797166 100644
--- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
@@ -82,7 +82,7 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset(
   uint64_t NumberOfBytesToWrite = ConstantSize->getZExtValue();
 
   // Do this only if it is word aligned and we write multiple of 4 bytes.
-  if (!(Alignment.value() >= 4) || !((NumberOfBytesToWrite & 3) == 0))
+  if (!(Alignment >= 4) || !((NumberOfBytesToWrite & 3) == 0))
     return SDValue();
 
   SmallVector<SDValue, 8> OutChains;

>From be7306a1668763b99dbd442390c7063797fe3ecd Mon Sep 17 00:00:00 2001
From: Sudharsan Veeravalli <quic_svs at quicinc.com>
Date: Fri, 1 Aug 2025 22:35:16 +0530
Subject: [PATCH 7/9] Use GPR

---
 llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index 399fb2c9092ed..44a8245dc2a75 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -1579,8 +1579,8 @@ def : QCISELECTIICCPat <SETNE,  QC_SELECTIINE>;
 } // Predicates = [HasVendorXqcics, IsRV32]
 
 let Predicates = [HasVendorXqcilsm, IsRV32] in {
-def : Pat<(qc_setwmi i32:$rd, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7),
-          (QC_SETWMI i32:$rd, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7)>;
+def : Pat<(qc_setwmi GPR:$rs3, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7),
+          (QC_SETWMI GPR:$rs3, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7)>;
 } // Predicates = [HasVendorXqcilsm, IsRV32]
 
 //===----------------------------------------------------------------------===/i

>From b3881c2b553083b91fe7483db353aa22cfb3c44e Mon Sep 17 00:00:00 2001
From: Sudharsan Veeravalli <quic_svs at quicinc.com>
Date: Sat, 2 Aug 2025 06:50:10 +0530
Subject: [PATCH 8/9] MachinePointerInfo offset

---
 llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
index bc3213b797166..cd94307fb6a27 100644
--- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
@@ -96,7 +96,8 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset(
                      DAG.getTargetConstant(SizeWords, dl, MVT::i32),
                      DAG.getTargetConstant(OffsetSetwmi, dl, MVT::i32)};
     MachineMemOperand *BaseMemOperand = MF.getMachineMemOperand(
-        DstPtrInfo, MachineMemOperand::MOStore, SizeWords * 4, Align(4));
+        DstPtrInfo.getWithOffset(OffsetSetwmi), MachineMemOperand::MOStore,
+        SizeWords * 4, Align(4));
     return DAG.getMemIntrinsicNode(RISCVISD::QC_SETWMI, dl,
                                    DAG.getVTList(MVT::Other), Ops, MVT::i32,
                                    BaseMemOperand);

>From aebf23cea3393695c9ec5e36efe7180778ea79a3 Mon Sep 17 00:00:00 2001
From: Sudharsan Veeravalli <quic_svs at quicinc.com>
Date: Mon, 4 Aug 2025 11:41:45 +0530
Subject: [PATCH 9/9] Add volatile memop and fix typos

---
 llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
index cd94307fb6a27..041dd07b48bf0 100644
--- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
@@ -81,7 +81,7 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset(
 
   uint64_t NumberOfBytesToWrite = ConstantSize->getZExtValue();
 
-  // Do this only if it is word aligned and we write multiple of 4 bytes.
+  // Do this only if it is word aligned and we write a multiple of 4 bytes.
   if (!(Alignment >= 4) || !((NumberOfBytesToWrite & 3) == 0))
     return SDValue();
 
@@ -89,6 +89,8 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset(
   SDValue SrcValueReplicated = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
   int NumberOfWords = NumberOfBytesToWrite / 4;
   MachineFunction &MF = DAG.getMachineFunction();
+  auto Volatile =
+      isVolatile ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
 
   // Helper for constructing the QC_SETWMI instruction
   auto getSetwmiNode = [&](uint8_t SizeWords, uint8_t OffsetSetwmi) -> SDValue {
@@ -96,8 +98,8 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset(
                      DAG.getTargetConstant(SizeWords, dl, MVT::i32),
                      DAG.getTargetConstant(OffsetSetwmi, dl, MVT::i32)};
     MachineMemOperand *BaseMemOperand = MF.getMachineMemOperand(
-        DstPtrInfo.getWithOffset(OffsetSetwmi), MachineMemOperand::MOStore,
-        SizeWords * 4, Align(4));
+        DstPtrInfo.getWithOffset(OffsetSetwmi),
+        MachineMemOperand::MOStore | Volatile, SizeWords * 4, Align(4));
     return DAG.getMemIntrinsicNode(RISCVISD::QC_SETWMI, dl,
                                    DAG.getVTList(MVT::Other), Ops, MVT::i32,
                                    BaseMemOperand);
@@ -122,7 +124,7 @@ SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset(
   // QC_SETWMI reg1, N, 64(reg2)
   //
   // For 33-48 words, we would like to use (16, 16, n), but that means the last
-  // QC_SETWMI needs an offset of 128 which the instruction doesnt support.
+  // QC_SETWMI needs an offset of 128 which the instruction doesn't support.
   // So in this case we use a length of 15 for the second instruction and we do
   // the rest with the third instruction.
   // This means the maximum inlined number of words is 47 (for now):



More information about the llvm-commits mailing list