[clang] [clang][RISCV] Fix crash on VLS calling convention (PR #145489)

Sat Jul 12 00:43:47 PDT 2025

https://github.com/4vtomat updated https://github.com/llvm/llvm-project/pull/145489

>From e51b061b7231ccb990e74a313f7cea900faf34c5 Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Sun, 6 Jul 2025 00:42:02 -0700
Subject: [PATCH] [clang][RISCV] Fix crash on VLS calling convention

This patch handle struct of fixed vector and struct of array of fixed
vector correctly for VLS calling convention in EmitFunctionProlog,
EmitFunctionEpilog and EmitCall.
---
 clang/include/clang/CodeGen/CGFunctionInfo.h  |  46 ++++-
 clang/lib/CodeGen/ABIInfo.cpp                 |   9 +
 clang/lib/CodeGen/ABIInfo.h                   |   6 +
 clang/lib/CodeGen/CGCall.cpp                  |  71 +++++--
 clang/lib/CodeGen/TargetInfo.cpp              |   7 +
 clang/lib/CodeGen/Targets/RISCV.cpp           | 181 +++++++++++++++++-
 clang/lib/CodeGen/Targets/Sparc.cpp           |   1 +
 clang/lib/CodeGen/Targets/X86.cpp             |   1 +
 clang/lib/CodeGen/Targets/XCore.cpp           |   1 +
 .../RISCV/riscv-vector-callingconv-llvm-ir.c  |  93 +++++++--
 .../riscv-vector-callingconv-llvm-ir.cpp      |  32 ++--
 11 files changed, 394 insertions(+), 54 deletions(-)

diff --git a/clang/include/clang/CodeGen/CGFunctionInfo.h b/clang/include/clang/CodeGen/CGFunctionInfo.h
index 50be51769f1a8..d469efc8b431d 100644
--- a/clang/include/clang/CodeGen/CGFunctionInfo.h
+++ b/clang/include/clang/CodeGen/CGFunctionInfo.h
@@ -77,6 +77,10 @@ class ABIArgInfo {
     /// Array elements in the type are assumed to be padding and skipped.
     CoerceAndExpand,
 
+    /// TargetSpecific - Some argument types are passed as target specific types
+    /// such as RISCV's tuple type, these need to be handled in the target hook.
+    TargetSpecific,
+
     /// InAlloca - Pass the argument directly using the LLVM inalloca attribute.
     /// This is similar to indirect with byval, except it only applies to
     /// arguments stored in memory and forbids any implicit copies.  When
@@ -120,7 +124,7 @@ class ABIArgInfo {
 
   bool canHavePaddingType() const {
     return isDirect() || isExtend() || isIndirect() || isIndirectAliased() ||
-           isExpand();
+           isExpand() || isTargetSpecific();
   }
   void setPaddingType(llvm::Type *T) {
     assert(canHavePaddingType());
@@ -291,6 +295,20 @@ class ABIArgInfo {
     return AI;
   }
 
+  static ABIArgInfo getTargetSpecific(llvm::Type *T = nullptr,
+                                      unsigned Offset = 0,
+                                      llvm::Type *Padding = nullptr,
+                                      bool CanBeFlattened = true,
+                                      unsigned Align = 0) {
+    auto AI = ABIArgInfo(TargetSpecific);
+    AI.setCoerceToType(T);
+    AI.setPaddingType(Padding);
+    AI.setDirectOffset(Offset);
+    AI.setDirectAlign(Align);
+    AI.setCanBeFlattened(CanBeFlattened);
+    return AI;
+  }
+
   static bool isPaddingForCoerceAndExpand(llvm::Type *eltType) {
     return eltType->isArrayTy() &&
            eltType->getArrayElementType()->isIntegerTy(8);
@@ -305,27 +323,33 @@ class ABIArgInfo {
   bool isIndirectAliased() const { return TheKind == IndirectAliased; }
   bool isExpand() const { return TheKind == Expand; }
   bool isCoerceAndExpand() const { return TheKind == CoerceAndExpand; }
+  bool isTargetSpecific() const { return TheKind == TargetSpecific; }
 
   bool canHaveCoerceToType() const {
-    return isDirect() || isExtend() || isCoerceAndExpand();
+    return isDirect() || isExtend() || isCoerceAndExpand() ||
+           isTargetSpecific();
   }
 
   // Direct/Extend accessors
   unsigned getDirectOffset() const {
-    assert((isDirect() || isExtend()) && "Not a direct or extend kind");
+    assert((isDirect() || isExtend() || isTargetSpecific()) &&
+           "Not a direct or extend or target specific kind");
     return DirectAttr.Offset;
   }
   void setDirectOffset(unsigned Offset) {
-    assert((isDirect() || isExtend()) && "Not a direct or extend kind");
+    assert((isDirect() || isExtend() || isTargetSpecific()) &&
+           "Not a direct or extend or target specific kind");
     DirectAttr.Offset = Offset;
   }
 
   unsigned getDirectAlign() const {
-    assert((isDirect() || isExtend()) && "Not a direct or extend kind");
+    assert((isDirect() || isExtend() || isTargetSpecific()) &&
+           "Not a direct or extend or target specific kind");
     return DirectAttr.Align;
   }
   void setDirectAlign(unsigned Align) {
-    assert((isDirect() || isExtend()) && "Not a direct or extend kind");
+    assert((isDirect() || isExtend() || isTargetSpecific()) &&
+           "Not a direct or extend or target specific kind");
     DirectAttr.Align = Align;
   }
 
@@ -394,12 +418,14 @@ class ABIArgInfo {
   }
 
   bool getInReg() const {
-    assert((isDirect() || isExtend() || isIndirect()) && "Invalid kind!");
+    assert((isDirect() || isExtend() || isIndirect() || isTargetSpecific()) &&
+           "Invalid kind!");
     return InReg;
   }
 
   void setInReg(bool IR) {
-    assert((isDirect() || isExtend() || isIndirect()) && "Invalid kind!");
+    assert((isDirect() || isExtend() || isIndirect() || isTargetSpecific()) &&
+           "Invalid kind!");
     InReg = IR;
   }
 
@@ -481,12 +507,12 @@ class ABIArgInfo {
   }
 
   bool getCanBeFlattened() const {
-    assert(isDirect() && "Invalid kind!");
+    assert((isDirect() || isTargetSpecific()) && "Invalid kind!");
     return CanBeFlattened;
   }
 
   void setCanBeFlattened(bool Flatten) {
-    assert(isDirect() && "Invalid kind!");
+    assert((isDirect() || isTargetSpecific()) && "Invalid kind!");
     CanBeFlattened = Flatten;
   }
 
diff --git a/clang/lib/CodeGen/ABIInfo.cpp b/clang/lib/CodeGen/ABIInfo.cpp
index d981d69913632..5073e1426e097 100644
--- a/clang/lib/CodeGen/ABIInfo.cpp
+++ b/clang/lib/CodeGen/ABIInfo.cpp
@@ -244,6 +244,15 @@ ABIInfo::getOptimalVectorMemoryType(llvm::FixedVectorType *T,
   return T;
 }
 
+llvm::Value *ABIInfo::CreateCoercedLoad(Address SrcAddr, const ABIArgInfo &AI,
+                                        CodeGenFunction &CGF) const {
+  return nullptr;
+}
+void ABIInfo::CreateCoercedStore(llvm::Value *Val, Address DstAddr,
+                                 const ABIArgInfo &AI, bool DestIsVolatile,
+                                 CodeGenFunction &CGF) const {
+  return;
+}
 // Pin the vtable to this file.
 SwiftABIInfo::~SwiftABIInfo() = default;
 
diff --git a/clang/lib/CodeGen/ABIInfo.h b/clang/lib/CodeGen/ABIInfo.h
index 9c7029c99bd44..368ccd8f43192 100644
--- a/clang/lib/CodeGen/ABIInfo.h
+++ b/clang/lib/CodeGen/ABIInfo.h
@@ -132,6 +132,12 @@ class ABIInfo {
   virtual llvm::FixedVectorType *
   getOptimalVectorMemoryType(llvm::FixedVectorType *T,
                              const LangOptions &Opt) const;
+
+  virtual llvm::Value *CreateCoercedLoad(Address SrcAddr, const ABIArgInfo &AI,
+                                         CodeGenFunction &CGF) const;
+  virtual void CreateCoercedStore(llvm::Value *Val, Address DstAddr,
+                                  const ABIArgInfo &AI, bool DestIsVolatile,
+                                  CodeGenFunction &CGF) const;
 };
 
 /// Target specific hooks for defining how a type should be passed or returned
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index c8c3d6b20c496..2cbb4d739683a 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1602,6 +1602,7 @@ void ClangToLLVMArgMapping::construct(const ASTContext &Context,
       IRArgs.PaddingArgIndex = IRArgNo++;
 
     switch (AI.getKind()) {
+    case ABIArgInfo::TargetSpecific:
     case ABIArgInfo::Extend:
     case ABIArgInfo::Direct: {
       // FIXME: handle sseregparm someday...
@@ -1712,6 +1713,7 @@ llvm::FunctionType *CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
   case ABIArgInfo::IndirectAliased:
     llvm_unreachable("Invalid ABI kind for return argument");
 
+  case ABIArgInfo::TargetSpecific:
   case ABIArgInfo::Extend:
   case ABIArgInfo::Direct:
     resultType = retAI.getCoerceToType();
@@ -1784,6 +1786,7 @@ llvm::FunctionType *CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
       ArgTypes[FirstIRArg] = llvm::PointerType::get(
           getLLVMContext(), ArgInfo.getIndirectAddrSpace());
       break;
+    case ABIArgInfo::TargetSpecific:
     case ABIArgInfo::Extend:
     case ABIArgInfo::Direct: {
       // Fast-isel and the optimizer generally like scalar values better than
@@ -2697,6 +2700,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
     else
       RetAttrs.addAttribute(llvm::Attribute::NoExt);
     [[fallthrough]];
+  case ABIArgInfo::TargetSpecific:
   case ABIArgInfo::Direct:
     if (RetAI.getInReg())
       RetAttrs.addAttribute(llvm::Attribute::InReg);
@@ -2838,6 +2842,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
       else
         Attrs.addAttribute(llvm::Attribute::NoExt);
       [[fallthrough]];
+    case ABIArgInfo::TargetSpecific:
     case ABIArgInfo::Direct:
       if (ArgNo == 0 && FI.isChainCall())
         Attrs.addAttribute(llvm::Attribute::Nest);
@@ -3335,17 +3340,6 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
         }
       }
 
-      // Struct of fixed-length vectors and struct of array of fixed-length
-      // vector in VLS calling convention are coerced to vector tuple
-      // type(represented as TargetExtType) and scalable vector type
-      // respectively, they're no longer handled as struct.
-      if (ArgI.isDirect() && isa<llvm::StructType>(ConvertType(Ty)) &&
-          (isa<llvm::TargetExtType>(ArgI.getCoerceToType()) ||
-           isa<llvm::ScalableVectorType>(ArgI.getCoerceToType()))) {
-        ArgVals.push_back(ParamValue::forDirect(AI));
-        break;
-      }
-
       llvm::StructType *STy =
           dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
       Address Alloca =
@@ -3486,6 +3480,25 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
       break;
     }
 
+    case ABIArgInfo::TargetSpecific: {
+      auto *AI = Fn->getArg(FirstIRArg);
+      AI->setName(Arg->getName() + ".target_coerce");
+      Address Alloca =
+          CreateMemTemp(Ty, getContext().getDeclAlign(Arg), Arg->getName());
+      Address Ptr = emitAddressAtOffset(*this, Alloca, ArgI);
+      CGM.getABIInfo().CreateCoercedStore(AI, Ptr, ArgI, false, *this);
+      if (CodeGenFunction::hasScalarEvaluationKind(Ty)) {
+        llvm::Value *V =
+            EmitLoadOfScalar(Alloca, false, Ty, Arg->getBeginLoc());
+        if (isPromoted) {
+          V = emitArgumentDemotion(*this, Arg, V);
+        }
+        ArgVals.push_back(ParamValue::forDirect(V));
+      } else {
+        ArgVals.push_back(ParamValue::forIndirect(Alloca));
+      }
+      break;
+    }
     case ABIArgInfo::Ignore:
       assert(NumIRArgs == 0);
       // Initialize the local variable appropriately.
@@ -4114,6 +4127,11 @@ void CodeGenFunction::EmitFunctionEpilog(
     }
     break;
   }
+  case ABIArgInfo::TargetSpecific: {
+    Address V = emitAddressAtOffset(*this, ReturnValue, RetAI);
+    RV = CGM.getABIInfo().CreateCoercedLoad(V, RetAI, *this);
+    break;
+  }
   case ABIArgInfo::Expand:
   case ABIArgInfo::IndirectAliased:
     llvm_unreachable("Invalid ABI kind for return argument");
@@ -5691,6 +5709,24 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
       assert(IRArgPos == FirstIRArg + NumIRArgs);
       break;
     }
+
+    case ABIArgInfo::TargetSpecific: {
+      Address Src = Address::invalid();
+      if (!I->isAggregate()) {
+        Src = CreateMemTemp(I->Ty, "target_coerce");
+        I->copyInto(*this, Src);
+      } else {
+        Src = I->hasLValue() ? I->getKnownLValue().getAddress()
+                             : I->getKnownRValue().getAggregateAddress();
+      }
+
+      // If the value is offset in memory, apply the offset now.
+      Src = emitAddressAtOffset(*this, Src, ArgInfo);
+      llvm::Value *Load =
+          CGM.getABIInfo().CreateCoercedLoad(Src, ArgInfo, *this);
+      IRCallArgs[FirstIRArg] = Load;
+      break;
+    }
     }
   }
 
@@ -6177,6 +6213,19 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
         return convertTempToRValue(DestPtr, RetTy, SourceLocation());
       }
 
+      case ABIArgInfo::TargetSpecific: {
+        Address DestPtr = ReturnValue.getValue();
+        Address StorePtr = emitAddressAtOffset(*this, DestPtr, RetAI);
+        bool DestIsVolatile = ReturnValue.isVolatile();
+        if (!DestPtr.isValid()) {
+          DestPtr = CreateMemTemp(RetTy, "target_coerce");
+          DestIsVolatile = false;
+        }
+        CGM.getABIInfo().CreateCoercedStore(CI, StorePtr, RetAI, DestIsVolatile,
+                                            *this);
+        return convertTempToRValue(DestPtr, RetTy, SourceLocation());
+      }
+
       case ABIArgInfo::Expand:
       case ABIArgInfo::IndirectAliased:
         llvm_unreachable("Invalid ABI kind for return argument");
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 277d69daf493c..1e58c3f217812 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -63,6 +63,13 @@ LLVM_DUMP_METHOD void ABIArgInfo::dump() const {
     OS << "CoerceAndExpand Type=";
     getCoerceAndExpandType()->print(OS);
     break;
+  case TargetSpecific:
+    OS << "TargetSpecific Type=";
+    if (llvm::Type *Ty = getCoerceToType())
+      Ty->print(OS);
+    else
+      OS << "null";
+    break;
   }
   OS << ")\n";
 }
diff --git a/clang/lib/CodeGen/Targets/RISCV.cpp b/clang/lib/CodeGen/Targets/RISCV.cpp
index e3232b61a693c..3305832d00ff6 100644
--- a/clang/lib/CodeGen/Targets/RISCV.cpp
+++ b/clang/lib/CodeGen/Targets/RISCV.cpp
@@ -8,6 +8,7 @@
 
 #include "ABIInfoImpl.h"
 #include "TargetInfo.h"
+#include "llvm/IR/IntrinsicsRISCV.h"
 #include "llvm/TargetParser/RISCVTargetParser.h"
 
 using namespace clang;
@@ -73,6 +74,11 @@ class RISCVABIInfo : public DefaultABIInfo {
                                raw_ostream &Out) const override;
   void appendAttributeMangling(StringRef AttrStr,
                                raw_ostream &Out) const override;
+  llvm::Value *CreateCoercedLoad(Address SrcAddr, const ABIArgInfo &AI,
+                                 CodeGenFunction &CGF) const override;
+  void CreateCoercedStore(llvm::Value *Val, Address DstAddr,
+                          const ABIArgInfo &AI, bool DestIsVolatile,
+                          CodeGenFunction &CGF) const override;
 };
 } // end anonymous namespace
 
@@ -648,7 +654,7 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
   if (IsFixed && Ty->isStructureOrClassType()) {
     llvm::Type *VLSType = nullptr;
     if (detectVLSCCEligibleStruct(Ty, ABIVLen, VLSType))
-      return ABIArgInfo::getDirect(VLSType);
+      return ABIArgInfo::getTargetSpecific(VLSType);
   }
 
   uint64_t NeededAlign = getContext().getTypeAlign(Ty);
@@ -780,6 +786,179 @@ ABIArgInfo RISCVABIInfo::extendType(QualType Ty, llvm::Type *CoerceTy) const {
   return ABIArgInfo::getExtend(Ty, CoerceTy);
 }
 
+llvm::Value *RISCVABIInfo::CreateCoercedLoad(Address Src, const ABIArgInfo &AI,
+                                             CodeGenFunction &CGF) const {
+  llvm::Type *Ty = AI.getCoerceToType();
+  llvm::Type *SrcTy = Src.getElementType();
+  llvm::StructType *SrcSTy = dyn_cast<llvm::StructType>(SrcTy);
+  assert(SrcSTy && "Source should be struct type");
+  assert((Ty->isScalableTy() || Ty->isTargetExtTy()) &&
+         "Only scalable vector type and vector tuple type are allowed for load "
+         "type.");
+  if (llvm::TargetExtType *TupTy = dyn_cast<llvm::TargetExtType>(Ty)) {
+    // In RISC-V VLS calling convention, struct of fixed vectors or struct of
+    // array of fixed vector of length >1 might be lowered using vector tuple
+    // type, we consider it as a valid load, e.g.
+    // struct i32x4x2 {
+    //     __attribute__((vector_size(16))) int i;
+    //     __attribute__((vector_size(16))) int i;
+    // };
+    // or
+    // struct i32x4 {
+    //     __attribute__((vector_size(16))) int i[2];
+    // };
+    // is lowered to target("riscv.vector.tuple", <vscale x 8 x i8>, 2)
+    // when ABI_VLEN = 128 bits, please checkout
+    // clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c
+    // for more information.
+    assert(TupTy->getName() == "riscv.vector.tuple");
+    llvm::Type *EltTy = TupTy->getTypeParameter(0);
+    unsigned NumElts = TupTy->getIntParameter(0);
+
+    if (auto *ArrayTy = dyn_cast<llvm::ArrayType>(SrcSTy->getElementType(0)))
+      Src = Src.withElementType(ArrayTy);
+
+    // Perform extract element and load
+    llvm::Value *PoisonTuple = llvm::PoisonValue::get(Ty);
+    auto *Load = CGF.Builder.CreateLoad(Src);
+    for (unsigned i = 0; i < NumElts; ++i) {
+      // Extract from struct
+      llvm::Value *ExtractFromLoad = CGF.Builder.CreateExtractValue(Load, i);
+      // Element in vector tuple type is always i8, so we need to cast back to
+      // it's original element type.
+      EltTy =
+          cast<llvm::ScalableVectorType>(llvm::VectorType::getWithSizeAndScalar(
+              cast<llvm::VectorType>(EltTy), ExtractFromLoad->getType()));
+      llvm::Value *PoisonVec = llvm::PoisonValue::get(EltTy);
+      // Insert to scalable vector
+      PoisonVec = CGF.Builder.CreateInsertVector(
+          EltTy, PoisonVec, ExtractFromLoad, uint64_t(0), "cast.scalable");
+      // Insert scalable vector to vector tuple
+      llvm::Value *Idx = llvm::ConstantInt::get(CGF.Builder.getInt32Ty(), i);
+      PoisonTuple = CGF.Builder.CreateIntrinsic(
+          llvm::Intrinsic::riscv_tuple_insert, {Ty, EltTy},
+          {PoisonTuple, PoisonVec, Idx});
+    }
+    return PoisonTuple;
+  }
+
+  // In RISC-V VLS calling convention, struct of fixed vector or struct of
+  // fixed vector array of length 1 might be lowered using scalable vector,
+  // we consider it as a valid load, e.g.
+  // struct i32x4 {
+  //     __attribute__((vector_size(16))) int i;
+  // };
+  // or
+  // struct i32x4 {
+  //     __attribute__((vector_size(16))) int i[1];
+  // };
+  // is lowered to <vscale x 2 x i32>
+  // when ABI_VLEN = 128 bits, please checkout
+  // clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c
+  // for more information.
+  auto *ScalableDstTy = cast<llvm::ScalableVectorType>(Ty);
+  SrcTy = SrcSTy->getElementType(0);
+  if (auto *ArrayTy = dyn_cast<llvm::ArrayType>(SrcTy))
+    SrcTy = ArrayTy->getElementType();
+  Src = Src.withElementType(SrcTy);
+  auto *FixedSrcTy = dyn_cast<llvm::FixedVectorType>(SrcTy);
+  assert(FixedSrcTy);
+  assert(ScalableDstTy->getElementType() == FixedSrcTy->getElementType());
+  auto *Load = CGF.Builder.CreateLoad(Src);
+  auto *PoisonVec = llvm::PoisonValue::get(ScalableDstTy);
+  llvm::Value *Result = CGF.Builder.CreateInsertVector(
+      ScalableDstTy, PoisonVec, Load, uint64_t(0), "cast.scalable");
+  return Result;
+}
+
+void RISCVABIInfo::CreateCoercedStore(llvm::Value *Val, Address Dst,
+                                      const ABIArgInfo &AI, bool DestIsVolatile,
+                                      CodeGenFunction &CGF) const {
+  llvm::Type *SrcTy = Val->getType();
+  llvm::StructType *DstSTy = dyn_cast<llvm::StructType>(Dst.getElementType());
+  assert(DstSTy && "Destination should be struct type");
+  assert((SrcTy->isScalableTy() || SrcTy->isTargetExtTy()) &&
+         "Only scalable vector type and vector tuple type are allowed for "
+         "store value.");
+  if (llvm::TargetExtType *TupTy = dyn_cast<llvm::TargetExtType>(SrcTy)) {
+    // In RISC-V VLS calling convention, struct of fixed vectors or struct
+    // of array of fixed vector of length >1 might be lowered using vector
+    // tuple type, we consider it as a valid load, e.g.
+    // struct i32x4x2 {
+    //     __attribute__((vector_size(16))) int i;
+    //     __attribute__((vector_size(16))) int i;
+    // };
+    // or
+    // struct i32x4 {
+    //     __attribute__((vector_size(16))) int i[2];
+    // };
+    // is lowered to target("riscv.vector.tuple", <vscale x 8 x i8>, 2)
+    // when ABI_VLEN = 128 bits, please checkout
+    // clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c
+    // for more information.
+    assert(TupTy->getName() == "riscv.vector.tuple");
+    llvm::Type *EltTy = TupTy->getTypeParameter(0);
+    unsigned NumElts = TupTy->getIntParameter(0);
+
+    llvm::Type *FixedVecTy = DstSTy->getElementType(0);
+    if (auto *ArrayTy = dyn_cast<llvm::ArrayType>(DstSTy->getElementType(0))) {
+      Dst = Dst.withElementType(ArrayTy);
+      FixedVecTy = ArrayTy->getArrayElementType();
+    }
+
+    // Perform extract element and store
+    for (unsigned i = 0; i < NumElts; ++i) {
+      // Element in vector tuple type is always i8, so we need to cast back
+      // to it's original element type.
+      EltTy =
+          cast<llvm::ScalableVectorType>(llvm::VectorType::getWithSizeAndScalar(
+              cast<llvm::VectorType>(EltTy), FixedVecTy));
+      // Extract scalable vector from tuple
+      llvm::Value *Idx = llvm::ConstantInt::get(CGF.Builder.getInt32Ty(), i);
+      auto *TupleElement = CGF.Builder.CreateIntrinsic(
+          llvm::Intrinsic::riscv_tuple_extract, {EltTy, TupTy}, {Val, Idx});
+
+      // Extract fixed vector from scalable vector
+      auto *ExtractVec = CGF.Builder.CreateExtractVector(
+          FixedVecTy, TupleElement, uint64_t(0));
+      // Store fixed vector to corresponding address
+      Address EltPtr = Address::invalid();
+      if (Dst.getElementType()->isStructTy())
+        EltPtr = CGF.Builder.CreateStructGEP(Dst, i);
+      else
+        EltPtr = CGF.Builder.CreateConstArrayGEP(Dst, i);
+      auto *I = CGF.Builder.CreateStore(ExtractVec, EltPtr, DestIsVolatile);
+      CGF.addInstToCurrentSourceAtom(I, ExtractVec);
+    }
+    return;
+  }
+
+  // In RISC-V VLS calling convention, struct of fixed vector or struct of
+  // fixed vector array of length 1 might be lowered using scalable
+  // vector, we consider it as a valid load, e.g.
+  // struct i32x4 {
+  //     __attribute__((vector_size(16))) int i;
+  // };
+  // or
+  // struct i32x4 {
+  //     __attribute__((vector_size(16))) int i[1];
+  // };
+  // is lowered to <vscale x 2 x i32>
+  // when ABI_VLEN = 128 bits, please checkout
+  // clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c
+  // for more information.
+  llvm::Type *EltTy = DstSTy->getElementType(0);
+  if (auto *ArrayTy = dyn_cast<llvm::ArrayType>(EltTy)) {
+    assert(ArrayTy->getNumElements() == 1);
+    EltTy = ArrayTy->getElementType();
+  }
+  auto *Coerced = CGF.Builder.CreateExtractVector(
+      cast<llvm::FixedVectorType>(EltTy), Val, uint64_t(0));
+  auto *I = CGF.Builder.CreateStore(Coerced, Dst, DestIsVolatile);
+  CGF.addInstToCurrentSourceAtom(I, Val);
+  return;
+}
+
 namespace {
 class RISCVTargetCodeGenInfo : public TargetCodeGenInfo {
 public:
diff --git a/clang/lib/CodeGen/Targets/Sparc.cpp b/clang/lib/CodeGen/Targets/Sparc.cpp
index 9642196b78c63..7ba22c6b8b9ad 100644
--- a/clang/lib/CodeGen/Targets/Sparc.cpp
+++ b/clang/lib/CodeGen/Targets/Sparc.cpp
@@ -303,6 +303,7 @@ RValue SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
   case ABIArgInfo::Expand:
   case ABIArgInfo::CoerceAndExpand:
   case ABIArgInfo::InAlloca:
+  case ABIArgInfo::TargetSpecific:
     llvm_unreachable("Unsupported ABI kind for va_arg");
 
   case ABIArgInfo::Extend: {
diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp
index 0f59caac2323e..9f530b702e113 100644
--- a/clang/lib/CodeGen/Targets/X86.cpp
+++ b/clang/lib/CodeGen/Targets/X86.cpp
@@ -1008,6 +1008,7 @@ static bool isArgInAlloca(const ABIArgInfo &Info) {
     return true;
   case ABIArgInfo::Ignore:
   case ABIArgInfo::IndirectAliased:
+  case ABIArgInfo::TargetSpecific:
     return false;
   case ABIArgInfo::Indirect:
   case ABIArgInfo::Direct:
diff --git a/clang/lib/CodeGen/Targets/XCore.cpp b/clang/lib/CodeGen/Targets/XCore.cpp
index b7824bde5f55a..14a4e11000336 100644
--- a/clang/lib/CodeGen/Targets/XCore.cpp
+++ b/clang/lib/CodeGen/Targets/XCore.cpp
@@ -157,6 +157,7 @@ RValue XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
   case ABIArgInfo::Expand:
   case ABIArgInfo::CoerceAndExpand:
   case ABIArgInfo::InAlloca:
+  case ABIArgInfo::TargetSpecific:
     llvm_unreachable("Unsupported ABI kind for va_arg");
   case ABIArgInfo::Ignore:
     Val = Address(llvm::UndefValue::get(ArgPtrTy), ArgTy, TypeAlign);
diff --git a/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c b/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c
index 82e43fff0c3aa..9febd47feb90c 100644
--- a/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c
+++ b/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c
@@ -143,34 +143,34 @@ void __attribute__((riscv_vls_cc)) test_too_large(int32x64_t arg) {}
 // CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_too_large_256(<vscale x 16 x i32> noundef %arg.coerce)
 void __attribute__((riscv_vls_cc(256))) test_too_large_256(int32x64_t arg) {}
 
-// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4(<vscale x 2 x i32> %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4(<vscale x 2 x i32> %arg.target_coerce)
 void __attribute__((riscv_vls_cc)) test_st_i32x4(struct st_i32x4 arg) {}
-// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4_256(<vscale x 1 x i32> %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4_256(<vscale x 1 x i32> %arg.target_coerce)
 void __attribute__((riscv_vls_cc(256))) test_st_i32x4_256(struct st_i32x4 arg) {}
 
-// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4_arr1(<vscale x 2 x i32> %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4_arr1(<vscale x 2 x i32> %arg.target_coerce)
 void __attribute__((riscv_vls_cc)) test_st_i32x4_arr1(struct st_i32x4_arr1 arg) {}
-// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4_arr1_256(<vscale x 1 x i32> %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4_arr1_256(<vscale x 1 x i32> %arg.target_coerce)
 void __attribute__((riscv_vls_cc(256))) test_st_i32x4_arr1_256(struct st_i32x4_arr1 arg) {}
 
-// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4_arr4(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4_arr4(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %arg.target_coerce)
 void __attribute__((riscv_vls_cc)) test_st_i32x4_arr4(struct st_i32x4_arr4 arg) {}
-// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4_arr4_256(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4_arr4_256(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %arg.target_coerce)
 void __attribute__((riscv_vls_cc(256))) test_st_i32x4_arr4_256(struct st_i32x4_arr4 arg) {}
 
-// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4_arr8(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4_arr8(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %arg.target_coerce)
 void __attribute__((riscv_vls_cc)) test_st_i32x4_arr8(struct st_i32x4_arr8 arg) {}
-// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4_arr8_256(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4_arr8_256(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %arg.target_coerce)
 void __attribute__((riscv_vls_cc(256))) test_st_i32x4_arr8_256(struct st_i32x4_arr8 arg) {}
 
-// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4x2(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4x2(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %arg.target_coerce)
 void __attribute__((riscv_vls_cc)) test_st_i32x4x2(struct st_i32x4x2 arg) {}
-// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4x2_256(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4x2_256(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %arg.target_coerce)
 void __attribute__((riscv_vls_cc(256))) test_st_i32x4x2_256(struct st_i32x4x2 arg) {}
 
-// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x8x2(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x8x2(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %arg.target_coerce)
 void __attribute__((riscv_vls_cc)) test_st_i32x8x2(struct st_i32x8x2 arg) {}
-// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x8x2_256(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x8x2_256(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %arg.target_coerce)
 void __attribute__((riscv_vls_cc(256))) test_st_i32x8x2_256(struct st_i32x8x2 arg) {}
 
 // CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x64x2(ptr noundef %arg)
@@ -178,17 +178,78 @@ void __attribute__((riscv_vls_cc)) test_st_i32x64x2(struct st_i32x64x2 arg) {}
 // CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x64x2_256(ptr noundef %arg)
 void __attribute__((riscv_vls_cc(256))) test_st_i32x64x2_256(struct st_i32x64x2 arg) {}
 
-// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4x3(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4x3(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %arg.target_coerce)
 void __attribute__((riscv_vls_cc)) test_st_i32x4x3(struct st_i32x4x3 arg) {}
-// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4x3_256(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4x3_256(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %arg.target_coerce)
 void __attribute__((riscv_vls_cc(256))) test_st_i32x4x3_256(struct st_i32x4x3 arg) {}
 
-// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4x8(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4x8(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %arg.target_coerce)
 void __attribute__((riscv_vls_cc)) test_st_i32x4x8(struct st_i32x4x8 arg) {}
-// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4x8_256(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4x8_256(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %arg.target_coerce)
 void __attribute__((riscv_vls_cc(256))) test_st_i32x4x8_256(struct st_i32x4x8 arg) {}
 
 // CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4x9(ptr noundef %arg)
 void __attribute__((riscv_vls_cc)) test_st_i32x4x9(struct st_i32x4x9 arg) {}
 // CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4x9_256(ptr noundef %arg)
 void __attribute__((riscv_vls_cc(256))) test_st_i32x4x9_256(struct st_i32x4x9 arg) {}
+
+// CHECK-LLVM-LABEL: define dso_local riscv_vls_cc(128) target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_function_prolog_epilog(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %arg.target_coerce) #0 {
+// CHECK-LLVM-NEXT: entry:
+// CHECK-LLVM-NEXT:   %retval = alloca %struct.st_i32x4_arr4, align 16
+// CHECK-LLVM-NEXT:   %arg = alloca %struct.st_i32x4_arr4, align 16
+// CHECK-LLVM-NEXT:   %0 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %arg.target_coerce, i32 0)
+// CHECK-LLVM-NEXT:   %1 = call <4 x i32> @llvm.vector.extract.v4i32.nxv2i32(<vscale x 2 x i32> %0, i64 0)
+// CHECK-LLVM-NEXT:   %2 = getelementptr inbounds [4 x <4 x i32>], ptr %arg, i64 0, i64 0
+// CHECK-LLVM-NEXT:   store <4 x i32> %1, ptr %2, align 16
+// CHECK-LLVM-NEXT:   %3 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %arg.target_coerce, i32 1)
+// CHECK-LLVM-NEXT:   %4 = call <4 x i32> @llvm.vector.extract.v4i32.nxv2i32(<vscale x 2 x i32> %3, i64 0)
+// CHECK-LLVM-NEXT:   %5 = getelementptr inbounds [4 x <4 x i32>], ptr %arg, i64 0, i64 1
+// CHECK-LLVM-NEXT:   store <4 x i32> %4, ptr %5, align 16
+// CHECK-LLVM-NEXT:   %6 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %arg.target_coerce, i32 2)
+// CHECK-LLVM-NEXT:   %7 = call <4 x i32> @llvm.vector.extract.v4i32.nxv2i32(<vscale x 2 x i32> %6, i64 0)
+// CHECK-LLVM-NEXT:   %8 = getelementptr inbounds [4 x <4 x i32>], ptr %arg, i64 0, i64 2
+// CHECK-LLVM-NEXT:   store <4 x i32> %7, ptr %8, align 16
+// CHECK-LLVM-NEXT:   %9 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %arg.target_coerce, i32 3)
+// CHECK-LLVM-NEXT:   %10 = call <4 x i32> @llvm.vector.extract.v4i32.nxv2i32(<vscale x 2 x i32> %9, i64 0)
+// CHECK-LLVM-NEXT:   %11 = getelementptr inbounds [4 x <4 x i32>], ptr %arg, i64 0, i64 3
+// CHECK-LLVM-NEXT:   store <4 x i32> %10, ptr %11, align 16
+// CHECK-LLVM-NEXT:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 %retval, ptr align 16 %arg, i64 64, i1 false)
+// CHECK-LLVM-NEXT:   %12 = load [4 x <4 x i32>], ptr %retval, align 16
+// CHECK-LLVM-NEXT:   %13 = extractvalue [4 x <4 x i32>] %12, 0
+// CHECK-LLVM-NEXT:   %cast.scalable = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32> poison, <4 x i32> %13, i64 0)
+// CHECK-LLVM-NEXT:   %14 = call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv2i32(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) poison, <vscale x 2 x i32> %cast.scalable, i32 0)
+// CHECK-LLVM-NEXT:   %15 = extractvalue [4 x <4 x i32>] %12, 1
+// CHECK-LLVM-NEXT:   %cast.scalable1 = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32> poison, <4 x i32> %15, i64 0)
+// CHECK-LLVM-NEXT:   %16 = call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv2i32(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %14, <vscale x 2 x i32> %cast.scalable1, i32 1)
+// CHECK-LLVM-NEXT:   %17 = extractvalue [4 x <4 x i32>] %12, 2
+// CHECK-LLVM-NEXT:   %cast.scalable2 = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32> poison, <4 x i32> %17, i64 0)
+// CHECK-LLVM-NEXT:   %18 = call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv2i32(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %16, <vscale x 2 x i32> %cast.scalable2, i32 2)
+// CHECK-LLVM-NEXT:   %19 = extractvalue [4 x <4 x i32>] %12, 3
+// CHECK-LLVM-NEXT:   %cast.scalable3 = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32> poison, <4 x i32> %19, i64 0)
+// CHECK-LLVM-NEXT:   %20 = call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv2i32(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %18, <vscale x 2 x i32> %cast.scalable3, i32 3)
+// CHECK-LLVM-NEXT:   ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %20
+// CHECK-LLVM-NEXT: }
+struct st_i32x4_arr4 __attribute__((riscv_vls_cc)) test_function_prolog_epilog(struct st_i32x4_arr4 arg) {
+  return arg;
+}
+
+struct st_i32x4 __attribute__((riscv_vls_cc)) dummy(struct st_i32x4);
+// CHECK-LLVM-LABEL: define dso_local riscv_vls_cc(128) <vscale x 2 x i32> @test_call(<vscale x 2 x i32> %arg.target_coerce) #0 {
+// CHECK-LLVM-NEXT: entry:
+// CHECK-LLVM-NEXT:   %retval = alloca %struct.st_i32x4, align 16
+// CHECK-LLVM-NEXT:   %arg = alloca %struct.st_i32x4, align 16
+// CHECK-LLVM-NEXT:   %0 = call <4 x i32> @llvm.vector.extract.v4i32.nxv2i32(<vscale x 2 x i32> %arg.target_coerce, i64 0)
+// CHECK-LLVM-NEXT:   store <4 x i32> %0, ptr %arg, align 16
+// CHECK-LLVM-NEXT:   %1 = load <4 x i32>, ptr %arg, align 16
+// CHECK-LLVM-NEXT:   %cast.scalable = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32> poison, <4 x i32> %1, i64 0)
+// CHECK-LLVM-NEXT:   %call = call riscv_vls_cc(128) <vscale x 2 x i32> @dummy(<vscale x 2 x i32> %cast.scalable)
+// CHECK-LLVM-NEXT:   %2 = call <4 x i32> @llvm.vector.extract.v4i32.nxv2i32(<vscale x 2 x i32> %call, i64 0)
+// CHECK-LLVM-NEXT:   store <4 x i32> %2, ptr %retval, align 16
+// CHECK-LLVM-NEXT:   %3 = load <4 x i32>, ptr %retval, align 16
+// CHECK-LLVM-NEXT:   %cast.scalable1 = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32> poison, <4 x i32> %3, i64 0)
+// CHECK-LLVM-NEXT:   ret <vscale x 2 x i32> %cast.scalable1
+// CHECK-LLVM-NEXT: }
+struct st_i32x4 __attribute__((riscv_vls_cc)) test_call(struct st_i32x4 arg) {
+  struct st_i32x4 abc = dummy(arg);
+  return abc;
+}
diff --git a/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.cpp b/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.cpp
index 5f6539796c20d..bd56760ff60c8 100644
--- a/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.cpp
+++ b/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.cpp
@@ -123,34 +123,34 @@ typedef int __attribute__((vector_size(256))) int32x64_t;
 // CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z18test_too_large_256Dv64_i(<vscale x 16 x i32> noundef %arg.coerce)
 [[riscv::vls_cc(256)]] void test_too_large_256(int32x64_t arg) {}
 
-// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z13test_st_i32x48st_i32x4(<vscale x 2 x i32> %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z13test_st_i32x48st_i32x4(<vscale x 2 x i32> %arg.target_coerce)
 [[riscv::vls_cc]] void test_st_i32x4(struct st_i32x4 arg) {}
-// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z17test_st_i32x4_2568st_i32x4(<vscale x 1 x i32> %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z17test_st_i32x4_2568st_i32x4(<vscale x 1 x i32> %arg.target_coerce)
 [[riscv::vls_cc(256)]] void test_st_i32x4_256(struct st_i32x4 arg) {}
 
-// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z18test_st_i32x4_arr113st_i32x4_arr1(<vscale x 2 x i32> %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z18test_st_i32x4_arr113st_i32x4_arr1(<vscale x 2 x i32> %arg.target_coerce)
 [[riscv::vls_cc]] void test_st_i32x4_arr1(struct st_i32x4_arr1 arg) {}
-// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z22test_st_i32x4_arr1_25613st_i32x4_arr1(<vscale x 1 x i32> %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z22test_st_i32x4_arr1_25613st_i32x4_arr1(<vscale x 1 x i32> %arg.target_coerce)
 [[riscv::vls_cc(256)]] void test_st_i32x4_arr1_256(struct st_i32x4_arr1 arg) {}
 
-// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z18test_st_i32x4_arr413st_i32x4_arr4(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z18test_st_i32x4_arr413st_i32x4_arr4(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %arg.target_coerce)
 [[riscv::vls_cc]] void test_st_i32x4_arr4(struct st_i32x4_arr4 arg) {}
-// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z22test_st_i32x4_arr4_25613st_i32x4_arr4(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z22test_st_i32x4_arr4_25613st_i32x4_arr4(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %arg.target_coerce)
 [[riscv::vls_cc(256)]] void test_st_i32x4_arr4_256(struct st_i32x4_arr4 arg) {}
 
-// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z18test_st_i32x4_arr813st_i32x4_arr8(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z18test_st_i32x4_arr813st_i32x4_arr8(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %arg.target_coerce)
 [[riscv::vls_cc]] void test_st_i32x4_arr8(struct st_i32x4_arr8 arg) {}
-// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z22test_st_i32x4_arr8_25613st_i32x4_arr8(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z22test_st_i32x4_arr8_25613st_i32x4_arr8(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %arg.target_coerce)
 [[riscv::vls_cc(256)]] void test_st_i32x4_arr8_256(struct st_i32x4_arr8 arg) {}
 
-// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z15test_st_i32x4x210st_i32x4x2(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z15test_st_i32x4x210st_i32x4x2(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %arg.target_coerce)
 [[riscv::vls_cc]] void test_st_i32x4x2(struct st_i32x4x2 arg) {}
-// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z19test_st_i32x4x2_25610st_i32x4x2(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z19test_st_i32x4x2_25610st_i32x4x2(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %arg.target_coerce)
 [[riscv::vls_cc(256)]] void test_st_i32x4x2_256(struct st_i32x4x2 arg) {}
 
-// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z15test_st_i32x8x210st_i32x8x2(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z15test_st_i32x8x210st_i32x8x2(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %arg.target_coerce)
 [[riscv::vls_cc]] void test_st_i32x8x2(struct st_i32x8x2 arg) {}
-// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z19test_st_i32x8x2_25610st_i32x8x2(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z19test_st_i32x8x2_25610st_i32x8x2(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %arg.target_coerce)
 [[riscv::vls_cc(256)]] void test_st_i32x8x2_256(struct st_i32x8x2 arg) {}
 
 // CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z16test_st_i32x64x211st_i32x64x2(ptr noundef %arg)
@@ -158,14 +158,14 @@ typedef int __attribute__((vector_size(256))) int32x64_t;
 // CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z20test_st_i32x64x2_25611st_i32x64x2(ptr noundef %arg)
 [[riscv::vls_cc(256)]] void test_st_i32x64x2_256(struct st_i32x64x2 arg) {}
 
-// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z15test_st_i32x4x310st_i32x4x3(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z15test_st_i32x4x310st_i32x4x3(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %arg.target_coerce)
 [[riscv::vls_cc]] void test_st_i32x4x3(struct st_i32x4x3 arg) {}
-// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z19test_st_i32x4x3_25610st_i32x4x3(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z19test_st_i32x4x3_25610st_i32x4x3(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %arg.target_coerce)
 [[riscv::vls_cc(256)]] void test_st_i32x4x3_256(struct st_i32x4x3 arg) {}
 
-// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z15test_st_i32x4x810st_i32x4x8(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z15test_st_i32x4x810st_i32x4x8(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %arg.target_coerce)
 [[riscv::vls_cc]] void test_st_i32x4x8(struct st_i32x4x8 arg) {}
-// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z19test_st_i32x4x8_25610st_i32x4x8(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %arg)
+// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z19test_st_i32x4x8_25610st_i32x4x8(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %arg.target_coerce)
 [[riscv::vls_cc(256)]] void test_st_i32x4x8_256(struct st_i32x4x8 arg) {}
 
 // CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z15test_st_i32x4x910st_i32x4x9(ptr noundef %arg)