[llvm] c5b11a7 - [NVPTX] support immediate values in st.param instructions (#91523)

via llvm-commits llvm-commits at lists.llvm.org
Sat May 18 10:33:09 PDT 2024


Author: Alex MacLean
Date: 2024-05-18T10:33:05-07:00
New Revision: c5b11a710e01304908e3f320e40bc9da9f6a8de4

URL: https://github.com/llvm/llvm-project/commit/c5b11a710e01304908e3f320e40bc9da9f6a8de4
DIFF: https://github.com/llvm/llvm-project/commit/c5b11a710e01304908e3f320e40bc9da9f6a8de4.diff

LOG: [NVPTX] support immediate values in st.param instructions (#91523)

Add support for generating `st.param` instructions with direct use of
immediates. This eliminates the need for a `mov` instruction prior to
the `st.param` resulting in more concise emitted PTX.

Added: 
    llvm/test/CodeGen/NVPTX/st-param-imm.ll

Modified: 
    llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
    llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 595395bb1b4b4..2713b6859ff3b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -2182,6 +2182,100 @@ bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
   return true;
 }
 
+// Helpers for constructing opcode (ex: NVPTX::StoreParamV4F32_iiri)
+#define getOpcV2H(ty, opKind0, opKind1)                                        \
+  NVPTX::StoreParamV2##ty##_##opKind0##opKind1
+
+#define getOpcV2H1(ty, opKind0, isImm1)                                        \
+  (isImm1) ? getOpcV2H(ty, opKind0, i) : getOpcV2H(ty, opKind0, r)
+
+#define getOpcodeForVectorStParamV2(ty, isimm)                                 \
+  (isimm[0]) ? getOpcV2H1(ty, i, isimm[1]) : getOpcV2H1(ty, r, isimm[1])
+
+#define getOpcV4H(ty, opKind0, opKind1, opKind2, opKind3)                      \
+  NVPTX::StoreParamV4##ty##_##opKind0##opKind1##opKind2##opKind3
+
+#define getOpcV4H3(ty, opKind0, opKind1, opKind2, isImm3)                      \
+  (isImm3) ? getOpcV4H(ty, opKind0, opKind1, opKind2, i)                       \
+           : getOpcV4H(ty, opKind0, opKind1, opKind2, r)
+
+#define getOpcV4H2(ty, opKind0, opKind1, isImm2, isImm3)                       \
+  (isImm2) ? getOpcV4H3(ty, opKind0, opKind1, i, isImm3)                       \
+           : getOpcV4H3(ty, opKind0, opKind1, r, isImm3)
+
+#define getOpcV4H1(ty, opKind0, isImm1, isImm2, isImm3)                        \
+  (isImm1) ? getOpcV4H2(ty, opKind0, i, isImm2, isImm3)                        \
+           : getOpcV4H2(ty, opKind0, r, isImm2, isImm3)
+
+#define getOpcodeForVectorStParamV4(ty, isimm)                                 \
+  (isimm[0]) ? getOpcV4H1(ty, i, isimm[1], isimm[2], isimm[3])                 \
+             : getOpcV4H1(ty, r, isimm[1], isimm[2], isimm[3])
+
+#define getOpcodeForVectorStParam(n, ty, isimm)                                \
+  (n == 2) ? getOpcodeForVectorStParamV2(ty, isimm)                            \
+           : getOpcodeForVectorStParamV4(ty, isimm)
+
+static unsigned pickOpcodeForVectorStParam(SmallVector<SDValue, 8> &Ops,
+                                           unsigned NumElts,
+                                           MVT::SimpleValueType MemTy,
+                                           SelectionDAG *CurDAG, SDLoc DL) {
+  // Determine which inputs are registers and immediates make new operators
+  // with constant values
+  SmallVector<bool, 4> IsImm(NumElts, false);
+  for (unsigned i = 0; i < NumElts; i++) {
+    IsImm[i] = (isa<ConstantSDNode>(Ops[i]) || isa<ConstantFPSDNode>(Ops[i]));
+    if (IsImm[i]) {
+      SDValue Imm = Ops[i];
+      if (MemTy == MVT::f32 || MemTy == MVT::f64) {
+        const ConstantFPSDNode *ConstImm = cast<ConstantFPSDNode>(Imm);
+        const ConstantFP *CF = ConstImm->getConstantFPValue();
+        Imm = CurDAG->getTargetConstantFP(*CF, DL, Imm->getValueType(0));
+      } else {
+        const ConstantSDNode *ConstImm = cast<ConstantSDNode>(Imm);
+        const ConstantInt *CI = ConstImm->getConstantIntValue();
+        Imm = CurDAG->getTargetConstant(*CI, DL, Imm->getValueType(0));
+      }
+      Ops[i] = Imm;
+    }
+  }
+
+  // Get opcode for MemTy, size, and register/immediate operand ordering
+  switch (MemTy) {
+  case MVT::i8:
+    return getOpcodeForVectorStParam(NumElts, I8, IsImm);
+  case MVT::i16:
+    return getOpcodeForVectorStParam(NumElts, I16, IsImm);
+  case MVT::i32:
+    return getOpcodeForVectorStParam(NumElts, I32, IsImm);
+  case MVT::i64:
+    assert(NumElts == 2 && "MVT too large for NumElts > 2");
+    return getOpcodeForVectorStParamV2(I64, IsImm);
+  case MVT::f32:
+    return getOpcodeForVectorStParam(NumElts, F32, IsImm);
+  case MVT::f64:
+    assert(NumElts == 2 && "MVT too large for NumElts > 2");
+    return getOpcodeForVectorStParamV2(F64, IsImm);
+
+  // These cases don't support immediates, just use the all register version
+  // and generate moves.
+  case MVT::i1:
+    return (NumElts == 2) ? NVPTX::StoreParamV2I8_rr
+                          : NVPTX::StoreParamV4I8_rrrr;
+  case MVT::f16:
+  case MVT::bf16:
+    return (NumElts == 2) ? NVPTX::StoreParamV2I16_rr
+                          : NVPTX::StoreParamV4I16_rrrr;
+  case MVT::v2f16:
+  case MVT::v2bf16:
+  case MVT::v2i16:
+  case MVT::v4i8:
+    return (NumElts == 2) ? NVPTX::StoreParamV2I32_rr
+                          : NVPTX::StoreParamV4I32_rrrr;
+  default:
+    llvm_unreachable("Cannot select st.param for unknown MemTy");
+  }
+}
+
 bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
   SDLoc DL(N);
   SDValue Chain = N->getOperand(0);
@@ -2193,10 +2287,10 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
   SDValue Glue = N->getOperand(N->getNumOperands() - 1);
 
   // How many elements do we have?
-  unsigned NumElts = 1;
+  unsigned NumElts;
   switch (N->getOpcode()) {
   default:
-    return false;
+    llvm_unreachable("Unexpected opcode");
   case NVPTXISD::StoreParamU32:
   case NVPTXISD::StoreParamS32:
   case NVPTXISD::StoreParam:
@@ -2222,18 +2316,40 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
   // Determine target opcode
   // If we have an i1, use an 8-bit store. The lowering code in
   // NVPTXISelLowering will have already emitted an upcast.
-  std::optional<unsigned> Opcode = 0;
+  std::optional<unsigned> Opcode;
   switch (N->getOpcode()) {
   default:
     switch (NumElts) {
     default:
-      return false;
-    case 1:
-      Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
-                               NVPTX::StoreParamI8, NVPTX::StoreParamI16,
-                               NVPTX::StoreParamI32, NVPTX::StoreParamI64,
-                               NVPTX::StoreParamF32, NVPTX::StoreParamF64);
-      if (Opcode == NVPTX::StoreParamI8) {
+      llvm_unreachable("Unexpected NumElts");
+    case 1: {
+      MVT::SimpleValueType MemTy = Mem->getMemoryVT().getSimpleVT().SimpleTy;
+      SDValue Imm = Ops[0];
+      if (MemTy != MVT::f16 && MemTy != MVT::v2f16 &&
+          (isa<ConstantSDNode>(Imm) || isa<ConstantFPSDNode>(Imm))) {
+        // Convert immediate to target constant
+        if (MemTy == MVT::f32 || MemTy == MVT::f64) {
+          const ConstantFPSDNode *ConstImm = cast<ConstantFPSDNode>(Imm);
+          const ConstantFP *CF = ConstImm->getConstantFPValue();
+          Imm = CurDAG->getTargetConstantFP(*CF, DL, Imm->getValueType(0));
+        } else {
+          const ConstantSDNode *ConstImm = cast<ConstantSDNode>(Imm);
+          const ConstantInt *CI = ConstImm->getConstantIntValue();
+          Imm = CurDAG->getTargetConstant(*CI, DL, Imm->getValueType(0));
+        }
+        Ops[0] = Imm;
+        // Use immediate version of store param
+        Opcode = pickOpcodeForVT(MemTy, NVPTX::StoreParamI8_i,
+                                 NVPTX::StoreParamI16_i, NVPTX::StoreParamI32_i,
+                                 NVPTX::StoreParamI64_i, NVPTX::StoreParamF32_i,
+                                 NVPTX::StoreParamF64_i);
+      } else
+        Opcode =
+            pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
+                            NVPTX::StoreParamI8_r, NVPTX::StoreParamI16_r,
+                            NVPTX::StoreParamI32_r, NVPTX::StoreParamI64_r,
+                            NVPTX::StoreParamF32_r, NVPTX::StoreParamF64_r);
+      if (Opcode == NVPTX::StoreParamI8_r) {
         // Fine tune the opcode depending on the size of the operand.
         // This helps to avoid creating redundant COPY instructions in
         // InstrEmitter::AddRegisterOperand().
@@ -2241,35 +2357,28 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
         default:
           break;
         case MVT::i32:
-          Opcode = NVPTX::StoreParamI8TruncI32;
+          Opcode = NVPTX::StoreParamI8TruncI32_r;
           break;
         case MVT::i64:
-          Opcode = NVPTX::StoreParamI8TruncI64;
+          Opcode = NVPTX::StoreParamI8TruncI64_r;
           break;
         }
       }
       break;
+    }
     case 2:
-      Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
-                               NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
-                               NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
-                               NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
-      break;
-    case 4:
-      Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
-                               NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
-                               NVPTX::StoreParamV4I32, std::nullopt,
-                               NVPTX::StoreParamV4F32, std::nullopt);
+    case 4: {
+      MVT::SimpleValueType MemTy = Mem->getMemoryVT().getSimpleVT().SimpleTy;
+      Opcode = pickOpcodeForVectorStParam(Ops, NumElts, MemTy, CurDAG, DL);
       break;
     }
-    if (!Opcode)
-      return false;
+    }
     break;
   // Special case: if we have a sign-extend/zero-extend node, insert the
   // conversion instruction first, and use that as the value operand to
   // the selected StoreParam node.
   case NVPTXISD::StoreParamU32: {
-    Opcode = NVPTX::StoreParamI32;
+    Opcode = NVPTX::StoreParamI32_r;
     SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
                                                 MVT::i32);
     SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
@@ -2278,7 +2387,7 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
     break;
   }
   case NVPTXISD::StoreParamS32: {
-    Opcode = NVPTX::StoreParamI32;
+    Opcode = NVPTX::StoreParamI32_r;
     SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
                                                 MVT::i32);
     SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,

diff  --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 393fa29ff0516..c4c35a1f74ba9 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2637,25 +2637,46 @@ class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
                 [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
 
 let mayStore = true in {
-  class StoreParamInst<NVPTXRegClass regclass, string opstr> :
-        NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
-                  !strconcat("st.param", opstr, " \t[param$a+$b], $val;"),
-                  []>;
 
-  class StoreParamV2Inst<NVPTXRegClass regclass, string opstr> :
-        NVPTXInst<(outs), (ins regclass:$val, regclass:$val2,
-                               i32imm:$a, i32imm:$b),
-                  !strconcat("st.param.v2", opstr,
-                             " \t[param$a+$b], {{$val, $val2}};"),
-                  []>;
+  multiclass StoreParamInst<NVPTXRegClass regclass, Operand IMMType, string opstr, bit support_imm = true> {
+    foreach op = [IMMType, regclass] in
+      if !or(support_imm, !isa<NVPTXRegClass>(op)) then
+        def _ # !if(!isa<NVPTXRegClass>(op), "r", "i")
+          : NVPTXInst<(outs),
+                      (ins op:$val, i32imm:$a, i32imm:$b),
+                      "st.param" # opstr # " \t[param$a+$b], $val;",
+                      []>;
+  }
 
-  class StoreParamV4Inst<NVPTXRegClass regclass, string opstr> :
-        NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, regclass:$val3,
-                               regclass:$val4, i32imm:$a,
-                               i32imm:$b),
-                  !strconcat("st.param.v4", opstr,
-                             " \t[param$a+$b], {{$val, $val2, $val3, $val4}};"),
-                  []>;
+  multiclass StoreParamV2Inst<NVPTXRegClass regclass, Operand IMMType, string opstr> {
+    foreach op1 = [IMMType, regclass] in
+      foreach op2 = [IMMType, regclass] in
+        def _ # !if(!isa<NVPTXRegClass>(op1), "r", "i")
+              # !if(!isa<NVPTXRegClass>(op2), "r", "i")
+          : NVPTXInst<(outs),
+                      (ins op1:$val1, op2:$val2,
+                           i32imm:$a, i32imm:$b),
+                      "st.param.v2" # opstr # " \t[param$a+$b], {{$val1, $val2}};",
+                      []>;
+  }
+
+  multiclass StoreParamV4Inst<NVPTXRegClass regclass, Operand IMMType, string opstr> {
+    foreach op1 = [IMMType, regclass] in
+      foreach op2 = [IMMType, regclass] in
+        foreach op3 = [IMMType, regclass] in
+          foreach op4 = [IMMType, regclass] in
+            def _ # !if(!isa<NVPTXRegClass>(op1), "r", "i")
+                  # !if(!isa<NVPTXRegClass>(op2), "r", "i")
+                  # !if(!isa<NVPTXRegClass>(op3), "r", "i")
+                  # !if(!isa<NVPTXRegClass>(op4), "r", "i")
+
+              : NVPTXInst<(outs),
+                          (ins op1:$val1, op2:$val2, op3:$val3, op4:$val4,
+                               i32imm:$a, i32imm:$b),
+                          "st.param.v4" # opstr #
+                          " \t[param$a+$b], {{$val1, $val2, $val3, $val4}};",
+                          []>;
+  }
 
   class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
         NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
@@ -2735,27 +2756,30 @@ def LoadParamMemV2F32  : LoadParamV2MemInst<Float32Regs, ".f32">;
 def LoadParamMemV2F64  : LoadParamV2MemInst<Float64Regs, ".f64">;
 def LoadParamMemV4F32  : LoadParamV4MemInst<Float32Regs, ".f32">;
 
-def StoreParamI64    : StoreParamInst<Int64Regs, ".b64">;
-def StoreParamI32    : StoreParamInst<Int32Regs, ".b32">;
-
-def StoreParamI16    : StoreParamInst<Int16Regs, ".b16">;
-def StoreParamI8     : StoreParamInst<Int16Regs, ".b8">;
-def StoreParamI8TruncI32 : StoreParamInst<Int32Regs, ".b8">;
-def StoreParamI8TruncI64 : StoreParamInst<Int64Regs, ".b8">;
-def StoreParamV2I64  : StoreParamV2Inst<Int64Regs, ".b64">;
-def StoreParamV2I32  : StoreParamV2Inst<Int32Regs, ".b32">;
-def StoreParamV2I16  : StoreParamV2Inst<Int16Regs, ".b16">;
-def StoreParamV2I8   : StoreParamV2Inst<Int16Regs, ".b8">;
-
-def StoreParamV4I32  : StoreParamV4Inst<Int32Regs, ".b32">;
-def StoreParamV4I16  : StoreParamV4Inst<Int16Regs, ".b16">;
-def StoreParamV4I8   : StoreParamV4Inst<Int16Regs, ".b8">;
-
-def StoreParamF32      : StoreParamInst<Float32Regs, ".f32">;
-def StoreParamF64      : StoreParamInst<Float64Regs, ".f64">;
-def StoreParamV2F32    : StoreParamV2Inst<Float32Regs, ".f32">;
-def StoreParamV2F64    : StoreParamV2Inst<Float64Regs, ".f64">;
-def StoreParamV4F32    : StoreParamV4Inst<Float32Regs, ".f32">;
+defm StoreParamI64    : StoreParamInst<Int64Regs, i64imm, ".b64">;
+defm StoreParamI32    : StoreParamInst<Int32Regs, i32imm, ".b32">;
+defm StoreParamI16    : StoreParamInst<Int16Regs, i16imm, ".b16">;
+defm StoreParamI8     : StoreParamInst<Int16Regs, i8imm,  ".b8">;
+
+defm StoreParamI8TruncI32 : StoreParamInst<Int32Regs, i8imm, ".b8", /* support_imm */ false>;
+defm StoreParamI8TruncI64 : StoreParamInst<Int64Regs, i8imm, ".b8", /* support_imm */ false>;
+
+defm StoreParamV2I64  : StoreParamV2Inst<Int64Regs, i64imm, ".b64">;
+defm StoreParamV2I32  : StoreParamV2Inst<Int32Regs, i32imm, ".b32">;
+defm StoreParamV2I16  : StoreParamV2Inst<Int16Regs, i16imm, ".b16">;
+defm StoreParamV2I8   : StoreParamV2Inst<Int16Regs, i8imm,  ".b8">;
+
+defm StoreParamV4I32  : StoreParamV4Inst<Int32Regs, i32imm, ".b32">;
+defm StoreParamV4I16  : StoreParamV4Inst<Int16Regs, i16imm, ".b16">;
+defm StoreParamV4I8   : StoreParamV4Inst<Int16Regs, i8imm,  ".b8">;
+
+defm StoreParamF32    : StoreParamInst<Float32Regs, f32imm, ".f32">;
+defm StoreParamF64    : StoreParamInst<Float64Regs, f64imm, ".f64">;
+
+defm StoreParamV2F32  : StoreParamV2Inst<Float32Regs, f32imm, ".f32">;
+defm StoreParamV2F64  : StoreParamV2Inst<Float64Regs, f64imm, ".f64">;
+
+defm StoreParamV4F32  : StoreParamV4Inst<Float32Regs, f32imm, ".f32">;
 
 def StoreRetvalI64    : StoreRetvalInst<Int64Regs, ".b64">;
 def StoreRetvalI32    : StoreRetvalInst<Int32Regs, ".b32">;

diff  --git a/llvm/test/CodeGen/NVPTX/st-param-imm.ll b/llvm/test/CodeGen/NVPTX/st-param-imm.ll
new file mode 100644
index 0000000000000..d9e0057192381
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/st-param-imm.ll
@@ -0,0 +1,2002 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -march=nvptx64 | FileCheck %s
+; RUN: llc < %s -march=nvptx | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx -verify-machineinstrs | %ptxas-verify %}
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -verify-machineinstrs | %ptxas-verify %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+%struct.A = type { i8, i16 }
+%struct.char2 = type { i8, i8 }
+%struct.char4 = type { i8, i8, i8, i8 }
+%struct.short2 = type { i16, i16 }
+%struct.short4 = type { i16, i16, i16, i16 }
+%struct.int2 = type { i32, i32 }
+%struct.int4 = type { i32, i32, i32, i32 }
+%struct.longlong2 = type { i64, i64 }
+%struct.float2 = type { float, float }
+%struct.float4 = type { float, float, float, float }
+%struct.double2 = type { double, double }
+
+define void @st_param_i8_i16() {
+; CHECK-LABEL: st_param_i8_i16(
+; CHECK:       {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    { // callseq 0, 0
+; CHECK-NEXT:    .param .align 2 .b8 param0[4];
+; CHECK-NEXT:    st.param.b8 [param0+0], 1;
+; CHECK-NEXT:    st.param.b16 [param0+2], 2;
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_i8_i16,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 0
+; CHECK-NEXT:    ret;
+  call void @call_i8_i16(%struct.A { i8 1, i16 2 })
+  ret void
+}
+
+define void @st_param_i32() {
+; CHECK-LABEL: st_param_i32(
+; CHECK:       {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    { // callseq 1, 0
+; CHECK-NEXT:    .param .b32 param0;
+; CHECK-NEXT:    st.param.b32 [param0+0], 3;
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_i32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 1
+; CHECK-NEXT:    ret;
+  call void @call_i32(i32 3)
+  ret void
+}
+
+define void @st_param_i64() {
+; CHECK-LABEL: st_param_i64(
+; CHECK:       {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    { // callseq 2, 0
+; CHECK-NEXT:    .param .b64 param0;
+; CHECK-NEXT:    st.param.b64 [param0+0], 4;
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_i64,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 2
+; CHECK-NEXT:    ret;
+  call void @call_i64(i64 4)
+  ret void
+}
+
+define void @st_param_f32() {
+; CHECK-LABEL: st_param_f32(
+; CHECK:       {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    { // callseq 3, 0
+; CHECK-NEXT:    .param .b32 param0;
+; CHECK-NEXT:    st.param.f32 [param0+0], 0f40A00000;
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_f32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 3
+; CHECK-NEXT:    ret;
+  call void @call_f32(float 5.0)
+  ret void
+}
+
+define void @st_param_f64() {
+; CHECK-LABEL: st_param_f64(
+; CHECK:       {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    { // callseq 4, 0
+; CHECK-NEXT:    .param .b64 param0;
+; CHECK-NEXT:    st.param.f64 [param0+0], 0d4018000000000000;
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_f64,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 4
+; CHECK-NEXT:    ret;
+  call void @call_f64(double 6.0)
+  ret void
+}
+
+declare void @call_i8_i16(%struct.A)
+declare void @call_i32(i32)
+declare void @call_i64(i64)
+declare void @call_f32(float)
+declare void @call_f64(double)
+
+define void @st_param_v2_i8_ii() {
+; CHECK-LABEL: st_param_v2_i8_ii(
+; CHECK:       {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    { // callseq 5, 0
+; CHECK-NEXT:    .param .align 2 .b8 param0[2];
+; CHECK-NEXT:    st.param.v2.b8 [param0+0], {1, 2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 5
+; CHECK-NEXT:    ret;
+  call void @call_v2_i8(%struct.char2 { i8 1, i8 2 })
+  ret void
+}
+define void @st_param_v2_i8_ir(i8 %val) {
+; CHECK-LABEL: st_param_v2_i8_ir(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u8 %rs1, [st_param_v2_i8_ir_param_0];
+; CHECK-NEXT:    { // callseq 6, 0
+; CHECK-NEXT:    .param .align 2 .b8 param0[2];
+; CHECK-NEXT:    st.param.v2.b8 [param0+0], {1, %rs1};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 6
+; CHECK-NEXT:    ret;
+  %struct.ir0 = insertvalue %struct.char2 poison, i8 1, 0
+  %struct.ir1 = insertvalue %struct.char2 %struct.ir0, i8 %val, 1
+  call void @call_v2_i8(%struct.char2 %struct.ir1)
+  ret void
+}
+define void @st_param_v2_i8_ri(i8 %val) {
+; CHECK-LABEL: st_param_v2_i8_ri(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u8 %rs1, [st_param_v2_i8_ri_param_0];
+; CHECK-NEXT:    { // callseq 7, 0
+; CHECK-NEXT:    .param .align 2 .b8 param0[2];
+; CHECK-NEXT:    st.param.v2.b8 [param0+0], {%rs1, 2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 7
+; CHECK-NEXT:    ret;
+  %struct.ri0 = insertvalue %struct.char2 poison, i8 %val, 0
+  %struct.ri1 = insertvalue %struct.char2 %struct.ri0, i8 2, 1
+  call void @call_v2_i8(%struct.char2 %struct.ri1)
+  ret void
+}
+
+define void @st_param_v2_i16_ii() {
+; CHECK-LABEL: st_param_v2_i16_ii(
+; CHECK:       {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    { // callseq 8, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v2.b16 [param0+0], {1, 2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i16,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 8
+; CHECK-NEXT:    ret;
+  call void @call_v2_i16(%struct.short2 { i16 1, i16 2 })
+  ret void
+}
+define void @st_param_v2_i16_ir(i16 %val) {
+; CHECK-LABEL: st_param_v2_i16_ir(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u16 %rs1, [st_param_v2_i16_ir_param_0];
+; CHECK-NEXT:    { // callseq 9, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v2.b16 [param0+0], {1, %rs1};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i16,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 9
+; CHECK-NEXT:    ret;
+  %struct.ir0 = insertvalue %struct.short2 poison, i16 1, 0
+  %struct.ir1 = insertvalue %struct.short2 %struct.ir0, i16 %val, 1
+  call void @call_v2_i16(%struct.short2 %struct.ir1)
+  ret void
+}
+define void @st_param_v2_i16_ri(i16 %val) {
+; CHECK-LABEL: st_param_v2_i16_ri(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u16 %rs1, [st_param_v2_i16_ri_param_0];
+; CHECK-NEXT:    { // callseq 10, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v2.b16 [param0+0], {%rs1, 2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i16,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 10
+; CHECK-NEXT:    ret;
+  %struct.ri0 = insertvalue %struct.short2 poison, i16 %val, 0
+  %struct.ri1 = insertvalue %struct.short2 %struct.ri0, i16 2, 1
+  call void @call_v2_i16(%struct.short2 %struct.ri1)
+  ret void
+}
+
+define void @st_param_v2_i32_ii() {
+; CHECK-LABEL: st_param_v2_i32_ii(
+; CHECK:       {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    { // callseq 11, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v2.b32 [param0+0], {1, 2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 11
+; CHECK-NEXT:    ret;
+  call void @call_v2_i32(%struct.int2 { i32 1, i32 2 })
+  ret void
+}
+define void @st_param_v2_i32_ir(i32 %val) {
+; CHECK-LABEL: st_param_v2_i32_ir(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [st_param_v2_i32_ir_param_0];
+; CHECK-NEXT:    { // callseq 12, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v2.b32 [param0+0], {1, %r1};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 12
+; CHECK-NEXT:    ret;
+  %struct.ir0 = insertvalue %struct.int2 poison, i32 1, 0
+  %struct.ir1 = insertvalue %struct.int2 %struct.ir0, i32 %val, 1
+  call void @call_v2_i32(%struct.int2 %struct.ir1)
+  ret void
+}
+define void @st_param_v2_i32_ri(i32 %val) {
+; CHECK-LABEL: st_param_v2_i32_ri(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [st_param_v2_i32_ri_param_0];
+; CHECK-NEXT:    { // callseq 13, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v2.b32 [param0+0], {%r1, 2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 13
+; CHECK-NEXT:    ret;
+  %struct.ri0 = insertvalue %struct.int2 poison, i32 %val, 0
+  %struct.ri1 = insertvalue %struct.int2 %struct.ri0, i32 2, 1
+  call void @call_v2_i32(%struct.int2 %struct.ri1)
+  ret void
+}
+
+define void @st_param_v2_i64_ii() {
+; CHECK-LABEL: st_param_v2_i64_ii(
+; CHECK:       {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    { // callseq 14, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v2.b64 [param0+0], {1, 2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i64,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 14
+; CHECK-NEXT:    ret;
+  call void @call_v2_i64(%struct.longlong2 { i64 1, i64 2 })
+  ret void
+}
+define void @st_param_v2_i64_ir(i64 %val) {
+; CHECK-LABEL: st_param_v2_i64_ir(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u64 %rd1, [st_param_v2_i64_ir_param_0];
+; CHECK-NEXT:    { // callseq 15, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v2.b64 [param0+0], {1, %rd1};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i64,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 15
+; CHECK-NEXT:    ret;
+  %struct.ir0 = insertvalue %struct.longlong2 poison, i64 1, 0
+  %struct.ir1 = insertvalue %struct.longlong2 %struct.ir0, i64 %val, 1
+  call void @call_v2_i64(%struct.longlong2 %struct.ir1)
+  ret void
+}
+define void @st_param_v2_i64_ri(i64 %val) {
+; CHECK-LABEL: st_param_v2_i64_ri(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u64 %rd1, [st_param_v2_i64_ri_param_0];
+; CHECK-NEXT:    { // callseq 16, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v2.b64 [param0+0], {%rd1, 2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i64,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 16
+; CHECK-NEXT:    ret;
+  %struct.ri0 = insertvalue %struct.longlong2 poison, i64 %val, 0
+  %struct.ri1 = insertvalue %struct.longlong2 %struct.ri0, i64 2, 1
+  call void @call_v2_i64(%struct.longlong2 %struct.ri1)
+  ret void
+}
+
+define void @st_param_v2_f32_ii(float %val) {
+; CHECK-LABEL: st_param_v2_f32_ii(
+; CHECK:       {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    { // callseq 17, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v2.f32 [param0+0], {0f3F800000, 0f40000000};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_f32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 17
+; CHECK-NEXT:    ret;
+  call void @call_v2_f32(%struct.float2 { float 1.0, float 2.0 })
+  ret void
+}
+define void @st_param_v2_f32_ir(float %val) {
+; CHECK-LABEL: st_param_v2_f32_ir(
+; CHECK:       {
+; CHECK-NEXT:    .reg .f32 %f<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [st_param_v2_f32_ir_param_0];
+; CHECK-NEXT:    { // callseq 18, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v2.f32 [param0+0], {0f3F800000, %f1};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_f32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 18
+; CHECK-NEXT:    ret;
+  %struct.ir0 = insertvalue %struct.float2 poison, float 1.0, 0
+  %struct.ir1 = insertvalue %struct.float2 %struct.ir0, float %val, 1
+  call void @call_v2_f32(%struct.float2 %struct.ir1)
+  ret void
+}
+define void @st_param_v2_f32_ri(float %val) {
+; CHECK-LABEL: st_param_v2_f32_ri(
+; CHECK:       {
+; CHECK-NEXT:    .reg .f32 %f<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [st_param_v2_f32_ri_param_0];
+; CHECK-NEXT:    { // callseq 19, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v2.f32 [param0+0], {%f1, 0f40000000};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_f32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 19
+; CHECK-NEXT:    ret;
+  %struct.ri0 = insertvalue %struct.float2 poison, float %val, 0
+  %struct.ri1 = insertvalue %struct.float2 %struct.ri0, float 2.0, 1
+  call void @call_v2_f32(%struct.float2 %struct.ri1)
+  ret void
+}
+
+define void @st_param_v2_f64_ii(double %val) {
+; CHECK-LABEL: st_param_v2_f64_ii(
+; CHECK:       {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    { // callseq 20, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v2.f64 [param0+0], {0d3FF0000000000000, 0d4000000000000000};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_f64,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 20
+; CHECK-NEXT:    ret;
+  call void @call_v2_f64(%struct.double2 { double 1.0, double 2.0 })
+  ret void
+}
+define void @st_param_v2_f64_ir(double %val) {
+; CHECK-LABEL: st_param_v2_f64_ir(
+; CHECK:       {
+; CHECK-NEXT:    .reg .f64 %fd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f64 %fd1, [st_param_v2_f64_ir_param_0];
+; CHECK-NEXT:    { // callseq 21, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v2.f64 [param0+0], {0d3FF0000000000000, %fd1};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_f64,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 21
+; CHECK-NEXT:    ret;
+  %struct.ir0 = insertvalue %struct.double2 poison, double 1.0, 0
+  %struct.ir1 = insertvalue %struct.double2 %struct.ir0, double %val, 1
+  call void @call_v2_f64(%struct.double2 %struct.ir1)
+  ret void
+}
+define void @st_param_v2_f64_ri(double %val) {
+; CHECK-LABEL: st_param_v2_f64_ri(
+; CHECK:       {
+; CHECK-NEXT:    .reg .f64 %fd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f64 %fd1, [st_param_v2_f64_ri_param_0];
+; CHECK-NEXT:    { // callseq 22, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v2.f64 [param0+0], {%fd1, 0d4000000000000000};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_f64,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 22
+; CHECK-NEXT:    ret;
+  %struct.ri0 = insertvalue %struct.double2 poison, double %val, 0
+  %struct.ri1 = insertvalue %struct.double2 %struct.ri0, double 2.0, 1
+  call void @call_v2_f64(%struct.double2 %struct.ri1)
+  ret void
+}
+
+declare void @call_v2_i8(%struct.char2 alignstack(2))
+declare void @call_v2_i16(%struct.short2 alignstack(4))
+declare void @call_v2_i32(%struct.int2 alignstack(8))
+declare void @call_v2_i64(%struct.longlong2 alignstack(16))
+declare void @call_v2_f32(%struct.float2 alignstack(8))
+declare void @call_v2_f64(%struct.double2 alignstack(16))
+
+define void @st_param_v4_i8_iiii() {
+; CHECK-LABEL: st_param_v4_i8_iiii(
+; CHECK:       {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    { // callseq 23, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {1, 2, 3, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 23
+; CHECK-NEXT:    ret;
+  call void @call_v4_i8(%struct.char4 { i8 1, i8 2, i8 3, i8 4 })
+  ret void
+}
+define void @st_param_v4_i8_irrr(i8 %b, i8 %c, i8 %d) {
+; CHECK-LABEL: st_param_v4_i8_irrr(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u8 %rs1, [st_param_v4_i8_irrr_param_0];
+; CHECK-NEXT:    ld.param.u8 %rs2, [st_param_v4_i8_irrr_param_1];
+; CHECK-NEXT:    ld.param.u8 %rs3, [st_param_v4_i8_irrr_param_2];
+; CHECK-NEXT:    { // callseq 24, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {1, %rs1, %rs2, %rs3};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 24
+; CHECK-NEXT:    ret;
+  %struct.irrr0 = insertvalue %struct.char4 poison, i8 1, 0
+  %struct.irrr1 = insertvalue %struct.char4 %struct.irrr0, i8 %b, 1
+  %struct.irrr2 = insertvalue %struct.char4 %struct.irrr1, i8 %c, 2
+  %struct.irrr3 = insertvalue %struct.char4 %struct.irrr2, i8 %d, 3
+  call void @call_v4_i8(%struct.char4 %struct.irrr3)
+  ret void
+}
+define void @st_param_v4_i8_rirr(i8 %a, i8 %c, i8 %d) {
+; CHECK-LABEL: st_param_v4_i8_rirr(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u8 %rs1, [st_param_v4_i8_rirr_param_0];
+; CHECK-NEXT:    ld.param.u8 %rs2, [st_param_v4_i8_rirr_param_1];
+; CHECK-NEXT:    ld.param.u8 %rs3, [st_param_v4_i8_rirr_param_2];
+; CHECK-NEXT:    { // callseq 25, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {%rs1, 2, %rs2, %rs3};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 25
+; CHECK-NEXT:    ret;
+  %struct.rirr0 = insertvalue %struct.char4 poison, i8 %a, 0
+  %struct.rirr1 = insertvalue %struct.char4 %struct.rirr0, i8 2, 1
+  %struct.rirr2 = insertvalue %struct.char4 %struct.rirr1, i8 %c, 2
+  %struct.rirr3 = insertvalue %struct.char4 %struct.rirr2, i8 %d, 3
+  call void @call_v4_i8(%struct.char4 %struct.rirr3)
+  ret void
+}
+define void @st_param_v4_i8_rrir(i8 %a, i8 %b, i8 %d) {
+; CHECK-LABEL: st_param_v4_i8_rrir(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u8 %rs1, [st_param_v4_i8_rrir_param_0];
+; CHECK-NEXT:    ld.param.u8 %rs2, [st_param_v4_i8_rrir_param_1];
+; CHECK-NEXT:    ld.param.u8 %rs3, [st_param_v4_i8_rrir_param_2];
+; CHECK-NEXT:    { // callseq 26, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {%rs1, %rs2, 3, %rs3};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 26
+; CHECK-NEXT:    ret;
+  %struct.rrir0 = insertvalue %struct.char4 poison, i8 %a, 0
+  %struct.rrir1 = insertvalue %struct.char4 %struct.rrir0, i8 %b, 1
+  %struct.rrir2 = insertvalue %struct.char4 %struct.rrir1, i8 3, 2
+  %struct.rrir3 = insertvalue %struct.char4 %struct.rrir2, i8 %d, 3
+  call void @call_v4_i8(%struct.char4 %struct.rrir3)
+  ret void
+}
+define void @st_param_v4_i8_rrri(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: st_param_v4_i8_rrri(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u8 %rs1, [st_param_v4_i8_rrri_param_0];
+; CHECK-NEXT:    ld.param.u8 %rs2, [st_param_v4_i8_rrri_param_1];
+; CHECK-NEXT:    ld.param.u8 %rs3, [st_param_v4_i8_rrri_param_2];
+; CHECK-NEXT:    { // callseq 27, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {%rs1, %rs2, %rs3, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 27
+; CHECK-NEXT:    ret;
+  %struct.rrri0 = insertvalue %struct.char4 poison, i8 %a, 0
+  %struct.rrri1 = insertvalue %struct.char4 %struct.rrri0, i8 %b, 1
+  %struct.rrri2 = insertvalue %struct.char4 %struct.rrri1, i8 %c, 2
+  %struct.rrri3 = insertvalue %struct.char4 %struct.rrri2, i8 4, 3
+  call void @call_v4_i8(%struct.char4 %struct.rrri3)
+  ret void
+}
+define void @st_param_v4_i8_iirr(i8 %c, i8 %d) {
+; CHECK-LABEL: st_param_v4_i8_iirr(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u8 %rs1, [st_param_v4_i8_iirr_param_0];
+; CHECK-NEXT:    ld.param.u8 %rs2, [st_param_v4_i8_iirr_param_1];
+; CHECK-NEXT:    { // callseq 28, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {1, 2, %rs1, %rs2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 28
+; CHECK-NEXT:    ret;
+  %struct.iirr0 = insertvalue %struct.char4 poison, i8 1, 0
+  %struct.iirr1 = insertvalue %struct.char4 %struct.iirr0, i8 2, 1
+  %struct.iirr2 = insertvalue %struct.char4 %struct.iirr1, i8 %c, 2
+  %struct.iirr3 = insertvalue %struct.char4 %struct.iirr2, i8 %d, 3
+  call void @call_v4_i8(%struct.char4 %struct.iirr3)
+  ret void
+}
+define void @st_param_v4_i8_irir(i8 %b, i8 %d) {
+; CHECK-LABEL: st_param_v4_i8_irir(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u8 %rs1, [st_param_v4_i8_irir_param_0];
+; CHECK-NEXT:    ld.param.u8 %rs2, [st_param_v4_i8_irir_param_1];
+; CHECK-NEXT:    { // callseq 29, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {1, %rs1, 3, %rs2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 29
+; CHECK-NEXT:    ret;
+  %struct.irir0 = insertvalue %struct.char4 poison, i8 1, 0
+  %struct.irir1 = insertvalue %struct.char4 %struct.irir0, i8 %b, 1
+  %struct.irir2 = insertvalue %struct.char4 %struct.irir1, i8 3, 2
+  %struct.irir3 = insertvalue %struct.char4 %struct.irir2, i8 %d, 3
+  call void @call_v4_i8(%struct.char4 %struct.irir3)
+  ret void
+}
+define void @st_param_v4_i8_irri(i8 %b, i8 %c) {
+; CHECK-LABEL: st_param_v4_i8_irri(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u8 %rs1, [st_param_v4_i8_irri_param_0];
+; CHECK-NEXT:    ld.param.u8 %rs2, [st_param_v4_i8_irri_param_1];
+; CHECK-NEXT:    { // callseq 30, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {1, %rs1, %rs2, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 30
+; CHECK-NEXT:    ret;
+  %struct.irri0 = insertvalue %struct.char4 poison, i8 1, 0
+  %struct.irri1 = insertvalue %struct.char4 %struct.irri0, i8 %b, 1
+  %struct.irri2 = insertvalue %struct.char4 %struct.irri1, i8 %c, 2
+  %struct.irri3 = insertvalue %struct.char4 %struct.irri2, i8 4, 3
+  call void @call_v4_i8(%struct.char4 %struct.irri3)
+  ret void
+}
+define void @st_param_v4_i8_riir(i8 %a, i8 %d) {
+; CHECK-LABEL: st_param_v4_i8_riir(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u8 %rs1, [st_param_v4_i8_riir_param_0];
+; CHECK-NEXT:    ld.param.u8 %rs2, [st_param_v4_i8_riir_param_1];
+; CHECK-NEXT:    { // callseq 31, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {%rs1, 2, 3, %rs2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 31
+; CHECK-NEXT:    ret;
+  %struct.riir0 = insertvalue %struct.char4 poison, i8 %a, 0
+  %struct.riir1 = insertvalue %struct.char4 %struct.riir0, i8 2, 1
+  %struct.riir2 = insertvalue %struct.char4 %struct.riir1, i8 3, 2
+  %struct.riir3 = insertvalue %struct.char4 %struct.riir2, i8 %d, 3
+  call void @call_v4_i8(%struct.char4 %struct.riir3)
+  ret void
+}
+define void @st_param_v4_i8_riri(i8 %a, i8 %c) {
+; CHECK-LABEL: st_param_v4_i8_riri(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u8 %rs1, [st_param_v4_i8_riri_param_0];
+; CHECK-NEXT:    ld.param.u8 %rs2, [st_param_v4_i8_riri_param_1];
+; CHECK-NEXT:    { // callseq 32, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {%rs1, 2, %rs2, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 32
+; CHECK-NEXT:    ret;
+  %struct.riri0 = insertvalue %struct.char4 poison, i8 %a, 0
+  %struct.riri1 = insertvalue %struct.char4 %struct.riri0, i8 2, 1
+  %struct.riri2 = insertvalue %struct.char4 %struct.riri1, i8 %c, 2
+  %struct.riri3 = insertvalue %struct.char4 %struct.riri2, i8 4, 3
+  call void @call_v4_i8(%struct.char4 %struct.riri3)
+  ret void
+}
+define void @st_param_v4_i8_rrii(i8 %a, i8 %b) {
+; CHECK-LABEL: st_param_v4_i8_rrii(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u8 %rs1, [st_param_v4_i8_rrii_param_0];
+; CHECK-NEXT:    ld.param.u8 %rs2, [st_param_v4_i8_rrii_param_1];
+; CHECK-NEXT:    { // callseq 33, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {%rs1, %rs2, 3, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 33
+; CHECK-NEXT:    ret;
+  %struct.rrii0 = insertvalue %struct.char4 poison, i8 %a, 0
+  %struct.rrii1 = insertvalue %struct.char4 %struct.rrii0, i8 %b, 1
+  %struct.rrii2 = insertvalue %struct.char4 %struct.rrii1, i8 3, 2
+  %struct.rrii3 = insertvalue %struct.char4 %struct.rrii2, i8 4, 3
+  call void @call_v4_i8(%struct.char4 %struct.rrii3)
+  ret void
+}
+define void @st_param_v4_i8_iiir(i8 %d) {
+; CHECK-LABEL: st_param_v4_i8_iiir(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u8 %rs1, [st_param_v4_i8_iiir_param_0];
+; CHECK-NEXT:    { // callseq 34, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {1, 2, 3, %rs1};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 34
+; CHECK-NEXT:    ret;
+  %struct.iiir0 = insertvalue %struct.char4 poison, i8 1, 0
+  %struct.iiir1 = insertvalue %struct.char4 %struct.iiir0, i8 2, 1
+  %struct.iiir2 = insertvalue %struct.char4 %struct.iiir1, i8 3, 2
+  %struct.iiir3 = insertvalue %struct.char4 %struct.iiir2, i8 %d, 3
+  call void @call_v4_i8(%struct.char4 %struct.iiir3)
+  ret void
+}
+define void @st_param_v4_i8_iiri(i8 %c) {
+; CHECK-LABEL: st_param_v4_i8_iiri(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u8 %rs1, [st_param_v4_i8_iiri_param_0];
+; CHECK-NEXT:    { // callseq 35, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {1, 2, %rs1, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 35
+; CHECK-NEXT:    ret;
+  %struct.iiri0 = insertvalue %struct.char4 poison, i8 1, 0
+  %struct.iiri1 = insertvalue %struct.char4 %struct.iiri0, i8 2, 1
+  %struct.iiri2 = insertvalue %struct.char4 %struct.iiri1, i8 %c, 2
+  %struct.iiri3 = insertvalue %struct.char4 %struct.iiri2, i8 4, 3
+  call void @call_v4_i8(%struct.char4 %struct.iiri3)
+  ret void
+}
+define void @st_param_v4_i8_irii(i8 %b) {
+; CHECK-LABEL: st_param_v4_i8_irii(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u8 %rs1, [st_param_v4_i8_irii_param_0];
+; CHECK-NEXT:    { // callseq 36, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {1, %rs1, 3, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 36
+; CHECK-NEXT:    ret;
+  %struct.irii0 = insertvalue %struct.char4 poison, i8 1, 0
+  %struct.irii1 = insertvalue %struct.char4 %struct.irii0, i8 %b, 1
+  %struct.irii2 = insertvalue %struct.char4 %struct.irii1, i8 3, 2
+  %struct.irii3 = insertvalue %struct.char4 %struct.irii2, i8 4, 3
+  call void @call_v4_i8(%struct.char4 %struct.irii3)
+  ret void
+}
+define void @st_param_v4_i8_riii(i8 %a) {
+; CHECK-LABEL: st_param_v4_i8_riii(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u8 %rs1, [st_param_v4_i8_riii_param_0];
+; CHECK-NEXT:    { // callseq 37, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {%rs1, 2, 3, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 37
+; CHECK-NEXT:    ret;
+  %struct.riii0 = insertvalue %struct.char4 poison, i8 %a, 0
+  %struct.riii1 = insertvalue %struct.char4 %struct.riii0, i8 2, 1
+  %struct.riii2 = insertvalue %struct.char4 %struct.riii1, i8 3, 2
+  %struct.riii3 = insertvalue %struct.char4 %struct.riii2, i8 4, 3
+  call void @call_v4_i8(%struct.char4 %struct.riii3)
+  ret void
+}
+
+define void @st_param_v4_i16_iiii() {
+; CHECK-LABEL: st_param_v4_i16_iiii(
+; CHECK:       {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    { // callseq 38, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v4.b16 [param0+0], {1, 2, 3, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i16,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 38
+; CHECK-NEXT:    ret;
+  call void @call_v4_i16(%struct.short4 { i16 1, i16 2, i16 3, i16 4 })
+  ret void
+}
+define void @st_param_v4_i16_irrr(i16 %b, i16 %c, i16 %d) {
+; CHECK-LABEL: st_param_v4_i16_irrr(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u16 %rs1, [st_param_v4_i16_irrr_param_0];
+; CHECK-NEXT:    ld.param.u16 %rs2, [st_param_v4_i16_irrr_param_1];
+; CHECK-NEXT:    ld.param.u16 %rs3, [st_param_v4_i16_irrr_param_2];
+; CHECK-NEXT:    { // callseq 39, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v4.b16 [param0+0], {1, %rs1, %rs2, %rs3};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i16,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 39
+; CHECK-NEXT:    ret;
+  %struct.irrr0 = insertvalue %struct.short4 poison, i16 1, 0
+  %struct.irrr1 = insertvalue %struct.short4 %struct.irrr0, i16 %b, 1
+  %struct.irrr2 = insertvalue %struct.short4 %struct.irrr1, i16 %c, 2
+  %struct.irrr3 = insertvalue %struct.short4 %struct.irrr2, i16 %d, 3
+  call void @call_v4_i16(%struct.short4 %struct.irrr3)
+  ret void
+}
+define void @st_param_v4_i16_rirr(i16 %a, i16 %c, i16 %d) {
+; CHECK-LABEL: st_param_v4_i16_rirr(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u16 %rs1, [st_param_v4_i16_rirr_param_0];
+; CHECK-NEXT:    ld.param.u16 %rs2, [st_param_v4_i16_rirr_param_1];
+; CHECK-NEXT:    ld.param.u16 %rs3, [st_param_v4_i16_rirr_param_2];
+; CHECK-NEXT:    { // callseq 40, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v4.b16 [param0+0], {%rs1, 2, %rs2, %rs3};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i16,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 40
+; CHECK-NEXT:    ret;
+  %struct.rirr0 = insertvalue %struct.short4 poison, i16 %a, 0
+  %struct.rirr1 = insertvalue %struct.short4 %struct.rirr0, i16 2, 1
+  %struct.rirr2 = insertvalue %struct.short4 %struct.rirr1, i16 %c, 2
+  %struct.rirr3 = insertvalue %struct.short4 %struct.rirr2, i16 %d, 3
+  call void @call_v4_i16(%struct.short4 %struct.rirr3)
+  ret void
+}
+define void @st_param_v4_i16_rrir(i16 %a, i16 %b, i16 %d) {
+; CHECK-LABEL: st_param_v4_i16_rrir(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u16 %rs1, [st_param_v4_i16_rrir_param_0];
+; CHECK-NEXT:    ld.param.u16 %rs2, [st_param_v4_i16_rrir_param_1];
+; CHECK-NEXT:    ld.param.u16 %rs3, [st_param_v4_i16_rrir_param_2];
+; CHECK-NEXT:    { // callseq 41, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v4.b16 [param0+0], {%rs1, %rs2, 3, %rs3};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i16,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 41
+; CHECK-NEXT:    ret;
+  %struct.rrir0 = insertvalue %struct.short4 poison, i16 %a, 0
+  %struct.rrir1 = insertvalue %struct.short4 %struct.rrir0, i16 %b, 1
+  %struct.rrir2 = insertvalue %struct.short4 %struct.rrir1, i16 3, 2
+  %struct.rrir3 = insertvalue %struct.short4 %struct.rrir2, i16 %d, 3
+  call void @call_v4_i16(%struct.short4 %struct.rrir3)
+  ret void
+}
+define void @st_param_v4_i16_rrri(i16 %a, i16 %b, i16 %c) {
+; CHECK-LABEL: st_param_v4_i16_rrri(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u16 %rs1, [st_param_v4_i16_rrri_param_0];
+; CHECK-NEXT:    ld.param.u16 %rs2, [st_param_v4_i16_rrri_param_1];
+; CHECK-NEXT:    ld.param.u16 %rs3, [st_param_v4_i16_rrri_param_2];
+; CHECK-NEXT:    { // callseq 42, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v4.b16 [param0+0], {%rs1, %rs2, %rs3, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i16,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 42
+; CHECK-NEXT:    ret;
+  %struct.rrri0 = insertvalue %struct.short4 poison, i16 %a, 0
+  %struct.rrri1 = insertvalue %struct.short4 %struct.rrri0, i16 %b, 1
+  %struct.rrri2 = insertvalue %struct.short4 %struct.rrri1, i16 %c, 2
+  %struct.rrri3 = insertvalue %struct.short4 %struct.rrri2, i16 4, 3
+  call void @call_v4_i16(%struct.short4 %struct.rrri3)
+  ret void
+}
+define void @st_param_v4_i16_iirr(i16 %c, i16 %d) {
+; CHECK-LABEL: st_param_v4_i16_iirr(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u16 %rs1, [st_param_v4_i16_iirr_param_0];
+; CHECK-NEXT:    ld.param.u16 %rs2, [st_param_v4_i16_iirr_param_1];
+; CHECK-NEXT:    { // callseq 43, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v4.b16 [param0+0], {1, 2, %rs1, %rs2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i16,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 43
+; CHECK-NEXT:    ret;
+  %struct.iirr0 = insertvalue %struct.short4 poison, i16 1, 0
+  %struct.iirr1 = insertvalue %struct.short4 %struct.iirr0, i16 2, 1
+  %struct.iirr2 = insertvalue %struct.short4 %struct.iirr1, i16 %c, 2
+  %struct.iirr3 = insertvalue %struct.short4 %struct.iirr2, i16 %d, 3
+  call void @call_v4_i16(%struct.short4 %struct.iirr3)
+  ret void
+}
+define void @st_param_v4_i16_irir(i16 %b, i16 %d) {
+; CHECK-LABEL: st_param_v4_i16_irir(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u16 %rs1, [st_param_v4_i16_irir_param_0];
+; CHECK-NEXT:    ld.param.u16 %rs2, [st_param_v4_i16_irir_param_1];
+; CHECK-NEXT:    { // callseq 44, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v4.b16 [param0+0], {1, %rs1, 3, %rs2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i16,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 44
+; CHECK-NEXT:    ret;
+  %struct.irir0 = insertvalue %struct.short4 poison, i16 1, 0
+  %struct.irir1 = insertvalue %struct.short4 %struct.irir0, i16 %b, 1
+  %struct.irir2 = insertvalue %struct.short4 %struct.irir1, i16 3, 2
+  %struct.irir3 = insertvalue %struct.short4 %struct.irir2, i16 %d, 3
+  call void @call_v4_i16(%struct.short4 %struct.irir3)
+  ret void
+}
+define void @st_param_v4_i16_irri(i16 %b, i16 %c) {
+; CHECK-LABEL: st_param_v4_i16_irri(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u16 %rs1, [st_param_v4_i16_irri_param_0];
+; CHECK-NEXT:    ld.param.u16 %rs2, [st_param_v4_i16_irri_param_1];
+; CHECK-NEXT:    { // callseq 45, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v4.b16 [param0+0], {1, %rs1, %rs2, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i16,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 45
+; CHECK-NEXT:    ret;
+  %struct.irri0 = insertvalue %struct.short4 poison, i16 1, 0
+  %struct.irri1 = insertvalue %struct.short4 %struct.irri0, i16 %b, 1
+  %struct.irri2 = insertvalue %struct.short4 %struct.irri1, i16 %c, 2
+  %struct.irri3 = insertvalue %struct.short4 %struct.irri2, i16 4, 3
+  call void @call_v4_i16(%struct.short4 %struct.irri3)
+  ret void
+}
+define void @st_param_v4_i16_riir(i16 %a, i16 %d) {
+; CHECK-LABEL: st_param_v4_i16_riir(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u16 %rs1, [st_param_v4_i16_riir_param_0];
+; CHECK-NEXT:    ld.param.u16 %rs2, [st_param_v4_i16_riir_param_1];
+; CHECK-NEXT:    { // callseq 46, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v4.b16 [param0+0], {%rs1, 2, 3, %rs2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i16,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 46
+; CHECK-NEXT:    ret;
+  %struct.riir0 = insertvalue %struct.short4 poison, i16 %a, 0
+  %struct.riir1 = insertvalue %struct.short4 %struct.riir0, i16 2, 1
+  %struct.riir2 = insertvalue %struct.short4 %struct.riir1, i16 3, 2
+  %struct.riir3 = insertvalue %struct.short4 %struct.riir2, i16 %d, 3
+  call void @call_v4_i16(%struct.short4 %struct.riir3)
+  ret void
+}
+define void @st_param_v4_i16_riri(i16 %a, i16 %c) {
+; CHECK-LABEL: st_param_v4_i16_riri(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u16 %rs1, [st_param_v4_i16_riri_param_0];
+; CHECK-NEXT:    ld.param.u16 %rs2, [st_param_v4_i16_riri_param_1];
+; CHECK-NEXT:    { // callseq 47, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v4.b16 [param0+0], {%rs1, 2, %rs2, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i16,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 47
+; CHECK-NEXT:    ret;
+  %struct.riri0 = insertvalue %struct.short4 poison, i16 %a, 0
+  %struct.riri1 = insertvalue %struct.short4 %struct.riri0, i16 2, 1
+  %struct.riri2 = insertvalue %struct.short4 %struct.riri1, i16 %c, 2
+  %struct.riri3 = insertvalue %struct.short4 %struct.riri2, i16 4, 3
+  call void @call_v4_i16(%struct.short4 %struct.riri3)
+  ret void
+}
+define void @st_param_v4_i16_rrii(i16 %a, i16 %b) {
+; CHECK-LABEL: st_param_v4_i16_rrii(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u16 %rs1, [st_param_v4_i16_rrii_param_0];
+; CHECK-NEXT:    ld.param.u16 %rs2, [st_param_v4_i16_rrii_param_1];
+; CHECK-NEXT:    { // callseq 48, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v4.b16 [param0+0], {%rs1, %rs2, 3, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i16,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 48
+; CHECK-NEXT:    ret;
+  %struct.rrii0 = insertvalue %struct.short4 poison, i16 %a, 0
+  %struct.rrii1 = insertvalue %struct.short4 %struct.rrii0, i16 %b, 1
+  %struct.rrii2 = insertvalue %struct.short4 %struct.rrii1, i16 3, 2
+  %struct.rrii3 = insertvalue %struct.short4 %struct.rrii2, i16 4, 3
+  call void @call_v4_i16(%struct.short4 %struct.rrii3)
+  ret void
+}
+define void @st_param_v4_i16_iiir(i16 %d) {
+; CHECK-LABEL: st_param_v4_i16_iiir(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u16 %rs1, [st_param_v4_i16_iiir_param_0];
+; CHECK-NEXT:    { // callseq 49, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v4.b16 [param0+0], {1, 2, 3, %rs1};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i16,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 49
+; CHECK-NEXT:    ret;
+  %struct.iiir0 = insertvalue %struct.short4 poison, i16 1, 0
+  %struct.iiir1 = insertvalue %struct.short4 %struct.iiir0, i16 2, 1
+  %struct.iiir2 = insertvalue %struct.short4 %struct.iiir1, i16 3, 2
+  %struct.iiir3 = insertvalue %struct.short4 %struct.iiir2, i16 %d, 3
+  call void @call_v4_i16(%struct.short4 %struct.iiir3)
+  ret void
+}
+define void @st_param_v4_i16_iiri(i16 %c) {
+; CHECK-LABEL: st_param_v4_i16_iiri(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u16 %rs1, [st_param_v4_i16_iiri_param_0];
+; CHECK-NEXT:    { // callseq 50, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v4.b16 [param0+0], {1, 2, %rs1, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i16,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 50
+; CHECK-NEXT:    ret;
+  %struct.iiri0 = insertvalue %struct.short4 poison, i16 1, 0
+  %struct.iiri1 = insertvalue %struct.short4 %struct.iiri0, i16 2, 1
+  %struct.iiri2 = insertvalue %struct.short4 %struct.iiri1, i16 %c, 2
+  %struct.iiri3 = insertvalue %struct.short4 %struct.iiri2, i16 4, 3
+  call void @call_v4_i16(%struct.short4 %struct.iiri3)
+  ret void
+}
+define void @st_param_v4_i16_irii(i16 %b) {
+; CHECK-LABEL: st_param_v4_i16_irii(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u16 %rs1, [st_param_v4_i16_irii_param_0];
+; CHECK-NEXT:    { // callseq 51, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v4.b16 [param0+0], {1, %rs1, 3, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i16,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 51
+; CHECK-NEXT:    ret;
+  %struct.irii0 = insertvalue %struct.short4 poison, i16 1, 0
+  %struct.irii1 = insertvalue %struct.short4 %struct.irii0, i16 %b, 1
+  %struct.irii2 = insertvalue %struct.short4 %struct.irii1, i16 3, 2
+  %struct.irii3 = insertvalue %struct.short4 %struct.irii2, i16 4, 3
+  call void @call_v4_i16(%struct.short4 %struct.irii3)
+  ret void
+}
+define void @st_param_v4_i16_riii(i16 %a) {
+; CHECK-LABEL: st_param_v4_i16_riii(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u16 %rs1, [st_param_v4_i16_riii_param_0];
+; CHECK-NEXT:    { // callseq 52, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v4.b16 [param0+0], {%rs1, 2, 3, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i16,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 52
+; CHECK-NEXT:    ret;
+  %struct.riii0 = insertvalue %struct.short4 poison, i16 %a, 0
+  %struct.riii1 = insertvalue %struct.short4 %struct.riii0, i16 2, 1
+  %struct.riii2 = insertvalue %struct.short4 %struct.riii1, i16 3, 2
+  %struct.riii3 = insertvalue %struct.short4 %struct.riii2, i16 4, 3
+  call void @call_v4_i16(%struct.short4 %struct.riii3)
+  ret void
+}
+
+define void @st_param_v4_i32_iiii() {
+; CHECK-LABEL: st_param_v4_i32_iiii(
+; CHECK:       {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    { // callseq 53, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.b32 [param0+0], {1, 2, 3, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 53
+; CHECK-NEXT:    ret;
+  call void @call_v4_i32(%struct.int4 { i32 1, i32 2, i32 3, i32 4 })
+  ret void
+}
+define void @st_param_v4_i32_irrr(i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: st_param_v4_i32_irrr(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [st_param_v4_i32_irrr_param_0];
+; CHECK-NEXT:    ld.param.u32 %r2, [st_param_v4_i32_irrr_param_1];
+; CHECK-NEXT:    ld.param.u32 %r3, [st_param_v4_i32_irrr_param_2];
+; CHECK-NEXT:    { // callseq 54, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.b32 [param0+0], {1, %r1, %r2, %r3};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 54
+; CHECK-NEXT:    ret;
+  %struct.irrr0 = insertvalue %struct.int4 poison, i32 1, 0
+  %struct.irrr1 = insertvalue %struct.int4 %struct.irrr0, i32 %b, 1
+  %struct.irrr2 = insertvalue %struct.int4 %struct.irrr1, i32 %c, 2
+  %struct.irrr3 = insertvalue %struct.int4 %struct.irrr2, i32 %d, 3
+  call void @call_v4_i32(%struct.int4 %struct.irrr3)
+  ret void
+}
+define void @st_param_v4_i32_rirr(i32 %a, i32 %c, i32 %d) {
+; CHECK-LABEL: st_param_v4_i32_rirr(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [st_param_v4_i32_rirr_param_0];
+; CHECK-NEXT:    ld.param.u32 %r2, [st_param_v4_i32_rirr_param_1];
+; CHECK-NEXT:    ld.param.u32 %r3, [st_param_v4_i32_rirr_param_2];
+; CHECK-NEXT:    { // callseq 55, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.b32 [param0+0], {%r1, 2, %r2, %r3};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 55
+; CHECK-NEXT:    ret;
+  %struct.rirr0 = insertvalue %struct.int4 poison, i32 %a, 0
+  %struct.rirr1 = insertvalue %struct.int4 %struct.rirr0, i32 2, 1
+  %struct.rirr2 = insertvalue %struct.int4 %struct.rirr1, i32 %c, 2
+  %struct.rirr3 = insertvalue %struct.int4 %struct.rirr2, i32 %d, 3
+  call void @call_v4_i32(%struct.int4 %struct.rirr3)
+  ret void
+}
+define void @st_param_v4_i32_rrir(i32 %a, i32 %b, i32 %d) {
+; CHECK-LABEL: st_param_v4_i32_rrir(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [st_param_v4_i32_rrir_param_0];
+; CHECK-NEXT:    ld.param.u32 %r2, [st_param_v4_i32_rrir_param_1];
+; CHECK-NEXT:    ld.param.u32 %r3, [st_param_v4_i32_rrir_param_2];
+; CHECK-NEXT:    { // callseq 56, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.b32 [param0+0], {%r1, %r2, 3, %r3};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 56
+; CHECK-NEXT:    ret;
+  %struct.rrir0 = insertvalue %struct.int4 poison, i32 %a, 0
+  %struct.rrir1 = insertvalue %struct.int4 %struct.rrir0, i32 %b, 1
+  %struct.rrir2 = insertvalue %struct.int4 %struct.rrir1, i32 3, 2
+  %struct.rrir3 = insertvalue %struct.int4 %struct.rrir2, i32 %d, 3
+  call void @call_v4_i32(%struct.int4 %struct.rrir3)
+  ret void
+}
+define void @st_param_v4_i32_rrri(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: st_param_v4_i32_rrri(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [st_param_v4_i32_rrri_param_0];
+; CHECK-NEXT:    ld.param.u32 %r2, [st_param_v4_i32_rrri_param_1];
+; CHECK-NEXT:    ld.param.u32 %r3, [st_param_v4_i32_rrri_param_2];
+; CHECK-NEXT:    { // callseq 57, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.b32 [param0+0], {%r1, %r2, %r3, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 57
+; CHECK-NEXT:    ret;
+  %struct.rrri0 = insertvalue %struct.int4 poison, i32 %a, 0
+  %struct.rrri1 = insertvalue %struct.int4 %struct.rrri0, i32 %b, 1
+  %struct.rrri2 = insertvalue %struct.int4 %struct.rrri1, i32 %c, 2
+  %struct.rrri3 = insertvalue %struct.int4 %struct.rrri2, i32 4, 3
+  call void @call_v4_i32(%struct.int4 %struct.rrri3)
+  ret void
+}
+define void @st_param_v4_i32_iirr(i32 %c, i32 %d) {
+; CHECK-LABEL: st_param_v4_i32_iirr(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [st_param_v4_i32_iirr_param_0];
+; CHECK-NEXT:    ld.param.u32 %r2, [st_param_v4_i32_iirr_param_1];
+; CHECK-NEXT:    { // callseq 58, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.b32 [param0+0], {1, 2, %r1, %r2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 58
+; CHECK-NEXT:    ret;
+  %struct.iirr0 = insertvalue %struct.int4 poison, i32 1, 0
+  %struct.iirr1 = insertvalue %struct.int4 %struct.iirr0, i32 2, 1
+  %struct.iirr2 = insertvalue %struct.int4 %struct.iirr1, i32 %c, 2
+  %struct.iirr3 = insertvalue %struct.int4 %struct.iirr2, i32 %d, 3
+  call void @call_v4_i32(%struct.int4 %struct.iirr3)
+  ret void
+}
+define void @st_param_v4_i32_irir(i32 %b, i32 %d) {
+; CHECK-LABEL: st_param_v4_i32_irir(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [st_param_v4_i32_irir_param_0];
+; CHECK-NEXT:    ld.param.u32 %r2, [st_param_v4_i32_irir_param_1];
+; CHECK-NEXT:    { // callseq 59, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.b32 [param0+0], {1, %r1, 3, %r2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 59
+; CHECK-NEXT:    ret;
+  %struct.irir0 = insertvalue %struct.int4 poison, i32 1, 0
+  %struct.irir1 = insertvalue %struct.int4 %struct.irir0, i32 %b, 1
+  %struct.irir2 = insertvalue %struct.int4 %struct.irir1, i32 3, 2
+  %struct.irir3 = insertvalue %struct.int4 %struct.irir2, i32 %d, 3
+  call void @call_v4_i32(%struct.int4 %struct.irir3)
+  ret void
+}
+define void @st_param_v4_i32_irri(i32 %b, i32 %c) {
+; CHECK-LABEL: st_param_v4_i32_irri(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [st_param_v4_i32_irri_param_0];
+; CHECK-NEXT:    ld.param.u32 %r2, [st_param_v4_i32_irri_param_1];
+; CHECK-NEXT:    { // callseq 60, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.b32 [param0+0], {1, %r1, %r2, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 60
+; CHECK-NEXT:    ret;
+  %struct.irri0 = insertvalue %struct.int4 poison, i32 1, 0
+  %struct.irri1 = insertvalue %struct.int4 %struct.irri0, i32 %b, 1
+  %struct.irri2 = insertvalue %struct.int4 %struct.irri1, i32 %c, 2
+  %struct.irri3 = insertvalue %struct.int4 %struct.irri2, i32 4, 3
+  call void @call_v4_i32(%struct.int4 %struct.irri3)
+  ret void
+}
+define void @st_param_v4_i32_riir(i32 %a, i32 %d) {
+; CHECK-LABEL: st_param_v4_i32_riir(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [st_param_v4_i32_riir_param_0];
+; CHECK-NEXT:    ld.param.u32 %r2, [st_param_v4_i32_riir_param_1];
+; CHECK-NEXT:    { // callseq 61, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.b32 [param0+0], {%r1, 2, 3, %r2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 61
+; CHECK-NEXT:    ret;
+  %struct.riir0 = insertvalue %struct.int4 poison, i32 %a, 0
+  %struct.riir1 = insertvalue %struct.int4 %struct.riir0, i32 2, 1
+  %struct.riir2 = insertvalue %struct.int4 %struct.riir1, i32 3, 2
+  %struct.riir3 = insertvalue %struct.int4 %struct.riir2, i32 %d, 3
+  call void @call_v4_i32(%struct.int4 %struct.riir3)
+  ret void
+}
+define void @st_param_v4_i32_riri(i32 %a, i32 %c) {
+; CHECK-LABEL: st_param_v4_i32_riri(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [st_param_v4_i32_riri_param_0];
+; CHECK-NEXT:    ld.param.u32 %r2, [st_param_v4_i32_riri_param_1];
+; CHECK-NEXT:    { // callseq 62, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.b32 [param0+0], {%r1, 2, %r2, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 62
+; CHECK-NEXT:    ret;
+  %struct.riri0 = insertvalue %struct.int4 poison, i32 %a, 0
+  %struct.riri1 = insertvalue %struct.int4 %struct.riri0, i32 2, 1
+  %struct.riri2 = insertvalue %struct.int4 %struct.riri1, i32 %c, 2
+  %struct.riri3 = insertvalue %struct.int4 %struct.riri2, i32 4, 3
+  call void @call_v4_i32(%struct.int4 %struct.riri3)
+  ret void
+}
+define void @st_param_v4_i32_rrii(i32 %a, i32 %b) {
+; CHECK-LABEL: st_param_v4_i32_rrii(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [st_param_v4_i32_rrii_param_0];
+; CHECK-NEXT:    ld.param.u32 %r2, [st_param_v4_i32_rrii_param_1];
+; CHECK-NEXT:    { // callseq 63, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.b32 [param0+0], {%r1, %r2, 3, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 63
+; CHECK-NEXT:    ret;
+  %struct.rrii0 = insertvalue %struct.int4 poison, i32 %a, 0
+  %struct.rrii1 = insertvalue %struct.int4 %struct.rrii0, i32 %b, 1
+  %struct.rrii2 = insertvalue %struct.int4 %struct.rrii1, i32 3, 2
+  %struct.rrii3 = insertvalue %struct.int4 %struct.rrii2, i32 4, 3
+  call void @call_v4_i32(%struct.int4 %struct.rrii3)
+  ret void
+}
+define void @st_param_v4_i32_iiir(i32 %d) {
+; CHECK-LABEL: st_param_v4_i32_iiir(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [st_param_v4_i32_iiir_param_0];
+; CHECK-NEXT:    { // callseq 64, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.b32 [param0+0], {1, 2, 3, %r1};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 64
+; CHECK-NEXT:    ret;
+  %struct.iiir0 = insertvalue %struct.int4 poison, i32 1, 0
+  %struct.iiir1 = insertvalue %struct.int4 %struct.iiir0, i32 2, 1
+  %struct.iiir2 = insertvalue %struct.int4 %struct.iiir1, i32 3, 2
+  %struct.iiir3 = insertvalue %struct.int4 %struct.iiir2, i32 %d, 3
+  call void @call_v4_i32(%struct.int4 %struct.iiir3)
+  ret void
+}
+define void @st_param_v4_i32_iiri(i32 %c) {
+; CHECK-LABEL: st_param_v4_i32_iiri(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [st_param_v4_i32_iiri_param_0];
+; CHECK-NEXT:    { // callseq 65, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.b32 [param0+0], {1, 2, %r1, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 65
+; CHECK-NEXT:    ret;
+  %struct.iiri0 = insertvalue %struct.int4 poison, i32 1, 0
+  %struct.iiri1 = insertvalue %struct.int4 %struct.iiri0, i32 2, 1
+  %struct.iiri2 = insertvalue %struct.int4 %struct.iiri1, i32 %c, 2
+  %struct.iiri3 = insertvalue %struct.int4 %struct.iiri2, i32 4, 3
+  call void @call_v4_i32(%struct.int4 %struct.iiri3)
+  ret void
+}
+define void @st_param_v4_i32_irii(i32 %b) {
+; CHECK-LABEL: st_param_v4_i32_irii(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [st_param_v4_i32_irii_param_0];
+; CHECK-NEXT:    { // callseq 66, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.b32 [param0+0], {1, %r1, 3, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 66
+; CHECK-NEXT:    ret;
+  %struct.irii0 = insertvalue %struct.int4 poison, i32 1, 0
+  %struct.irii1 = insertvalue %struct.int4 %struct.irii0, i32 %b, 1
+  %struct.irii2 = insertvalue %struct.int4 %struct.irii1, i32 3, 2
+  %struct.irii3 = insertvalue %struct.int4 %struct.irii2, i32 4, 3
+  call void @call_v4_i32(%struct.int4 %struct.irii3)
+  ret void
+}
+define void @st_param_v4_i32_riii(i32 %a) {
+; CHECK-LABEL: st_param_v4_i32_riii(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [st_param_v4_i32_riii_param_0];
+; CHECK-NEXT:    { // callseq 67, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.b32 [param0+0], {%r1, 2, 3, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 67
+; CHECK-NEXT:    ret;
+  %struct.riii0 = insertvalue %struct.int4 poison, i32 %a, 0
+  %struct.riii1 = insertvalue %struct.int4 %struct.riii0, i32 2, 1
+  %struct.riii2 = insertvalue %struct.int4 %struct.riii1, i32 3, 2
+  %struct.riii3 = insertvalue %struct.int4 %struct.riii2, i32 4, 3
+  call void @call_v4_i32(%struct.int4 %struct.riii3)
+  ret void
+}
+
+define void @st_param_v4_f32_iiii() {
+; CHECK-LABEL: st_param_v4_f32_iiii(
+; CHECK:       {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    { // callseq 68, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.f32 [param0+0], {0f3F800000, 0f40000000, 0f40400000, 0f40800000};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_f32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 68
+; CHECK-NEXT:    ret;
+  call void @call_v4_f32(%struct.float4 { float 1.0, float 2.0, float 3.0, float 4.0 })
+  ret void
+}
+define void @st_param_v4_f32_irrr(float %b, float %c, float %d) {
+; CHECK-LABEL: st_param_v4_f32_irrr(
+; CHECK:       {
+; CHECK-NEXT:    .reg .f32 %f<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [st_param_v4_f32_irrr_param_0];
+; CHECK-NEXT:    ld.param.f32 %f2, [st_param_v4_f32_irrr_param_1];
+; CHECK-NEXT:    ld.param.f32 %f3, [st_param_v4_f32_irrr_param_2];
+; CHECK-NEXT:    { // callseq 69, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.f32 [param0+0], {0f3F800000, %f1, %f2, %f3};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_f32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 69
+; CHECK-NEXT:    ret;
+  %struct.irrr0 = insertvalue %struct.float4 poison, float 1.0, 0
+  %struct.irrr1 = insertvalue %struct.float4 %struct.irrr0, float %b, 1
+  %struct.irrr2 = insertvalue %struct.float4 %struct.irrr1, float %c, 2
+  %struct.irrr3 = insertvalue %struct.float4 %struct.irrr2, float %d, 3
+  call void @call_v4_f32(%struct.float4 %struct.irrr3)
+  ret void
+}
+define void @st_param_v4_f32_rirr(float %a, float %c, float %d) {
+; CHECK-LABEL: st_param_v4_f32_rirr(
+; CHECK:       {
+; CHECK-NEXT:    .reg .f32 %f<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [st_param_v4_f32_rirr_param_0];
+; CHECK-NEXT:    ld.param.f32 %f2, [st_param_v4_f32_rirr_param_1];
+; CHECK-NEXT:    ld.param.f32 %f3, [st_param_v4_f32_rirr_param_2];
+; CHECK-NEXT:    { // callseq 70, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.f32 [param0+0], {%f1, 0f40000000, %f2, %f3};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_f32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 70
+; CHECK-NEXT:    ret;
+  %struct.rirr0 = insertvalue %struct.float4 poison, float %a, 0
+  %struct.rirr1 = insertvalue %struct.float4 %struct.rirr0, float 2.0, 1
+  %struct.rirr2 = insertvalue %struct.float4 %struct.rirr1, float %c, 2
+  %struct.rirr3 = insertvalue %struct.float4 %struct.rirr2, float %d, 3
+  call void @call_v4_f32(%struct.float4 %struct.rirr3)
+  ret void
+}
+define void @st_param_v4_f32_rrir(float %a, float %b, float %d) {
+; CHECK-LABEL: st_param_v4_f32_rrir(
+; CHECK:       {
+; CHECK-NEXT:    .reg .f32 %f<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [st_param_v4_f32_rrir_param_0];
+; CHECK-NEXT:    ld.param.f32 %f2, [st_param_v4_f32_rrir_param_1];
+; CHECK-NEXT:    ld.param.f32 %f3, [st_param_v4_f32_rrir_param_2];
+; CHECK-NEXT:    { // callseq 71, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.f32 [param0+0], {%f1, %f2, 0f40400000, %f3};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_f32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 71
+; CHECK-NEXT:    ret;
+  %struct.rrir0 = insertvalue %struct.float4 poison, float %a, 0
+  %struct.rrir1 = insertvalue %struct.float4 %struct.rrir0, float %b, 1
+  %struct.rrir2 = insertvalue %struct.float4 %struct.rrir1, float 3.0, 2
+  %struct.rrir3 = insertvalue %struct.float4 %struct.rrir2, float %d, 3
+  call void @call_v4_f32(%struct.float4 %struct.rrir3)
+  ret void
+}
+define void @st_param_v4_f32_rrri(float %a, float %b, float %c) {
+; CHECK-LABEL: st_param_v4_f32_rrri(
+; CHECK:       {
+; CHECK-NEXT:    .reg .f32 %f<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [st_param_v4_f32_rrri_param_0];
+; CHECK-NEXT:    ld.param.f32 %f2, [st_param_v4_f32_rrri_param_1];
+; CHECK-NEXT:    ld.param.f32 %f3, [st_param_v4_f32_rrri_param_2];
+; CHECK-NEXT:    { // callseq 72, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.f32 [param0+0], {%f1, %f2, %f3, 0f40800000};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_f32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 72
+; CHECK-NEXT:    ret;
+  %struct.rrri0 = insertvalue %struct.float4 poison, float %a, 0
+  %struct.rrri1 = insertvalue %struct.float4 %struct.rrri0, float %b, 1
+  %struct.rrri2 = insertvalue %struct.float4 %struct.rrri1, float %c, 2
+  %struct.rrri3 = insertvalue %struct.float4 %struct.rrri2, float 4.0, 3
+  call void @call_v4_f32(%struct.float4 %struct.rrri3)
+  ret void
+}
+define void @st_param_v4_f32_iirr(float %c, float %d) {
+; CHECK-LABEL: st_param_v4_f32_iirr(
+; CHECK:       {
+; CHECK-NEXT:    .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [st_param_v4_f32_iirr_param_0];
+; CHECK-NEXT:    ld.param.f32 %f2, [st_param_v4_f32_iirr_param_1];
+; CHECK-NEXT:    { // callseq 73, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.f32 [param0+0], {0f3F800000, 0f40000000, %f1, %f2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_f32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 73
+; CHECK-NEXT:    ret;
+  %struct.iirr0 = insertvalue %struct.float4 poison, float 1.0, 0
+  %struct.iirr1 = insertvalue %struct.float4 %struct.iirr0, float 2.0, 1
+  %struct.iirr2 = insertvalue %struct.float4 %struct.iirr1, float %c, 2
+  %struct.iirr3 = insertvalue %struct.float4 %struct.iirr2, float %d, 3
+  call void @call_v4_f32(%struct.float4 %struct.iirr3)
+  ret void
+}
+define void @st_param_v4_f32_irir(float %b, float %d) {
+; CHECK-LABEL: st_param_v4_f32_irir(
+; CHECK:       {
+; CHECK-NEXT:    .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [st_param_v4_f32_irir_param_0];
+; CHECK-NEXT:    ld.param.f32 %f2, [st_param_v4_f32_irir_param_1];
+; CHECK-NEXT:    { // callseq 74, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.f32 [param0+0], {0f3F800000, %f1, 0f40400000, %f2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_f32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 74
+; CHECK-NEXT:    ret;
+  %struct.irir0 = insertvalue %struct.float4 poison, float 1.0, 0
+  %struct.irir1 = insertvalue %struct.float4 %struct.irir0, float %b, 1
+  %struct.irir2 = insertvalue %struct.float4 %struct.irir1, float 3.0, 2
+  %struct.irir3 = insertvalue %struct.float4 %struct.irir2, float %d, 3
+  call void @call_v4_f32(%struct.float4 %struct.irir3)
+  ret void
+}
+define void @st_param_v4_f32_irri(float %b, float %c) {
+; CHECK-LABEL: st_param_v4_f32_irri(
+; CHECK:       {
+; CHECK-NEXT:    .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [st_param_v4_f32_irri_param_0];
+; CHECK-NEXT:    ld.param.f32 %f2, [st_param_v4_f32_irri_param_1];
+; CHECK-NEXT:    { // callseq 75, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.f32 [param0+0], {0f3F800000, %f1, %f2, 0f40800000};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_f32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 75
+; CHECK-NEXT:    ret;
+  %struct.irri0 = insertvalue %struct.float4 poison, float 1.0, 0
+  %struct.irri1 = insertvalue %struct.float4 %struct.irri0, float %b, 1
+  %struct.irri2 = insertvalue %struct.float4 %struct.irri1, float %c, 2
+  %struct.irri3 = insertvalue %struct.float4 %struct.irri2, float 4.0, 3
+  call void @call_v4_f32(%struct.float4 %struct.irri3)
+  ret void
+}
+define void @st_param_v4_f32_riir(float %a, float %d) {
+; CHECK-LABEL: st_param_v4_f32_riir(
+; CHECK:       {
+; CHECK-NEXT:    .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [st_param_v4_f32_riir_param_0];
+; CHECK-NEXT:    ld.param.f32 %f2, [st_param_v4_f32_riir_param_1];
+; CHECK-NEXT:    { // callseq 76, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.f32 [param0+0], {%f1, 0f40000000, 0f40400000, %f2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_f32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 76
+; CHECK-NEXT:    ret;
+  %struct.riir0 = insertvalue %struct.float4 poison, float %a, 0
+  %struct.riir1 = insertvalue %struct.float4 %struct.riir0, float 2.0, 1
+  %struct.riir2 = insertvalue %struct.float4 %struct.riir1, float 3.0, 2
+  %struct.riir3 = insertvalue %struct.float4 %struct.riir2, float %d, 3
+  call void @call_v4_f32(%struct.float4 %struct.riir3)
+  ret void
+}
+define void @st_param_v4_f32_riri(float %a, float %c) {
+; CHECK-LABEL: st_param_v4_f32_riri(
+; CHECK:       {
+; CHECK-NEXT:    .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [st_param_v4_f32_riri_param_0];
+; CHECK-NEXT:    ld.param.f32 %f2, [st_param_v4_f32_riri_param_1];
+; CHECK-NEXT:    { // callseq 77, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.f32 [param0+0], {%f1, 0f40000000, %f2, 0f40800000};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_f32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 77
+; CHECK-NEXT:    ret;
+  %struct.riri0 = insertvalue %struct.float4 poison, float %a, 0
+  %struct.riri1 = insertvalue %struct.float4 %struct.riri0, float 2.0, 1
+  %struct.riri2 = insertvalue %struct.float4 %struct.riri1, float %c, 2
+  %struct.riri3 = insertvalue %struct.float4 %struct.riri2, float 4.0, 3
+  call void @call_v4_f32(%struct.float4 %struct.riri3)
+  ret void
+}
+define void @st_param_v4_f32_rrii(float %a, float %b) {
+; CHECK-LABEL: st_param_v4_f32_rrii(
+; CHECK:       {
+; CHECK-NEXT:    .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [st_param_v4_f32_rrii_param_0];
+; CHECK-NEXT:    ld.param.f32 %f2, [st_param_v4_f32_rrii_param_1];
+; CHECK-NEXT:    { // callseq 78, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.f32 [param0+0], {%f1, %f2, 0f40400000, 0f40800000};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_f32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 78
+; CHECK-NEXT:    ret;
+  %struct.rrii0 = insertvalue %struct.float4 poison, float %a, 0
+  %struct.rrii1 = insertvalue %struct.float4 %struct.rrii0, float %b, 1
+  %struct.rrii2 = insertvalue %struct.float4 %struct.rrii1, float 3.0, 2
+  %struct.rrii3 = insertvalue %struct.float4 %struct.rrii2, float 4.0, 3
+  call void @call_v4_f32(%struct.float4 %struct.rrii3)
+  ret void
+}
+define void @st_param_v4_f32_iiir(float %d) {
+; CHECK-LABEL: st_param_v4_f32_iiir(
+; CHECK:       {
+; CHECK-NEXT:    .reg .f32 %f<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [st_param_v4_f32_iiir_param_0];
+; CHECK-NEXT:    { // callseq 79, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.f32 [param0+0], {0f3F800000, 0f40000000, 0f40400000, %f1};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_f32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 79
+; CHECK-NEXT:    ret;
+  %struct.iiir0 = insertvalue %struct.float4 poison, float 1.0, 0
+  %struct.iiir1 = insertvalue %struct.float4 %struct.iiir0, float 2.0, 1
+  %struct.iiir2 = insertvalue %struct.float4 %struct.iiir1, float 3.0, 2
+  %struct.iiir3 = insertvalue %struct.float4 %struct.iiir2, float %d, 3
+  call void @call_v4_f32(%struct.float4 %struct.iiir3)
+  ret void
+}
+define void @st_param_v4_f32_iiri(float %c) {
+; CHECK-LABEL: st_param_v4_f32_iiri(
+; CHECK:       {
+; CHECK-NEXT:    .reg .f32 %f<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [st_param_v4_f32_iiri_param_0];
+; CHECK-NEXT:    { // callseq 80, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.f32 [param0+0], {0f3F800000, 0f40000000, %f1, 0f40800000};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_f32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 80
+; CHECK-NEXT:    ret;
+  %struct.iiri0 = insertvalue %struct.float4 poison, float 1.0, 0
+  %struct.iiri1 = insertvalue %struct.float4 %struct.iiri0, float 2.0, 1
+  %struct.iiri2 = insertvalue %struct.float4 %struct.iiri1, float %c, 2
+  %struct.iiri3 = insertvalue %struct.float4 %struct.iiri2, float 4.0, 3
+  call void @call_v4_f32(%struct.float4 %struct.iiri3)
+  ret void
+}
+define void @st_param_v4_f32_irii(float %b) {
+; CHECK-LABEL: st_param_v4_f32_irii(
+; CHECK:       {
+; CHECK-NEXT:    .reg .f32 %f<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [st_param_v4_f32_irii_param_0];
+; CHECK-NEXT:    { // callseq 81, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.f32 [param0+0], {0f3F800000, %f1, 0f40400000, 0f40800000};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_f32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 81
+; CHECK-NEXT:    ret;
+  %struct.irii0 = insertvalue %struct.float4 poison, float 1.0, 0
+  %struct.irii1 = insertvalue %struct.float4 %struct.irii0, float %b, 1
+  %struct.irii2 = insertvalue %struct.float4 %struct.irii1, float 3.0, 2
+  %struct.irii3 = insertvalue %struct.float4 %struct.irii2, float 4.0, 3
+  call void @call_v4_f32(%struct.float4 %struct.irii3)
+  ret void
+}
+define void @st_param_v4_f32_riii(float %a) {
+; CHECK-LABEL: st_param_v4_f32_riii(
+; CHECK:       {
+; CHECK-NEXT:    .reg .f32 %f<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [st_param_v4_f32_riii_param_0];
+; CHECK-NEXT:    { // callseq 82, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v4.f32 [param0+0], {%f1, 0f40000000, 0f40400000, 0f40800000};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_f32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 82
+; CHECK-NEXT:    ret;
+  %struct.riii0 = insertvalue %struct.float4 poison, float %a, 0
+  %struct.riii1 = insertvalue %struct.float4 %struct.riii0, float 2.0, 1
+  %struct.riii2 = insertvalue %struct.float4 %struct.riii1, float 3.0, 2
+  %struct.riii3 = insertvalue %struct.float4 %struct.riii2, float 4.0, 3
+  call void @call_v4_f32(%struct.float4 %struct.riii3)
+  ret void
+}
+
+declare void @call_v4_i8(%struct.char4 alignstack(4))
+declare void @call_v4_i16(%struct.short4 alignstack(8))
+declare void @call_v4_i32(%struct.int4 alignstack(16))
+declare void @call_v4_f32(%struct.float4 alignstack(16))


        


More information about the llvm-commits mailing list