[llvm] f03782d - [NVPTX] Fixup v2i8 parameter and return lowering (#145585)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 27 09:26:14 PDT 2025
Author: Alex MacLean
Date: 2025-06-27T09:26:10-07:00
New Revision: f03782dd67d3e1909a447aff25926d445cde5f3a
URL: https://github.com/llvm/llvm-project/commit/f03782dd67d3e1909a447aff25926d445cde5f3a
DIFF: https://github.com/llvm/llvm-project/commit/f03782dd67d3e1909a447aff25926d445cde5f3a.diff
LOG: [NVPTX] Fixup v2i8 parameter and return lowering (#145585)
This change fixes v2i8 lowering for parameters and returned values. As
part of this work, I move the lowering for return values to use generic
ISD::STORE nodes as these are more flexible and have existing
legalization handling.
Note that calling a function with v2i8 arguments or returns is still not
working but this is left for a subsequent change as this MR is already
fairly large.
Partially addresses #128853
Added:
Modified:
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
llvm/lib/Target/NVPTX/NVPTXISelLowering.h
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
llvm/test/CodeGen/NVPTX/and-or-setcc.ll
llvm/test/CodeGen/NVPTX/atomics.ll
llvm/test/CodeGen/NVPTX/bf16-instructions.ll
llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
llvm/test/CodeGen/NVPTX/compute-ptx-value-vts.ll
llvm/test/CodeGen/NVPTX/convert-fp-i8.ll
llvm/test/CodeGen/NVPTX/convert-int-sm20.ll
llvm/test/CodeGen/NVPTX/elect.ll
llvm/test/CodeGen/NVPTX/extractelement.ll
llvm/test/CodeGen/NVPTX/f16-instructions.ll
llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
llvm/test/CodeGen/NVPTX/fexp2.ll
llvm/test/CodeGen/NVPTX/flog2.ll
llvm/test/CodeGen/NVPTX/fma-relu-contract.ll
llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll
llvm/test/CodeGen/NVPTX/fma-relu-instruction-flag.ll
llvm/test/CodeGen/NVPTX/fma.ll
llvm/test/CodeGen/NVPTX/i1-icmp.ll
llvm/test/CodeGen/NVPTX/i128-ld-st.ll
llvm/test/CodeGen/NVPTX/i128.ll
llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
llvm/test/CodeGen/NVPTX/i8x2-instructions.ll
llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
llvm/test/CodeGen/NVPTX/idioms.ll
llvm/test/CodeGen/NVPTX/jump-table.ll
llvm/test/CodeGen/NVPTX/ldg-invariant-256.ll
llvm/test/CodeGen/NVPTX/ldg-invariant.ll
llvm/test/CodeGen/NVPTX/ldu-i8.ll
llvm/test/CodeGen/NVPTX/ldu-ldg.ll
llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll
llvm/test/CodeGen/NVPTX/param-add.ll
llvm/test/CodeGen/NVPTX/param-load-store.ll
llvm/test/CodeGen/NVPTX/param-overalign.ll
llvm/test/CodeGen/NVPTX/param-vectorize-device.ll
llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll
llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir
llvm/test/CodeGen/NVPTX/shift-opt.ll
llvm/test/CodeGen/NVPTX/tid-range.ll
llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll
llvm/test/CodeGen/NVPTX/variadics-backend.ll
llvm/test/CodeGen/NVPTX/vector-returns.ll
llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 7c1d13119fa18..5631342ecc13e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -151,12 +151,6 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
if (tryLoadParam(N))
return;
break;
- case NVPTXISD::StoreRetval:
- case NVPTXISD::StoreRetvalV2:
- case NVPTXISD::StoreRetvalV4:
- if (tryStoreRetval(N))
- return;
- break;
case NVPTXISD::StoreParam:
case NVPTXISD::StoreParamV2:
case NVPTXISD::StoreParamV4:
@@ -1504,84 +1498,6 @@ bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
return true;
}
-bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
- SDLoc DL(N);
- SDValue Chain = N->getOperand(0);
- SDValue Offset = N->getOperand(1);
- unsigned OffsetVal = Offset->getAsZExtVal();
- MemSDNode *Mem = cast<MemSDNode>(N);
-
- // How many elements do we have?
- unsigned NumElts = 1;
- switch (N->getOpcode()) {
- default:
- return false;
- case NVPTXISD::StoreRetval:
- NumElts = 1;
- break;
- case NVPTXISD::StoreRetvalV2:
- NumElts = 2;
- break;
- case NVPTXISD::StoreRetvalV4:
- NumElts = 4;
- break;
- }
-
- // Build vector of operands
- SmallVector<SDValue, 6> Ops;
- for (unsigned i = 0; i < NumElts; ++i)
- Ops.push_back(N->getOperand(i + 2));
- Ops.append({CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32), Chain});
-
- // Determine target opcode
- // If we have an i1, use an 8-bit store. The lowering code in
- // NVPTXISelLowering will have already emitted an upcast.
- std::optional<unsigned> Opcode = 0;
- switch (NumElts) {
- default:
- return false;
- case 1:
- Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
- NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
- NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64);
- if (Opcode == NVPTX::StoreRetvalI8) {
- // Fine tune the opcode depending on the size of the operand.
- // This helps to avoid creating redundant COPY instructions in
- // InstrEmitter::AddRegisterOperand().
- switch (Ops[0].getSimpleValueType().SimpleTy) {
- default:
- break;
- case MVT::i32:
- Opcode = NVPTX::StoreRetvalI8TruncI32;
- break;
- case MVT::i64:
- Opcode = NVPTX::StoreRetvalI8TruncI64;
- break;
- }
- }
- break;
- case 2:
- Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
- NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
- NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64);
- break;
- case 4:
- Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
- NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
- NVPTX::StoreRetvalV4I32, {/* no v4i64 */});
- break;
- }
- if (!Opcode)
- return false;
-
- SDNode *Ret = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops);
- MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
- CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
-
- ReplaceNode(N, Ret);
- return true;
-}
-
// Helpers for constructing opcode (ex: NVPTX::StoreParamV4F32_iiri)
#define getOpcV2H(ty, opKind0, opKind1) \
NVPTX::StoreParamV2##ty##_##opKind0##opKind1
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index a785e1513682e..0e4dec1adca67 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -79,7 +79,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
bool tryStore(SDNode *N);
bool tryStoreVector(SDNode *N);
bool tryLoadParam(SDNode *N);
- bool tryStoreRetval(SDNode *N);
bool tryStoreParam(SDNode *N);
bool tryFence(SDNode *N);
void SelectAddrSpaceCast(SDNode *N);
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index b924a1f5ac93c..d9192fbfceff1 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -370,7 +370,7 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
} else if (EltVT.getSimpleVT() == MVT::i8 && NumElts == 2) {
// v2i8 is promoted to v2i16
NumElts = 1;
- EltVT = MVT::v2i16;
+ EltVT = MVT::v2i8;
}
for (unsigned j = 0; j != NumElts; ++j) {
ValueVTs.push_back(EltVT);
@@ -1065,9 +1065,6 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(NVPTXISD::StoreParamV2)
MAKE_CASE(NVPTXISD::StoreParamV4)
MAKE_CASE(NVPTXISD::MoveParam)
- MAKE_CASE(NVPTXISD::StoreRetval)
- MAKE_CASE(NVPTXISD::StoreRetvalV2)
- MAKE_CASE(NVPTXISD::StoreRetvalV4)
MAKE_CASE(NVPTXISD::UNPACK_VECTOR)
MAKE_CASE(NVPTXISD::BUILD_VECTOR)
MAKE_CASE(NVPTXISD::CallPrototype)
@@ -1438,7 +1435,11 @@ static MachinePointerInfo refinePtrAS(SDValue &Ptr, SelectionDAG &DAG,
}
static ISD::NodeType getExtOpcode(const ISD::ArgFlagsTy &Flags) {
- return Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ if (Flags.isSExt())
+ return ISD::SIGN_EXTEND;
+ if (Flags.isZExt())
+ return ISD::ZERO_EXTEND;
+ return ISD::ANY_EXTEND;
}
SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
@@ -3373,10 +3374,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
}
InVals.push_back(P);
} else {
- bool aggregateIsPacked = false;
- if (StructType *STy = dyn_cast<StructType>(Ty))
- aggregateIsPacked = STy->isPacked();
-
SmallVector<EVT, 16> VTs;
SmallVector<uint64_t, 16> Offsets;
ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0);
@@ -3389,9 +3386,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
const auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign);
unsigned I = 0;
for (const unsigned NumElts : VectorInfo) {
- const EVT EltVT = VTs[I];
// i1 is loaded/stored as i8
- const EVT LoadVT = EltVT == MVT::i1 ? MVT::i8 : EltVT;
+ const EVT LoadVT = VTs[I] == MVT::i1 ? MVT::i8 : VTs[I];
// If the element is a packed type (ex. v2f16, v4i8, etc) holding
// multiple elements.
const unsigned PackingAmt =
@@ -3403,14 +3399,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
SDValue VecAddr = DAG.getObjectPtrOffset(
dl, ArgSymbol, TypeSize::getFixed(Offsets[I]));
- const MaybeAlign PartAlign = [&]() -> MaybeAlign {
- if (aggregateIsPacked)
- return Align(1);
- if (NumElts != 1)
- return std::nullopt;
- Align PartAlign = DAG.getEVTAlign(EltVT);
- return commonAlignment(PartAlign, Offsets[I]);
- }();
+ const MaybeAlign PartAlign = commonAlignment(ArgAlign, Offsets[I]);
SDValue P =
DAG.getLoad(VecVT, dl, Root, VecAddr,
MachinePointerInfo(ADDRESS_SPACE_PARAM), PartAlign,
@@ -3419,23 +3408,22 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
if (P.getNode())
P.getNode()->setIROrder(Arg.getArgNo() + 1);
for (const unsigned J : llvm::seq(NumElts)) {
- SDValue Elt = DAG.getNode(LoadVT.isVector() ? ISD::EXTRACT_SUBVECTOR
- : ISD::EXTRACT_VECTOR_ELT,
- dl, LoadVT, P,
- DAG.getIntPtrConstant(J * PackingAmt, dl));
+ SDValue Elt = DAG.getNode(
+ LoadVT.isVector() ? ISD::EXTRACT_SUBVECTOR
+ : ISD::EXTRACT_VECTOR_ELT,
+ dl, LoadVT, P, DAG.getVectorIdxConstant(J * PackingAmt, dl));
// Extend or truncate the element if necessary (e.g. an i8 is loaded
// into an i16 register)
- const EVT ExpactedVT = ArgIns[I + J].VT;
- assert((Elt.getValueType().bitsEq(ExpactedVT) ||
- (ExpactedVT.isScalarInteger() &&
- Elt.getValueType().isScalarInteger())) &&
+ const EVT ExpectedVT = ArgIns[I + J].VT;
+ assert((Elt.getValueType() == ExpectedVT ||
+ (ExpectedVT.isInteger() && Elt.getValueType().isInteger())) &&
"Non-integer argument type size mismatch");
- if (ExpactedVT.bitsGT(Elt.getValueType()))
- Elt = DAG.getNode(getExtOpcode(ArgIns[I + J].Flags), dl, ExpactedVT,
+ if (ExpectedVT.bitsGT(Elt.getValueType()))
+ Elt = DAG.getNode(getExtOpcode(ArgIns[I + J].Flags), dl, ExpectedVT,
Elt);
- else if (ExpactedVT.bitsLT(Elt.getValueType()))
- Elt = DAG.getNode(ISD::TRUNCATE, dl, ExpactedVT, Elt);
+ else if (ExpectedVT.bitsLT(Elt.getValueType()))
+ Elt = DAG.getNode(ISD::TRUNCATE, dl, ExpectedVT, Elt);
InVals.push_back(Elt);
}
I += NumElts;
@@ -3449,33 +3437,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
return Chain;
}
-// Use byte-store when the param adress of the return value is unaligned.
-// This may happen when the return value is a field of a packed structure.
-static SDValue LowerUnalignedStoreRet(SelectionDAG &DAG, SDValue Chain,
- uint64_t Offset, EVT ElementType,
- SDValue RetVal, const SDLoc &dl) {
- // Bit logic only works on integer types
- if (adjustElementType(ElementType))
- RetVal = DAG.getNode(ISD::BITCAST, dl, ElementType, RetVal);
-
- // Store each byte
- for (unsigned i = 0, n = ElementType.getSizeInBits() / 8; i < n; i++) {
- // Shift the byte to the last byte position
- SDValue ShiftVal = DAG.getNode(ISD::SRL, dl, ElementType, RetVal,
- DAG.getConstant(i * 8, dl, MVT::i32));
- SDValue StoreOperands[] = {Chain, DAG.getConstant(Offset + i, dl, MVT::i32),
- ShiftVal};
- // Trunc store only the last byte by using
- // st.param.b8
- // The register type can be larger than b8.
- Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
- DAG.getVTList(MVT::Other), StoreOperands,
- MVT::i8, MachinePointerInfo(), std::nullopt,
- MachineMemOperand::MOStore);
- }
- return Chain;
-}
-
SDValue
NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
@@ -3497,10 +3458,6 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets);
assert(VTs.size() == OutVals.size() && "Bad return value decomposition");
- for (const unsigned I : llvm::seq(VTs.size()))
- if (const auto PromotedVT = PromoteScalarIntegerPTX(VTs[I]))
- VTs[I] = *PromotedVT;
-
// PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
// 32-bits are sign extended or zero extended, depending on whether
// they are signed or unsigned types.
@@ -3512,12 +3469,20 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
assert(!PromoteScalarIntegerPTX(RetVal.getValueType()) &&
"OutVal type should always be legal");
- if (ExtendIntegerRetVal) {
- RetVal = DAG.getNode(getExtOpcode(Outs[I].Flags), dl, MVT::i32, RetVal);
- } else if (RetVal.getValueSizeInBits() < 16) {
- // Use 16-bit registers for small load-stores as it's the
- // smallest general purpose register size supported by NVPTX.
- RetVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, RetVal);
+ EVT VTI = VTs[I];
+ if (const auto PromotedVT = PromoteScalarIntegerPTX(VTI))
+ VTI = *PromotedVT;
+
+ const EVT StoreVT =
+ ExtendIntegerRetVal ? MVT::i32 : (VTI == MVT::i1 ? MVT::i8 : VTI);
+
+ assert((RetVal.getValueType() == StoreVT ||
+ (StoreVT.isInteger() && RetVal.getValueType().isInteger())) &&
+ "Non-integer argument type size mismatch");
+ if (StoreVT.bitsGT(RetVal.getValueType())) {
+ RetVal = DAG.getNode(getExtOpcode(Outs[I].Flags), dl, StoreVT, RetVal);
+ } else if (StoreVT.bitsLT(RetVal.getValueType())) {
+ RetVal = DAG.getNode(ISD::TRUNCATE, dl, StoreVT, RetVal);
}
return RetVal;
};
@@ -3526,45 +3491,34 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
const auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign);
unsigned I = 0;
for (const unsigned NumElts : VectorInfo) {
- const Align CurrentAlign = commonAlignment(RetAlign, Offsets[I]);
- if (NumElts == 1 && RetTy->isAggregateType() &&
- CurrentAlign < DAG.getEVTAlign(VTs[I])) {
- Chain = LowerUnalignedStoreRet(DAG, Chain, Offsets[I], VTs[I],
- GetRetVal(I), dl);
-
- // The call to LowerUnalignedStoreRet inserted the necessary SDAG nodes
- // into the graph, so just move on to the next element.
- I++;
- continue;
- }
+ const MaybeAlign CurrentAlign = ExtendIntegerRetVal
+ ? MaybeAlign(std::nullopt)
+ : commonAlignment(RetAlign, Offsets[I]);
- SmallVector<SDValue, 6> StoreOperands{
- Chain, DAG.getConstant(Offsets[I], dl, MVT::i32)};
-
- for (const unsigned J : llvm::seq(NumElts))
- StoreOperands.push_back(GetRetVal(I + J));
+ SDValue Val;
+ if (NumElts == 1) {
+ Val = GetRetVal(I);
+ } else {
+ SmallVector<SDValue, 6> StoreVals;
+ for (const unsigned J : llvm::seq(NumElts)) {
+ SDValue ValJ = GetRetVal(I + J);
+ if (ValJ.getValueType().isVector())
+ DAG.ExtractVectorElements(ValJ, StoreVals);
+ else
+ StoreVals.push_back(ValJ);
+ }
- NVPTXISD::NodeType Op;
- switch (NumElts) {
- case 1:
- Op = NVPTXISD::StoreRetval;
- break;
- case 2:
- Op = NVPTXISD::StoreRetvalV2;
- break;
- case 4:
- Op = NVPTXISD::StoreRetvalV4;
- break;
- default:
- llvm_unreachable("Invalid vector info.");
+ EVT VT = EVT::getVectorVT(F.getContext(), StoreVals[0].getValueType(),
+ StoreVals.size());
+ Val = DAG.getBuildVector(VT, dl, StoreVals);
}
- // Adjust type of load/store op if we've extended the scalar
- // return value.
- EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[I];
- Chain = DAG.getMemIntrinsicNode(
- Op, dl, DAG.getVTList(MVT::Other), StoreOperands, TheStoreType,
- MachinePointerInfo(), CurrentAlign, MachineMemOperand::MOStore);
+ SDValue RetSymbol = DAG.getExternalSymbol("func_retval0", MVT::i32);
+ SDValue Ptr =
+ DAG.getObjectPtrOffset(dl, RetSymbol, TypeSize::getFixed(Offsets[I]));
+
+ Chain = DAG.getStore(Chain, dl, Val, Ptr,
+ MachinePointerInfo(ADDRESS_SPACE_PARAM), CurrentAlign);
I += NumElts;
}
@@ -5120,19 +5074,12 @@ static SDValue combinePackingMovIntoStore(SDNode *N,
case NVPTXISD::StoreParamV2:
Opcode = NVPTXISD::StoreParamV4;
break;
- case NVPTXISD::StoreRetval:
- Opcode = NVPTXISD::StoreRetvalV2;
- break;
- case NVPTXISD::StoreRetvalV2:
- Opcode = NVPTXISD::StoreRetvalV4;
- break;
case NVPTXISD::StoreV2:
MemVT = ST->getMemoryVT();
Opcode = NVPTXISD::StoreV4;
break;
case NVPTXISD::StoreV4:
case NVPTXISD::StoreParamV4:
- case NVPTXISD::StoreRetvalV4:
case NVPTXISD::StoreV8:
// PTX doesn't support the next doubling of operands
return SDValue();
@@ -5201,12 +5148,6 @@ static SDValue PerformStoreParamCombine(SDNode *N,
return PerformStoreCombineHelper(N, DCI, 3, 1);
}
-static SDValue PerformStoreRetvalCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- // Operands from the 2nd to the last one are the values to be stored
- return PerformStoreCombineHelper(N, DCI, 2, 0);
-}
-
/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
///
static SDValue PerformADDCombine(SDNode *N,
@@ -5840,10 +5781,6 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
case NVPTXISD::LoadV2:
case NVPTXISD::LoadV4:
return combineUnpackingMovIntoLoad(N, DCI);
- case NVPTXISD::StoreRetval:
- case NVPTXISD::StoreRetvalV2:
- case NVPTXISD::StoreRetvalV4:
- return PerformStoreRetvalCombine(N, DCI);
case NVPTXISD::StoreParam:
case NVPTXISD::StoreParamV2:
case NVPTXISD::StoreParamV4:
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 5efdd1582214a..3a8091fecfde1 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -86,10 +86,7 @@ enum NodeType : unsigned {
StoreParam,
StoreParamV2,
StoreParamV4,
- StoreRetval,
- StoreRetvalV2,
- StoreRetvalV4,
- LAST_MEMORY_OPCODE = StoreRetvalV4,
+ LAST_MEMORY_OPCODE = StoreParamV4,
};
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index cb0275706ae3e..1a2515b7f66f3 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2001,9 +2001,6 @@ def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
def SDTStoreParamV2Profile : SDTypeProfile<0, 4, [SDTCisInt<0>, SDTCisInt<1>]>;
def SDTStoreParamV4Profile : SDTypeProfile<0, 6, [SDTCisInt<0>, SDTCisInt<1>]>;
def SDTMoveParamProfile : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<0, 1>]>;
-def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
-def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>;
-def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>;
def SDTProxyRegProfile : SDTypeProfile<1, 1, []>;
def DeclareParam :
@@ -2040,15 +2037,6 @@ def StoreParamV4 :
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def MoveParam :
SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>;
-def StoreRetval :
- SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile,
- [SDNPHasChain, SDNPSideEffect]>;
-def StoreRetvalV2 :
- SDNode<"NVPTXISD::StoreRetvalV2", SDTStoreRetvalV2Profile,
- [SDNPHasChain, SDNPSideEffect]>;
-def StoreRetvalV4 :
- SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile,
- [SDNPHasChain, SDNPSideEffect]>;
def ProxyReg :
SDNode<"NVPTXISD::ProxyReg", SDTProxyRegProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
@@ -2123,25 +2111,6 @@ let mayStore = true in {
" \t[param$a$b], {{$val1, $val2, $val3, $val4}};",
[]>;
}
-
- class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
- NVPTXInst<(outs), (ins regclass:$val, Offseti32imm:$a),
- !strconcat("st.param", opstr, " \t[func_retval0$a], $val;"),
- []>;
-
- class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> :
- NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, Offseti32imm:$a),
- !strconcat("st.param.v2", opstr,
- " \t[func_retval0$a], {{$val, $val2}};"),
- []>;
-
- class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> :
- NVPTXInst<(outs),
- (ins regclass:$val, regclass:$val2, regclass:$val3,
- regclass:$val4, Offseti32imm:$a),
- !strconcat("st.param.v4", opstr,
- " \t[func_retval0$a], {{$val, $val2, $val3, $val4}};"),
- []>;
}
/// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
@@ -2219,20 +2188,6 @@ defm StoreParamV2F64 : StoreParamV2Inst<B64, f64imm, ".b64">;
defm StoreParamV4F32 : StoreParamV4Inst<B32, f32imm, ".b32">;
-def StoreRetvalI64 : StoreRetvalInst<B64, ".b64">;
-def StoreRetvalI32 : StoreRetvalInst<B32, ".b32">;
-def StoreRetvalI16 : StoreRetvalInst<B16, ".b16">;
-def StoreRetvalI8 : StoreRetvalInst<B16, ".b8">;
-def StoreRetvalI8TruncI32 : StoreRetvalInst<B32, ".b8">;
-def StoreRetvalI8TruncI64 : StoreRetvalInst<B64, ".b8">;
-def StoreRetvalV2I64 : StoreRetvalV2Inst<B64, ".b64">;
-def StoreRetvalV2I32 : StoreRetvalV2Inst<B32, ".b32">;
-def StoreRetvalV2I16 : StoreRetvalV2Inst<B16, ".b16">;
-def StoreRetvalV2I8 : StoreRetvalV2Inst<B16, ".b8">;
-def StoreRetvalV4I32 : StoreRetvalV4Inst<B32, ".b32">;
-def StoreRetvalV4I16 : StoreRetvalV4Inst<B16, ".b16">;
-def StoreRetvalV4I8 : StoreRetvalV4Inst<B16, ".b8">;
-
def DeclareRetMemInst :
NVPTXInst<(outs), (ins i32imm:$align, i32imm:$size),
".param .align $align .b8 retval0[$size];",
diff --git a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
index bb36b1df115d1..61c3ba2ee54ab 100644
--- a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
+++ b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
@@ -50,7 +50,7 @@ body: |
; CHECK: %6:b32 = FADD_rnf32ri %5, float 6.250000e+00
%6 = FADD_rnf32ri %5, float 6.250000e+00
%7 = FMUL_rnf32rr %6, %4
- StoreRetvalI32 %7, 0
+ ST_i32 %7, 0, 0, 101, 32, &func_retval0, 0 :: (store (s32), addrspace 101)
Return
...
---
@@ -76,6 +76,6 @@ body: |
; CHECK: %6:b32 = FADD_rnf32ri %5, float 0x7FF8000000000000
%6 = FADD_rnf32ri %5, float 0x7FF8000000000000
%7 = FMUL_rnf32rr %6, %4
- StoreRetvalI32 %7, 0
+ ST_i32 %7, 0, 0, 101, 32, &func_retval0, 0 :: (store (s32), addrspace 101)
Return
...
diff --git a/llvm/test/CodeGen/NVPTX/and-or-setcc.ll b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll
index b7e6e8b85298a..e75dd20e46a60 100644
--- a/llvm/test/CodeGen/NVPTX/and-or-setcc.ll
+++ b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll
@@ -14,7 +14,7 @@ define i1 @and_ord(float %a, float %b) {
; CHECK-NEXT: ld.param.b32 %r1, [and_ord_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [and_ord_param_1];
; CHECK-NEXT: setp.num.f32 %p1, %r1, %r2;
-; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
+; CHECK-NEXT: selp.b32 %r3, -1, 0, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%c = fcmp ord float %a, 0.0
@@ -33,7 +33,7 @@ define i1 @or_uno(float %a, float %b) {
; CHECK-NEXT: ld.param.b32 %r1, [or_uno_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [or_uno_param_1];
; CHECK-NEXT: setp.nan.f32 %p1, %r1, %r2;
-; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
+; CHECK-NEXT: selp.b32 %r3, -1, 0, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%c = fcmp uno float %a, 0.0
diff --git a/llvm/test/CodeGen/NVPTX/atomics.ll b/llvm/test/CodeGen/NVPTX/atomics.ll
index c8dc34e9de2ca..55ce3dfc46539 100644
--- a/llvm/test/CodeGen/NVPTX/atomics.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics.ll
@@ -424,7 +424,7 @@ define half @atomicrmw_add_f16_generic(ptr %addr, half %val) {
; CHECK-LABEL: atomicrmw_add_f16_generic(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
-; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-NEXT: .reg .b16 %rs<4>;
; CHECK-NEXT: .reg .b32 %r<20>;
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
@@ -458,8 +458,7 @@ define half @atomicrmw_add_f16_generic(ptr %addr, half %val) {
; CHECK-NEXT: @%p1 bra $L__BB24_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
; CHECK-NEXT: shr.u32 %r18, %r5, %r1;
-; CHECK-NEXT: cvt.u16.u32 %rs4, %r18;
-; CHECK-NEXT: st.param.b16 [func_retval0], %rs4;
+; CHECK-NEXT: st.param.b16 [func_retval0], %r18;
; CHECK-NEXT: ret;
%ret = atomicrmw fadd ptr %addr, half %val seq_cst
ret half %ret
diff --git a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
index 0dc658757053c..f59f51c3c57d3 100644
--- a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
@@ -17,8 +17,7 @@ define bfloat @test_fadd(bfloat %0, bfloat %1) {
; SM70-LABEL: test_fadd(
; SM70: {
; SM70-NEXT: .reg .pred %p<2>;
-; SM70-NEXT: .reg .b16 %rs<2>;
-; SM70-NEXT: .reg .b32 %r<11>;
+; SM70-NEXT: .reg .b32 %r<12>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b16 %r1, [test_fadd_param_1];
@@ -32,8 +31,8 @@ define bfloat @test_fadd(bfloat %0, bfloat %1) {
; SM70-NEXT: setp.nan.f32 %p1, %r5, %r5;
; SM70-NEXT: or.b32 %r9, %r5, 4194304;
; SM70-NEXT: selp.b32 %r10, %r9, %r8, %p1;
-; SM70-NEXT: mov.b32 {_, %rs1}, %r10;
-; SM70-NEXT: st.param.b16 [func_retval0], %rs1;
+; SM70-NEXT: shr.u32 %r11, %r10, 16;
+; SM70-NEXT: st.param.b16 [func_retval0], %r11;
; SM70-NEXT: ret;
;
; SM80-LABEL: test_fadd(
@@ -81,8 +80,7 @@ define bfloat @test_fsub(bfloat %0, bfloat %1) {
; SM70-LABEL: test_fsub(
; SM70: {
; SM70-NEXT: .reg .pred %p<2>;
-; SM70-NEXT: .reg .b16 %rs<2>;
-; SM70-NEXT: .reg .b32 %r<11>;
+; SM70-NEXT: .reg .b32 %r<12>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b16 %r1, [test_fsub_param_1];
@@ -96,8 +94,8 @@ define bfloat @test_fsub(bfloat %0, bfloat %1) {
; SM70-NEXT: setp.nan.f32 %p1, %r5, %r5;
; SM70-NEXT: or.b32 %r9, %r5, 4194304;
; SM70-NEXT: selp.b32 %r10, %r9, %r8, %p1;
-; SM70-NEXT: mov.b32 {_, %rs1}, %r10;
-; SM70-NEXT: st.param.b16 [func_retval0], %rs1;
+; SM70-NEXT: shr.u32 %r11, %r10, 16;
+; SM70-NEXT: st.param.b16 [func_retval0], %r11;
; SM70-NEXT: ret;
;
; SM80-LABEL: test_fsub(
@@ -551,8 +549,7 @@ define bfloat @test_fptrunc_float(float %a) #0 {
; SM70-LABEL: test_fptrunc_float(
; SM70: {
; SM70-NEXT: .reg .pred %p<2>;
-; SM70-NEXT: .reg .b16 %rs<2>;
-; SM70-NEXT: .reg .b32 %r<7>;
+; SM70-NEXT: .reg .b32 %r<8>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b32 %r1, [test_fptrunc_float_param_0];
@@ -562,8 +559,8 @@ define bfloat @test_fptrunc_float(float %a) #0 {
; SM70-NEXT: setp.nan.f32 %p1, %r1, %r1;
; SM70-NEXT: or.b32 %r5, %r1, 4194304;
; SM70-NEXT: selp.b32 %r6, %r5, %r4, %p1;
-; SM70-NEXT: mov.b32 {_, %rs1}, %r6;
-; SM70-NEXT: st.param.b16 [func_retval0], %rs1;
+; SM70-NEXT: shr.u32 %r7, %r6, 16;
+; SM70-NEXT: st.param.b16 [func_retval0], %r7;
; SM70-NEXT: ret;
;
; SM80-LABEL: test_fptrunc_float(
@@ -606,8 +603,7 @@ define bfloat @test_fadd_imm_1(bfloat %a) #0 {
; SM70-LABEL: test_fadd_imm_1(
; SM70: {
; SM70-NEXT: .reg .pred %p<2>;
-; SM70-NEXT: .reg .b16 %rs<2>;
-; SM70-NEXT: .reg .b32 %r<9>;
+; SM70-NEXT: .reg .b32 %r<10>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b16 %r1, [test_fadd_imm_1_param_0];
@@ -619,8 +615,8 @@ define bfloat @test_fadd_imm_1(bfloat %a) #0 {
; SM70-NEXT: setp.nan.f32 %p1, %r3, %r3;
; SM70-NEXT: or.b32 %r7, %r3, 4194304;
; SM70-NEXT: selp.b32 %r8, %r7, %r6, %p1;
-; SM70-NEXT: mov.b32 {_, %rs1}, %r8;
-; SM70-NEXT: st.param.b16 [func_retval0], %rs1;
+; SM70-NEXT: shr.u32 %r9, %r8, 16;
+; SM70-NEXT: st.param.b16 [func_retval0], %r9;
; SM70-NEXT: ret;
;
; SM80-LABEL: test_fadd_imm_1(
@@ -692,10 +688,10 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 {
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [test_extload_bf16x8_param_0];
; SM70-NEXT: ld.shared.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
-; SM70-NEXT: mov.b32 {%rs1, %rs2}, %r1;
-; SM70-NEXT: mov.b32 {%rs3, %rs4}, %r2;
-; SM70-NEXT: mov.b32 {%rs5, %rs6}, %r3;
-; SM70-NEXT: mov.b32 {%rs7, %rs8}, %r4;
+; SM70-NEXT: mov.b32 {%rs1, %rs2}, %r3;
+; SM70-NEXT: mov.b32 {%rs3, %rs4}, %r4;
+; SM70-NEXT: mov.b32 {%rs5, %rs6}, %r1;
+; SM70-NEXT: mov.b32 {%rs7, %rs8}, %r2;
; SM70-NEXT: cvt.u32.u16 %r5, %rs8;
; SM70-NEXT: shl.b32 %r6, %r5, 16;
; SM70-NEXT: cvt.u32.u16 %r7, %rs7;
@@ -712,8 +708,8 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 {
; SM70-NEXT: shl.b32 %r18, %r17, 16;
; SM70-NEXT: cvt.u32.u16 %r19, %rs1;
; SM70-NEXT: shl.b32 %r20, %r19, 16;
-; SM70-NEXT: st.param.v4.b32 [func_retval0], {%r20, %r18, %r16, %r14};
-; SM70-NEXT: st.param.v4.b32 [func_retval0+16], {%r12, %r10, %r8, %r6};
+; SM70-NEXT: st.param.v4.b32 [func_retval0+16], {%r20, %r18, %r16, %r14};
+; SM70-NEXT: st.param.v4.b32 [func_retval0], {%r12, %r10, %r8, %r6};
; SM70-NEXT: ret;
;
; SM80-LABEL: test_extload_bf16x8(
@@ -725,10 +721,10 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 {
; SM80-NEXT: // %bb.0:
; SM80-NEXT: ld.param.b64 %rd1, [test_extload_bf16x8_param_0];
; SM80-NEXT: ld.shared.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
-; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r1;
-; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r2;
-; SM80-NEXT: mov.b32 {%rs5, %rs6}, %r3;
-; SM80-NEXT: mov.b32 {%rs7, %rs8}, %r4;
+; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r3;
+; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r4;
+; SM80-NEXT: mov.b32 {%rs5, %rs6}, %r1;
+; SM80-NEXT: mov.b32 {%rs7, %rs8}, %r2;
; SM80-NEXT: cvt.f32.bf16 %r5, %rs8;
; SM80-NEXT: cvt.f32.bf16 %r6, %rs7;
; SM80-NEXT: cvt.f32.bf16 %r7, %rs6;
@@ -737,8 +733,8 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 {
; SM80-NEXT: cvt.f32.bf16 %r10, %rs3;
; SM80-NEXT: cvt.f32.bf16 %r11, %rs2;
; SM80-NEXT: cvt.f32.bf16 %r12, %rs1;
-; SM80-NEXT: st.param.v4.b32 [func_retval0], {%r12, %r11, %r10, %r9};
-; SM80-NEXT: st.param.v4.b32 [func_retval0+16], {%r8, %r7, %r6, %r5};
+; SM80-NEXT: st.param.v4.b32 [func_retval0+16], {%r12, %r11, %r10, %r9};
+; SM80-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r6, %r5};
; SM80-NEXT: ret;
;
; SM80-FTZ-LABEL: test_extload_bf16x8(
@@ -750,10 +746,10 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 {
; SM80-FTZ-NEXT: // %bb.0:
; SM80-FTZ-NEXT: ld.param.b64 %rd1, [test_extload_bf16x8_param_0];
; SM80-FTZ-NEXT: ld.shared.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
-; SM80-FTZ-NEXT: mov.b32 {%rs1, %rs2}, %r1;
-; SM80-FTZ-NEXT: mov.b32 {%rs3, %rs4}, %r2;
-; SM80-FTZ-NEXT: mov.b32 {%rs5, %rs6}, %r3;
-; SM80-FTZ-NEXT: mov.b32 {%rs7, %rs8}, %r4;
+; SM80-FTZ-NEXT: mov.b32 {%rs1, %rs2}, %r3;
+; SM80-FTZ-NEXT: mov.b32 {%rs3, %rs4}, %r4;
+; SM80-FTZ-NEXT: mov.b32 {%rs5, %rs6}, %r1;
+; SM80-FTZ-NEXT: mov.b32 {%rs7, %rs8}, %r2;
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r5, %rs8;
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r6, %rs7;
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r7, %rs6;
@@ -762,8 +758,8 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 {
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r10, %rs3;
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r11, %rs2;
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r12, %rs1;
-; SM80-FTZ-NEXT: st.param.v4.b32 [func_retval0], {%r12, %r11, %r10, %r9};
-; SM80-FTZ-NEXT: st.param.v4.b32 [func_retval0+16], {%r8, %r7, %r6, %r5};
+; SM80-FTZ-NEXT: st.param.v4.b32 [func_retval0+16], {%r12, %r11, %r10, %r9};
+; SM80-FTZ-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r6, %r5};
; SM80-FTZ-NEXT: ret;
;
; SM90-LABEL: test_extload_bf16x8(
@@ -775,10 +771,10 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 {
; SM90-NEXT: // %bb.0:
; SM90-NEXT: ld.param.b64 %rd1, [test_extload_bf16x8_param_0];
; SM90-NEXT: ld.shared.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
-; SM90-NEXT: mov.b32 {%rs1, %rs2}, %r1;
-; SM90-NEXT: mov.b32 {%rs3, %rs4}, %r2;
-; SM90-NEXT: mov.b32 {%rs5, %rs6}, %r3;
-; SM90-NEXT: mov.b32 {%rs7, %rs8}, %r4;
+; SM90-NEXT: mov.b32 {%rs1, %rs2}, %r3;
+; SM90-NEXT: mov.b32 {%rs3, %rs4}, %r4;
+; SM90-NEXT: mov.b32 {%rs5, %rs6}, %r1;
+; SM90-NEXT: mov.b32 {%rs7, %rs8}, %r2;
; SM90-NEXT: cvt.f32.bf16 %r5, %rs8;
; SM90-NEXT: cvt.f32.bf16 %r6, %rs7;
; SM90-NEXT: cvt.f32.bf16 %r7, %rs6;
@@ -787,8 +783,8 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 {
; SM90-NEXT: cvt.f32.bf16 %r10, %rs3;
; SM90-NEXT: cvt.f32.bf16 %r11, %rs2;
; SM90-NEXT: cvt.f32.bf16 %r12, %rs1;
-; SM90-NEXT: st.param.v4.b32 [func_retval0], {%r12, %r11, %r10, %r9};
-; SM90-NEXT: st.param.v4.b32 [func_retval0+16], {%r8, %r7, %r6, %r5};
+; SM90-NEXT: st.param.v4.b32 [func_retval0+16], {%r12, %r11, %r10, %r9};
+; SM90-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r6, %r5};
; SM90-NEXT: ret;
%load = load <8 x bfloat>, ptr addrspace(3) %arg, align 16
%res = fpext <8 x bfloat> %load to <8 x float>
@@ -909,8 +905,8 @@ define bfloat @test_sitofp_i16(i16 %a) {
; SM70-LABEL: test_sitofp_i16(
; SM70: {
; SM70-NEXT: .reg .pred %p<2>;
-; SM70-NEXT: .reg .b16 %rs<3>;
-; SM70-NEXT: .reg .b32 %r<7>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<8>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b16 %rs1, [test_sitofp_i16_param_0];
@@ -921,8 +917,8 @@ define bfloat @test_sitofp_i16(i16 %a) {
; SM70-NEXT: setp.nan.f32 %p1, %r1, %r1;
; SM70-NEXT: or.b32 %r5, %r1, 4194304;
; SM70-NEXT: selp.b32 %r6, %r5, %r4, %p1;
-; SM70-NEXT: mov.b32 {_, %rs2}, %r6;
-; SM70-NEXT: st.param.b16 [func_retval0], %rs2;
+; SM70-NEXT: shr.u32 %r7, %r6, 16;
+; SM70-NEXT: st.param.b16 [func_retval0], %r7;
; SM70-NEXT: ret;
;
; SM80-LABEL: test_sitofp_i16(
@@ -966,8 +962,8 @@ define bfloat @test_uitofp_i8(i8 %a) {
; SM70-LABEL: test_uitofp_i8(
; SM70: {
; SM70-NEXT: .reg .pred %p<2>;
-; SM70-NEXT: .reg .b16 %rs<3>;
-; SM70-NEXT: .reg .b32 %r<7>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<8>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b8 %rs1, [test_uitofp_i8_param_0];
@@ -978,8 +974,8 @@ define bfloat @test_uitofp_i8(i8 %a) {
; SM70-NEXT: setp.nan.f32 %p1, %r1, %r1;
; SM70-NEXT: or.b32 %r5, %r1, 4194304;
; SM70-NEXT: selp.b32 %r6, %r5, %r4, %p1;
-; SM70-NEXT: mov.b32 {_, %rs2}, %r6;
-; SM70-NEXT: st.param.b16 [func_retval0], %rs2;
+; SM70-NEXT: shr.u32 %r7, %r6, 16;
+; SM70-NEXT: st.param.b16 [func_retval0], %r7;
; SM70-NEXT: ret;
;
; SM80-LABEL: test_uitofp_i8(
@@ -1023,8 +1019,8 @@ define bfloat @test_uitofp_i1(i1 %a) {
; SM70-LABEL: test_uitofp_i1(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b16 %rs<4>;
-; SM70-NEXT: .reg .b32 %r<8>;
+; SM70-NEXT: .reg .b16 %rs<3>;
+; SM70-NEXT: .reg .b32 %r<9>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b8 %rs1, [test_uitofp_i1_param_0];
@@ -1038,8 +1034,8 @@ define bfloat @test_uitofp_i1(i1 %a) {
; SM70-NEXT: setp.nan.f32 %p2, %r2, %r2;
; SM70-NEXT: or.b32 %r6, %r2, 4194304;
; SM70-NEXT: selp.b32 %r7, %r6, %r5, %p2;
-; SM70-NEXT: mov.b32 {_, %rs3}, %r7;
-; SM70-NEXT: st.param.b16 [func_retval0], %rs3;
+; SM70-NEXT: shr.u32 %r8, %r7, 16;
+; SM70-NEXT: st.param.b16 [func_retval0], %r8;
; SM70-NEXT: ret;
;
; SM80-LABEL: test_uitofp_i1(
@@ -1096,8 +1092,8 @@ define bfloat @test_uitofp_i16(i16 %a) {
; SM70-LABEL: test_uitofp_i16(
; SM70: {
; SM70-NEXT: .reg .pred %p<2>;
-; SM70-NEXT: .reg .b16 %rs<3>;
-; SM70-NEXT: .reg .b32 %r<7>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<8>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b16 %rs1, [test_uitofp_i16_param_0];
@@ -1108,8 +1104,8 @@ define bfloat @test_uitofp_i16(i16 %a) {
; SM70-NEXT: setp.nan.f32 %p1, %r1, %r1;
; SM70-NEXT: or.b32 %r5, %r1, 4194304;
; SM70-NEXT: selp.b32 %r6, %r5, %r4, %p1;
-; SM70-NEXT: mov.b32 {_, %rs2}, %r6;
-; SM70-NEXT: st.param.b16 [func_retval0], %rs2;
+; SM70-NEXT: shr.u32 %r7, %r6, 16;
+; SM70-NEXT: st.param.b16 [func_retval0], %r7;
; SM70-NEXT: ret;
;
; SM80-LABEL: test_uitofp_i16(
@@ -1153,8 +1149,7 @@ define bfloat @test_uitofp_i32(i32 %a) {
; SM70-LABEL: test_uitofp_i32(
; SM70: {
; SM70-NEXT: .reg .pred %p<2>;
-; SM70-NEXT: .reg .b16 %rs<2>;
-; SM70-NEXT: .reg .b32 %r<8>;
+; SM70-NEXT: .reg .b32 %r<9>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b32 %r1, [test_uitofp_i32_param_0];
@@ -1165,8 +1160,8 @@ define bfloat @test_uitofp_i32(i32 %a) {
; SM70-NEXT: setp.nan.f32 %p1, %r2, %r2;
; SM70-NEXT: or.b32 %r6, %r2, 4194304;
; SM70-NEXT: selp.b32 %r7, %r6, %r5, %p1;
-; SM70-NEXT: mov.b32 {_, %rs1}, %r7;
-; SM70-NEXT: st.param.b16 [func_retval0], %rs1;
+; SM70-NEXT: shr.u32 %r8, %r7, 16;
+; SM70-NEXT: st.param.b16 [func_retval0], %r8;
; SM70-NEXT: ret;
;
; SM80-LABEL: test_uitofp_i32(
@@ -1211,8 +1206,7 @@ define bfloat @test_uitofp_i64(i64 %a) {
; SM70-LABEL: test_uitofp_i64(
; SM70: {
; SM70-NEXT: .reg .pred %p<2>;
-; SM70-NEXT: .reg .b16 %rs<2>;
-; SM70-NEXT: .reg .b32 %r<7>;
+; SM70-NEXT: .reg .b32 %r<8>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
@@ -1224,8 +1218,8 @@ define bfloat @test_uitofp_i64(i64 %a) {
; SM70-NEXT: setp.nan.f32 %p1, %r1, %r1;
; SM70-NEXT: or.b32 %r5, %r1, 4194304;
; SM70-NEXT: selp.b32 %r6, %r5, %r4, %p1;
-; SM70-NEXT: mov.b32 {_, %rs1}, %r6;
-; SM70-NEXT: st.param.b16 [func_retval0], %rs1;
+; SM70-NEXT: shr.u32 %r7, %r6, 16;
+; SM70-NEXT: st.param.b16 [func_retval0], %r7;
; SM70-NEXT: ret;
;
; SM80-LABEL: test_uitofp_i64(
@@ -1272,8 +1266,7 @@ define bfloat @test_roundeven(bfloat %a) {
; SM70-LABEL: test_roundeven(
; SM70: {
; SM70-NEXT: .reg .pred %p<2>;
-; SM70-NEXT: .reg .b16 %rs<2>;
-; SM70-NEXT: .reg .b32 %r<9>;
+; SM70-NEXT: .reg .b32 %r<10>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b16 %r1, [test_roundeven_param_0];
@@ -1285,8 +1278,8 @@ define bfloat @test_roundeven(bfloat %a) {
; SM70-NEXT: setp.nan.f32 %p1, %r3, %r3;
; SM70-NEXT: or.b32 %r7, %r3, 4194304;
; SM70-NEXT: selp.b32 %r8, %r7, %r6, %p1;
-; SM70-NEXT: mov.b32 {_, %rs1}, %r8;
-; SM70-NEXT: st.param.b16 [func_retval0], %rs1;
+; SM70-NEXT: shr.u32 %r9, %r8, 16;
+; SM70-NEXT: st.param.b16 [func_retval0], %r9;
; SM70-NEXT: ret;
;
; SM80-LABEL: test_roundeven(
@@ -1397,8 +1390,7 @@ define bfloat @test_maxnum(bfloat %a, bfloat %b) {
; SM70-LABEL: test_maxnum(
; SM70: {
; SM70-NEXT: .reg .pred %p<2>;
-; SM70-NEXT: .reg .b16 %rs<2>;
-; SM70-NEXT: .reg .b32 %r<11>;
+; SM70-NEXT: .reg .b32 %r<12>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b16 %r1, [test_maxnum_param_1];
@@ -1412,8 +1404,8 @@ define bfloat @test_maxnum(bfloat %a, bfloat %b) {
; SM70-NEXT: setp.nan.f32 %p1, %r5, %r5;
; SM70-NEXT: or.b32 %r9, %r5, 4194304;
; SM70-NEXT: selp.b32 %r10, %r9, %r8, %p1;
-; SM70-NEXT: mov.b32 {_, %rs1}, %r10;
-; SM70-NEXT: st.param.b16 [func_retval0], %rs1;
+; SM70-NEXT: shr.u32 %r11, %r10, 16;
+; SM70-NEXT: st.param.b16 [func_retval0], %r11;
; SM70-NEXT: ret;
;
; SM80-LABEL: test_maxnum(
diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
index 15de26ff4df30..ba5813c869236 100644
--- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
@@ -9,11 +9,10 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
define <2 x bfloat> @test_ret_const() #0 {
; CHECK-LABEL: test_ret_const(
; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b32 %r1, 1073758080;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0], 1073758080;
; CHECK-NEXT: ret;
ret <2 x bfloat> <bfloat 1.0, bfloat 2.0>
}
diff --git a/llvm/test/CodeGen/NVPTX/compute-ptx-value-vts.ll b/llvm/test/CodeGen/NVPTX/compute-ptx-value-vts.ll
index aa08b9605790c..c5a995ae47bc1 100644
--- a/llvm/test/CodeGen/NVPTX/compute-ptx-value-vts.ll
+++ b/llvm/test/CodeGen/NVPTX/compute-ptx-value-vts.ll
@@ -6,12 +6,11 @@ target triple = "nvptx-nvidia-cuda"
define <6 x half> @half6() {
; CHECK-LABEL: half6(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0x0000;
-; CHECK-NEXT: st.param.v4.b16 [func_retval0], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.v2.b16 [func_retval0+8], {%rs1, %rs1};
+; CHECK-NEXT: st.param.b32 [func_retval0+8], 0;
+; CHECK-NEXT: st.param.v2.b32 [func_retval0], {0, 0};
; CHECK-NEXT: ret;
ret <6 x half> zeroinitializer
}
@@ -19,13 +18,12 @@ define <6 x half> @half6() {
define <10 x half> @half10() {
; CHECK-LABEL: half10(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0x0000;
-; CHECK-NEXT: st.param.v4.b16 [func_retval0], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.v4.b16 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.v2.b16 [func_retval0+16], {%rs1, %rs1};
+; CHECK-NEXT: st.param.b32 [func_retval0+16], 0;
+; CHECK-NEXT: st.param.v2.b32 [func_retval0+8], {0, 0};
+; CHECK-NEXT: st.param.v2.b32 [func_retval0], {0, 0};
; CHECK-NEXT: ret;
ret <10 x half> zeroinitializer
}
@@ -33,13 +31,11 @@ define <10 x half> @half10() {
define <12 x i8> @byte12() {
; CHECK-LABEL: byte12(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1};
+; CHECK-NEXT: st.param.b32 [func_retval0+8], 0;
+; CHECK-NEXT: st.param.b64 [func_retval0], 0;
; CHECK-NEXT: ret;
ret <12 x i8> zeroinitializer
}
@@ -47,15 +43,12 @@ define <12 x i8> @byte12() {
define <20 x i8> @byte20() {
; CHECK-LABEL: byte20(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.v4.b8 [func_retval0+12], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.v4.b8 [func_retval0+16], {%rs1, %rs1, %rs1, %rs1};
+; CHECK-NEXT: st.param.b32 [func_retval0+16], 0;
+; CHECK-NEXT: st.param.b64 [func_retval0+8], 0;
+; CHECK-NEXT: st.param.b64 [func_retval0], 0;
; CHECK-NEXT: ret;
ret <20 x i8> zeroinitializer
}
diff --git a/llvm/test/CodeGen/NVPTX/convert-fp-i8.ll b/llvm/test/CodeGen/NVPTX/convert-fp-i8.ll
index 5e856112c0142..f8bfcd5531502 100644
--- a/llvm/test/CodeGen/NVPTX/convert-fp-i8.ll
+++ b/llvm/test/CodeGen/NVPTX/convert-fp-i8.ll
@@ -100,14 +100,13 @@ define i8 @cvt_s8_f32(float %x) {
; CHECK-LABEL: cvt_s8_f32(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<2>;
-; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [cvt_s8_f32_param_0];
; CHECK-NEXT: cvt.rzi.s16.f32 %rs1, %r1;
; CHECK-NEXT: cvt.u32.u16 %r2, %rs1;
-; CHECK-NEXT: and.b32 %r3, %r2, 255;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%a = fptosi float %x to i8
ret i8 %a
@@ -117,15 +116,14 @@ define i8 @cvt_s8_f64(double %x) {
; CHECK-LABEL: cvt_s8_f64(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<2>;
-; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [cvt_s8_f64_param_0];
; CHECK-NEXT: cvt.rzi.s16.f64 %rs1, %rd1;
; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
-; CHECK-NEXT: and.b32 %r2, %r1, 255;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
; CHECK-NEXT: ret;
%a = fptosi double %x to i8
ret i8 %a
diff --git a/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll b/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll
index ce6a16d9c0400..8a0c0f8c3b452 100644
--- a/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll
+++ b/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll
@@ -1,66 +1,91 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
-
;; Integer conversions happen inplicitly by loading/storing the proper types
-
; i16
define i16 @cvt_i16_i32(i32 %x) {
-; CHECK: ld.param.b16 %r[[R0:[0-9]+]], [cvt_i16_i32_param_{{[0-9]+}}]
-; CHECK: st.param.b32 [func_retval{{[0-9]+}}], %r[[R0]]
-; CHECK: ret
+; CHECK-LABEL: cvt_i16_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [cvt_i16_i32_param_0];
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%a = trunc i32 %x to i16
ret i16 %a
}
define i16 @cvt_i16_i64(i64 %x) {
-; CHECK: ld.param.b16 %r[[R0:[0-9]+]], [cvt_i16_i64_param_{{[0-9]+}}]
-; CHECK: st.param.b32 [func_retval{{[0-9]+}}], %r[[R0]]
-; CHECK: ret
+; CHECK-LABEL: cvt_i16_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [cvt_i16_i64_param_0];
+; CHECK-NEXT: st.param.b32 [func_retval0], %rd1;
+; CHECK-NEXT: ret;
%a = trunc i64 %x to i16
ret i16 %a
}
-
-
; i32
define i32 @cvt_i32_i16(i16 %x) {
-; CHECK: ld.param.b16 %r[[R0:[0-9]+]], [cvt_i32_i16_param_{{[0-9]+}}]
-; CHECK: st.param.b32 [func_retval{{[0-9]+}}], %r[[R0]]
-; CHECK: ret
+; CHECK-LABEL: cvt_i32_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %r1, [cvt_i32_i16_param_0];
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%a = zext i16 %x to i32
ret i32 %a
}
define i32 @cvt_i32_i64(i64 %x) {
-; CHECK: ld.param.b32 %r[[R0:[0-9]+]], [cvt_i32_i64_param_{{[0-9]+}}]
-; CHECK: st.param.b32 [func_retval{{[0-9]+}}], %r[[R0]]
-; CHECK: ret
+; CHECK-LABEL: cvt_i32_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [cvt_i32_i64_param_0];
+; CHECK-NEXT: st.param.b32 [func_retval0], %rd1;
+; CHECK-NEXT: ret;
%a = trunc i64 %x to i32
ret i32 %a
}
-
-
; i64
define i64 @cvt_i64_i16(i16 %x) {
-; CHECK: ld.param.b16 %rd[[R0:[0-9]+]], [cvt_i64_i16_param_{{[0-9]+}}]
-; CHECK: st.param.b64 [func_retval{{[0-9]+}}], %rd[[R0]]
-; CHECK: ret
+; CHECK-LABEL: cvt_i64_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rd1, [cvt_i64_i16_param_0];
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd1;
+; CHECK-NEXT: ret;
%a = zext i16 %x to i64
ret i64 %a
}
define i64 @cvt_i64_i32(i32 %x) {
-; CHECK: ld.param.b32 %rd[[R0:[0-9]+]], [cvt_i64_i32_param_{{[0-9]+}}]
-; CHECK: st.param.b64 [func_retval{{[0-9]+}}], %rd[[R0]]
-; CHECK: ret
+; CHECK-LABEL: cvt_i64_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %rd1, [cvt_i64_i32_param_0];
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd1;
+; CHECK-NEXT: ret;
%a = zext i32 %x to i64
ret i64 %a
}
diff --git a/llvm/test/CodeGen/NVPTX/elect.ll b/llvm/test/CodeGen/NVPTX/elect.ll
index 93c30a9b00068..b65fa5a6376ef 100644
--- a/llvm/test/CodeGen/NVPTX/elect.ll
+++ b/llvm/test/CodeGen/NVPTX/elect.ll
@@ -54,9 +54,9 @@ define {i32, i1} @elect_sync_twice(i32 %mask) {
; CHECK-NEXT: ld.param.b32 %r1, [elect_sync_twice_param_0];
; CHECK-NEXT: elect.sync %r2|%p1, %r1;
; CHECK-NEXT: elect.sync %r3|%p2, %r1;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1;
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%val = call {i32, i1} @llvm.nvvm.elect.sync(i32 %mask)
%val2 = call {i32, i1} @llvm.nvvm.elect.sync(i32 %mask)
diff --git a/llvm/test/CodeGen/NVPTX/extractelement.ll b/llvm/test/CodeGen/NVPTX/extractelement.ll
index 79d80e6f8fa84..b1eadf381d3b4 100644
--- a/llvm/test/CodeGen/NVPTX/extractelement.ll
+++ b/llvm/test/CodeGen/NVPTX/extractelement.ll
@@ -41,7 +41,7 @@ define i1 @test_v2i8_load(ptr %a) {
; CHECK-NEXT: or.b16 %rs5, %rs1, %rs2;
; CHECK-NEXT: and.b16 %rs6, %rs5, 255;
; CHECK-NEXT: setp.eq.s16 %p1, %rs6, 0;
-; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: selp.b32 %r1, -1, 0, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
; CHECK-NEXT: ret;
%v = load <2 x i8>, ptr %a, align 4
diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
index 252edf4b02c76..13f1c2f30b830 100644
--- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
@@ -43,8 +43,7 @@
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
; CHECK-LABEL: test_ret_const(
-; CHECK: mov.b16 [[R:%rs[0-9]+]], 0x3C00;
-; CHECK-NEXT: st.param.b16 [func_retval0], [[R]];
+; CHECK: st.param.b16 [func_retval0], 0x3C00;
; CHECK-NEXT: ret;
define half @test_ret_const() #0 {
ret half 1.0
@@ -384,7 +383,7 @@ define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 {
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]];
; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]];
; CHECK-NEXT: st.param.b32 [func_retval0], [[R]];
; CHECK-NEXT: ret;
define i1 @test_fcmp_une(half %a, half %b) #0 {
@@ -400,7 +399,7 @@ define i1 @test_fcmp_une(half %a, half %b) #0 {
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]];
; CHECK-NOF16: setp.equ.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]];
; CHECK-NEXT: st.param.b32 [func_retval0], [[R]];
; CHECK-NEXT: ret;
define i1 @test_fcmp_ueq(half %a, half %b) #0 {
@@ -416,7 +415,7 @@ define i1 @test_fcmp_ueq(half %a, half %b) #0 {
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]];
; CHECK-NOF16: setp.gtu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]];
; CHECK-NEXT: st.param.b32 [func_retval0], [[R]];
; CHECK-NEXT: ret;
define i1 @test_fcmp_ugt(half %a, half %b) #0 {
@@ -432,7 +431,7 @@ define i1 @test_fcmp_ugt(half %a, half %b) #0 {
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]];
; CHECK-NOF16: setp.geu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]];
; CHECK-NEXT: st.param.b32 [func_retval0], [[R]];
; CHECK-NEXT: ret;
define i1 @test_fcmp_uge(half %a, half %b) #0 {
@@ -448,7 +447,7 @@ define i1 @test_fcmp_uge(half %a, half %b) #0 {
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]];
; CHECK-NOF16: setp.ltu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]];
; CHECK-NEXT: st.param.b32 [func_retval0], [[R]];
; CHECK-NEXT: ret;
define i1 @test_fcmp_ult(half %a, half %b) #0 {
@@ -464,7 +463,7 @@ define i1 @test_fcmp_ult(half %a, half %b) #0 {
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]];
; CHECK-NOF16: setp.leu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]];
; CHECK-NEXT: st.param.b32 [func_retval0], [[R]];
; CHECK-NEXT: ret;
define i1 @test_fcmp_ule(half %a, half %b) #0 {
@@ -481,7 +480,7 @@ define i1 @test_fcmp_ule(half %a, half %b) #0 {
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]];
; CHECK-NOF16: setp.nan.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]];
; CHECK-NEXT: st.param.b32 [func_retval0], [[R]];
; CHECK-NEXT: ret;
define i1 @test_fcmp_uno(half %a, half %b) #0 {
@@ -497,7 +496,7 @@ define i1 @test_fcmp_uno(half %a, half %b) #0 {
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]];
; CHECK-NOF16: setp.ne.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]];
; CHECK-NEXT: st.param.b32 [func_retval0], [[R]];
; CHECK-NEXT: ret;
define i1 @test_fcmp_one(half %a, half %b) #0 {
@@ -513,7 +512,7 @@ define i1 @test_fcmp_one(half %a, half %b) #0 {
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]];
; CHECK-NOF16: setp.eq.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]];
; CHECK-NEXT: st.param.b32 [func_retval0], [[R]];
; CHECK-NEXT: ret;
define i1 @test_fcmp_oeq(half %a, half %b) #0 {
@@ -529,7 +528,7 @@ define i1 @test_fcmp_oeq(half %a, half %b) #0 {
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]];
; CHECK-NOF16: setp.gt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]];
; CHECK-NEXT: st.param.b32 [func_retval0], [[R]];
; CHECK-NEXT: ret;
define i1 @test_fcmp_ogt(half %a, half %b) #0 {
@@ -545,7 +544,7 @@ define i1 @test_fcmp_ogt(half %a, half %b) #0 {
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]];
; CHECK-NOF16: setp.ge.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]];
; CHECK-NEXT: st.param.b32 [func_retval0], [[R]];
; CHECK-NEXT: ret;
define i1 @test_fcmp_oge(half %a, half %b) #0 {
@@ -561,7 +560,7 @@ define i1 @test_fcmp_oge(half %a, half %b) #0 {
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]];
; CHECK-NOF16: setp.lt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]];
; CHECK-NEXT: st.param.b32 [func_retval0], [[R]];
; CHECK-NEXT: ret;
define i1 @test_fcmp_olt(half %a, half %b) #0 {
@@ -577,7 +576,7 @@ define i1 @test_fcmp_olt(half %a, half %b) #0 {
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]];
; CHECK-NOF16: setp.le.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]];
; CHECK-NEXT: st.param.b32 [func_retval0], [[R]];
; CHECK-NEXT: ret;
define i1 @test_fcmp_ole(half %a, half %b) #0 {
@@ -593,7 +592,7 @@ define i1 @test_fcmp_ole(half %a, half %b) #0 {
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]];
; CHECK-NOF16: setp.num.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]];
; CHECK-NEXT: st.param.b32 [func_retval0], [[R]];
; CHECK-NEXT: ret;
define i1 @test_fcmp_ord(half %a, half %b) #0 {
diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
index 2b2fed153d823..43a605f2b34d7 100644
--- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
@@ -34,11 +34,10 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
define <2 x half> @test_ret_const() #0 {
; CHECK-LABEL: test_ret_const(
; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b32 %r1, 1073757184;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0], 1073757184;
; CHECK-NEXT: ret;
ret <2 x half> <half 1.0, half 2.0>
}
diff --git a/llvm/test/CodeGen/NVPTX/fexp2.ll b/llvm/test/CodeGen/NVPTX/fexp2.ll
index 7c5e2f83d62c8..391aa453f0757 100644
--- a/llvm/test/CodeGen/NVPTX/fexp2.ll
+++ b/llvm/test/CodeGen/NVPTX/fexp2.ll
@@ -252,8 +252,7 @@ define bfloat @exp2_bf16_test(bfloat %in) {
; CHECK-LABEL: exp2_bf16_test(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
-; CHECK-NEXT: .reg .b16 %rs<2>;
-; CHECK-NEXT: .reg .b32 %r<9>;
+; CHECK-NEXT: .reg .b32 %r<10>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: ld.param.b16 %r1, [exp2_bf16_test_param_0];
@@ -265,15 +264,14 @@ define bfloat @exp2_bf16_test(bfloat %in) {
; CHECK-NEXT: setp.nan.f32 %p1, %r3, %r3;
; CHECK-NEXT: or.b32 %r7, %r3, 4194304;
; CHECK-NEXT: selp.b32 %r8, %r7, %r6, %p1;
-; CHECK-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r8; }
-; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
+; CHECK-NEXT: shr.u32 %r9, %r8, 16;
+; CHECK-NEXT: st.param.b16 [func_retval0], %r9;
; CHECK-NEXT: ret;
;
; CHECK-FP16-LABEL: exp2_bf16_test(
; CHECK-FP16: {
; CHECK-FP16-NEXT: .reg .pred %p<2>;
-; CHECK-FP16-NEXT: .reg .b16 %rs<2>;
-; CHECK-FP16-NEXT: .reg .b32 %r<9>;
+; CHECK-FP16-NEXT: .reg .b32 %r<10>;
; CHECK-FP16-EMPTY:
; CHECK-FP16-NEXT: // %bb.0: // %entry
; CHECK-FP16-NEXT: ld.param.b16 %r1, [exp2_bf16_test_param_0];
@@ -285,8 +283,8 @@ define bfloat @exp2_bf16_test(bfloat %in) {
; CHECK-FP16-NEXT: setp.nan.f32 %p1, %r3, %r3;
; CHECK-FP16-NEXT: or.b32 %r7, %r3, 4194304;
; CHECK-FP16-NEXT: selp.b32 %r8, %r7, %r6, %p1;
-; CHECK-FP16-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r8; }
-; CHECK-FP16-NEXT: st.param.b16 [func_retval0], %rs1;
+; CHECK-FP16-NEXT: shr.u32 %r9, %r8, 16;
+; CHECK-FP16-NEXT: st.param.b16 [func_retval0], %r9;
; CHECK-FP16-NEXT: ret;
;
; CHECK-BF16-LABEL: exp2_bf16_test(
diff --git a/llvm/test/CodeGen/NVPTX/flog2.ll b/llvm/test/CodeGen/NVPTX/flog2.ll
index 890c2f8a2678d..acac5a8da4e14 100644
--- a/llvm/test/CodeGen/NVPTX/flog2.ll
+++ b/llvm/test/CodeGen/NVPTX/flog2.ll
@@ -122,8 +122,7 @@ define bfloat @log2_bf16_test(bfloat %in) {
; CHECK-LABEL: log2_bf16_test(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
-; CHECK-NEXT: .reg .b16 %rs<2>;
-; CHECK-NEXT: .reg .b32 %r<9>;
+; CHECK-NEXT: .reg .b32 %r<10>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: ld.param.b16 %r1, [log2_bf16_test_param_0];
@@ -135,8 +134,8 @@ define bfloat @log2_bf16_test(bfloat %in) {
; CHECK-NEXT: setp.nan.f32 %p1, %r3, %r3;
; CHECK-NEXT: or.b32 %r7, %r3, 4194304;
; CHECK-NEXT: selp.b32 %r8, %r7, %r6, %p1;
-; CHECK-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r8; }
-; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
+; CHECK-NEXT: shr.u32 %r9, %r8, 16;
+; CHECK-NEXT: st.param.b16 [func_retval0], %r9;
; CHECK-NEXT: ret;
entry:
%log2 = call bfloat @llvm.log2.bf16(bfloat %in)
@@ -148,8 +147,7 @@ define bfloat @log2_bf16_ftz_test(bfloat %in) #0 {
; CHECK-LABEL: log2_bf16_ftz_test(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
-; CHECK-NEXT: .reg .b16 %rs<2>;
-; CHECK-NEXT: .reg .b32 %r<9>;
+; CHECK-NEXT: .reg .b32 %r<10>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: ld.param.b16 %r1, [log2_bf16_ftz_test_param_0];
@@ -161,8 +159,8 @@ define bfloat @log2_bf16_ftz_test(bfloat %in) #0 {
; CHECK-NEXT: setp.nan.ftz.f32 %p1, %r3, %r3;
; CHECK-NEXT: or.b32 %r7, %r3, 4194304;
; CHECK-NEXT: selp.b32 %r8, %r7, %r6, %p1;
-; CHECK-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r8; }
-; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
+; CHECK-NEXT: shr.u32 %r9, %r8, 16;
+; CHECK-NEXT: st.param.b16 [func_retval0], %r9;
; CHECK-NEXT: ret;
entry:
%log2 = call bfloat @llvm.log2.bf16(bfloat %in)
diff --git a/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll b/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll
index 42160b50348c4..92293ab171a12 100644
--- a/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll
+++ b/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll
@@ -384,8 +384,8 @@ define bfloat @fma_bf16_expanded_no_nans_multiple_uses_of_fma(bfloat %a, bfloat
; CHECK-SM70-LABEL: fma_bf16_expanded_no_nans_multiple_uses_of_fma(
; CHECK-SM70: {
; CHECK-SM70-NEXT: .reg .pred %p<5>;
-; CHECK-SM70-NEXT: .reg .b16 %rs<4>;
-; CHECK-SM70-NEXT: .reg .b32 %r<29>;
+; CHECK-SM70-NEXT: .reg .b16 %rs<3>;
+; CHECK-SM70-NEXT: .reg .b32 %r<30>;
; CHECK-SM70-EMPTY:
; CHECK-SM70-NEXT: // %bb.0:
; CHECK-SM70-NEXT: ld.param.b16 %r1, [fma_bf16_expanded_no_nans_multiple_uses_of_fma_param_2];
@@ -422,8 +422,8 @@ define bfloat @fma_bf16_expanded_no_nans_multiple_uses_of_fma(bfloat %a, bfloat
; CHECK-SM70-NEXT: setp.nan.f32 %p4, %r23, %r23;
; CHECK-SM70-NEXT: or.b32 %r27, %r23, 4194304;
; CHECK-SM70-NEXT: selp.b32 %r28, %r27, %r26, %p4;
-; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs3}, %r28; }
-; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs3;
+; CHECK-SM70-NEXT: shr.u32 %r29, %r28, 16;
+; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %r29;
; CHECK-SM70-NEXT: ret;
%1 = fmul bfloat %a, %b
%2 = fadd bfloat %1, %c
@@ -462,8 +462,7 @@ define bfloat @fma_bf16_expanded_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c)
; CHECK-SM70-LABEL: fma_bf16_expanded_maxnum_no_nans(
; CHECK-SM70: {
; CHECK-SM70-NEXT: .reg .pred %p<3>;
-; CHECK-SM70-NEXT: .reg .b16 %rs<2>;
-; CHECK-SM70-NEXT: .reg .b32 %r<20>;
+; CHECK-SM70-NEXT: .reg .b32 %r<21>;
; CHECK-SM70-EMPTY:
; CHECK-SM70-NEXT: // %bb.0:
; CHECK-SM70-NEXT: ld.param.b16 %r1, [fma_bf16_expanded_maxnum_no_nans_param_2];
@@ -487,8 +486,8 @@ define bfloat @fma_bf16_expanded_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c)
; CHECK-SM70-NEXT: setp.nan.f32 %p2, %r14, %r14;
; CHECK-SM70-NEXT: or.b32 %r18, %r14, 4194304;
; CHECK-SM70-NEXT: selp.b32 %r19, %r18, %r17, %p2;
-; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r19; }
-; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs1;
+; CHECK-SM70-NEXT: shr.u32 %r20, %r19, 16;
+; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %r20;
; CHECK-SM70-NEXT: ret;
%1 = fmul bfloat %a, %b
%2 = fadd bfloat %1, %c
diff --git a/llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll b/llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll
index 6f6c5d7340789..2f1d7d6321438 100644
--- a/llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll
+++ b/llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll
@@ -253,8 +253,7 @@ define bfloat @fma_bf16_no_nans_multiple_uses_of_fma(bfloat %a, bfloat %b, bfloa
; CHECK-SM70-LABEL: fma_bf16_no_nans_multiple_uses_of_fma(
; CHECK-SM70: {
; CHECK-SM70-NEXT: .reg .pred %p<4>;
-; CHECK-SM70-NEXT: .reg .b16 %rs<2>;
-; CHECK-SM70-NEXT: .reg .b32 %r<27>;
+; CHECK-SM70-NEXT: .reg .b32 %r<28>;
; CHECK-SM70-EMPTY:
; CHECK-SM70-NEXT: // %bb.0:
; CHECK-SM70-NEXT: ld.param.b16 %r1, [fma_bf16_no_nans_multiple_uses_of_fma_param_2];
@@ -286,8 +285,8 @@ define bfloat @fma_bf16_no_nans_multiple_uses_of_fma(bfloat %a, bfloat %b, bfloa
; CHECK-SM70-NEXT: setp.nan.f32 %p3, %r21, %r21;
; CHECK-SM70-NEXT: or.b32 %r25, %r21, 4194304;
; CHECK-SM70-NEXT: selp.b32 %r26, %r25, %r24, %p3;
-; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r26; }
-; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs1;
+; CHECK-SM70-NEXT: shr.u32 %r27, %r26, 16;
+; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %r27;
; CHECK-SM70-NEXT: ret;
%1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
%2 = fcmp ogt bfloat %1, 0.0
@@ -325,8 +324,7 @@ define bfloat @fma_bf16_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c) #0 {
; CHECK-SM70-LABEL: fma_bf16_maxnum_no_nans(
; CHECK-SM70: {
; CHECK-SM70-NEXT: .reg .pred %p<3>;
-; CHECK-SM70-NEXT: .reg .b16 %rs<2>;
-; CHECK-SM70-NEXT: .reg .b32 %r<20>;
+; CHECK-SM70-NEXT: .reg .b32 %r<21>;
; CHECK-SM70-EMPTY:
; CHECK-SM70-NEXT: // %bb.0:
; CHECK-SM70-NEXT: ld.param.b16 %r1, [fma_bf16_maxnum_no_nans_param_2];
@@ -350,8 +348,8 @@ define bfloat @fma_bf16_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c) #0 {
; CHECK-SM70-NEXT: setp.nan.f32 %p2, %r14, %r14;
; CHECK-SM70-NEXT: or.b32 %r18, %r14, 4194304;
; CHECK-SM70-NEXT: selp.b32 %r19, %r18, %r17, %p2;
-; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r19; }
-; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs1;
+; CHECK-SM70-NEXT: shr.u32 %r20, %r19, 16;
+; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %r20;
; CHECK-SM70-NEXT: ret;
%1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
%2 = call bfloat @llvm.maxnum.bf16(bfloat %1, bfloat 0.0)
diff --git a/llvm/test/CodeGen/NVPTX/fma-relu-instruction-flag.ll b/llvm/test/CodeGen/NVPTX/fma-relu-instruction-flag.ll
index 60bfe3fa2cbf6..090dd00f1b933 100644
--- a/llvm/test/CodeGen/NVPTX/fma-relu-instruction-flag.ll
+++ b/llvm/test/CodeGen/NVPTX/fma-relu-instruction-flag.ll
@@ -271,8 +271,8 @@ define bfloat @fma_bf16_expanded_no_nans_multiple_uses_of_fma(bfloat %a, bfloat
; CHECK-SM70-LABEL: fma_bf16_expanded_no_nans_multiple_uses_of_fma(
; CHECK-SM70: {
; CHECK-SM70-NEXT: .reg .pred %p<5>;
-; CHECK-SM70-NEXT: .reg .b16 %rs<4>;
-; CHECK-SM70-NEXT: .reg .b32 %r<29>;
+; CHECK-SM70-NEXT: .reg .b16 %rs<3>;
+; CHECK-SM70-NEXT: .reg .b32 %r<30>;
; CHECK-SM70-EMPTY:
; CHECK-SM70-NEXT: // %bb.0:
; CHECK-SM70-NEXT: ld.param.b16 %r1, [fma_bf16_expanded_no_nans_multiple_uses_of_fma_param_2];
@@ -309,8 +309,8 @@ define bfloat @fma_bf16_expanded_no_nans_multiple_uses_of_fma(bfloat %a, bfloat
; CHECK-SM70-NEXT: setp.nan.f32 %p4, %r23, %r23;
; CHECK-SM70-NEXT: or.b32 %r27, %r23, 4194304;
; CHECK-SM70-NEXT: selp.b32 %r28, %r27, %r26, %p4;
-; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs3}, %r28; }
-; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs3;
+; CHECK-SM70-NEXT: shr.u32 %r29, %r28, 16;
+; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %r29;
; CHECK-SM70-NEXT: ret;
%1 = fmul fast bfloat %a, %b
%2 = fadd fast bfloat %1, %c
@@ -351,8 +351,7 @@ define bfloat @fma_bf16_expanded_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c)
; CHECK-SM70-LABEL: fma_bf16_expanded_maxnum_no_nans(
; CHECK-SM70: {
; CHECK-SM70-NEXT: .reg .pred %p<3>;
-; CHECK-SM70-NEXT: .reg .b16 %rs<2>;
-; CHECK-SM70-NEXT: .reg .b32 %r<20>;
+; CHECK-SM70-NEXT: .reg .b32 %r<21>;
; CHECK-SM70-EMPTY:
; CHECK-SM70-NEXT: // %bb.0:
; CHECK-SM70-NEXT: ld.param.b16 %r1, [fma_bf16_expanded_maxnum_no_nans_param_2];
@@ -376,8 +375,8 @@ define bfloat @fma_bf16_expanded_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c)
; CHECK-SM70-NEXT: setp.nan.f32 %p2, %r14, %r14;
; CHECK-SM70-NEXT: or.b32 %r18, %r14, 4194304;
; CHECK-SM70-NEXT: selp.b32 %r19, %r18, %r17, %p2;
-; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r19; }
-; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs1;
+; CHECK-SM70-NEXT: shr.u32 %r20, %r19, 16;
+; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %r20;
; CHECK-SM70-NEXT: ret;
%1 = fmul fast bfloat %a, %b
%2 = fadd fast bfloat %1, %c
@@ -1111,8 +1110,7 @@ define bfloat @fma_bf16_no_nans_multiple_uses_of_fma(bfloat %a, bfloat %b, bfloa
; CHECK-SM70-LABEL: fma_bf16_no_nans_multiple_uses_of_fma(
; CHECK-SM70: {
; CHECK-SM70-NEXT: .reg .pred %p<4>;
-; CHECK-SM70-NEXT: .reg .b16 %rs<2>;
-; CHECK-SM70-NEXT: .reg .b32 %r<27>;
+; CHECK-SM70-NEXT: .reg .b32 %r<28>;
; CHECK-SM70-EMPTY:
; CHECK-SM70-NEXT: // %bb.0:
; CHECK-SM70-NEXT: ld.param.b16 %r1, [fma_bf16_no_nans_multiple_uses_of_fma_param_2];
@@ -1144,8 +1142,8 @@ define bfloat @fma_bf16_no_nans_multiple_uses_of_fma(bfloat %a, bfloat %b, bfloa
; CHECK-SM70-NEXT: setp.nan.f32 %p3, %r21, %r21;
; CHECK-SM70-NEXT: or.b32 %r25, %r21, 4194304;
; CHECK-SM70-NEXT: selp.b32 %r26, %r25, %r24, %p3;
-; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r26; }
-; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs1;
+; CHECK-SM70-NEXT: shr.u32 %r27, %r26, 16;
+; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %r27;
; CHECK-SM70-NEXT: ret;
%1 = call nnan bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
%2 = fcmp nsz ogt bfloat %1, 0.0
@@ -1183,8 +1181,7 @@ define bfloat @fma_bf16_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c) {
; CHECK-SM70-LABEL: fma_bf16_maxnum_no_nans(
; CHECK-SM70: {
; CHECK-SM70-NEXT: .reg .pred %p<3>;
-; CHECK-SM70-NEXT: .reg .b16 %rs<2>;
-; CHECK-SM70-NEXT: .reg .b32 %r<20>;
+; CHECK-SM70-NEXT: .reg .b32 %r<21>;
; CHECK-SM70-EMPTY:
; CHECK-SM70-NEXT: // %bb.0:
; CHECK-SM70-NEXT: ld.param.b16 %r1, [fma_bf16_maxnum_no_nans_param_2];
@@ -1208,8 +1205,8 @@ define bfloat @fma_bf16_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c) {
; CHECK-SM70-NEXT: setp.nan.f32 %p2, %r14, %r14;
; CHECK-SM70-NEXT: or.b32 %r18, %r14, 4194304;
; CHECK-SM70-NEXT: selp.b32 %r19, %r18, %r17, %p2;
-; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r19; }
-; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs1;
+; CHECK-SM70-NEXT: shr.u32 %r20, %r19, 16;
+; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %r20;
; CHECK-SM70-NEXT: ret;
%1 = call nnan bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
%2 = call nsz bfloat @llvm.maxnum.bf16(bfloat %1, bfloat 0.0)
diff --git a/llvm/test/CodeGen/NVPTX/fma.ll b/llvm/test/CodeGen/NVPTX/fma.ll
index b74e531adba3f..5aa12b08a3812 100644
--- a/llvm/test/CodeGen/NVPTX/fma.ll
+++ b/llvm/test/CodeGen/NVPTX/fma.ll
@@ -116,11 +116,10 @@ define ptx_device float @f32_iir(float %x) {
define ptx_device float @f32_iii(float %x) {
; CHECK-LABEL: f32_iii(
; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b32 %r1, 0f41200000;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0], 1092616192;
; CHECK-NEXT: ret;
%r = call float @llvm.fma.f32(float 2.0, float 3.0, float 4.0)
ret float %r
diff --git a/llvm/test/CodeGen/NVPTX/i1-icmp.ll b/llvm/test/CodeGen/NVPTX/i1-icmp.ll
index e43a9da88a50e..5d5e861432b25 100644
--- a/llvm/test/CodeGen/NVPTX/i1-icmp.ll
+++ b/llvm/test/CodeGen/NVPTX/i1-icmp.ll
@@ -8,7 +8,7 @@ define i32 @icmp_i1_eq(i32 %a, i32 %b) {
; CHECK-LABEL: icmp_i1_eq(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<4>;
-; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [icmp_i1_eq_param_0];
@@ -18,12 +18,10 @@ define i32 @icmp_i1_eq(i32 %a, i32 %b) {
; CHECK-NEXT: xor.pred %p3, %p1, %p2;
; CHECK-NEXT: @%p3 bra $L__BB0_2;
; CHECK-NEXT: // %bb.1: // %bb1
-; CHECK-NEXT: mov.b32 %r4, 1;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: st.param.b32 [func_retval0], 1;
; CHECK-NEXT: ret;
; CHECK-NEXT: $L__BB0_2: // %bb2
-; CHECK-NEXT: mov.b32 %r3, 127;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], 127;
; CHECK-NEXT: ret;
%p1 = icmp sgt i32 %a, 1
%p2 = icmp sgt i32 %b, 1
@@ -39,7 +37,7 @@ define i32 @icmp_i1_ne(i32 %a, i32 %b) {
; CHECK-LABEL: icmp_i1_ne(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<5>;
-; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [icmp_i1_ne_param_0];
@@ -50,12 +48,10 @@ define i32 @icmp_i1_ne(i32 %a, i32 %b) {
; CHECK-NEXT: not.pred %p4, %p3;
; CHECK-NEXT: @%p4 bra $L__BB1_2;
; CHECK-NEXT: // %bb.1: // %bb1
-; CHECK-NEXT: mov.b32 %r4, 1;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: st.param.b32 [func_retval0], 1;
; CHECK-NEXT: ret;
; CHECK-NEXT: $L__BB1_2: // %bb2
-; CHECK-NEXT: mov.b32 %r3, 127;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], 127;
; CHECK-NEXT: ret;
%p1 = icmp sgt i32 %a, 1
%p2 = icmp sgt i32 %b, 1
@@ -71,7 +67,7 @@ define i32 @icmp_i1_sgt(i32 %a, i32 %b) {
; CHECK-LABEL: icmp_i1_sgt(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<4>;
-; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [icmp_i1_sgt_param_0];
@@ -81,12 +77,10 @@ define i32 @icmp_i1_sgt(i32 %a, i32 %b) {
; CHECK-NEXT: or.pred %p3, %p1, %p2;
; CHECK-NEXT: @%p3 bra $L__BB2_2;
; CHECK-NEXT: // %bb.1: // %bb1
-; CHECK-NEXT: mov.b32 %r4, 1;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: st.param.b32 [func_retval0], 1;
; CHECK-NEXT: ret;
; CHECK-NEXT: $L__BB2_2: // %bb2
-; CHECK-NEXT: mov.b32 %r3, 127;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], 127;
; CHECK-NEXT: ret;
%p1 = icmp sgt i32 %a, 1
%p2 = icmp sgt i32 %b, 1
@@ -102,7 +96,7 @@ define i32 @icmp_i1_slt(i32 %a, i32 %b) {
; CHECK-LABEL: icmp_i1_slt(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<4>;
-; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [icmp_i1_slt_param_0];
@@ -112,12 +106,10 @@ define i32 @icmp_i1_slt(i32 %a, i32 %b) {
; CHECK-NEXT: or.pred %p3, %p2, %p1;
; CHECK-NEXT: @%p3 bra $L__BB3_2;
; CHECK-NEXT: // %bb.1: // %bb1
-; CHECK-NEXT: mov.b32 %r4, 1;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: st.param.b32 [func_retval0], 1;
; CHECK-NEXT: ret;
; CHECK-NEXT: $L__BB3_2: // %bb2
-; CHECK-NEXT: mov.b32 %r3, 127;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], 127;
; CHECK-NEXT: ret;
%p1 = icmp sgt i32 %a, 1
%p2 = icmp sgt i32 %b, 1
@@ -133,7 +125,7 @@ define i32 @icmp_i1_sge(i32 %a, i32 %b) {
; CHECK-LABEL: icmp_i1_sge(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<4>;
-; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [icmp_i1_sge_param_0];
@@ -143,12 +135,10 @@ define i32 @icmp_i1_sge(i32 %a, i32 %b) {
; CHECK-NEXT: and.pred %p3, %p1, %p2;
; CHECK-NEXT: @%p3 bra $L__BB4_2;
; CHECK-NEXT: // %bb.1: // %bb1
-; CHECK-NEXT: mov.b32 %r4, 1;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: st.param.b32 [func_retval0], 1;
; CHECK-NEXT: ret;
; CHECK-NEXT: $L__BB4_2: // %bb2
-; CHECK-NEXT: mov.b32 %r3, 127;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], 127;
; CHECK-NEXT: ret;
%p1 = icmp sgt i32 %a, 1
%p2 = icmp sgt i32 %b, 1
@@ -164,7 +154,7 @@ define i32 @icmp_i1_sle(i32 %a, i32 %b) {
; CHECK-LABEL: icmp_i1_sle(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<4>;
-; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [icmp_i1_sle_param_0];
@@ -174,12 +164,10 @@ define i32 @icmp_i1_sle(i32 %a, i32 %b) {
; CHECK-NEXT: and.pred %p3, %p2, %p1;
; CHECK-NEXT: @%p3 bra $L__BB5_2;
; CHECK-NEXT: // %bb.1: // %bb1
-; CHECK-NEXT: mov.b32 %r4, 1;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: st.param.b32 [func_retval0], 1;
; CHECK-NEXT: ret;
; CHECK-NEXT: $L__BB5_2: // %bb2
-; CHECK-NEXT: mov.b32 %r3, 127;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], 127;
; CHECK-NEXT: ret;
%p1 = icmp sgt i32 %a, 1
%p2 = icmp sgt i32 %b, 1
@@ -195,7 +183,7 @@ define i32 @icmp_i1_uge(i32 %a, i32 %b) {
; CHECK-LABEL: icmp_i1_uge(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<4>;
-; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [icmp_i1_uge_param_0];
@@ -205,12 +193,10 @@ define i32 @icmp_i1_uge(i32 %a, i32 %b) {
; CHECK-NEXT: and.pred %p3, %p2, %p1;
; CHECK-NEXT: @%p3 bra $L__BB6_2;
; CHECK-NEXT: // %bb.1: // %bb1
-; CHECK-NEXT: mov.b32 %r4, 1;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: st.param.b32 [func_retval0], 1;
; CHECK-NEXT: ret;
; CHECK-NEXT: $L__BB6_2: // %bb2
-; CHECK-NEXT: mov.b32 %r3, 127;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], 127;
; CHECK-NEXT: ret;
%p1 = icmp sgt i32 %a, 1
%p2 = icmp sgt i32 %b, 1
@@ -226,7 +212,7 @@ define i32 @icmp_i1_ugt(i32 %a, i32 %b) {
; CHECK-LABEL: icmp_i1_ugt(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<4>;
-; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [icmp_i1_ugt_param_0];
@@ -236,12 +222,10 @@ define i32 @icmp_i1_ugt(i32 %a, i32 %b) {
; CHECK-NEXT: or.pred %p3, %p2, %p1;
; CHECK-NEXT: @%p3 bra $L__BB7_2;
; CHECK-NEXT: // %bb.1: // %bb1
-; CHECK-NEXT: mov.b32 %r4, 1;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: st.param.b32 [func_retval0], 1;
; CHECK-NEXT: ret;
; CHECK-NEXT: $L__BB7_2: // %bb2
-; CHECK-NEXT: mov.b32 %r3, 127;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], 127;
; CHECK-NEXT: ret;
%p1 = icmp sgt i32 %a, 1
%p2 = icmp sgt i32 %b, 1
@@ -257,7 +241,7 @@ define i32 @icmp_i1_ule(i32 %a, i32 %b) {
; CHECK-LABEL: icmp_i1_ule(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<4>;
-; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [icmp_i1_ule_param_0];
@@ -267,12 +251,10 @@ define i32 @icmp_i1_ule(i32 %a, i32 %b) {
; CHECK-NEXT: and.pred %p3, %p1, %p2;
; CHECK-NEXT: @%p3 bra $L__BB8_2;
; CHECK-NEXT: // %bb.1: // %bb1
-; CHECK-NEXT: mov.b32 %r4, 1;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: st.param.b32 [func_retval0], 1;
; CHECK-NEXT: ret;
; CHECK-NEXT: $L__BB8_2: // %bb2
-; CHECK-NEXT: mov.b32 %r3, 127;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], 127;
; CHECK-NEXT: ret;
%p1 = icmp sgt i32 %a, 1
%p2 = icmp sgt i32 %b, 1
@@ -288,7 +270,7 @@ define i32 @icmp_i1_ult(i32 %a, i32 %b) {
; CHECK-LABEL: icmp_i1_ult(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<4>;
-; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [icmp_i1_ult_param_0];
@@ -298,12 +280,10 @@ define i32 @icmp_i1_ult(i32 %a, i32 %b) {
; CHECK-NEXT: or.pred %p3, %p1, %p2;
; CHECK-NEXT: @%p3 bra $L__BB9_2;
; CHECK-NEXT: // %bb.1: // %bb1
-; CHECK-NEXT: mov.b32 %r4, 1;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: st.param.b32 [func_retval0], 1;
; CHECK-NEXT: ret;
; CHECK-NEXT: $L__BB9_2: // %bb2
-; CHECK-NEXT: mov.b32 %r3, 127;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], 127;
; CHECK-NEXT: ret;
%p1 = icmp sgt i32 %a, 1
%p2 = icmp sgt i32 %b, 1
diff --git a/llvm/test/CodeGen/NVPTX/i128-ld-st.ll b/llvm/test/CodeGen/NVPTX/i128-ld-st.ll
index abe92a5bf79b9..83f1ed43960fe 100644
--- a/llvm/test/CodeGen/NVPTX/i128-ld-st.ll
+++ b/llvm/test/CodeGen/NVPTX/i128-ld-st.ll
@@ -7,15 +7,14 @@ target triple = "nvptx64-nvidia-cuda"
define i128 @foo(ptr %p, ptr %o) {
; CHECK-LABEL: foo(
; CHECK: {
-; CHECK-NEXT: .reg .b64 %rd<5>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd2, [foo_param_1];
; CHECK-NEXT: ld.param.b64 %rd1, [foo_param_0];
; CHECK-NEXT: ld.b8 %rd3, [%rd1];
; CHECK-NEXT: st.v2.b64 [%rd2], {%rd3, 0};
-; CHECK-NEXT: mov.b64 %rd4, 0;
-; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd4};
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, 0};
; CHECK-NEXT: ret;
%c = load i8, ptr %p, align 1
%i = zext i8 %c to i128
diff --git a/llvm/test/CodeGen/NVPTX/i128.ll b/llvm/test/CodeGen/NVPTX/i128.ll
index ecd42fd6ceb3c..29408a24213cc 100644
--- a/llvm/test/CodeGen/NVPTX/i128.ll
+++ b/llvm/test/CodeGen/NVPTX/i128.ll
@@ -292,13 +292,12 @@ define i128 @srem_i128_pow2k(i128 %lhs) {
define i128 @urem_i128_pow2k(i128 %lhs) {
; CHECK-LABEL: urem_i128_pow2k(
; CHECK: {
-; CHECK-NEXT: .reg .b64 %rd<5>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [urem_i128_pow2k_param_0];
; CHECK-NEXT: and.b64 %rd3, %rd1, 8589934591;
-; CHECK-NEXT: mov.b64 %rd4, 0;
-; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd4};
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, 0};
; CHECK-NEXT: ret;
%div = urem i128 %lhs, 8589934592
ret i128 %div
diff --git a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
index d5ddadf2b21c5..e89ab7a5605c3 100644
--- a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
@@ -23,11 +23,10 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
define <2 x i16> @test_ret_const() #0 {
; COMMON-LABEL: test_ret_const(
; COMMON: {
-; COMMON-NEXT: .reg .b32 %r<2>;
+; COMMON-EMPTY:
; COMMON-EMPTY:
; COMMON-NEXT: // %bb.0:
-; COMMON-NEXT: mov.b32 %r1, 131073;
-; COMMON-NEXT: st.param.b32 [func_retval0], %r1;
+; COMMON-NEXT: st.param.b32 [func_retval0], 131073;
; COMMON-NEXT: ret;
ret <2 x i16> <i16 1, i16 2>
}
@@ -905,9 +904,9 @@ define <2 x i32> @test_zext_2xi32(<2 x i16> %a) #0 {
; COMMON-NEXT: // %bb.0:
; COMMON-NEXT: ld.param.b32 %r1, [test_zext_2xi32_param_0];
; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r1;
-; COMMON-NEXT: cvt.u32.u16 %r2, %rs1;
-; COMMON-NEXT: cvt.u32.u16 %r3, %rs2;
-; COMMON-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r3};
+; COMMON-NEXT: cvt.u32.u16 %r2, %rs2;
+; COMMON-NEXT: cvt.u32.u16 %r3, %rs1;
+; COMMON-NEXT: st.param.v2.b32 [func_retval0], {%r3, %r2};
; COMMON-NEXT: ret;
%r = zext <2 x i16> %a to <2 x i32>
ret <2 x i32> %r
@@ -960,14 +959,11 @@ define i32 @test_bitcast_2xi16_to_i32(<2 x i16> %a) #0 {
define <2 x half> @test_bitcast_2xi16_to_2xhalf(i16 %a) #0 {
; COMMON-LABEL: test_bitcast_2xi16_to_2xhalf(
; COMMON: {
-; COMMON-NEXT: .reg .b16 %rs<3>;
-; COMMON-NEXT: .reg .b32 %r<2>;
+; COMMON-NEXT: .reg .b16 %rs<2>;
; COMMON-EMPTY:
; COMMON-NEXT: // %bb.0:
; COMMON-NEXT: ld.param.b16 %rs1, [test_bitcast_2xi16_to_2xhalf_param_0];
-; COMMON-NEXT: mov.b16 %rs2, 5;
-; COMMON-NEXT: mov.b32 %r1, {%rs1, %rs2};
-; COMMON-NEXT: st.param.b32 [func_retval0], %r1;
+; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs1, 5};
; COMMON-NEXT: ret;
%ins.0 = insertelement <2 x i16> undef, i16 %a, i32 0
%ins.1 = insertelement <2 x i16> %ins.0, i16 5, i32 1
diff --git a/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll
index 718840897c696..3edd4e4da60e0 100644
--- a/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll
@@ -13,16 +13,15 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
define i16 @test_bitcast_2xi8_i16(<2 x i8> %a) {
; CHECK-LABEL: test_bitcast_2xi8_i16(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<6>;
+; CHECK-NEXT: .reg .b16 %rs<5>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [test_bitcast_2xi8_i16_param_0];
-; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
+; CHECK-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_bitcast_2xi8_i16_param_0];
+; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NEXT: shl.b16 %rs3, %rs2, 8;
-; CHECK-NEXT: and.b16 %rs4, %rs1, 255;
-; CHECK-NEXT: or.b16 %rs5, %rs4, %rs3;
-; CHECK-NEXT: cvt.u32.u16 %r2, %rs5;
+; CHECK-NEXT: or.b16 %rs4, %rs1, %rs3;
+; CHECK-NEXT: cvt.u32.u16 %r2, %rs4;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%res = bitcast <2 x i8> %a to i16
@@ -32,12 +31,11 @@ define i16 @test_bitcast_2xi8_i16(<2 x i8> %a) {
define <2 x i8> @test_bitcast_i16_2xi8(i16 %a) {
; CHECK-LABEL: test_bitcast_i16_2xi8(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0];
-; CHECK-NEXT: shr.u16 %rs2, %rs1, 8;
-; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs1, %rs2};
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
; CHECK-NEXT: ret;
%res = bitcast i16 %a to <2 x i8>
ret <2 x i8> %res
diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
index 72c279bee4268..fd2e56bb126bb 100644
--- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
@@ -14,11 +14,10 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
define <4 x i8> @test_ret_const() #0 {
; CHECK-LABEL: test_ret_const(
; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b32 %r1, -66911489;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0], -66911489;
; CHECK-NEXT: ret;
ret <4 x i8> <i8 -1, i8 2, i8 3, i8 -4>
}
diff --git a/llvm/test/CodeGen/NVPTX/idioms.ll b/llvm/test/CodeGen/NVPTX/idioms.ll
index d41b9b9973351..be84f9bfb1aeb 100644
--- a/llvm/test/CodeGen/NVPTX/idioms.ll
+++ b/llvm/test/CodeGen/NVPTX/idioms.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; Check that various LLVM idioms get lowered to NVPTX as expected.
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
@@ -8,114 +9,178 @@
%struct.S16 = type { i16, i16 }
%struct.S32 = type { i32, i32 }
-; CHECK-LABEL: abs_i16(
define i16 @abs_i16(i16 %a) {
-; CHECK: abs.s16
+; CHECK-LABEL: abs_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [abs_i16_param_0];
+; CHECK-NEXT: abs.s16 %rs2, %rs1;
+; CHECK-NEXT: cvt.u32.u16 %r1, %rs2;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%neg = sub i16 0, %a
%abs.cond = icmp sge i16 %a, 0
%abs = select i1 %abs.cond, i16 %a, i16 %neg
ret i16 %abs
}
-; CHECK-LABEL: abs_i32(
define i32 @abs_i32(i32 %a) {
-; CHECK: abs.s32
+; CHECK-LABEL: abs_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [abs_i32_param_0];
+; CHECK-NEXT: abs.s32 %r2, %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-NEXT: ret;
%neg = sub i32 0, %a
%abs.cond = icmp sge i32 %a, 0
%abs = select i1 %abs.cond, i32 %a, i32 %neg
ret i32 %abs
}
-; CHECK-LABEL: abs_i64(
define i64 @abs_i64(i64 %a) {
-; CHECK: abs.s64
+; CHECK-LABEL: abs_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [abs_i64_param_0];
+; CHECK-NEXT: abs.s64 %rd2, %rd1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
+; CHECK-NEXT: ret;
%neg = sub i64 0, %a
%abs.cond = icmp sge i64 %a, 0
%abs = select i1 %abs.cond, i64 %a, i64 %neg
ret i64 %abs
}
-; CHECK-LABEL: i32_to_2xi16(
define %struct.S16 @i32_to_2xi16(i32 noundef %in) {
+; CHECK-LABEL: i32_to_2xi16(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [i32_to_2xi16_param_0];
+; CHECK-NEXT: shr.u32 %r2, %r1, 16;
+; CHECK-NEXT: st.param.b16 [func_retval0], %r1;
+; CHECK-NEXT: st.param.b16 [func_retval0+2], %r2;
+; CHECK-NEXT: ret;
%low = trunc i32 %in to i16
%high32 = lshr i32 %in, 16
%high = trunc i32 %high32 to i16
-; CHECK: ld.param.b32 %[[R32:r[0-9]+]], [i32_to_2xi16_param_0];
-; CHECK-DAG: cvt.u16.u32 %rs{{[0-9+]}}, %[[R32]];
-; CHECK-DAG mov.b32 {tmp, %rs{{[0-9+]}}}, %[[R32]];
%s1 = insertvalue %struct.S16 poison, i16 %low, 0
%s = insertvalue %struct.S16 %s1, i16 %high, 1
ret %struct.S16 %s
}
-; CHECK-LABEL: i32_to_2xi16_lh(
; Same as above, but with rearranged order of low/high parts.
define %struct.S16 @i32_to_2xi16_lh(i32 noundef %in) {
+; CHECK-LABEL: i32_to_2xi16_lh(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [i32_to_2xi16_lh_param_0];
+; CHECK-NEXT: shr.u32 %r2, %r1, 16;
+; CHECK-NEXT: st.param.b16 [func_retval0], %r1;
+; CHECK-NEXT: st.param.b16 [func_retval0+2], %r2;
+; CHECK-NEXT: ret;
%high32 = lshr i32 %in, 16
%high = trunc i32 %high32 to i16
%low = trunc i32 %in to i16
-; CHECK: ld.param.b32 %[[R32:r[0-9]+]], [i32_to_2xi16_lh_param_0];
-; CHECK-DAG: cvt.u16.u32 %rs{{[0-9+]}}, %[[R32]];
-; CHECK-DAG mov.b32 {tmp, %rs{{[0-9+]}}}, %[[R32]];
%s1 = insertvalue %struct.S16 poison, i16 %low, 0
%s = insertvalue %struct.S16 %s1, i16 %high, 1
ret %struct.S16 %s
}
-; CHECK-LABEL: i32_to_2xi16_not(
define %struct.S16 @i32_to_2xi16_not(i32 noundef %in) {
+; CHECK-LABEL: i32_to_2xi16_not(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [i32_to_2xi16_not_param_0];
+; CHECK-NEXT: shr.u32 %r2, %r1, 15;
+; CHECK-NEXT: st.param.b16 [func_retval0], %r1;
+; CHECK-NEXT: st.param.b16 [func_retval0+2], %r2;
+; CHECK-NEXT: ret;
%low = trunc i32 %in to i16
; Shift by any value other than 16 blocks the conversiopn to mov.
%high32 = lshr i32 %in, 15
%high = trunc i32 %high32 to i16
-; CHECK: cvt.u16.u32
-; CHECK: shr.u32
-; CHECK: cvt.u16.u32
%s1 = insertvalue %struct.S16 poison, i16 %low, 0
%s = insertvalue %struct.S16 %s1, i16 %high, 1
ret %struct.S16 %s
}
-; CHECK-LABEL: i64_to_2xi32(
define %struct.S32 @i64_to_2xi32(i64 noundef %in) {
+; CHECK-LABEL: i64_to_2xi32(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [i64_to_2xi32_param_0];
+; CHECK-NEXT: shr.u64 %rd2, %rd1, 32;
+; CHECK-NEXT: st.param.b32 [func_retval0], %rd1;
+; CHECK-NEXT: st.param.b32 [func_retval0+4], %rd2;
+; CHECK-NEXT: ret;
%low = trunc i64 %in to i32
%high64 = lshr i64 %in, 32
%high = trunc i64 %high64 to i32
-; CHECK: ld.param.b64 %[[R64:rd[0-9]+]], [i64_to_2xi32_param_0];
-; CHECK-DAG: cvt.u32.u64 %r{{[0-9+]}}, %[[R64]];
-; CHECK-DAG mov.b64 {tmp, %r{{[0-9+]}}}, %[[R64]];
%s1 = insertvalue %struct.S32 poison, i32 %low, 0
%s = insertvalue %struct.S32 %s1, i32 %high, 1
ret %struct.S32 %s
}
-; CHECK-LABEL: i64_to_2xi32_not(
define %struct.S32 @i64_to_2xi32_not(i64 noundef %in) {
+; CHECK-LABEL: i64_to_2xi32_not(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [i64_to_2xi32_not_param_0];
+; CHECK-NEXT: shr.u64 %rd2, %rd1, 31;
+; CHECK-NEXT: st.param.b32 [func_retval0], %rd1;
+; CHECK-NEXT: st.param.b32 [func_retval0+4], %rd2;
+; CHECK-NEXT: ret;
%low = trunc i64 %in to i32
; Shift by any value other than 32 blocks the conversiopn to mov.
%high64 = lshr i64 %in, 31
%high = trunc i64 %high64 to i32
-; CHECK: cvt.u32.u64
-; CHECK: shr.u64
-; CHECK: cvt.u32.u64
%s1 = insertvalue %struct.S32 poison, i32 %low, 0
%s = insertvalue %struct.S32 %s1, i32 %high, 1
ret %struct.S32 %s
}
-; CHECK-LABEL: i32_to_2xi16_shr(
; Make sure we do not get confused when our input itself is [al]shr.
define %struct.S16 @i32_to_2xi16_shr(i32 noundef %i){
+; CHECK-LABEL: i32_to_2xi16_shr(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [i32_to_2xi16_shr_param_0];
+; CHECK-NEXT: { // callseq 0, 0
+; CHECK-NEXT: .param .b32 param0;
+; CHECK-NEXT: st.param.b32 [param0], %r1;
+; CHECK-NEXT: call.uni escape_int, (param0);
+; CHECK-NEXT: } // callseq 0
+; CHECK-NEXT: shr.s32 %r2, %r1, 16;
+; CHECK-NEXT: shr.u32 %r3, %r2, 16;
+; CHECK-NEXT: st.param.b16 [func_retval0], %r2;
+; CHECK-NEXT: st.param.b16 [func_retval0+2], %r3;
+; CHECK-NEXT: ret;
call void @escape_int(i32 %i); // Force %i to be loaded completely.
%i1 = ashr i32 %i, 16
%l = trunc i32 %i1 to i16
%h32 = ashr i32 %i1, 16
%h = trunc i32 %h32 to i16
-; CHECK: ld.param.b32 %[[R32:r[0-9]+]], [i32_to_2xi16_shr_param_0];
-; CHECK: shr.s32 %[[R32H:r[0-9]+]], %[[R32]], 16;
-; CHECK-DAG mov.b32 {tmp, %rs{{[0-9+]}}}, %[[R32]];
-; CHECK-DAG mov.b32 {tmp, %rs{{[0-9+]}}}, %[[R32H]];
%s0 = insertvalue %struct.S16 poison, i16 %l, 0
%s1 = insertvalue %struct.S16 %s0, i16 %h, 1
ret %struct.S16 %s1
diff --git a/llvm/test/CodeGen/NVPTX/jump-table.ll b/llvm/test/CodeGen/NVPTX/jump-table.ll
index e1eeb66b5afc0..955befc624c71 100644
--- a/llvm/test/CodeGen/NVPTX/jump-table.ll
+++ b/llvm/test/CodeGen/NVPTX/jump-table.ll
@@ -69,7 +69,7 @@ define i32 @test2(i32 %tmp158) {
; CHECK-LABEL: test2(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<6>;
-; CHECK-NEXT: .reg .b32 %r<10>;
+; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: ld.param.b32 %r1, [test2_param_0];
@@ -96,36 +96,29 @@ define i32 @test2(i32 %tmp158) {
; CHECK-NEXT: $L__BB1_11;
; CHECK-NEXT: brx.idx %r2, $L_brx_0;
; CHECK-NEXT: $L__BB1_7: // %bb339
-; CHECK-NEXT: mov.b32 %r7, 12;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r7;
+; CHECK-NEXT: st.param.b32 [func_retval0], 12;
; CHECK-NEXT: ret;
; CHECK-NEXT: $L__BB1_5: // %entry
; CHECK-NEXT: setp.eq.s32 %p3, %r1, 1024;
; CHECK-NEXT: @%p3 bra $L__BB1_3;
; CHECK-NEXT: bra.uni $L__BB1_6;
; CHECK-NEXT: $L__BB1_3: // %bb338
-; CHECK-NEXT: mov.b32 %r8, 11;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r8;
+; CHECK-NEXT: st.param.b32 [func_retval0], 11;
; CHECK-NEXT: ret;
; CHECK-NEXT: $L__BB1_10: // %bb342
-; CHECK-NEXT: mov.b32 %r4, 15;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: st.param.b32 [func_retval0], 15;
; CHECK-NEXT: ret;
; CHECK-NEXT: $L__BB1_6: // %bb336
-; CHECK-NEXT: mov.b32 %r9, 10;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r9;
+; CHECK-NEXT: st.param.b32 [func_retval0], 10;
; CHECK-NEXT: ret;
; CHECK-NEXT: $L__BB1_8: // %bb340
-; CHECK-NEXT: mov.b32 %r6, 13;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
+; CHECK-NEXT: st.param.b32 [func_retval0], 13;
; CHECK-NEXT: ret;
; CHECK-NEXT: $L__BB1_9: // %bb341
-; CHECK-NEXT: mov.b32 %r5, 14;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
+; CHECK-NEXT: st.param.b32 [func_retval0], 14;
; CHECK-NEXT: ret;
; CHECK-NEXT: $L__BB1_11: // %bb343
-; CHECK-NEXT: mov.b32 %r3, 18;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], 18;
; CHECK-NEXT: ret;
entry:
switch i32 %tmp158, label %bb336 [
diff --git a/llvm/test/CodeGen/NVPTX/ldg-invariant-256.ll b/llvm/test/CodeGen/NVPTX/ldg-invariant-256.ll
index b4a74c762f523..d5078f5f19af6 100644
--- a/llvm/test/CodeGen/NVPTX/ldg-invariant-256.ll
+++ b/llvm/test/CodeGen/NVPTX/ldg-invariant-256.ll
@@ -11,7 +11,7 @@ define i8 @ld_global_v32i8(ptr addrspace(1) %ptr) {
; SM90-LABEL: ld_global_v32i8(
; SM90: {
; SM90-NEXT: .reg .b16 %rs<16>;
-; SM90-NEXT: .reg .b32 %r<19>;
+; SM90-NEXT: .reg .b32 %r<18>;
; SM90-NEXT: .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
@@ -42,14 +42,13 @@ define i8 @ld_global_v32i8(ptr addrspace(1) %ptr) {
; SM90-NEXT: add.s16 %rs14, %rs11, %rs12;
; SM90-NEXT: add.s16 %rs15, %rs13, %rs14;
; SM90-NEXT: cvt.u32.u16 %r17, %rs15;
-; SM90-NEXT: and.b32 %r18, %r17, 255;
-; SM90-NEXT: st.param.b32 [func_retval0], %r18;
+; SM90-NEXT: st.param.b32 [func_retval0], %r17;
; SM90-NEXT: ret;
;
; SM100-LABEL: ld_global_v32i8(
; SM100: {
; SM100-NEXT: .reg .b16 %rs<16>;
-; SM100-NEXT: .reg .b32 %r<19>;
+; SM100-NEXT: .reg .b32 %r<18>;
; SM100-NEXT: .reg .b64 %rd<2>;
; SM100-EMPTY:
; SM100-NEXT: // %bb.0:
@@ -79,8 +78,7 @@ define i8 @ld_global_v32i8(ptr addrspace(1) %ptr) {
; SM100-NEXT: add.s16 %rs14, %rs11, %rs12;
; SM100-NEXT: add.s16 %rs15, %rs13, %rs14;
; SM100-NEXT: cvt.u32.u16 %r17, %rs15;
-; SM100-NEXT: and.b32 %r18, %r17, 255;
-; SM100-NEXT: st.param.b32 [func_retval0], %r18;
+; SM100-NEXT: st.param.b32 [func_retval0], %r17;
; SM100-NEXT: ret;
%a = load <32 x i8>, ptr addrspace(1) %ptr, !invariant.load !0
%v1 = extractelement <32 x i8> %a, i32 0
diff --git a/llvm/test/CodeGen/NVPTX/ldg-invariant.ll b/llvm/test/CodeGen/NVPTX/ldg-invariant.ll
index 06143debb6838..3bd46000661ce 100644
--- a/llvm/test/CodeGen/NVPTX/ldg-invariant.ll
+++ b/llvm/test/CodeGen/NVPTX/ldg-invariant.ll
@@ -128,7 +128,7 @@ define i8 @ld_global_v8i8(ptr addrspace(1) %ptr) {
; CHECK-LABEL: ld_global_v8i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<8>;
-; CHECK-NEXT: .reg .b32 %r<9>;
+; CHECK-NEXT: .reg .b32 %r<8>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
@@ -146,8 +146,7 @@ define i8 @ld_global_v8i8(ptr addrspace(1) %ptr) {
; CHECK-NEXT: add.s16 %rs6, %rs2, %rs1;
; CHECK-NEXT: add.s16 %rs7, %rs5, %rs6;
; CHECK-NEXT: cvt.u32.u16 %r7, %rs7;
-; CHECK-NEXT: and.b32 %r8, %r7, 255;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r8;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r7;
; CHECK-NEXT: ret;
%a = load <8 x i8>, ptr addrspace(1) %ptr, !invariant.load !0
%v1 = extractelement <8 x i8> %a, i32 0
@@ -164,7 +163,7 @@ define i8 @ld_global_v16i8(ptr addrspace(1) %ptr) {
; CHECK-LABEL: ld_global_v16i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<16>;
-; CHECK-NEXT: .reg .b32 %r<15>;
+; CHECK-NEXT: .reg .b32 %r<14>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
@@ -194,8 +193,7 @@ define i8 @ld_global_v16i8(ptr addrspace(1) %ptr) {
; CHECK-NEXT: add.s16 %rs14, %rs11, %rs12;
; CHECK-NEXT: add.s16 %rs15, %rs13, %rs14;
; CHECK-NEXT: cvt.u32.u16 %r13, %rs15;
-; CHECK-NEXT: and.b32 %r14, %r13, 255;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r14;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r13;
; CHECK-NEXT: ret;
%a = load <16 x i8>, ptr addrspace(1) %ptr, !invariant.load !0
%v1 = extractelement <16 x i8> %a, i32 0
diff --git a/llvm/test/CodeGen/NVPTX/ldu-i8.ll b/llvm/test/CodeGen/NVPTX/ldu-i8.ll
index 89f23f30f34e8..5a0a66b8b93e2 100644
--- a/llvm/test/CodeGen/NVPTX/ldu-i8.ll
+++ b/llvm/test/CodeGen/NVPTX/ldu-i8.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
@@ -6,10 +7,18 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
declare i8 @llvm.nvvm.ldu.global.i.i8.p0(ptr, i32)
define i8 @foo(ptr %a) {
-; Ensure we properly truncate off the high-order 24 bits
-; CHECK: ldu.global.b8
-; CHECK: cvt.u32.u16
-; CHECK: and.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, 255
+; CHECK-LABEL: foo(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [foo_param_0];
+; CHECK-NEXT: ldu.global.b8 %rs1, [%rd1];
+; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0(ptr %a, i32 4)
ret i8 %val
}
diff --git a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
index 7f4b049af84fb..977e7353f58e8 100644
--- a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
+++ b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
@@ -27,15 +27,14 @@ define i8 @test_ldu_i8(ptr addrspace(1) %ptr) {
; CHECK-LABEL: test_ldu_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<2>;
-; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_ldu_i8_param_0];
; CHECK-NEXT: ldu.global.b8 %rs1, [%rd1];
; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
-; CHECK-NEXT: and.b32 %r2, %r1, 255;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
; CHECK-NEXT: ret;
%val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
ret i8 %val
diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll
index ff04e18701a84..a7f3103e5fcbb 100644
--- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll
+++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll
@@ -1,26 +1,18 @@
-; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_52 -mattr=+ptx64 -O0 | FileCheck %s --check-prefix=SM_52
-; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx64 -O0 | FileCheck %s --check-prefix=SM_70
-; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx72 -O0 | FileCheck %s --check-prefix=SM_90
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_52 -mattr=+ptx64 -O0 | FileCheck %s --check-prefixes=SM_52,COMMON
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx64 -O0 | FileCheck %s --check-prefixes=SM_70,COMMON
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx72 -O0 | FileCheck %s --check-prefixes=SM_90,COMMON
@.str = private unnamed_addr constant [12 x i8] c"__CUDA_ARCH\00"
@.str1 = constant [11 x i8] c"__CUDA_FTZ\00"
declare i32 @__nvvm_reflect(ptr)
-; SM_52: .visible .func (.param .b32 func_retval0) foo()
-; SM_52: mov.b32 %[[REG:.+]], 3;
-; SM_52-NEXT: st.param.b32 [func_retval0], %[[REG:.+]];
-; SM_52-NEXT: ret;
-;
-; SM_70: .visible .func (.param .b32 func_retval0) foo()
-; SM_70: mov.b32 %[[REG:.+]], 2;
-; SM_70-NEXT: st.param.b32 [func_retval0], %[[REG:.+]];
-; SM_70-NEXT: ret;
-;
-; SM_90: .visible .func (.param .b32 func_retval0) foo()
-; SM_90: mov.b32 %[[REG:.+]], 1;
-; SM_90-NEXT: st.param.b32 [func_retval0], %[[REG:.+]];
-; SM_90-NEXT: ret;
+; COMMON-LABEL: .visible .func (.param .b32 func_retval0) foo()
+; SM_52: st.param.b32 [func_retval0], 3;
+; SM_70: st.param.b32 [func_retval0], 2;
+; SM_90: st.param.b32 [func_retval0], 1;
+; COMMON-NEXT: ret;
+
define i32 @foo() {
entry:
%call = call i32 @__nvvm_reflect(ptr @.str)
@@ -54,20 +46,11 @@ return:
ret i32 %retval.0
}
-; SM_52: .visible .func (.param .b32 func_retval0) bar()
-; SM_52: mov.b32 %[[REG:.+]], 2;
-; SM_52-NEXT: st.param.b32 [func_retval0], %[[REG:.+]];
-; SM_52-NEXT: ret;
-;
-; SM_70: .visible .func (.param .b32 func_retval0) bar()
-; SM_70: mov.b32 %[[REG:.+]], 1;
-; SM_70-NEXT: st.param.b32 [func_retval0], %[[REG:.+]];
-; SM_70-NEXT: ret;
-;
-; SM_90: .visible .func (.param .b32 func_retval0) bar()
-; SM_90: mov.b32 %[[REG:.+]], 1;
-; SM_90-NEXT: st.param.b32 [func_retval0], %[[REG:.+]];
-; SM_90-NEXT: ret;
+; COMMON-LABEL: .visible .func (.param .b32 func_retval0) bar()
+; SM_52: st.param.b32 [func_retval0], 2;
+; SM_70: st.param.b32 [func_retval0], 1;
+; SM_90: st.param.b32 [func_retval0], 1;
+; COMMON-NEXT: ret;
define i32 @bar() {
entry:
%call = call i32 @__nvvm_reflect(ptr @.str)
@@ -102,20 +85,11 @@ if.end:
ret void
}
-; SM_52: .visible .func (.param .b32 func_retval0) qux()
-; SM_52: mov.b32 %[[REG:.+]], 3;
-; SM_52-NEXT: st.param.b32 [func_retval0], %[[REG:.+]];
-; SM_52-NEXT: ret;
-;
-; SM_70: .visible .func (.param .b32 func_retval0) qux()
-; SM_70: mov.b32 %[[REG:.+]], 2;
-; SM_70-NEXT: st.param.b32 [func_retval0], %[[REG:.+]];
-; SM_70-NEXT: ret;
-;
-; SM_90: .visible .func (.param .b32 func_retval0) qux()
-; SM_90: mov.b32 %[[REG:.+]], 1;
-; SM_90-NEXT: st.param.b32 [func_retval0], %[[REG:.+]];
-; SM_90-NEXT: ret;
+; SM_52: .visible .func (.param .b32 func_retval0) qux()
+; SM_52: st.param.b32 [func_retval0], 3;
+; SM_70: st.param.b32 [func_retval0], 2;
+; SM_90: st.param.b32 [func_retval0], 1;
+; COMMON-NEXT: ret;
define i32 @qux() {
entry:
%call = call i32 @__nvvm_reflect(ptr noundef @.str)
@@ -142,18 +116,9 @@ return:
ret i32 %retval
}
-; SM_52: .visible .func (.param .b32 func_retval0) phi()
-; SM_52: mov.b32 %[[REG:.+]], 0f00000000;
-; SM_52-NEXT: st.param.b32 [func_retval0], %[[REG]];
-; SM_52-NEXT: ret;
-; SM_70: .visible .func (.param .b32 func_retval0) phi()
-; SM_70: mov.b32 %[[REG:.+]], 0f00000000;
-; SM_70-NEXT: st.param.b32 [func_retval0], %[[REG]];
-; SM_70-NEXT: ret;
-; SM_90: .visible .func (.param .b32 func_retval0) phi()
-; SM_90: mov.b32 %[[REG:.+]], 0f00000000;
-; SM_90-NEXT: st.param.b32 [func_retval0], %[[REG]];
-; SM_90-NEXT: ret;
+; COMMON-LABEL: .visible .func (.param .b32 func_retval0) phi()
+; COMMON: st.param.b32 [func_retval0], 0;
+; COMMON-NEXT: ret;
define float @phi() {
entry:
%0 = call i32 @__nvvm_reflect(ptr @.str)
@@ -175,20 +140,11 @@ exit:
ret float 0.000000e+00
}
-; SM_52: .visible .func (.param .b32 func_retval0) prop()
-; SM_52: mov.b32 %[[REG:.+]], 3;
-; SM_52-NEXT: st.param.b32 [func_retval0], %[[REG:.+]];
-; SM_52-NEXT: ret;
-;
-; SM_70: .visible .func (.param .b32 func_retval0) prop()
-; SM_70: mov.b32 %[[REG:.+]], 2;
-; SM_70-NEXT: st.param.b32 [func_retval0], %[[REG:.+]];
-; SM_70-NEXT: ret;
-;
-; SM_90: .visible .func (.param .b32 func_retval0) prop()
-; SM_90: mov.b32 %[[REG:.+]], 1;
-; SM_90-NEXT: st.param.b32 [func_retval0], %[[REG:.+]];
-; SM_90-NEXT: ret;
+; COMMON-LABEL: .visible .func (.param .b32 func_retval0) prop()
+; SM_52: st.param.b32 [func_retval0], 3;
+; SM_70: st.param.b32 [func_retval0], 2;
+; SM_90: st.param.b32 [func_retval0], 1;
+; COMMON-NEXT: ret;
define i32 @prop() {
entry:
%call = call i32 @__nvvm_reflect(ptr @.str)
diff --git a/llvm/test/CodeGen/NVPTX/param-add.ll b/llvm/test/CodeGen/NVPTX/param-add.ll
index cd2664e913824..4fa1235633cf6 100644
--- a/llvm/test/CodeGen/NVPTX/param-add.ll
+++ b/llvm/test/CodeGen/NVPTX/param-add.ll
@@ -14,33 +14,24 @@ declare i32 @callee(%struct.1float %a)
define i32 @test(%struct.1float alignstack(32) %data) {
; CHECK-LABEL: test(
; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<16>;
+; CHECK-NEXT: .reg .b32 %r<7>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %r1, [test_param_0+1];
-; CHECK-NEXT: shl.b32 %r2, %r1, 8;
-; CHECK-NEXT: ld.param.b8 %r3, [test_param_0];
-; CHECK-NEXT: or.b32 %r4, %r2, %r3;
-; CHECK-NEXT: ld.param.b8 %r5, [test_param_0+3];
-; CHECK-NEXT: shl.b32 %r6, %r5, 8;
-; CHECK-NEXT: ld.param.b8 %r7, [test_param_0+2];
-; CHECK-NEXT: or.b32 %r8, %r6, %r7;
-; CHECK-NEXT: shl.b32 %r9, %r8, 16;
-; CHECK-NEXT: or.b32 %r10, %r9, %r4;
-; CHECK-NEXT: shr.u32 %r11, %r10, 8;
-; CHECK-NEXT: shr.u32 %r12, %r10, 16;
-; CHECK-NEXT: shr.u32 %r13, %r10, 24;
+; CHECK-NEXT: ld.param.b32 %r1, [test_param_0];
+; CHECK-NEXT: shr.u32 %r2, %r1, 8;
+; CHECK-NEXT: shr.u32 %r3, %r1, 16;
+; CHECK-NEXT: shr.u32 %r4, %r1, 24;
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 1 .b8 param0[4];
-; CHECK-NEXT: st.param.b8 [param0], %r10;
-; CHECK-NEXT: st.param.b8 [param0+1], %r11;
-; CHECK-NEXT: st.param.b8 [param0+2], %r12;
-; CHECK-NEXT: st.param.b8 [param0+3], %r13;
+; CHECK-NEXT: st.param.b8 [param0], %r1;
+; CHECK-NEXT: st.param.b8 [param0+1], %r2;
+; CHECK-NEXT: st.param.b8 [param0+2], %r3;
+; CHECK-NEXT: st.param.b8 [param0+3], %r4;
; CHECK-NEXT: .param .b32 retval0;
; CHECK-NEXT: call.uni (retval0), callee, (param0);
-; CHECK-NEXT: ld.param.b32 %r14, [retval0];
+; CHECK-NEXT: ld.param.b32 %r5, [retval0];
; CHECK-NEXT: } // callseq 0
-; CHECK-NEXT: st.param.b32 [func_retval0], %r14;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
; CHECK-NEXT: ret;
%1 = call i32 @callee(%struct.1float %data)
diff --git a/llvm/test/CodeGen/NVPTX/param-load-store.ll b/llvm/test/CodeGen/NVPTX/param-load-store.ll
index 263477df1dbfe..6c52bfd6cbfd8 100644
--- a/llvm/test/CodeGen/NVPTX/param-load-store.ll
+++ b/llvm/test/CodeGen/NVPTX/param-load-store.ll
@@ -28,14 +28,12 @@
; CHECK: and.b16 [[A:%rs[0-9]+]], [[A8]], 1;
; CHECK: setp.ne.b16 %p1, [[A]], 0
; CHECK: cvt.u32.u16 [[B:%r[0-9]+]], [[A8]]
-; CHECK: and.b32 [[C:%r[0-9]+]], [[B]], 1;
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], [[C]]
+; CHECK: st.param.b32 [param0], [[B]]
; CHECK: .param .b32 retval0;
; CHECK: call.uni (retval0), test_i1,
; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0];
-; CHECK: and.b32 [[R:%r[0-9]+]], [[R8]], 1;
-; CHECK: st.param.b32 [func_retval0], [[R]];
+; CHECK: st.param.b32 [func_retval0], [[R8]];
; CHECK: ret;
define i1 @test_i1(i1 %a) {
%r = tail call i1 @test_i1(i1 %a);
@@ -166,14 +164,12 @@ define i3 @test_i3(i3 %a) {
; CHECK-NEXT: .param .b32 test_i8_param_0
; CHECK: ld.param.b8 [[A8:%rs[0-9]+]], [test_i8_param_0];
; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]];
-; CHECK: and.b32 [[A:%r[0-9]+]], [[A32]], 255;
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], [[A]];
+; CHECK: st.param.b32 [param0], [[A32]];
; CHECK: .param .b32 retval0;
; CHECK: call.uni (retval0), test_i8,
; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0];
-; CHECK: and.b32 [[R:%r[0-9]+]], [[R32]], 255;
-; CHECK: st.param.b32 [func_retval0], [[R]];
+; CHECK: st.param.b32 [func_retval0], [[R32]];
; CHECK-NEXT: ret;
define i8 @test_i8(i8 %a) {
%r = tail call i8 @test_i8(i8 %a);
@@ -247,7 +243,14 @@ define <4 x i8> @test_v4i8(<4 x i8> %a) {
; CHECK: call.uni (retval0), test_v5i8,
; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0];
; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4];
-; CHECK-DAG: st.param.v4.b8 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
+; CHECK-DAG: cvt.u32.u16 [[R3:%r[0-9]+]], [[RE3]];
+; CHECK-DAG: cvt.u32.u16 [[R2:%r[0-9]+]], [[RE2]];
+; CHECK-DAG: prmt.b32 [[P0:%r[0-9]+]], [[R2]], [[R3]], 0x3340U;
+; CHECK-DAG: cvt.u32.u16 [[R1:%r[0-9]+]], [[RE1]];
+; CHECK-DAG: cvt.u32.u16 [[R0:%r[0-9]+]], [[RE0]];
+; CHECK-DAG: prmt.b32 [[P1:%r[0-9]+]], [[R0]], [[R1]], 0x3340U;
+; CHECK-DAG: prmt.b32 [[P2:%r[0-9]+]], [[P1]], [[P0]], 0x5410U;
+; CHECK-DAG: st.param.b32 [func_retval0], [[P2]];
; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]];
; CHECK-NEXT: ret;
define <5 x i8> @test_v5i8(<5 x i8> %a) {
@@ -280,8 +283,7 @@ define i11 @test_i11(i11 %a) {
; CHECK: .param .b32 retval0;
; CHECK: call.uni (retval0), test_i16,
; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0];
-; CHECK: and.b32 [[R:%r[0-9]+]], [[RE32]], 65535;
-; CHECK: st.param.b32 [func_retval0], [[R]];
+; CHECK: st.param.b32 [func_retval0], [[RE32]];
; CHECK-NEXT: ret;
define i16 @test_i16(i16 %a) {
%r = tail call i16 @test_i16(i16 %a);
diff --git a/llvm/test/CodeGen/NVPTX/param-overalign.ll b/llvm/test/CodeGen/NVPTX/param-overalign.ll
index f490c5f73d425..88ad0b0a9f9d1 100644
--- a/llvm/test/CodeGen/NVPTX/param-overalign.ll
+++ b/llvm/test/CodeGen/NVPTX/param-overalign.ll
@@ -106,10 +106,10 @@ define alignstack(8) %struct.float2 @aligned_return(%struct.float2 %a ) {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [aligned_return_param_0+4];
-; CHECK-NEXT: ld.param.b32 %r2, [aligned_return_param_0];
-; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
-; CHECK-NEXT: st.param.b32 [func_retval0+4], %r1;
+; CHECK-NEXT: ld.param.b32 %r1, [aligned_return_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [aligned_return_param_0+4];
+; CHECK-NEXT: st.param.b32 [func_retval0+4], %r2;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
; CHECK-NEXT: ret;
ret %struct.float2 %a
}
diff --git a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll
index 892e49a5fe82a..a480984a538b3 100644
--- a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll
+++ b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll
@@ -173,8 +173,8 @@ define internal fastcc [3 x i32] @callee_St4x3(ptr nocapture noundef readonly by
; CHECK-NEXT: .param .align 16 .b8 callee_St4x3_param_0[12]
; CHECK: ld.param.v2.b32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]]}, [callee_St4x3_param_0];
; CHECK: ld.param.b32 [[R3:%r[0-9]+]], [callee_St4x3_param_0+8];
- ; CHECK: st.param.v2.b32 [func_retval0], {[[R1]], [[R2]]};
- ; CHECK: st.param.b32 [func_retval0+8], [[R3]];
+ ; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[R1]], [[R2]]};
+ ; CHECK-DAG: st.param.b32 [func_retval0+8], [[R3]];
; CHECK-NEXT: ret;
%1 = load i32, ptr %in, align 4
%arrayidx.1 = getelementptr inbounds [3 x i32], ptr %in, i64 0, i64 1
@@ -269,8 +269,8 @@ define internal fastcc [5 x i32] @callee_St4x5(ptr nocapture noundef readonly by
; CHECK-NEXT: .param .align 16 .b8 callee_St4x5_param_0[20]
; CHECK: ld.param.v4.b32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x5_param_0];
; CHECK: ld.param.b32 [[R5:%r[0-9]+]], [callee_St4x5_param_0+16];
- ; CHECK: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]};
- ; CHECK: st.param.b32 [func_retval0+16], [[R5]];
+ ; CHECK-DAG: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]};
+ ; CHECK-DAG: st.param.b32 [func_retval0+16], [[R5]];
; CHECK-NEXT: ret;
%1 = load i32, ptr %in, align 4
%arrayidx.1 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 1
@@ -328,8 +328,8 @@ define internal fastcc [6 x i32] @callee_St4x6(ptr nocapture noundef readonly by
; CHECK-NEXT: .param .align 16 .b8 callee_St4x6_param_0[24]
; CHECK: ld.param.v4.b32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x6_param_0];
; CHECK: ld.param.v2.b32 {[[R5:%r[0-9]+]], [[R6:%r[0-9]+]]}, [callee_St4x6_param_0+16];
- ; CHECK: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]};
- ; CHECK: st.param.v2.b32 [func_retval0+16], {[[R5]], [[R6]]};
+ ; CHECK-DAG: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]};
+ ; CHECK-DAG: st.param.v2.b32 [func_retval0+16], {[[R5]], [[R6]]};
; CHECK-NEXT: ret;
%1 = load i32, ptr %in, align 4
%arrayidx.1 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 1
@@ -396,9 +396,9 @@ define internal fastcc [7 x i32] @callee_St4x7(ptr nocapture noundef readonly by
; CHECK: ld.param.v4.b32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x7_param_0];
; CHECK: ld.param.v2.b32 {[[R5:%r[0-9]+]], [[R6:%r[0-9]+]]}, [callee_St4x7_param_0+16];
; CHECK: ld.param.b32 [[R7:%r[0-9]+]], [callee_St4x7_param_0+24];
- ; CHECK: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]};
- ; CHECK: st.param.v2.b32 [func_retval0+16], {[[R5]], [[R6]]};
- ; CHECK: st.param.b32 [func_retval0+24], [[R7]];
+ ; CHECK-DAG: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]};
+ ; CHECK-DAG: st.param.v2.b32 [func_retval0+16], {[[R5]], [[R6]]};
+ ; CHECK-DAG: st.param.b32 [func_retval0+24], [[R7]];
; CHECK-NEXT: ret;
%1 = load i32, ptr %in, align 4
%arrayidx.1 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 1
@@ -468,8 +468,8 @@ define internal fastcc [8 x i32] @callee_St4x8(ptr nocapture noundef readonly by
; CHECK-NEXT: .param .align 16 .b8 callee_St4x8_param_0[32]
; CHECK: ld.param.v4.b32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x8_param_0];
; CHECK: ld.param.v4.b32 {[[R5:%r[0-9]+]], [[R6:%r[0-9]+]], [[R7:%r[0-9]+]], [[R8:%r[0-9]+]]}, [callee_St4x8_param_0+16];
- ; CHECK: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]};
- ; CHECK: st.param.v4.b32 [func_retval0+16], {[[R5]], [[R6]], [[R7]], [[R8]]};
+ ; CHECK-DAG: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]};
+ ; CHECK-DAG: st.param.v4.b32 [func_retval0+16], {[[R5]], [[R6]], [[R7]], [[R8]]};
; CHECK-NEXT: ret;
%1 = load i32, ptr %in, align 4
%arrayidx.1 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 1
@@ -589,8 +589,8 @@ define internal fastcc [3 x i64] @callee_St8x3(ptr nocapture noundef readonly by
; CHECK-NEXT: .param .align 16 .b8 callee_St8x3_param_0[24]
; CHECK: ld.param.v2.b64 {[[RD1:%rd[0-9]+]], [[RD2:%rd[0-9]+]]}, [callee_St8x3_param_0];
; CHECK: ld.param.b64 [[RD3:%rd[0-9]+]], [callee_St8x3_param_0+16];
- ; CHECK: st.param.v2.b64 [func_retval0], {[[RD1]], [[RD2]]};
- ; CHECK: st.param.b64 [func_retval0+16], [[RD3]];
+ ; CHECK-DAG: st.param.v2.b64 [func_retval0], {[[RD1]], [[RD2]]};
+ ; CHECK-DAG: st.param.b64 [func_retval0+16], [[RD3]];
; CHECK-NEXT: ret;
%1 = load i64, ptr %in, align 8
%arrayidx.1 = getelementptr inbounds [3 x i64], ptr %in, i64 0, i64 1
@@ -636,8 +636,8 @@ define internal fastcc [4 x i64] @callee_St8x4(ptr nocapture noundef readonly by
; CHECK-NEXT: .param .align 16 .b8 callee_St8x4_param_0[32]
; CHECK: ld.param.v2.b64 {[[RD1:%rd[0-9]+]], [[RD2:%rd[0-9]+]]}, [callee_St8x4_param_0];
; CHECK: ld.param.v2.b64 {[[RD3:%rd[0-9]+]], [[RD4:%rd[0-9]+]]}, [callee_St8x4_param_0+16];
- ; CHECK: st.param.v2.b64 [func_retval0], {[[RD1]], [[RD2]]};
- ; CHECK: st.param.v2.b64 [func_retval0+16], {[[RD3]], [[RD4]]};
+ ; CHECK-DAG: st.param.v2.b64 [func_retval0], {[[RD1]], [[RD2]]};
+ ; CHECK-DAG: st.param.v2.b64 [func_retval0+16], {[[RD3]], [[RD4]]};
; CHECK-NEXT: ret;
%1 = load i64, ptr %in, align 8
%arrayidx.1 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 1
@@ -687,10 +687,10 @@ define external fastcc [4 x i32] @callee_St4x4_external(ptr nocapture noundef re
; CHECK: ld.param.b32 [[R2:%r[0-9]+]], [callee_St4x4_external_param_0+4];
; CHECK: ld.param.b32 [[R3:%r[0-9]+]], [callee_St4x4_external_param_0+8];
; CHECK: ld.param.b32 [[R4:%r[0-9]+]], [callee_St4x4_external_param_0+12];
- ; CHECK: st.param.b32 [func_retval0], [[R1]];
- ; CHECK: st.param.b32 [func_retval0+4], [[R2]];
- ; CHECK: st.param.b32 [func_retval0+8], [[R3]];
- ; CHECK: st.param.b32 [func_retval0+12], [[R4]];
+ ; CHECK-DAG: st.param.b32 [func_retval0], [[R1]];
+ ; CHECK-DAG: st.param.b32 [func_retval0+4], [[R2]];
+ ; CHECK-DAG: st.param.b32 [func_retval0+8], [[R3]];
+ ; CHECK-DAG: st.param.b32 [func_retval0+12], [[R4]];
; CHECK-NEXT: ret;
%1 = load i32, ptr %in, align 4
%arrayidx.1 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 1
diff --git a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll
index 3649ef53b0881..74023c4f5631a 100644
--- a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll
+++ b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll
@@ -15,11 +15,9 @@ define i1 @check_i1() {
; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0];
; PTX-DAG: } // callseq {{[0-9]+}}
- ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]];
- ; PTX-WITHOUT-DAG: and.b32 [[RES:%r[0-9]+]], [[PROXY]], 1;
- ; PTX-WITH-DAG: and.b32 [[RES:%r[0-9]+]], [[LD]], 1;
-
- ; PTX-DAG: st.param.b32 [func_retval0], [[RES]];
+ ; PTX-WITHOUT-DAG: mov.b32 [[RES:%r[0-9]+]], [[LD]];
+ ; PTX-WITHOUT-DAG: st.param.b32 [func_retval0], [[RES]];
+ ; PTX-WITH-DAG: st.param.b32 [func_retval0], [[LD]];
%ret = call i1 @callee_i1()
ret i1 %ret
@@ -32,11 +30,9 @@ define i16 @check_i16() {
; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0];
; PTX-DAG: } // callseq {{[0-9]+}}
- ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]];
- ; PTX-WITHOUT-DAG: and.b32 [[RES:%r[0-9]+]], [[PROXY]], 65535;
- ; PTX-WITH-DAG: and.b32 [[RES:%r[0-9]+]], [[LD]], 65535;
-
- ; PTX-DAG: st.param.b32 [func_retval0], [[RES]];
+ ; PTX-WITHOUT-DAG: mov.b32 [[RES:%r[0-9]+]], [[LD]];
+ ; PTX-WITHOUT-DAG: st.param.b32 [func_retval0], [[RES]];
+ ; PTX-WITH-DAG: st.param.b32 [func_retval0], [[LD]];
%ret = call i16 @callee_i16()
ret i16 %ret
diff --git a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir
index a1d8d0590f160..5d0d6f6ecd5ff 100644
--- a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir
+++ b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir
@@ -83,16 +83,16 @@ body: |
%5:b32 = ProxyRegB32 killed %1
%6:b32 = ProxyRegB32 killed %2
%7:b32 = ProxyRegB32 killed %3
- ; CHECK: StoreRetvalV4I32 killed %0, killed %1, killed %2, killed %3
- StoreRetvalV4I32 killed %4, killed %5, killed %6, killed %7, 0
+ ; CHECK: STV_i32_v4 killed %0, killed %1, killed %2, killed %3
+ STV_i32_v4 killed %4, killed %5, killed %6, killed %7, 0, 0, 101, 32, &func_retval0, 0 :: (store (s128), addrspace 101)
%8:b32 = LoadParamMemI32 0
; CHECK-NOT: ProxyReg
%9:b32 = ProxyRegB32 killed %8
%10:b32 = ProxyRegB32 killed %9
%11:b32 = ProxyRegB32 killed %10
- ; CHECK: StoreRetvalI32 killed %8
- StoreRetvalI32 killed %11, 0
+ ; CHECK: ST_i32 killed %8
+ ST_i32 killed %11, 0, 0, 101, 32, &func_retval0, 0 :: (store (s32), addrspace 101)
Return
...
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
index f0813609268e9..e7866b01064c7 100644
--- a/llvm/test/CodeGen/NVPTX/shift-opt.ll
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -70,16 +70,17 @@ define i64 @test_and(i64 %x, i32 %y) {
define <2 x i16> @test_vec(<2 x i16> %x, <2 x i8> %y) {
; CHECK-LABEL: test_vec(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-NEXT: .reg .b16 %rs<7>;
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_vec_param_0];
-; CHECK-NEXT: ld.param.b32 %r1, [test_vec_param_1];
+; CHECK-NEXT: ld.param.v2.b8 {%rs3, %rs4}, [test_vec_param_1];
+; CHECK-NEXT: mov.b32 %r1, {%rs3, %rs4};
; CHECK-NEXT: and.b32 %r2, %r1, 16711935;
-; CHECK-NEXT: shr.u16 %rs3, %rs2, 5;
-; CHECK-NEXT: shr.u16 %rs4, %rs1, 5;
-; CHECK-NEXT: mov.b32 %r3, {%rs4, %rs3};
+; CHECK-NEXT: shr.u16 %rs5, %rs2, 5;
+; CHECK-NEXT: shr.u16 %rs6, %rs1, 5;
+; CHECK-NEXT: mov.b32 %r3, {%rs6, %rs5};
; CHECK-NEXT: or.b32 %r4, %r3, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
; CHECK-NEXT: ret;
diff --git a/llvm/test/CodeGen/NVPTX/tid-range.ll b/llvm/test/CodeGen/NVPTX/tid-range.ll
index 27bc19bfd6c4a..019814e47c2b1 100644
--- a/llvm/test/CodeGen/NVPTX/tid-range.ll
+++ b/llvm/test/CodeGen/NVPTX/tid-range.ll
@@ -1,20 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %}
declare i32 @get_register()
define i1 @test1() {
+; CHECK-LABEL: test1(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: mov.u32 %r1, %tid.x;
+; CHECK-NEXT: setp.eq.s32 %p1, %r1, 1;
+; CHECK-NEXT: selp.b32 %r2, -1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-NEXT: ret;
entry:
%call = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !0
%cmp = icmp eq i32 %call, 1
ret i1 %cmp
}
-; CHECK-LABEL: test1(
-; CHECK: setp.eq.s32 %p1, %r1, 1;
-; CHECK: selp.b32 %[[R:.+]], 1, 0, %p1;
-; CHECK: st.param.b32 [func_retval0], %[[R]];
-
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
!0 = !{ i32 0, i32 3 }
diff --git a/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll b/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll
index 178ee7ff6db18..e4e668018d872 100644
--- a/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll
+++ b/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; Verifies correctness of load/store of parameters and return values.
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | %ptxas-verify %}
@@ -19,339 +20,489 @@
; -- Only loading and storing the said fields are checked in the following
; series of tests so that they are more concise.
-; CHECK: .visible .func (.param .align 8 .b8 func_retval0[16])
-; CHECK-LABEL: test_s_i8i16p(
-; CHECK: .param .align 8 .b8 test_s_i8i16p_param_0[16]
-; CHECK-DAG: ld.param.b16 [[P0:%rs[0-9]+]], [test_s_i8i16p_param_0];
-; CHECK-DAG: ld.param.b8 [[P2_0:%rs[0-9]+]], [test_s_i8i16p_param_0+3];
-; CHECK-DAG: ld.param.b8 [[P2_1:%rs[0-9]+]], [test_s_i8i16p_param_0+4];
-; CHECK-DAG: shl.b16 [[P2_1_shl:%rs[0-9]+]], [[P2_1]], 8;
-; CHECK-DAG: or.b16 [[P2_1_or:%rs[0-9]+]], [[P2_1_shl]], [[P2_0]];
-; CHECK: { // callseq
-; CHECK: .param .align 8 .b8 param0[16];
-; CHECK-DAG: st.param.b16 [param0], [[P0]];
-; CHECK-DAG: st.param.b8 [param0+3], [[P2_1_or]];
-; CHECK-DAG: st.param.b8 [param0+4], [[P2_1]];
-; CHECK: .param .align 8 .b8 retval0[16];
-; CHECK-NEXT: call.uni (retval0), test_s_i8i16p, (param0);
-; CHECK-DAG: ld.param.b16 [[R0:%rs[0-9]+]], [retval0];
-; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+3];
-; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+4];
-; CHECK: } // callseq
-; CHECK-DAG: st.param.b16 [func_retval0], [[R0]];
-; CHECK-DAG: shl.b16 [[R2_1_shl:%rs[0-9]+]], [[R2_1]], 8;
-; CHECK-DAG: and.b16 [[R2_0_and:%rs[0-9]+]], [[R2_0]], 255;
-; CHECK-DAG: or.b16 [[R2:%rs[0-9]+]], [[R2_0_and]], [[R2_1_shl]];
-; CHECK-DAG: st.param.b8 [func_retval0+3], [[R2]];
-; CHECK-DAG: and.b16 [[R2_1_and:%rs[0-9]+]], [[R2_1]], 255;
-; CHECK-DAG: st.param.b8 [func_retval0+4], [[R2_1_and]];
-; CHECK: ret;
define %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) {
- %r = tail call %s_i8i16p @test_s_i8i16p(%s_i8i16p %a)
- ret %s_i8i16p %r
+; CHECK-LABEL: test_s_i8i16p(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<15>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rs4, [test_s_i8i16p_param_0+4];
+; CHECK-NEXT: shl.b16 %rs5, %rs4, 8;
+; CHECK-NEXT: ld.param.b8 %rs6, [test_s_i8i16p_param_0+3];
+; CHECK-NEXT: or.b16 %rs3, %rs5, %rs6;
+; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i16p_param_0+8];
+; CHECK-NEXT: ld.param.b8 %rs2, [test_s_i8i16p_param_0+2];
+; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8i16p_param_0];
+; CHECK-NEXT: { // callseq 0, 0
+; CHECK-NEXT: .param .align 8 .b8 param0[16];
+; CHECK-NEXT: st.param.b16 [param0], %rs1;
+; CHECK-NEXT: st.param.b8 [param0+2], %rs2;
+; CHECK-NEXT: st.param.b8 [param0+3], %rs3;
+; CHECK-NEXT: st.param.b8 [param0+4], %rs4;
+; CHECK-NEXT: st.param.b64 [param0+8], %rd1;
+; CHECK-NEXT: .param .align 8 .b8 retval0[16];
+; CHECK-NEXT: call.uni (retval0), test_s_i8i16p, (param0);
+; CHECK-NEXT: ld.param.b16 %rs7, [retval0];
+; CHECK-NEXT: ld.param.b8 %rs8, [retval0+2];
+; CHECK-NEXT: ld.param.b8 %rs9, [retval0+3];
+; CHECK-NEXT: ld.param.b8 %rs10, [retval0+4];
+; CHECK-NEXT: ld.param.b64 %rd2, [retval0+8];
+; CHECK-NEXT: } // callseq 0
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs7;
+; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs8;
+; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs10;
+; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs9;
+; CHECK-NEXT: st.param.b64 [func_retval0+8], %rd2;
+; CHECK-NEXT: ret;
+ %r = tail call %s_i8i16p @test_s_i8i16p(%s_i8i16p %a)
+ ret %s_i8i16p %r
}
-; CHECK: .visible .func (.param .align 8 .b8 func_retval0[24])
-; CHECK-LABEL: test_s_i8i32p(
-; CHECK: .param .align 8 .b8 test_s_i8i32p_param_0[24]
-; CHECK-DAG: ld.param.b32 [[P0:%r[0-9]+]], [test_s_i8i32p_param_0];
-; CHECK-DAG: ld.param.b8 [[P2_0:%r[0-9]+]], [test_s_i8i32p_param_0+5];
-; CHECK-DAG: ld.param.b8 [[P2_1:%r[0-9]+]], [test_s_i8i32p_param_0+6];
-; CHECK-DAG: ld.param.b8 [[P2_2:%r[0-9]+]], [test_s_i8i32p_param_0+7];
-; CHECK-DAG: ld.param.b8 [[P2_3:%r[0-9]+]], [test_s_i8i32p_param_0+8];
-; CHECK-DAG: shl.b32 [[P2_1_shl:%r[0-9]+]], [[P2_1]], 8;
-; CHECK-DAG: shl.b32 [[P2_2_shl:%r[0-9]+]], [[P2_2]], 16;
-; CHECK-DAG: shl.b32 [[P2_3_shl:%r[0-9]+]], [[P2_3]], 24;
-; CHECK-DAG: or.b32 [[P2_or:%r[0-9]+]], [[P2_1_shl]], [[P2_0]];
-; CHECK-DAG: or.b32 [[P2_or_1:%r[0-9]+]], [[P2_3_shl]], [[P2_2_shl]];
-; CHECK-DAG: or.b32 [[P2:%r[0-9]+]], [[P2_or_1]], [[P2_or]];
-; CHECK-DAG: shr.u32 [[P2_1_shr:%r[0-9]+]], [[P2]], 8;
-; CHECK-DAG: shr.u32 [[P2_2_shr:%r[0-9]+]], [[P2_or_1]], 16;
-; CHECK: { // callseq
-; CHECK-DAG: .param .align 8 .b8 param0[24];
-; CHECK-DAG: st.param.b32 [param0], [[P0]];
-; CHECK-DAG: st.param.b8 [param0+5], [[P2]];
-; CHECK-DAG: st.param.b8 [param0+6], [[P2_1_shr]];
-; CHECK-DAG: st.param.b8 [param0+7], [[P2_2_shr]];
-; CHECK-DAG: st.param.b8 [param0+8], [[P2_3]];
-; CHECK: .param .align 8 .b8 retval0[24];
-; CHECK-NEXT: call.uni (retval0), test_s_i8i32p, (param0);
-; CHECK-DAG: ld.param.b32 [[R0:%r[0-9]+]], [retval0];
-; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+5];
-; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+6];
-; CHECK-DAG: ld.param.b8 [[R2_2:%rs[0-9]+]], [retval0+7];
-; CHECK-DAG: ld.param.b8 [[R2_3:%rs[0-9]+]], [retval0+8];
-; CHECK: } // callseq
-; CHECK-DAG: st.param.b32 [func_retval0], [[R0]];
-; CHECK-DAG: st.param.b8 [func_retval0+5],
-; CHECK-DAG: st.param.b8 [func_retval0+6],
-; CHECK-DAG: st.param.b8 [func_retval0+7],
-; CHECK-DAG: st.param.b8 [func_retval0+8],
-; CHECK: ret;
define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) {
- %r = tail call %s_i8i32p @test_s_i8i32p(%s_i8i32p %a)
- ret %s_i8i32p %r
+; CHECK-LABEL: test_s_i8i32p(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<12>;
+; CHECK-NEXT: .reg .b32 %r<20>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8i32p_param_0+6];
+; CHECK-NEXT: shl.b32 %r4, %r3, 8;
+; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8i32p_param_0+5];
+; CHECK-NEXT: or.b32 %r6, %r4, %r5;
+; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8i32p_param_0+7];
+; CHECK-NEXT: shl.b32 %r8, %r7, 16;
+; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8i32p_param_0+8];
+; CHECK-NEXT: shl.b32 %r10, %r9, 24;
+; CHECK-NEXT: or.b32 %r11, %r10, %r8;
+; CHECK-NEXT: or.b32 %r2, %r11, %r6;
+; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i32p_param_0+16];
+; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8i32p_param_0+4];
+; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8i32p_param_0];
+; CHECK-NEXT: shr.u32 %r12, %r2, 8;
+; CHECK-NEXT: shr.u32 %r13, %r11, 16;
+; CHECK-NEXT: { // callseq 1, 0
+; CHECK-NEXT: .param .align 8 .b8 param0[24];
+; CHECK-NEXT: st.param.b32 [param0], %r1;
+; CHECK-NEXT: st.param.b8 [param0+4], %rs1;
+; CHECK-NEXT: st.param.b8 [param0+5], %r2;
+; CHECK-NEXT: st.param.b8 [param0+6], %r12;
+; CHECK-NEXT: st.param.b8 [param0+7], %r13;
+; CHECK-NEXT: st.param.b8 [param0+8], %r9;
+; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
+; CHECK-NEXT: .param .align 8 .b8 retval0[24];
+; CHECK-NEXT: call.uni (retval0), test_s_i8i32p, (param0);
+; CHECK-NEXT: ld.param.b32 %r14, [retval0];
+; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
+; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5];
+; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6];
+; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7];
+; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8];
+; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16];
+; CHECK-NEXT: } // callseq 1
+; CHECK-NEXT: cvt.u32.u16 %r16, %rs3;
+; CHECK-NEXT: cvt.u32.u16 %r17, %rs4;
+; CHECK-NEXT: cvt.u32.u16 %r18, %rs5;
+; CHECK-NEXT: cvt.u32.u16 %r19, %rs6;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r14;
+; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
+; CHECK-NEXT: st.param.b8 [func_retval0+8], %r19;
+; CHECK-NEXT: st.param.b8 [func_retval0+7], %r18;
+; CHECK-NEXT: st.param.b8 [func_retval0+6], %r17;
+; CHECK-NEXT: st.param.b8 [func_retval0+5], %r16;
+; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2;
+; CHECK-NEXT: ret;
+ %r = tail call %s_i8i32p @test_s_i8i32p(%s_i8i32p %a)
+ ret %s_i8i32p %r
}
-; CHECK: .visible .func (.param .align 8 .b8 func_retval0[32])
-; CHECK-LABEL: test_s_i8i64p(
-; CHECK: .param .align 8 .b8 test_s_i8i64p_param_0[32]
-; CHECK-DAG: ld.param.b64 [[P0:%rd[0-9]+]], [test_s_i8i64p_param_0];
-; CHECK-DAG: ld.param.b8 [[P2_0:%rd[0-9]+]], [test_s_i8i64p_param_0+9];
-; CHECK-DAG: ld.param.b8 [[P2_1:%rd[0-9]+]], [test_s_i8i64p_param_0+10];
-; CHECK-DAG: ld.param.b8 [[P2_2:%rd[0-9]+]], [test_s_i8i64p_param_0+11];
-; CHECK-DAG: ld.param.b8 [[P2_3:%rd[0-9]+]], [test_s_i8i64p_param_0+12];
-; CHECK-DAG: ld.param.b8 [[P2_4:%rd[0-9]+]], [test_s_i8i64p_param_0+13];
-; CHECK-DAG: ld.param.b8 [[P2_5:%rd[0-9]+]], [test_s_i8i64p_param_0+14];
-; CHECK-DAG: ld.param.b8 [[P2_6:%rd[0-9]+]], [test_s_i8i64p_param_0+15];
-; CHECK-DAG: ld.param.b8 [[P2_7:%rd[0-9]+]], [test_s_i8i64p_param_0+16];
-; CHECK-DAG: shl.b64 [[P2_1_shl:%rd[0-9]+]], [[P2_1]], 8;
-; CHECK-DAG: shl.b64 [[P2_2_shl:%rd[0-9]+]], [[P2_2]], 16;
-; CHECK-DAG: shl.b64 [[P2_3_shl:%rd[0-9]+]], [[P2_3]], 24;
-; CHECK-DAG: or.b64 [[P2_or_0:%rd[0-9]+]], [[P2_1_shl]], [[P2_0]];
-; CHECK-DAG: or.b64 [[P2_or_1:%rd[0-9]+]], [[P2_3_shl]], [[P2_2_shl]];
-; CHECK-DAG: or.b64 [[P2_or_2:%rd[0-9]+]], [[P2_or_1]], [[P2_or_0]];
-; CHECK-DAG: shl.b64 [[P2_5_shl:%rd[0-9]+]], [[P2_5]], 8;
-; CHECK-DAG: shl.b64 [[P2_6_shl:%rd[0-9]+]], [[P2_6]], 16;
-; CHECK-DAG: shl.b64 [[P2_7_shl:%rd[0-9]+]], [[P2_7]], 24;
-; CHECK-DAG: or.b64 [[P2_or_3:%rd[0-9]+]], [[P2_5_shl]], [[P2_4]];
-; CHECK-DAG: or.b64 [[P2_or_4:%rd[0-9]+]], [[P2_7_shl]], [[P2_6_shl]];
-; CHECK-DAG: or.b64 [[P2_or_5:%rd[0-9]+]], [[P2_or_4]], [[P2_or_3]];
-; CHECK-DAG: shl.b64 [[P2_or_shl:%rd[0-9]+]], [[P2_or_5]], 32;
-; CHECK-DAG: or.b64 [[P2:%rd[0-9]+]], [[P2_or_shl]], [[P2_or_2]];
-; CHECK-DAG: shr.u64 [[P2_shr_1:%rd[0-9]+]], [[P2]], 8;
-; CHECK-DAG: shr.u64 [[P2_shr_2:%rd[0-9]+]], [[P2]], 16;
-; CHECK-DAG: shr.u64 [[P2_shr_3:%rd[0-9]+]], [[P2]], 24;
-; CHECK-DAG: bfe.u64 [[P2_bfe_4:%rd[0-9]+]], [[P2_or_5]], 8, 24;
-; CHECK-DAG: bfe.u64 [[P2_bfe_5:%rd[0-9]+]], [[P2_or_5]], 16, 16;
-; CHECK-DAG: bfe.u64 [[P2_bfe_6:%rd[0-9]+]], [[P2_or_5]], 24, 8;
-; CHECK: { // callseq
-; CHECK: .param .align 8 .b8 param0[32];
-; CHECK-DAG: st.param.b64 [param0], [[P0]];
-; CHECK-DAG: st.param.b8 [param0+9], [[P2]];
-; CHECK-DAG: st.param.b8 [param0+10], [[P2_shr_1]];
-; CHECK-DAG: st.param.b8 [param0+11], [[P2_shr_2]];
-; CHECK-DAG: st.param.b8 [param0+12], [[P2_shr_3]];
-; CHECK-DAG: st.param.b8 [param0+13], [[P2_or_5]];
-; CHECK-DAG: st.param.b8 [param0+14], [[P2_bfe_4]];
-; CHECK-DAG: st.param.b8 [param0+15], [[P2_bfe_5]];
-; CHECK-DAG: st.param.b8 [param0+16], [[P2_bfe_6]];
-; CHECK: .param .align 8 .b8 retval0[32];
-; CHECK-NEXT: call.uni (retval0), test_s_i8i64p, (param0);
-; CHECK-DAG: ld.param.b64 [[R0:%rd[0-9]+]], [retval0];
-; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+9];
-; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+10];
-; CHECK-DAG: ld.param.b8 [[R2_2:%rs[0-9]+]], [retval0+11];
-; CHECK-DAG: ld.param.b8 [[R2_3:%rs[0-9]+]], [retval0+12];
-; CHECK-DAG: ld.param.b8 [[R2_4:%rs[0-9]+]], [retval0+13];
-; CHECK-DAG: ld.param.b8 [[R2_5:%rs[0-9]+]], [retval0+14];
-; CHECK-DAG: ld.param.b8 [[R2_6:%rs[0-9]+]], [retval0+15];
-; CHECK-DAG: ld.param.b8 [[R2_7:%rs[0-9]+]], [retval0+16];
-; CHECK: } // callseq
-; CHECK-DAG: st.param.b64 [func_retval0], [[R0]];
-; CHECK-DAG: st.param.b8 [func_retval0+9],
-; CHECK-DAG: st.param.b8 [func_retval0+10],
-; CHECK-DAG: st.param.b8 [func_retval0+11],
-; CHECK-DAG: st.param.b8 [func_retval0+12],
-; CHECK-DAG: st.param.b8 [func_retval0+13],
-; CHECK-DAG: st.param.b8 [func_retval0+14],
-; CHECK-DAG: st.param.b8 [func_retval0+15],
-; CHECK-DAG: st.param.b8 [func_retval0+16],
-; CHECK: ret;
define %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) {
- %r = tail call %s_i8i64p @test_s_i8i64p(%s_i8i64p %a)
- ret %s_i8i64p %r
+; CHECK-LABEL: test_s_i8i64p(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<20>;
+; CHECK-NEXT: .reg .b64 %rd<68>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8i64p_param_0+10];
+; CHECK-NEXT: shl.b64 %rd5, %rd4, 8;
+; CHECK-NEXT: ld.param.b8 %rd6, [test_s_i8i64p_param_0+9];
+; CHECK-NEXT: or.b64 %rd7, %rd5, %rd6;
+; CHECK-NEXT: ld.param.b8 %rd8, [test_s_i8i64p_param_0+11];
+; CHECK-NEXT: shl.b64 %rd9, %rd8, 16;
+; CHECK-NEXT: ld.param.b8 %rd10, [test_s_i8i64p_param_0+12];
+; CHECK-NEXT: shl.b64 %rd11, %rd10, 24;
+; CHECK-NEXT: or.b64 %rd12, %rd11, %rd9;
+; CHECK-NEXT: or.b64 %rd13, %rd12, %rd7;
+; CHECK-NEXT: ld.param.b8 %rd14, [test_s_i8i64p_param_0+14];
+; CHECK-NEXT: shl.b64 %rd15, %rd14, 8;
+; CHECK-NEXT: ld.param.b8 %rd16, [test_s_i8i64p_param_0+13];
+; CHECK-NEXT: or.b64 %rd17, %rd15, %rd16;
+; CHECK-NEXT: ld.param.b8 %rd18, [test_s_i8i64p_param_0+15];
+; CHECK-NEXT: shl.b64 %rd19, %rd18, 16;
+; CHECK-NEXT: ld.param.b8 %rd20, [test_s_i8i64p_param_0+16];
+; CHECK-NEXT: shl.b64 %rd21, %rd20, 24;
+; CHECK-NEXT: or.b64 %rd22, %rd21, %rd19;
+; CHECK-NEXT: or.b64 %rd23, %rd22, %rd17;
+; CHECK-NEXT: shl.b64 %rd24, %rd23, 32;
+; CHECK-NEXT: or.b64 %rd2, %rd24, %rd13;
+; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8i64p_param_0+24];
+; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8i64p_param_0+8];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i64p_param_0];
+; CHECK-NEXT: shr.u64 %rd25, %rd2, 8;
+; CHECK-NEXT: shr.u64 %rd26, %rd2, 16;
+; CHECK-NEXT: shr.u64 %rd27, %rd2, 24;
+; CHECK-NEXT: bfe.u64 %rd28, %rd23, 8, 24;
+; CHECK-NEXT: bfe.u64 %rd29, %rd23, 16, 16;
+; CHECK-NEXT: bfe.u64 %rd30, %rd23, 24, 8;
+; CHECK-NEXT: { // callseq 2, 0
+; CHECK-NEXT: .param .align 8 .b8 param0[32];
+; CHECK-NEXT: st.param.b64 [param0], %rd1;
+; CHECK-NEXT: st.param.b8 [param0+8], %rs1;
+; CHECK-NEXT: st.param.b8 [param0+9], %rd2;
+; CHECK-NEXT: st.param.b8 [param0+10], %rd25;
+; CHECK-NEXT: st.param.b8 [param0+11], %rd26;
+; CHECK-NEXT: st.param.b8 [param0+12], %rd27;
+; CHECK-NEXT: st.param.b8 [param0+13], %rd23;
+; CHECK-NEXT: st.param.b8 [param0+14], %rd28;
+; CHECK-NEXT: st.param.b8 [param0+15], %rd29;
+; CHECK-NEXT: st.param.b8 [param0+16], %rd30;
+; CHECK-NEXT: st.param.b64 [param0+24], %rd3;
+; CHECK-NEXT: .param .align 8 .b8 retval0[32];
+; CHECK-NEXT: call.uni (retval0), test_s_i8i64p, (param0);
+; CHECK-NEXT: ld.param.b64 %rd31, [retval0];
+; CHECK-NEXT: ld.param.b8 %rs2, [retval0+8];
+; CHECK-NEXT: ld.param.b8 %rs3, [retval0+9];
+; CHECK-NEXT: ld.param.b8 %rs4, [retval0+10];
+; CHECK-NEXT: ld.param.b8 %rs5, [retval0+11];
+; CHECK-NEXT: ld.param.b8 %rs6, [retval0+12];
+; CHECK-NEXT: ld.param.b8 %rs7, [retval0+13];
+; CHECK-NEXT: ld.param.b8 %rs8, [retval0+14];
+; CHECK-NEXT: ld.param.b8 %rs9, [retval0+15];
+; CHECK-NEXT: ld.param.b8 %rs10, [retval0+16];
+; CHECK-NEXT: ld.param.b64 %rd32, [retval0+24];
+; CHECK-NEXT: } // callseq 2
+; CHECK-NEXT: cvt.u64.u16 %rd35, %rs3;
+; CHECK-NEXT: and.b64 %rd36, %rd35, 255;
+; CHECK-NEXT: cvt.u64.u16 %rd37, %rs4;
+; CHECK-NEXT: and.b64 %rd38, %rd37, 255;
+; CHECK-NEXT: shl.b64 %rd39, %rd38, 8;
+; CHECK-NEXT: or.b64 %rd40, %rd36, %rd39;
+; CHECK-NEXT: cvt.u64.u16 %rd41, %rs5;
+; CHECK-NEXT: and.b64 %rd42, %rd41, 255;
+; CHECK-NEXT: shl.b64 %rd43, %rd42, 16;
+; CHECK-NEXT: or.b64 %rd44, %rd40, %rd43;
+; CHECK-NEXT: cvt.u64.u16 %rd45, %rs6;
+; CHECK-NEXT: and.b64 %rd46, %rd45, 255;
+; CHECK-NEXT: shl.b64 %rd47, %rd46, 24;
+; CHECK-NEXT: or.b64 %rd48, %rd44, %rd47;
+; CHECK-NEXT: cvt.u64.u16 %rd49, %rs7;
+; CHECK-NEXT: and.b64 %rd50, %rd49, 255;
+; CHECK-NEXT: shl.b64 %rd51, %rd50, 32;
+; CHECK-NEXT: or.b64 %rd52, %rd48, %rd51;
+; CHECK-NEXT: cvt.u64.u16 %rd53, %rs8;
+; CHECK-NEXT: and.b64 %rd54, %rd53, 255;
+; CHECK-NEXT: shl.b64 %rd55, %rd54, 40;
+; CHECK-NEXT: or.b64 %rd56, %rd52, %rd55;
+; CHECK-NEXT: cvt.u64.u16 %rd57, %rs9;
+; CHECK-NEXT: and.b64 %rd58, %rd57, 255;
+; CHECK-NEXT: shl.b64 %rd59, %rd58, 48;
+; CHECK-NEXT: or.b64 %rd60, %rd56, %rd59;
+; CHECK-NEXT: cvt.u64.u16 %rd61, %rs10;
+; CHECK-NEXT: shl.b64 %rd62, %rd61, 56;
+; CHECK-NEXT: or.b64 %rd63, %rd60, %rd62;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd31;
+; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs2;
+; CHECK-NEXT: st.param.b8 [func_retval0+12], %rd45;
+; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd41;
+; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd37;
+; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd35;
+; CHECK-NEXT: shr.u64 %rd64, %rd52, 32;
+; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd64;
+; CHECK-NEXT: shr.u64 %rd65, %rd56, 40;
+; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd65;
+; CHECK-NEXT: shr.u64 %rd66, %rd60, 48;
+; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd66;
+; CHECK-NEXT: shr.u64 %rd67, %rd63, 56;
+; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd67;
+; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd32;
+; CHECK-NEXT: ret;
+ %r = tail call %s_i8i64p @test_s_i8i64p(%s_i8i64p %a)
+ ret %s_i8i64p %r
}
-; CHECK: .visible .func (.param .align 8 .b8 func_retval0[16])
-; CHECK-LABEL: test_s_i8f16p(
-; CHECK: .param .align 8 .b8 test_s_i8f16p_param_0[16]
-; CHECK-DAG: ld.param.b16 [[P0:%rs[0-9]+]], [test_s_i8f16p_param_0];
-; CHECK-DAG: ld.param.b8 [[P2_0:%rs[0-9]+]], [test_s_i8f16p_param_0+3];
-; CHECK-DAG: ld.param.b8 [[P2_1:%rs[0-9]+]], [test_s_i8f16p_param_0+4];
-; CHECK-DAG: shl.b16 [[P2_1_shl:%rs[0-9]+]], [[P2_1]], 8;
-; CHECK-DAG: or.b16 [[P2_1_or:%rs[0-9]+]], [[P2_1_shl]], [[P2_0]];
-; CHECK: { // callseq
-; CHECK: .param .align 8 .b8 param0[16];
-; CHECK-DAG: st.param.b16 [param0], [[P0]];
-; CHECK-DAG: st.param.b8 [param0+3], [[P2_1_or]];
-; CHECK-DAG: st.param.b8 [param0+4], [[P2_1]];
-; CHECK: .param .align 8 .b8 retval0[16];
-; CHECK-NEXT: call.uni (retval0), test_s_i8f16p, (param0);
-; CHECK-DAG: ld.param.b16 [[R0:%rs[0-9]+]], [retval0];
-; CHECK-DAG: ld.param.b8 [[R2I_0:%rs[0-9]+]], [retval0+3];
-; CHECK-DAG: ld.param.b8 [[R2I_1:%rs[0-9]+]], [retval0+4];
-; CHECK: } // callseq
-; CHECK-DAG: st.param.b16 [func_retval0], [[R0]];
-; CHECK-DAG: shl.b16 [[R2I_1_shl:%rs[0-9]+]], [[R2I_1]], 8;
-; CHECK-DAG: and.b16 [[R2I_0_and:%rs[0-9]+]], [[R2I_0]], 255;
-; CHECK-DAG: or.b16 [[R2I:%rs[0-9]+]], [[R2I_0_and]], [[R2I_1_shl]];
-; CHECK-DAG: st.param.b8 [func_retval0+3], [[R2I]];
-; CHECK-DAG: and.b16 [[R2I_1_and:%rs[0-9]+]], [[R2I_1]], 255;
-; CHECK-DAG: st.param.b8 [func_retval0+4], [[R2I_1_and]];
-; CHECK: ret;
define %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) {
- %r = tail call %s_i8f16p @test_s_i8f16p(%s_i8f16p %a)
- ret %s_i8f16p %r
+; CHECK-LABEL: test_s_i8f16p(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<15>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rs4, [test_s_i8f16p_param_0+4];
+; CHECK-NEXT: shl.b16 %rs5, %rs4, 8;
+; CHECK-NEXT: ld.param.b8 %rs6, [test_s_i8f16p_param_0+3];
+; CHECK-NEXT: or.b16 %rs3, %rs5, %rs6;
+; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16p_param_0+8];
+; CHECK-NEXT: ld.param.b8 %rs2, [test_s_i8f16p_param_0+2];
+; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8f16p_param_0];
+; CHECK-NEXT: { // callseq 3, 0
+; CHECK-NEXT: .param .align 8 .b8 param0[16];
+; CHECK-NEXT: st.param.b16 [param0], %rs1;
+; CHECK-NEXT: st.param.b8 [param0+2], %rs2;
+; CHECK-NEXT: st.param.b8 [param0+3], %rs3;
+; CHECK-NEXT: st.param.b8 [param0+4], %rs4;
+; CHECK-NEXT: st.param.b64 [param0+8], %rd1;
+; CHECK-NEXT: .param .align 8 .b8 retval0[16];
+; CHECK-NEXT: call.uni (retval0), test_s_i8f16p, (param0);
+; CHECK-NEXT: ld.param.b16 %rs7, [retval0];
+; CHECK-NEXT: ld.param.b8 %rs8, [retval0+2];
+; CHECK-NEXT: ld.param.b8 %rs9, [retval0+3];
+; CHECK-NEXT: ld.param.b8 %rs10, [retval0+4];
+; CHECK-NEXT: ld.param.b64 %rd2, [retval0+8];
+; CHECK-NEXT: } // callseq 3
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs7;
+; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs8;
+; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs10;
+; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs9;
+; CHECK-NEXT: st.param.b64 [func_retval0+8], %rd2;
+; CHECK-NEXT: ret;
+ %r = tail call %s_i8f16p @test_s_i8f16p(%s_i8f16p %a)
+ ret %s_i8f16p %r
}
-; CHECK: .visible .func (.param .align 8 .b8 func_retval0[24])
-; CHECK-LABEL: test_s_i8f16x2p(
-; CHECK: .param .align 8 .b8 test_s_i8f16x2p_param_0[24]
-; CHECK-DAG: ld.param.b32 [[P0:%r[0-9]+]], [test_s_i8f16x2p_param_0];
-; CHECK-DAG: ld.param.b8 [[P2_0:%r[0-9]+]], [test_s_i8f16x2p_param_0+5];
-; CHECK-DAG: ld.param.b8 [[P2_1:%r[0-9]+]], [test_s_i8f16x2p_param_0+6];
-; CHECK-DAG: ld.param.b8 [[P2_2:%r[0-9]+]], [test_s_i8f16x2p_param_0+7];
-; CHECK-DAG: ld.param.b8 [[P2_3:%r[0-9]+]], [test_s_i8f16x2p_param_0+8];
-; CHECK-DAG: shl.b32 [[P2_1_shl:%r[0-9]+]], [[P2_1]], 8;
-; CHECK-DAG: shl.b32 [[P2_2_shl:%r[0-9]+]], [[P2_2]], 16;
-; CHECK-DAG: shl.b32 [[P2_3_shl:%r[0-9]+]], [[P2_3]], 24;
-; CHECK-DAG: or.b32 [[P2_or:%r[0-9]+]], [[P2_1_shl]], [[P2_0]];
-; CHECK-DAG: or.b32 [[P2_or_1:%r[0-9]+]], [[P2_3_shl]], [[P2_2_shl]];
-; CHECK-DAG: or.b32 [[P2:%r[0-9]+]], [[P2_or_1]], [[P2_or]];
-; CHECK-DAG: shr.u32 [[P2_1_shr:%r[0-9]+]], [[P2]], 8;
-; CHECK-DAG: shr.u32 [[P2_2_shr:%r[0-9]+]], [[P2_or_1]], 16;
-; CHECK: { // callseq
-; CHECK-DAG: .param .align 8 .b8 param0[24];
-; CHECK-DAG: st.param.b32 [param0], [[P0]];
-; CHECK-DAG: st.param.b8 [param0+5], [[P2]];
-; CHECK-DAG: st.param.b8 [param0+6], [[P2_1_shr]];
-; CHECK-DAG: st.param.b8 [param0+7], [[P2_2_shr]];
-; CHECK-DAG: st.param.b8 [param0+8], [[P2_3]];
-; CHECK: .param .align 8 .b8 retval0[24];
-; CHECK-NEXT: call.uni (retval0), test_s_i8f16x2p, (param0);
-; CHECK-DAG: ld.param.b32 [[R0:%r[0-9]+]], [retval0];
-; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+5];
-; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+6];
-; CHECK-DAG: ld.param.b8 [[R2_2:%rs[0-9]+]], [retval0+7];
-; CHECK-DAG: ld.param.b8 [[R2_3:%rs[0-9]+]], [retval0+8];
-; CHECK: } // callseq
-; CHECK-DAG: st.param.b32 [func_retval0], [[R0]];
-; CHECK-DAG: st.param.b8 [func_retval0+5],
-; CHECK-DAG: st.param.b8 [func_retval0+6],
-; CHECK-DAG: st.param.b8 [func_retval0+7],
-; CHECK-DAG: st.param.b8 [func_retval0+8],
-; CHECK: ret;
define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) {
- %r = tail call %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a)
- ret %s_i8f16x2p %r
+; CHECK-LABEL: test_s_i8f16x2p(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<12>;
+; CHECK-NEXT: .reg .b32 %r<20>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f16x2p_param_0+6];
+; CHECK-NEXT: shl.b32 %r4, %r3, 8;
+; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8f16x2p_param_0+5];
+; CHECK-NEXT: or.b32 %r6, %r4, %r5;
+; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8f16x2p_param_0+7];
+; CHECK-NEXT: shl.b32 %r8, %r7, 16;
+; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8f16x2p_param_0+8];
+; CHECK-NEXT: shl.b32 %r10, %r9, 24;
+; CHECK-NEXT: or.b32 %r11, %r10, %r8;
+; CHECK-NEXT: or.b32 %r2, %r11, %r6;
+; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16x2p_param_0+16];
+; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f16x2p_param_0+4];
+; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8f16x2p_param_0];
+; CHECK-NEXT: shr.u32 %r12, %r2, 8;
+; CHECK-NEXT: shr.u32 %r13, %r11, 16;
+; CHECK-NEXT: { // callseq 4, 0
+; CHECK-NEXT: .param .align 8 .b8 param0[24];
+; CHECK-NEXT: st.param.b32 [param0], %r1;
+; CHECK-NEXT: st.param.b8 [param0+4], %rs1;
+; CHECK-NEXT: st.param.b8 [param0+5], %r2;
+; CHECK-NEXT: st.param.b8 [param0+6], %r12;
+; CHECK-NEXT: st.param.b8 [param0+7], %r13;
+; CHECK-NEXT: st.param.b8 [param0+8], %r9;
+; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
+; CHECK-NEXT: .param .align 8 .b8 retval0[24];
+; CHECK-NEXT: call.uni (retval0), test_s_i8f16x2p, (param0);
+; CHECK-NEXT: ld.param.b32 %r14, [retval0];
+; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
+; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5];
+; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6];
+; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7];
+; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8];
+; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16];
+; CHECK-NEXT: } // callseq 4
+; CHECK-NEXT: cvt.u32.u16 %r16, %rs3;
+; CHECK-NEXT: cvt.u32.u16 %r17, %rs4;
+; CHECK-NEXT: cvt.u32.u16 %r18, %rs5;
+; CHECK-NEXT: cvt.u32.u16 %r19, %rs6;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r14;
+; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
+; CHECK-NEXT: st.param.b8 [func_retval0+8], %r19;
+; CHECK-NEXT: st.param.b8 [func_retval0+7], %r18;
+; CHECK-NEXT: st.param.b8 [func_retval0+6], %r17;
+; CHECK-NEXT: st.param.b8 [func_retval0+5], %r16;
+; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2;
+; CHECK-NEXT: ret;
+ %r = tail call %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a)
+ ret %s_i8f16x2p %r
}
-; CHECK: .visible .func (.param .align 8 .b8 func_retval0[24])
-; CHECK-LABEL: test_s_i8f32p(
-; CHECK: .param .align 8 .b8 test_s_i8f32p_param_0[24]
-; CHECK-DAG: ld.param.b32 [[P0:%r[0-9]+]], [test_s_i8f32p_param_0];
-; CHECK-DAG: ld.param.b8 [[P2_0:%r[0-9]+]], [test_s_i8f32p_param_0+5];
-; CHECK-DAG: ld.param.b8 [[P2_1:%r[0-9]+]], [test_s_i8f32p_param_0+6];
-; CHECK-DAG: ld.param.b8 [[P2_2:%r[0-9]+]], [test_s_i8f32p_param_0+7];
-; CHECK-DAG: ld.param.b8 [[P2_3:%r[0-9]+]], [test_s_i8f32p_param_0+8];
-; CHECK-DAG: shl.b32 [[P2_1_shl:%r[0-9]+]], [[P2_1]], 8;
-; CHECK-DAG: shl.b32 [[P2_2_shl:%r[0-9]+]], [[P2_2]], 16;
-; CHECK-DAG: shl.b32 [[P2_3_shl:%r[0-9]+]], [[P2_3]], 24;
-; CHECK-DAG: or.b32 [[P2_or:%r[0-9]+]], [[P2_1_shl]], [[P2_0]];
-; CHECK-DAG: or.b32 [[P2_or_1:%r[0-9]+]], [[P2_3_shl]], [[P2_2_shl]];
-; CHECK-DAG: or.b32 [[P2:%r[0-9]+]], [[P2_or_1]], [[P2_or]];
-; CHECK-DAG: shr.u32 [[P2_1_shr:%r[0-9]+]], [[P2]], 8;
-; CHECK-DAG: shr.u32 [[P2_2_shr:%r[0-9]+]], [[P2_or_1]], 16;
-; CHECK: { // callseq
-; CHECK-DAG: .param .align 8 .b8 param0[24];
-; CHECK-DAG: st.param.b32 [param0], [[P0]];
-; CHECK-DAG: st.param.b8 [param0+5], [[P2]];
-; CHECK-DAG: st.param.b8 [param0+6], [[P2_1_shr]];
-; CHECK-DAG: st.param.b8 [param0+7], [[P2_2_shr]];
-; CHECK-DAG: st.param.b8 [param0+8], [[P2_3]];
-; CHECK: .param .align 8 .b8 retval0[24];
-; CHECK-NEXT: call.uni (retval0), test_s_i8f32p, (param0);
-; CHECK-DAG: ld.param.b32 [[R0:%r[0-9]+]], [retval0];
-; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+5];
-; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+6];
-; CHECK-DAG: ld.param.b8 [[R2_2:%rs[0-9]+]], [retval0+7];
-; CHECK-DAG: ld.param.b8 [[R2_3:%rs[0-9]+]], [retval0+8];
-; CHECK: } // callseq
-; CHECK-DAG: st.param.b32 [func_retval0], [[R0]];
-; CHECK-DAG: st.param.b8 [func_retval0+5],
-; CHECK-DAG: st.param.b8 [func_retval0+6],
-; CHECK-DAG: st.param.b8 [func_retval0+7],
-; CHECK-DAG: st.param.b8 [func_retval0+8],
-; CHECK: ret;
define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) {
- %r = tail call %s_i8f32p @test_s_i8f32p(%s_i8f32p %a)
- ret %s_i8f32p %r
+; CHECK-LABEL: test_s_i8f32p(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<12>;
+; CHECK-NEXT: .reg .b32 %r<20>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f32p_param_0+6];
+; CHECK-NEXT: shl.b32 %r4, %r3, 8;
+; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8f32p_param_0+5];
+; CHECK-NEXT: or.b32 %r6, %r4, %r5;
+; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8f32p_param_0+7];
+; CHECK-NEXT: shl.b32 %r8, %r7, 16;
+; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8f32p_param_0+8];
+; CHECK-NEXT: shl.b32 %r10, %r9, 24;
+; CHECK-NEXT: or.b32 %r11, %r10, %r8;
+; CHECK-NEXT: or.b32 %r2, %r11, %r6;
+; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f32p_param_0+16];
+; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f32p_param_0+4];
+; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8f32p_param_0];
+; CHECK-NEXT: shr.u32 %r12, %r2, 8;
+; CHECK-NEXT: shr.u32 %r13, %r11, 16;
+; CHECK-NEXT: { // callseq 5, 0
+; CHECK-NEXT: .param .align 8 .b8 param0[24];
+; CHECK-NEXT: st.param.b32 [param0], %r1;
+; CHECK-NEXT: st.param.b8 [param0+4], %rs1;
+; CHECK-NEXT: st.param.b8 [param0+5], %r2;
+; CHECK-NEXT: st.param.b8 [param0+6], %r12;
+; CHECK-NEXT: st.param.b8 [param0+7], %r13;
+; CHECK-NEXT: st.param.b8 [param0+8], %r9;
+; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
+; CHECK-NEXT: .param .align 8 .b8 retval0[24];
+; CHECK-NEXT: call.uni (retval0), test_s_i8f32p, (param0);
+; CHECK-NEXT: ld.param.b32 %r14, [retval0];
+; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
+; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5];
+; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6];
+; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7];
+; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8];
+; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16];
+; CHECK-NEXT: } // callseq 5
+; CHECK-NEXT: cvt.u32.u16 %r16, %rs3;
+; CHECK-NEXT: cvt.u32.u16 %r17, %rs4;
+; CHECK-NEXT: cvt.u32.u16 %r18, %rs5;
+; CHECK-NEXT: cvt.u32.u16 %r19, %rs6;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r14;
+; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
+; CHECK-NEXT: st.param.b8 [func_retval0+8], %r19;
+; CHECK-NEXT: st.param.b8 [func_retval0+7], %r18;
+; CHECK-NEXT: st.param.b8 [func_retval0+6], %r17;
+; CHECK-NEXT: st.param.b8 [func_retval0+5], %r16;
+; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2;
+; CHECK-NEXT: ret;
+ %r = tail call %s_i8f32p @test_s_i8f32p(%s_i8f32p %a)
+ ret %s_i8f32p %r
}
-; CHECK: .visible .func (.param .align 8 .b8 func_retval0[32])
-; CHECK-LABEL: test_s_i8f64p(
-; CHECK: .param .align 8 .b8 test_s_i8f64p_param_0[32]
-; CHECK-DAG: ld.param.b64 [[P0:%rd[0-9]+]], [test_s_i8f64p_param_0];
-; CHECK-DAG: ld.param.b8 [[P2_0:%rd[0-9]+]], [test_s_i8f64p_param_0+9];
-; CHECK-DAG: ld.param.b8 [[P2_1:%rd[0-9]+]], [test_s_i8f64p_param_0+10];
-; CHECK-DAG: ld.param.b8 [[P2_2:%rd[0-9]+]], [test_s_i8f64p_param_0+11];
-; CHECK-DAG: ld.param.b8 [[P2_3:%rd[0-9]+]], [test_s_i8f64p_param_0+12];
-; CHECK-DAG: ld.param.b8 [[P2_4:%rd[0-9]+]], [test_s_i8f64p_param_0+13];
-; CHECK-DAG: ld.param.b8 [[P2_5:%rd[0-9]+]], [test_s_i8f64p_param_0+14];
-; CHECK-DAG: ld.param.b8 [[P2_6:%rd[0-9]+]], [test_s_i8f64p_param_0+15];
-; CHECK-DAG: ld.param.b8 [[P2_7:%rd[0-9]+]], [test_s_i8f64p_param_0+16];
-; CHECK-DAG: shl.b64 [[P2_1_shl:%rd[0-9]+]], [[P2_1]], 8;
-; CHECK-DAG: shl.b64 [[P2_2_shl:%rd[0-9]+]], [[P2_2]], 16;
-; CHECK-DAG: shl.b64 [[P2_3_shl:%rd[0-9]+]], [[P2_3]], 24;
-; CHECK-DAG: or.b64 [[P2_or_0:%rd[0-9]+]], [[P2_1_shl]], [[P2_0]];
-; CHECK-DAG: or.b64 [[P2_or_1:%rd[0-9]+]], [[P2_3_shl]], [[P2_2_shl]];
-; CHECK-DAG: or.b64 [[P2_or_2:%rd[0-9]+]], [[P2_or_1]], [[P2_or_0]];
-; CHECK-DAG: shl.b64 [[P2_5_shl:%rd[0-9]+]], [[P2_5]], 8;
-; CHECK-DAG: shl.b64 [[P2_6_shl:%rd[0-9]+]], [[P2_6]], 16;
-; CHECK-DAG: shl.b64 [[P2_7_shl:%rd[0-9]+]], [[P2_7]], 24;
-; CHECK-DAG: or.b64 [[P2_or_3:%rd[0-9]+]], [[P2_5_shl]], [[P2_4]];
-; CHECK-DAG: or.b64 [[P2_or_4:%rd[0-9]+]], [[P2_7_shl]], [[P2_6_shl]];
-; CHECK-DAG: or.b64 [[P2_or_5:%rd[0-9]+]], [[P2_or_4]], [[P2_or_3]];
-; CHECK-DAG: shl.b64 [[P2_or_shl:%rd[0-9]+]], [[P2_or_5]], 32;
-; CHECK-DAG: or.b64 [[P2:%rd[0-9]+]], [[P2_or_shl]], [[P2_or_2]];
-; CHECK-DAG: shr.u64 [[P2_shr_1:%rd[0-9]+]], [[P2]], 8;
-; CHECK-DAG: shr.u64 [[P2_shr_2:%rd[0-9]+]], [[P2]], 16;
-; CHECK-DAG: shr.u64 [[P2_shr_3:%rd[0-9]+]], [[P2]], 24;
-; CHECK-DAG: bfe.u64 [[P2_bfe_4:%rd[0-9]+]], [[P2_or_5]], 8, 24;
-; CHECK-DAG: bfe.u64 [[P2_bfe_5:%rd[0-9]+]], [[P2_or_5]], 16, 16;
-; CHECK-DAG: bfe.u64 [[P2_bfe_6:%rd[0-9]+]], [[P2_or_5]], 24, 8;
-; CHECK: { // callseq
-; CHECK: .param .align 8 .b8 param0[32];
-; CHECK-DAG: st.param.b64 [param0], [[P0]];
-; CHECK-DAG: st.param.b8 [param0+9], [[P2]];
-; CHECK-DAG: st.param.b8 [param0+10], [[P2_shr_1]];
-; CHECK-DAG: st.param.b8 [param0+11], [[P2_shr_2]];
-; CHECK-DAG: st.param.b8 [param0+12], [[P2_shr_3]];
-; CHECK-DAG: st.param.b8 [param0+13], [[P2_or_5]];
-; CHECK-DAG: st.param.b8 [param0+14], [[P2_bfe_4]];
-; CHECK-DAG: st.param.b8 [param0+15], [[P2_bfe_5]];
-; CHECK-DAG: st.param.b8 [param0+16], [[P2_bfe_6]];
-; CHECK: .param .align 8 .b8 retval0[32];
-; CHECK-NEXT: call.uni (retval0), test_s_i8f64p, (param0);
-; CHECK-DAG: ld.param.b64 [[R0:%rd[0-9]+]], [retval0];
-; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+9];
-; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+10];
-; CHECK-DAG: ld.param.b8 [[R2_2:%rs[0-9]+]], [retval0+11];
-; CHECK-DAG: ld.param.b8 [[R2_3:%rs[0-9]+]], [retval0+12];
-; CHECK-DAG: ld.param.b8 [[R2_4:%rs[0-9]+]], [retval0+13];
-; CHECK-DAG: ld.param.b8 [[R2_5:%rs[0-9]+]], [retval0+14];
-; CHECK-DAG: ld.param.b8 [[R2_6:%rs[0-9]+]], [retval0+15];
-; CHECK-DAG: ld.param.b8 [[R2_7:%rs[0-9]+]], [retval0+16];
-; CHECK: } // callseq
-; CHECK-DAG: st.param.b64 [func_retval0], [[R0]];
-; CHECK-DAG: st.param.b8 [func_retval0+9],
-; CHECK-DAG: st.param.b8 [func_retval0+10],
-; CHECK-DAG: st.param.b8 [func_retval0+11],
-; CHECK-DAG: st.param.b8 [func_retval0+12],
-; CHECK-DAG: st.param.b8 [func_retval0+13],
-; CHECK-DAG: st.param.b8 [func_retval0+14],
-; CHECK-DAG: st.param.b8 [func_retval0+15],
-; CHECK-DAG: st.param.b8 [func_retval0+16],
-; CHECK: ret;
define %s_i8f64p @test_s_i8f64p(%s_i8f64p %a) {
- %r = tail call %s_i8f64p @test_s_i8f64p(%s_i8f64p %a)
- ret %s_i8f64p %r
+; CHECK-LABEL: test_s_i8f64p(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<20>;
+; CHECK-NEXT: .reg .b64 %rd<68>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8f64p_param_0+10];
+; CHECK-NEXT: shl.b64 %rd5, %rd4, 8;
+; CHECK-NEXT: ld.param.b8 %rd6, [test_s_i8f64p_param_0+9];
+; CHECK-NEXT: or.b64 %rd7, %rd5, %rd6;
+; CHECK-NEXT: ld.param.b8 %rd8, [test_s_i8f64p_param_0+11];
+; CHECK-NEXT: shl.b64 %rd9, %rd8, 16;
+; CHECK-NEXT: ld.param.b8 %rd10, [test_s_i8f64p_param_0+12];
+; CHECK-NEXT: shl.b64 %rd11, %rd10, 24;
+; CHECK-NEXT: or.b64 %rd12, %rd11, %rd9;
+; CHECK-NEXT: or.b64 %rd13, %rd12, %rd7;
+; CHECK-NEXT: ld.param.b8 %rd14, [test_s_i8f64p_param_0+14];
+; CHECK-NEXT: shl.b64 %rd15, %rd14, 8;
+; CHECK-NEXT: ld.param.b8 %rd16, [test_s_i8f64p_param_0+13];
+; CHECK-NEXT: or.b64 %rd17, %rd15, %rd16;
+; CHECK-NEXT: ld.param.b8 %rd18, [test_s_i8f64p_param_0+15];
+; CHECK-NEXT: shl.b64 %rd19, %rd18, 16;
+; CHECK-NEXT: ld.param.b8 %rd20, [test_s_i8f64p_param_0+16];
+; CHECK-NEXT: shl.b64 %rd21, %rd20, 24;
+; CHECK-NEXT: or.b64 %rd22, %rd21, %rd19;
+; CHECK-NEXT: or.b64 %rd23, %rd22, %rd17;
+; CHECK-NEXT: shl.b64 %rd24, %rd23, 32;
+; CHECK-NEXT: or.b64 %rd2, %rd24, %rd13;
+; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8f64p_param_0+24];
+; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f64p_param_0+8];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f64p_param_0];
+; CHECK-NEXT: shr.u64 %rd25, %rd2, 8;
+; CHECK-NEXT: shr.u64 %rd26, %rd2, 16;
+; CHECK-NEXT: shr.u64 %rd27, %rd2, 24;
+; CHECK-NEXT: bfe.u64 %rd28, %rd23, 8, 24;
+; CHECK-NEXT: bfe.u64 %rd29, %rd23, 16, 16;
+; CHECK-NEXT: bfe.u64 %rd30, %rd23, 24, 8;
+; CHECK-NEXT: { // callseq 6, 0
+; CHECK-NEXT: .param .align 8 .b8 param0[32];
+; CHECK-NEXT: st.param.b64 [param0], %rd1;
+; CHECK-NEXT: st.param.b8 [param0+8], %rs1;
+; CHECK-NEXT: st.param.b8 [param0+9], %rd2;
+; CHECK-NEXT: st.param.b8 [param0+10], %rd25;
+; CHECK-NEXT: st.param.b8 [param0+11], %rd26;
+; CHECK-NEXT: st.param.b8 [param0+12], %rd27;
+; CHECK-NEXT: st.param.b8 [param0+13], %rd23;
+; CHECK-NEXT: st.param.b8 [param0+14], %rd28;
+; CHECK-NEXT: st.param.b8 [param0+15], %rd29;
+; CHECK-NEXT: st.param.b8 [param0+16], %rd30;
+; CHECK-NEXT: st.param.b64 [param0+24], %rd3;
+; CHECK-NEXT: .param .align 8 .b8 retval0[32];
+; CHECK-NEXT: call.uni (retval0), test_s_i8f64p, (param0);
+; CHECK-NEXT: ld.param.b64 %rd31, [retval0];
+; CHECK-NEXT: ld.param.b8 %rs2, [retval0+8];
+; CHECK-NEXT: ld.param.b8 %rs3, [retval0+9];
+; CHECK-NEXT: ld.param.b8 %rs4, [retval0+10];
+; CHECK-NEXT: ld.param.b8 %rs5, [retval0+11];
+; CHECK-NEXT: ld.param.b8 %rs6, [retval0+12];
+; CHECK-NEXT: ld.param.b8 %rs7, [retval0+13];
+; CHECK-NEXT: ld.param.b8 %rs8, [retval0+14];
+; CHECK-NEXT: ld.param.b8 %rs9, [retval0+15];
+; CHECK-NEXT: ld.param.b8 %rs10, [retval0+16];
+; CHECK-NEXT: ld.param.b64 %rd32, [retval0+24];
+; CHECK-NEXT: } // callseq 6
+; CHECK-NEXT: cvt.u64.u16 %rd35, %rs3;
+; CHECK-NEXT: and.b64 %rd36, %rd35, 255;
+; CHECK-NEXT: cvt.u64.u16 %rd37, %rs4;
+; CHECK-NEXT: and.b64 %rd38, %rd37, 255;
+; CHECK-NEXT: shl.b64 %rd39, %rd38, 8;
+; CHECK-NEXT: or.b64 %rd40, %rd36, %rd39;
+; CHECK-NEXT: cvt.u64.u16 %rd41, %rs5;
+; CHECK-NEXT: and.b64 %rd42, %rd41, 255;
+; CHECK-NEXT: shl.b64 %rd43, %rd42, 16;
+; CHECK-NEXT: or.b64 %rd44, %rd40, %rd43;
+; CHECK-NEXT: cvt.u64.u16 %rd45, %rs6;
+; CHECK-NEXT: and.b64 %rd46, %rd45, 255;
+; CHECK-NEXT: shl.b64 %rd47, %rd46, 24;
+; CHECK-NEXT: or.b64 %rd48, %rd44, %rd47;
+; CHECK-NEXT: cvt.u64.u16 %rd49, %rs7;
+; CHECK-NEXT: and.b64 %rd50, %rd49, 255;
+; CHECK-NEXT: shl.b64 %rd51, %rd50, 32;
+; CHECK-NEXT: or.b64 %rd52, %rd48, %rd51;
+; CHECK-NEXT: cvt.u64.u16 %rd53, %rs8;
+; CHECK-NEXT: and.b64 %rd54, %rd53, 255;
+; CHECK-NEXT: shl.b64 %rd55, %rd54, 40;
+; CHECK-NEXT: or.b64 %rd56, %rd52, %rd55;
+; CHECK-NEXT: cvt.u64.u16 %rd57, %rs9;
+; CHECK-NEXT: and.b64 %rd58, %rd57, 255;
+; CHECK-NEXT: shl.b64 %rd59, %rd58, 48;
+; CHECK-NEXT: or.b64 %rd60, %rd56, %rd59;
+; CHECK-NEXT: cvt.u64.u16 %rd61, %rs10;
+; CHECK-NEXT: shl.b64 %rd62, %rd61, 56;
+; CHECK-NEXT: or.b64 %rd63, %rd60, %rd62;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd31;
+; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs2;
+; CHECK-NEXT: st.param.b8 [func_retval0+12], %rd45;
+; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd41;
+; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd37;
+; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd35;
+; CHECK-NEXT: shr.u64 %rd64, %rd52, 32;
+; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd64;
+; CHECK-NEXT: shr.u64 %rd65, %rd56, 40;
+; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd65;
+; CHECK-NEXT: shr.u64 %rd66, %rd60, 48;
+; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd66;
+; CHECK-NEXT: shr.u64 %rd67, %rd63, 56;
+; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd67;
+; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd32;
+; CHECK-NEXT: ret;
+ %r = tail call %s_i8f64p @test_s_i8f64p(%s_i8f64p %a)
+ ret %s_i8f64p %r
}
diff --git a/llvm/test/CodeGen/NVPTX/variadics-backend.ll b/llvm/test/CodeGen/NVPTX/variadics-backend.ll
index ab9202650577a..167d7faafe5b3 100644
--- a/llvm/test/CodeGen/NVPTX/variadics-backend.ll
+++ b/llvm/test/CodeGen/NVPTX/variadics-backend.ll
@@ -142,7 +142,7 @@ define dso_local i32 @variadics2(i32 noundef %first, ...) {
; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b16 %rs<4>;
-; CHECK-PTX-NEXT: .reg .b32 %r<7>;
+; CHECK-PTX-NEXT: .reg .b32 %r<6>;
; CHECK-PTX-NEXT: .reg .b64 %rd<9>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
@@ -165,8 +165,7 @@ define dso_local i32 @variadics2(i32 noundef %first, ...) {
; CHECK-PTX-NEXT: add.s32 %r5, %r4, %r3;
; CHECK-PTX-NEXT: cvt.u64.u32 %rd7, %r5;
; CHECK-PTX-NEXT: add.s64 %rd8, %rd7, %rd6;
-; CHECK-PTX-NEXT: cvt.u32.u64 %r6, %rd8;
-; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r6;
+; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %rd8;
; CHECK-PTX-NEXT: ret;
entry:
%vlist = alloca ptr, align 8
@@ -310,7 +309,6 @@ entry:
define dso_local i32 @variadics4(ptr noundef byval(%struct.S2) align 8 %first, ...) {
; CHECK-PTX-LABEL: variadics4(
; CHECK-PTX: {
-; CHECK-PTX-NEXT: .reg .b32 %r<2>;
; CHECK-PTX-NEXT: .reg .b64 %rd<10>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
@@ -322,8 +320,7 @@ define dso_local i32 @variadics4(ptr noundef byval(%struct.S2) align 8 %first, .
; CHECK-PTX-NEXT: ld.param.b64 %rd7, [variadics4_param_0+8];
; CHECK-PTX-NEXT: add.s64 %rd8, %rd6, %rd7;
; CHECK-PTX-NEXT: add.s64 %rd9, %rd8, %rd5;
-; CHECK-PTX-NEXT: cvt.u32.u64 %r1, %rd9;
-; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %rd9;
; CHECK-PTX-NEXT: ret;
entry:
%vlist = alloca ptr, align 8
diff --git a/llvm/test/CodeGen/NVPTX/vector-returns.ll b/llvm/test/CodeGen/NVPTX/vector-returns.ll
index 2001d199ce0a7..472cdee11a4bd 100644
--- a/llvm/test/CodeGen/NVPTX/vector-returns.ll
+++ b/llvm/test/CodeGen/NVPTX/vector-returns.ll
@@ -6,12 +6,11 @@ target triple = "nvptx-nvidia-cuda"
define <3 x i64> @long3() {
; CHECK-LABEL: long3(
; CHECK: {
-; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b64 %rd1, 0;
-; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd1};
-; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd1;
+; CHECK-NEXT: st.param.b64 [func_retval0+16], 0;
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {0, 0};
; CHECK-NEXT: ret;
ret <3 x i64> zeroinitializer
}
@@ -19,11 +18,10 @@ define <3 x i64> @long3() {
define <2 x i64> @long2() {
; CHECK-LABEL: long2(
; CHECK: {
-; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b64 %rd1, 0;
-; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd1};
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {0, 0};
; CHECK-NEXT: ret;
ret <2 x i64> zeroinitializer
}
@@ -31,11 +29,10 @@ define <2 x i64> @long2() {
define <1 x i64> @long1() {
; CHECK-LABEL: long1(
; CHECK: {
-; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b64 %rd1, 0;
-; CHECK-NEXT: st.param.b64 [func_retval0], %rd1;
+; CHECK-NEXT: st.param.b64 [func_retval0], 0;
; CHECK-NEXT: ret;
ret <1 x i64> zeroinitializer
}
@@ -43,12 +40,11 @@ define <1 x i64> @long1() {
define <5 x i32> @int5() {
; CHECK-LABEL: int5(
; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b32 %r1, 0;
-; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r1, %r1, %r1, %r1};
-; CHECK-NEXT: st.param.b32 [func_retval0+16], %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0+16], 0;
+; CHECK-NEXT: st.param.v4.b32 [func_retval0], {0, 0, 0, 0};
; CHECK-NEXT: ret;
ret <5 x i32> zeroinitializer
}
@@ -56,11 +52,10 @@ define <5 x i32> @int5() {
define <4 x i32> @int4() {
; CHECK-LABEL: int4(
; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b32 %r1, 0;
-; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r1, %r1, %r1, %r1};
+; CHECK-NEXT: st.param.v4.b32 [func_retval0], {0, 0, 0, 0};
; CHECK-NEXT: ret;
ret <4 x i32> zeroinitializer
}
@@ -68,12 +63,11 @@ define <4 x i32> @int4() {
define <3 x i32> @int3() {
; CHECK-LABEL: int3(
; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b32 %r1, 0;
-; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r1, %r1};
-; CHECK-NEXT: st.param.b32 [func_retval0+8], %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0+8], 0;
+; CHECK-NEXT: st.param.v2.b32 [func_retval0], {0, 0};
; CHECK-NEXT: ret;
ret <3 x i32> zeroinitializer
}
@@ -81,11 +75,10 @@ define <3 x i32> @int3() {
define <2 x i32> @int2() {
; CHECK-LABEL: int2(
; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b32 %r1, 0;
-; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r1, %r1};
+; CHECK-NEXT: st.param.v2.b32 [func_retval0], {0, 0};
; CHECK-NEXT: ret;
ret <2 x i32> zeroinitializer
}
@@ -93,11 +86,10 @@ define <2 x i32> @int2() {
define <1 x i32> @int1() {
; CHECK-LABEL: int1(
; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b32 %r1, 0;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0], 0;
; CHECK-NEXT: ret;
ret <1 x i32> zeroinitializer
}
@@ -105,13 +97,12 @@ define <1 x i32> @int1() {
define <9 x i16> @short9() {
; CHECK-LABEL: short9(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.v4.b16 [func_retval0], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.v4.b16 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.b16 [func_retval0+16], %rs1;
+; CHECK-NEXT: st.param.b16 [func_retval0+16], 0;
+; CHECK-NEXT: st.param.v2.b32 [func_retval0+8], {0, 0};
+; CHECK-NEXT: st.param.v2.b32 [func_retval0], {0, 0};
; CHECK-NEXT: ret;
ret <9 x i16> zeroinitializer
}
@@ -119,11 +110,10 @@ define <9 x i16> @short9() {
define <8 x i16> @short8() {
; CHECK-LABEL: short8(
; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b32 %r1, 0;
-; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r1, %r1, %r1, %r1};
+; CHECK-NEXT: st.param.v4.b32 [func_retval0], {0, 0, 0, 0};
; CHECK-NEXT: ret;
ret <8 x i16> zeroinitializer
}
@@ -131,13 +121,12 @@ define <8 x i16> @short8() {
define <7 x i16> @short7() {
; CHECK-LABEL: short7(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.v4.b16 [func_retval0], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.v2.b16 [func_retval0+8], {%rs1, %rs1};
-; CHECK-NEXT: st.param.b16 [func_retval0+12], %rs1;
+; CHECK-NEXT: st.param.b16 [func_retval0+12], 0;
+; CHECK-NEXT: st.param.b32 [func_retval0+8], 0;
+; CHECK-NEXT: st.param.v2.b32 [func_retval0], {0, 0};
; CHECK-NEXT: ret;
ret <7 x i16> zeroinitializer
}
@@ -145,12 +134,11 @@ define <7 x i16> @short7() {
define <5 x i16> @short5() {
; CHECK-LABEL: short5(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.v4.b16 [func_retval0], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.b16 [func_retval0+8], %rs1;
+; CHECK-NEXT: st.param.b16 [func_retval0+8], 0;
+; CHECK-NEXT: st.param.v2.b32 [func_retval0], {0, 0};
; CHECK-NEXT: ret;
ret <5 x i16> zeroinitializer
}
@@ -158,11 +146,10 @@ define <5 x i16> @short5() {
define <4 x i16> @short4() {
; CHECK-LABEL: short4(
; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b32 %r1, 0;
-; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r1, %r1};
+; CHECK-NEXT: st.param.v2.b32 [func_retval0], {0, 0};
; CHECK-NEXT: ret;
ret <4 x i16> zeroinitializer
}
@@ -170,12 +157,11 @@ define <4 x i16> @short4() {
define <3 x i16> @short3() {
; CHECK-LABEL: short3(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs1, %rs1};
-; CHECK-NEXT: st.param.b16 [func_retval0+4], %rs1;
+; CHECK-NEXT: st.param.b16 [func_retval0+4], 0;
+; CHECK-NEXT: st.param.b32 [func_retval0], 0;
; CHECK-NEXT: ret;
ret <3 x i16> zeroinitializer
}
@@ -183,11 +169,10 @@ define <3 x i16> @short3() {
define <2 x i16> @short2() {
; CHECK-LABEL: short2(
; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b32 %r1, 0;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0], 0;
; CHECK-NEXT: ret;
ret <2 x i16> zeroinitializer
}
@@ -195,11 +180,10 @@ define <2 x i16> @short2() {
define <1 x i16> @short1() {
; CHECK-LABEL: short1(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
+; CHECK-NEXT: st.param.b16 [func_retval0], 0;
; CHECK-NEXT: ret;
ret <1 x i16> zeroinitializer
}
@@ -207,15 +191,12 @@ define <1 x i16> @short1() {
define <17 x i8> @byte17() {
; CHECK-LABEL: byte17(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.v4.b8 [func_retval0+12], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.b8 [func_retval0+16], %rs1;
+; CHECK-NEXT: st.param.b8 [func_retval0+16], 0;
+; CHECK-NEXT: st.param.b64 [func_retval0+8], 0;
+; CHECK-NEXT: st.param.b64 [func_retval0], 0;
; CHECK-NEXT: ret;
ret <17 x i8> zeroinitializer
}
@@ -223,11 +204,10 @@ define <17 x i8> @byte17() {
define <16 x i8> @byte16() {
; CHECK-LABEL: byte16(
; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b32 %r1, 0;
-; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r1, %r1, %r1, %r1};
+; CHECK-NEXT: st.param.v4.b32 [func_retval0], {0, 0, 0, 0};
; CHECK-NEXT: ret;
ret <16 x i8> zeroinitializer
}
@@ -235,15 +215,13 @@ define <16 x i8> @byte16() {
define <15 x i8> @byte15() {
; CHECK-LABEL: byte15(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+12], {%rs1, %rs1};
-; CHECK-NEXT: st.param.b8 [func_retval0+14], %rs1;
+; CHECK-NEXT: st.param.b8 [func_retval0+14], 0;
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+12], {0, 0};
+; CHECK-NEXT: st.param.b32 [func_retval0+8], 0;
+; CHECK-NEXT: st.param.b64 [func_retval0], 0;
; CHECK-NEXT: ret;
ret <15 x i8> zeroinitializer
}
@@ -251,13 +229,11 @@ define <15 x i8> @byte15() {
define <9 x i8> @byte9() {
; CHECK-LABEL: byte9(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs1;
+; CHECK-NEXT: st.param.b8 [func_retval0+8], 0;
+; CHECK-NEXT: st.param.b64 [func_retval0], 0;
; CHECK-NEXT: ret;
ret <9 x i8> zeroinitializer
}
@@ -265,11 +241,10 @@ define <9 x i8> @byte9() {
define <8 x i8> @byte8() {
; CHECK-LABEL: byte8(
; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b32 %r1, 0;
-; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r1, %r1};
+; CHECK-NEXT: st.param.v2.b32 [func_retval0], {0, 0};
; CHECK-NEXT: ret;
ret <8 x i8> zeroinitializer
}
@@ -277,13 +252,12 @@ define <8 x i8> @byte8() {
define <7 x i8> @byte7() {
; CHECK-LABEL: byte7(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {%rs1, %rs1};
-; CHECK-NEXT: st.param.b8 [func_retval0+6], %rs1;
+; CHECK-NEXT: st.param.b8 [func_retval0+6], 0;
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {0, 0};
+; CHECK-NEXT: st.param.b32 [func_retval0], 0;
; CHECK-NEXT: ret;
ret <7 x i8> zeroinitializer
}
@@ -291,12 +265,11 @@ define <7 x i8> @byte7() {
define <5 x i8> @byte5() {
; CHECK-LABEL: byte5(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1;
+; CHECK-NEXT: st.param.b8 [func_retval0+4], 0;
+; CHECK-NEXT: st.param.b32 [func_retval0], 0;
; CHECK-NEXT: ret;
ret <5 x i8> zeroinitializer
}
@@ -304,11 +277,10 @@ define <5 x i8> @byte5() {
define <4 x i8> @byte4() {
; CHECK-LABEL: byte4(
; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b32 %r1, 0;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0], 0;
; CHECK-NEXT: ret;
ret <4 x i8> zeroinitializer
}
@@ -316,11 +288,10 @@ define <4 x i8> @byte4() {
define <3 x i8> @byte3() {
; CHECK-LABEL: byte3(
; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b32 %r1, 0;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0], 0;
; CHECK-NEXT: ret;
ret <3 x i8> zeroinitializer
}
@@ -328,11 +299,10 @@ define <3 x i8> @byte3() {
define <2 x i8> @byte2() {
; CHECK-LABEL: byte2(
; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b32 %r1, 0;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: st.param.v2.b8 [func_retval0], {0, 0};
; CHECK-NEXT: ret;
ret <2 x i8> zeroinitializer
}
@@ -340,11 +310,10 @@ define <2 x i8> @byte2() {
define <1 x i8> @byte1() {
; CHECK-LABEL: byte1(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.b8 [func_retval0], %rs1;
+; CHECK-NEXT: st.param.b8 [func_retval0], 0;
; CHECK-NEXT: ret;
ret <1 x i8> zeroinitializer
}
@@ -352,15 +321,14 @@ define <1 x i8> @byte1() {
define <17 x i1> @bit17() {
; CHECK-LABEL: bit17(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.v4.b8 [func_retval0+12], {%rs1, %rs1, %rs1, %rs1};
-; CHECK-NEXT: st.param.b8 [func_retval0+16], %rs1;
+; CHECK-NEXT: st.param.b8 [func_retval0+16], 0;
+; CHECK-NEXT: st.param.b32 [func_retval0+12], 0;
+; CHECK-NEXT: st.param.b32 [func_retval0+8], 0;
+; CHECK-NEXT: st.param.b32 [func_retval0+4], 0;
+; CHECK-NEXT: st.param.b32 [func_retval0], 0;
; CHECK-NEXT: ret;
ret <17 x i1> zeroinitializer
}
@@ -368,18 +336,17 @@ define <17 x i1> @bit17() {
define <16 x i1> @bit16() {
; CHECK-LABEL: bit16(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.v2.b8 [func_retval0], {%rs1, %rs1};
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+2], {%rs1, %rs1};
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {%rs1, %rs1};
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+6], {%rs1, %rs1};
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+8], {%rs1, %rs1};
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+10], {%rs1, %rs1};
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+12], {%rs1, %rs1};
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+14], {%rs1, %rs1};
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+14], {0, 0};
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+12], {0, 0};
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+10], {0, 0};
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+8], {0, 0};
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+6], {0, 0};
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {0, 0};
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+2], {0, 0};
+; CHECK-NEXT: st.param.v2.b8 [func_retval0], {0, 0};
; CHECK-NEXT: ret;
ret <16 x i1> zeroinitializer
}
@@ -387,18 +354,17 @@ define <16 x i1> @bit16() {
define <15 x i1> @bit15() {
; CHECK-LABEL: bit15(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.v2.b8 [func_retval0], {%rs1, %rs1};
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+2], {%rs1, %rs1};
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {%rs1, %rs1};
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+6], {%rs1, %rs1};
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+8], {%rs1, %rs1};
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+10], {%rs1, %rs1};
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+12], {%rs1, %rs1};
-; CHECK-NEXT: st.param.b8 [func_retval0+14], %rs1;
+; CHECK-NEXT: st.param.b8 [func_retval0+14], 0;
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+12], {0, 0};
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+10], {0, 0};
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+8], {0, 0};
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+6], {0, 0};
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {0, 0};
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+2], {0, 0};
+; CHECK-NEXT: st.param.v2.b8 [func_retval0], {0, 0};
; CHECK-NEXT: ret;
ret <15 x i1> zeroinitializer
}
@@ -406,15 +372,14 @@ define <15 x i1> @bit15() {
define <9 x i1> @bit9() {
; CHECK-LABEL: bit9(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.v2.b8 [func_retval0], {%rs1, %rs1};
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+2], {%rs1, %rs1};
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {%rs1, %rs1};
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+6], {%rs1, %rs1};
-; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs1;
+; CHECK-NEXT: st.param.b8 [func_retval0+8], 0;
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+6], {0, 0};
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {0, 0};
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+2], {0, 0};
+; CHECK-NEXT: st.param.v2.b8 [func_retval0], {0, 0};
; CHECK-NEXT: ret;
ret <9 x i1> zeroinitializer
}
@@ -422,18 +387,17 @@ define <9 x i1> @bit9() {
define <8 x i1> @bit8() {
; CHECK-LABEL: bit8(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.b8 [func_retval0], %rs1;
-; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1;
-; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs1;
-; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs1;
-; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1;
-; CHECK-NEXT: st.param.b8 [func_retval0+5], %rs1;
-; CHECK-NEXT: st.param.b8 [func_retval0+6], %rs1;
-; CHECK-NEXT: st.param.b8 [func_retval0+7], %rs1;
+; CHECK-NEXT: st.param.b8 [func_retval0+7], 0;
+; CHECK-NEXT: st.param.b8 [func_retval0+6], 0;
+; CHECK-NEXT: st.param.b8 [func_retval0+5], 0;
+; CHECK-NEXT: st.param.b8 [func_retval0+4], 0;
+; CHECK-NEXT: st.param.b8 [func_retval0+3], 0;
+; CHECK-NEXT: st.param.b8 [func_retval0+2], 0;
+; CHECK-NEXT: st.param.b8 [func_retval0+1], 0;
+; CHECK-NEXT: st.param.b8 [func_retval0], 0;
; CHECK-NEXT: ret;
ret <8 x i1> zeroinitializer
}
@@ -441,17 +405,16 @@ define <8 x i1> @bit8() {
define <7 x i1> @bit7() {
; CHECK-LABEL: bit7(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.b8 [func_retval0], %rs1;
-; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1;
-; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs1;
-; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs1;
-; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1;
-; CHECK-NEXT: st.param.b8 [func_retval0+5], %rs1;
-; CHECK-NEXT: st.param.b8 [func_retval0+6], %rs1;
+; CHECK-NEXT: st.param.b8 [func_retval0+6], 0;
+; CHECK-NEXT: st.param.b8 [func_retval0+5], 0;
+; CHECK-NEXT: st.param.b8 [func_retval0+4], 0;
+; CHECK-NEXT: st.param.b8 [func_retval0+3], 0;
+; CHECK-NEXT: st.param.b8 [func_retval0+2], 0;
+; CHECK-NEXT: st.param.b8 [func_retval0+1], 0;
+; CHECK-NEXT: st.param.b8 [func_retval0], 0;
; CHECK-NEXT: ret;
ret <7 x i1> zeroinitializer
}
@@ -459,15 +422,14 @@ define <7 x i1> @bit7() {
define <5 x i1> @bit5() {
; CHECK-LABEL: bit5(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.b8 [func_retval0], %rs1;
-; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1;
-; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs1;
-; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs1;
-; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1;
+; CHECK-NEXT: st.param.b8 [func_retval0+4], 0;
+; CHECK-NEXT: st.param.b8 [func_retval0+3], 0;
+; CHECK-NEXT: st.param.b8 [func_retval0+2], 0;
+; CHECK-NEXT: st.param.b8 [func_retval0+1], 0;
+; CHECK-NEXT: st.param.b8 [func_retval0], 0;
; CHECK-NEXT: ret;
ret <5 x i1> zeroinitializer
}
@@ -475,14 +437,13 @@ define <5 x i1> @bit5() {
define <4 x i1> @bit4() {
; CHECK-LABEL: bit4(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.b8 [func_retval0], %rs1;
-; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1;
-; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs1;
-; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs1;
+; CHECK-NEXT: st.param.b8 [func_retval0+3], 0;
+; CHECK-NEXT: st.param.b8 [func_retval0+2], 0;
+; CHECK-NEXT: st.param.b8 [func_retval0+1], 0;
+; CHECK-NEXT: st.param.b8 [func_retval0], 0;
; CHECK-NEXT: ret;
ret <4 x i1> zeroinitializer
}
@@ -490,13 +451,12 @@ define <4 x i1> @bit4() {
define <3 x i1> @bit3() {
; CHECK-LABEL: bit3(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.b8 [func_retval0], %rs1;
-; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1;
-; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs1;
+; CHECK-NEXT: st.param.b8 [func_retval0+2], 0;
+; CHECK-NEXT: st.param.b8 [func_retval0+1], 0;
+; CHECK-NEXT: st.param.b8 [func_retval0], 0;
; CHECK-NEXT: ret;
ret <3 x i1> zeroinitializer
}
@@ -504,12 +464,11 @@ define <3 x i1> @bit3() {
define <2 x i1> @bit2() {
; CHECK-LABEL: bit2(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.b8 [func_retval0], %rs1;
-; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1;
+; CHECK-NEXT: st.param.b8 [func_retval0+1], 0;
+; CHECK-NEXT: st.param.b8 [func_retval0], 0;
; CHECK-NEXT: ret;
ret <2 x i1> zeroinitializer
}
@@ -517,11 +476,10 @@ define <2 x i1> @bit2() {
define <1 x i1> @bit1() {
; CHECK-LABEL: bit1(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0;
-; CHECK-NEXT: st.param.b8 [func_retval0], %rs1;
+; CHECK-NEXT: st.param.b8 [func_retval0], 0;
; CHECK-NEXT: ret;
ret <1 x i1> zeroinitializer
}
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected
index f466b1de9fb5a..51cafacd57ad5 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected
@@ -54,8 +54,8 @@ define internal fastcc [4 x i64] @callee_St8x4(ptr nocapture noundef readonly by
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [callee_St8x4_param_0];
; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [callee_St8x4_param_0+16];
-; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
; CHECK-NEXT: st.param.v2.b64 [func_retval0+16], {%rd3, %rd4};
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
; CHECK-NEXT: ret;
%1 = load i64, ptr %in, align 8
%arrayidx.1 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 1
More information about the llvm-commits
mailing list