[llvm] dc90f42 - Coalesce 16-bit FP types to use integer register classes.
Artem Belevich via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 5 12:22:12 PDT 2023
Author: Artem Belevich
Date: 2023-06-05T12:21:52-07:00
New Revision: dc90f42ea7b4f6d9e643f5ad2ba663eba2f9e421
URL: https://github.com/llvm/llvm-project/commit/dc90f42ea7b4f6d9e643f5ad2ba663eba2f9e421
DIFF: https://github.com/llvm/llvm-project/commit/dc90f42ea7b4f6d9e643f5ad2ba663eba2f9e421.diff
LOG: Coalesce 16-bit FP types to use integer register classes.
i16/f16/bf16 will use the same .b16 registers and
i32/v2f16 and v2bf16 will share .b32 registers.
The changes are mostly mechanical, intended to remove unnecessary register
classes which tend to produce redundant register moves.
Differential Revision: https://reviews.llvm.org/D151601
v2f16 regtype conversion to i32
Added:
Modified:
clang/test/CodeGenCUDA/bf16.cu
llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
llvm/test/CodeGen/NVPTX/bf16.ll
llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll
llvm/test/CodeGen/NVPTX/f16-instructions.ll
llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
llvm/test/CodeGen/NVPTX/half.ll
llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll
llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py
llvm/test/CodeGen/NVPTX/ldg-invariant.ll
llvm/test/CodeGen/NVPTX/ldu-ldg.ll
llvm/test/CodeGen/NVPTX/param-load-store.ll
llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll
llvm/test/CodeGen/NVPTX/wmma.py
Removed:
################################################################################
diff --git a/clang/test/CodeGenCUDA/bf16.cu b/clang/test/CodeGenCUDA/bf16.cu
index 0b4f375b95681..32082904c4d81 100644
--- a/clang/test/CodeGenCUDA/bf16.cu
+++ b/clang/test/CodeGenCUDA/bf16.cu
@@ -2,7 +2,7 @@
// REQUIRES: x86-registered-target
// RUN: %clang_cc1 "-aux-triple" "x86_64-unknown-linux-gnu" "-triple" "nvptx64-nvidia-cuda" \
-// RUN: -fcuda-is-device "-aux-target-cpu" "x86-64" -S -o - %s | FileCheck %s
+// RUN: -fcuda-is-device "-aux-target-cpu" "x86-64" -O1 -S -o - %s | FileCheck %s
#include "Inputs/cuda.h"
@@ -11,10 +11,11 @@
// CHECK: .param .b16 _Z8test_argPDF16bDF16b_param_1
//
__device__ void test_arg(__bf16 *out, __bf16 in) {
-// CHECK: ld.param.b16 %{{h.*}}, [_Z8test_argPDF16bDF16b_param_1];
+// CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [_Z8test_argPDF16bDF16b_param_0];
+// CHECK-DAG: ld.param.b16 %[[R:rs[0-9]+]], [_Z8test_argPDF16bDF16b_param_1];
__bf16 bf16 = in;
*out = bf16;
-// CHECK: st.b16
+// CHECK: st.b16 [%[[A]]], %[[R]]
// CHECK: ret;
}
@@ -22,25 +23,27 @@ __device__ void test_arg(__bf16 *out, __bf16 in) {
// CHECK-LABEL: .visible .func (.param .b32 func_retval0) _Z8test_retDF16b(
// CHECK: .param .b16 _Z8test_retDF16b_param_0
__device__ __bf16 test_ret( __bf16 in) {
-// CHECK: ld.param.b16 %h{{.*}}, [_Z8test_retDF16b_param_0];
+// CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z8test_retDF16b_param_0];
return in;
-// CHECK: st.param.b16 [func_retval0+0], %h
+// CHECK: st.param.b16 [func_retval0+0], %[[R]]
// CHECK: ret;
}
+__device__ __bf16 external_func( __bf16 in);
+
// CHECK-LABEL: .visible .func (.param .b32 func_retval0) _Z9test_callDF16b(
// CHECK: .param .b16 _Z9test_callDF16b_param_0
__device__ __bf16 test_call( __bf16 in) {
-// CHECK: ld.param.b16 %h{{.*}}, [_Z9test_callDF16b_param_0];
-// CHECK: st.param.b16 [param0+0], %h2;
+// CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z9test_callDF16b_param_0];
+// CHECK: st.param.b16 [param0+0], %[[R]];
// CHECK: .param .b32 retval0;
// CHECK: call.uni (retval0),
-// CHECK-NEXT: _Z8test_retDF16b,
+// CHECK-NEXT: _Z13external_funcDF16b,
// CHECK-NEXT: (
// CHECK-NEXT: param0
// CHECK-NEXT );
-// CHECK: ld.param.b16 %h{{.*}}, [retval0+0];
- return test_ret(in);
-// CHECK: st.param.b16 [func_retval0+0], %h
+// CHECK: ld.param.b16 %[[RET:rs[0-9]+]], [retval0+0];
+ return external_func(in);
+// CHECK: st.param.b16 [func_retval0+0], %[[RET]]
// CHECK: ret;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 5fce6d67f3067..179306b59b0ff 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -309,10 +309,6 @@ unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
Ret = (5 << 28);
} else if (RC == &NVPTX::Float64RegsRegClass) {
Ret = (6 << 28);
- } else if (RC == &NVPTX::Float16RegsRegClass) {
- Ret = (7 << 28);
- } else if (RC == &NVPTX::Float16x2RegsRegClass) {
- Ret = (8 << 28);
} else {
report_fatal_error("Bad register class");
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 10b633d1b9422..686a8d9f5448f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -816,8 +816,7 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
static std::optional<unsigned>
pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8,
unsigned Opcode_i16, unsigned Opcode_i32,
- std::optional<unsigned> Opcode_i64, unsigned Opcode_f16,
- unsigned Opcode_f16x2, unsigned Opcode_f32,
+ std::optional<unsigned> Opcode_i64, unsigned Opcode_f32,
std::optional<unsigned> Opcode_f64) {
switch (VT) {
case MVT::i1:
@@ -831,10 +830,10 @@ pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8,
return Opcode_i64;
case MVT::f16:
case MVT::bf16:
- return Opcode_f16;
+ return Opcode_i16;
case MVT::v2f16:
case MVT::v2bf16:
- return Opcode_f16x2;
+ return Opcode_i32;
case MVT::f32:
return Opcode_f32;
case MVT::f64:
@@ -935,10 +934,9 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
if (SelectDirectAddr(N1, Addr)) {
- Opcode = pickOpcodeForVT(
- TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar,
- NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar,
- NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
+ Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar,
+ NVPTX::LD_i32_avar, NVPTX::LD_i64_avar,
+ NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
if (!Opcode)
return false;
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
@@ -948,9 +946,8 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
} else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
: SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
- NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
- NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi,
- NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
+ NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
+ NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
if (!Opcode)
return false;
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
@@ -960,15 +957,14 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
} else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
: SelectADDRri(N1.getNode(), N1, Base, Offset)) {
if (PointerSize == 64)
- Opcode = pickOpcodeForVT(
- TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
- NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64,
- NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
+ Opcode =
+ pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
+ NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64,
+ NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
else
- Opcode = pickOpcodeForVT(
- TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari,
- NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari,
- NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
+ Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari,
+ NVPTX::LD_i32_ari, NVPTX::LD_i64_ari,
+ NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
if (!Opcode)
return false;
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
@@ -977,16 +973,14 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
} else {
if (PointerSize == 64)
- Opcode = pickOpcodeForVT(
- TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
- NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64,
- NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64,
- NVPTX::LD_f64_areg_64);
+ Opcode =
+ pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
+ NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64,
+ NVPTX::LD_f32_areg_64, NVPTX::LD_f64_areg_64);
else
- Opcode = pickOpcodeForVT(
- TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg,
- NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg,
- NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
+ Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg,
+ NVPTX::LD_i32_areg, NVPTX::LD_i64_areg,
+ NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
if (!Opcode)
return false;
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
@@ -1090,15 +1084,13 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
- NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar,
NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
break;
case NVPTXISD::LoadV4:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::LDV_i8_v4_avar, NVPTX::LDV_i16_v4_avar,
- NVPTX::LDV_i32_v4_avar, std::nullopt,
- NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar,
- NVPTX::LDV_f32_v4_avar, std::nullopt);
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar,
+ NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar,
+ std::nullopt, NVPTX::LDV_f32_v4_avar, std::nullopt);
break;
}
if (!Opcode)
@@ -1117,15 +1109,13 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
- NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi,
NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
break;
case NVPTXISD::LoadV4:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::LDV_i8_v4_asi, NVPTX::LDV_i16_v4_asi,
- NVPTX::LDV_i32_v4_asi, std::nullopt,
- NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi,
- NVPTX::LDV_f32_v4_asi, std::nullopt);
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi,
+ NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi,
+ std::nullopt, NVPTX::LDV_f32_v4_asi, std::nullopt);
break;
}
if (!Opcode)
@@ -1142,18 +1132,16 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
default:
return false;
case NVPTXISD::LoadV2:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64,
- NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64,
- NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64,
- NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64,
- NVPTX::LDV_f64_v2_ari_64);
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::LDV_i8_v2_ari_64, NVPTX::LDV_i16_v2_ari_64,
+ NVPTX::LDV_i32_v2_ari_64, NVPTX::LDV_i64_v2_ari_64,
+ NVPTX::LDV_f32_v2_ari_64, NVPTX::LDV_f64_v2_ari_64);
break;
case NVPTXISD::LoadV4:
Opcode = pickOpcodeForVT(
EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt,
- NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64,
NVPTX::LDV_f32_v4_ari_64, std::nullopt);
break;
}
@@ -1165,15 +1153,13 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
- NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari,
NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
break;
case NVPTXISD::LoadV4:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::LDV_i8_v4_ari, NVPTX::LDV_i16_v4_ari,
- NVPTX::LDV_i32_v4_ari, std::nullopt,
- NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari,
- NVPTX::LDV_f32_v4_ari, std::nullopt);
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari,
+ NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari,
+ std::nullopt, NVPTX::LDV_f32_v4_ari, std::nullopt);
break;
}
}
@@ -1193,15 +1179,13 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
Opcode = pickOpcodeForVT(
EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
- NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64,
- NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
+ NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
NVPTX::LDV_f64_v2_areg_64);
break;
case NVPTXISD::LoadV4:
Opcode = pickOpcodeForVT(
EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, std::nullopt,
- NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64,
NVPTX::LDV_f32_v4_areg_64, std::nullopt);
break;
}
@@ -1213,16 +1197,14 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
Opcode =
pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
- NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg,
- NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg,
+ NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f32_v2_areg,
NVPTX::LDV_f64_v2_areg);
break;
case NVPTXISD::LoadV4:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
- NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, std::nullopt,
- NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg,
- NVPTX::LDV_f32_v4_areg, std::nullopt);
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
+ NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg,
+ std::nullopt, NVPTX::LDV_f32_v4_areg, std::nullopt);
break;
}
}
@@ -1310,47 +1292,39 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
case ISD::INTRINSIC_W_CHAIN:
if (IsLDG)
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
- NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
- NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
- NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
- NVPTX::INT_PTX_LDG_GLOBAL_f16avar,
- NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar,
- NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
- NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
+ NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
else
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
- NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
- NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
- NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
- NVPTX::INT_PTX_LDU_GLOBAL_f16avar,
- NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar,
- NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
- NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
+ NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
break;
case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
- NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
- NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
- NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
- NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar,
- NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar,
- NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
- NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
+ NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
break;
case NVPTXISD::LDUV2:
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
- NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
- NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
- NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
- NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar,
- NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar,
- NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
- NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
+ NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
break;
case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
@@ -1358,8 +1332,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, std::nullopt,
- NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar,
- NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar,
NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, std::nullopt);
break;
case NVPTXISD::LDUV4:
@@ -1367,8 +1339,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, std::nullopt,
- NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar,
- NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar,
NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, std::nullopt);
break;
}
@@ -1390,8 +1360,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
- NVPTX::INT_PTX_LDG_GLOBAL_f16ari64,
- NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64,
NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
else
@@ -1400,8 +1368,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
- NVPTX::INT_PTX_LDU_GLOBAL_f16ari64,
- NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64,
NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
break;
@@ -1412,8 +1378,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
- NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64,
- NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64,
NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
break;
@@ -1423,8 +1387,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
- NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64,
- NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64,
NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
break;
@@ -1434,8 +1396,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt,
- NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64,
- NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64,
NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt);
break;
case NVPTXISD::LDUV4:
@@ -1443,8 +1403,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt,
- NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64,
- NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64,
NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt);
break;
}
@@ -1456,47 +1414,39 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
case ISD::INTRINSIC_W_CHAIN:
if (IsLDG)
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
- NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
- NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
- NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
- NVPTX::INT_PTX_LDG_GLOBAL_f16ari,
- NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari,
- NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
- NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
+ NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
else
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
- NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
- NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
- NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
- NVPTX::INT_PTX_LDU_GLOBAL_f16ari,
- NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari,
- NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
- NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
+ NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
break;
case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
- NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
- NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
- NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
- NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32,
- NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32,
- NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
- NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
+ NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
break;
case NVPTXISD::LDUV2:
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
- NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
- NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
- NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
- NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32,
- NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32,
- NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
- NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
+ NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
break;
case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
@@ -1504,8 +1454,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt,
- NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32,
- NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32,
NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt);
break;
case NVPTXISD::LDUV4:
@@ -1513,8 +1461,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt,
- NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32,
- NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32,
NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt);
break;
}
@@ -1536,8 +1482,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
- NVPTX::INT_PTX_LDG_GLOBAL_f16areg64,
- NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64,
NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
else
@@ -1546,8 +1490,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
- NVPTX::INT_PTX_LDU_GLOBAL_f16areg64,
- NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64,
NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
break;
@@ -1558,8 +1500,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
- NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64,
- NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64,
NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
break;
@@ -1569,8 +1509,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
- NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64,
- NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64,
NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
break;
@@ -1580,8 +1518,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, std::nullopt,
- NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64,
- NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64,
NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, std::nullopt);
break;
case NVPTXISD::LDUV4:
@@ -1589,8 +1525,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, std::nullopt,
- NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64,
- NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64,
NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, std::nullopt);
break;
}
@@ -1606,8 +1540,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
- NVPTX::INT_PTX_LDG_GLOBAL_f16areg,
- NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg,
NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
else
@@ -1616,8 +1548,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
- NVPTX::INT_PTX_LDU_GLOBAL_f16areg,
- NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg,
NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
break;
@@ -1628,8 +1558,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
- NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32,
- NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32,
NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
break;
@@ -1639,8 +1567,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
- NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32,
- NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32,
NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
break;
@@ -1650,8 +1576,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, std::nullopt,
- NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32,
- NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32,
NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, std::nullopt);
break;
case NVPTXISD::LDUV4:
@@ -1659,8 +1583,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, std::nullopt,
- NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32,
- NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32,
NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, std::nullopt);
break;
}
@@ -1781,7 +1703,6 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
if (SelectDirectAddr(BasePtr, Addr)) {
Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
- NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
if (!Opcode)
return false;
@@ -1799,7 +1720,6 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
: SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) {
Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
- NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
if (!Opcode)
return false;
@@ -1817,14 +1737,13 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset)
: SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) {
if (PointerSize == 64)
- Opcode = pickOpcodeForVT(
- SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
- NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64,
- NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
+ Opcode =
+ pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
+ NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64,
+ NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
else
Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
- NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari,
NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
if (!Opcode)
return false;
@@ -1844,12 +1763,10 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
- NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64,
NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
else
Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
- NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg,
NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
if (!Opcode)
return false;
@@ -1955,14 +1872,12 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
- NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar,
NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
break;
case NVPTXISD::StoreV4:
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
NVPTX::STV_i8_v4_avar, NVPTX::STV_i16_v4_avar,
NVPTX::STV_i32_v4_avar, std::nullopt,
- NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar,
NVPTX::STV_f32_v4_avar, std::nullopt);
break;
}
@@ -1976,15 +1891,13 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
- NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi,
NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
break;
case NVPTXISD::StoreV4:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::STV_i8_v4_asi, NVPTX::STV_i16_v4_asi,
- NVPTX::STV_i32_v4_asi, std::nullopt,
- NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi,
- NVPTX::STV_f32_v4_asi, std::nullopt);
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi,
+ NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi,
+ std::nullopt, NVPTX::STV_f32_v4_asi, std::nullopt);
break;
}
StOps.push_back(Base);
@@ -1996,18 +1909,16 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
default:
return false;
case NVPTXISD::StoreV2:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64,
- NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64,
- NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64,
- NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64,
- NVPTX::STV_f64_v2_ari_64);
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::STV_i8_v2_ari_64, NVPTX::STV_i16_v2_ari_64,
+ NVPTX::STV_i32_v2_ari_64, NVPTX::STV_i64_v2_ari_64,
+ NVPTX::STV_f32_v2_ari_64, NVPTX::STV_f64_v2_ari_64);
break;
case NVPTXISD::StoreV4:
Opcode = pickOpcodeForVT(
EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt,
- NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64,
NVPTX::STV_f32_v4_ari_64, std::nullopt);
break;
}
@@ -2019,14 +1930,12 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
- NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari,
NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
break;
case NVPTXISD::StoreV4:
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari,
NVPTX::STV_i32_v4_ari, std::nullopt,
- NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari,
NVPTX::STV_f32_v4_ari, std::nullopt);
break;
}
@@ -2042,15 +1951,13 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
Opcode = pickOpcodeForVT(
EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
- NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64,
- NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
+ NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
NVPTX::STV_f64_v2_areg_64);
break;
case NVPTXISD::StoreV4:
Opcode = pickOpcodeForVT(
EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, std::nullopt,
- NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64,
NVPTX::STV_f32_v4_areg_64, std::nullopt);
break;
}
@@ -2062,16 +1969,14 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
Opcode =
pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
- NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg,
- NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg,
+ NVPTX::STV_i64_v2_areg, NVPTX::STV_f32_v2_areg,
NVPTX::STV_f64_v2_areg);
break;
case NVPTXISD::StoreV4:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
- NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, std::nullopt,
- NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg,
- NVPTX::STV_f32_v4_areg, std::nullopt);
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
+ NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg,
+ std::nullopt, NVPTX::STV_f32_v4_areg, std::nullopt);
break;
}
}
@@ -2126,23 +2031,20 @@ bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
- NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2,
NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
break;
case 2:
Opcode =
pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
- NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16,
- NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32,
+ NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F32,
NVPTX::LoadParamMemV2F64);
break;
case 4:
- Opcode = pickOpcodeForVT(
- MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
- NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, std::nullopt,
- NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2,
- NVPTX::LoadParamMemV4F32, std::nullopt);
+ Opcode =
+ pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
+ NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32,
+ std::nullopt, NVPTX::LoadParamMemV4F32, std::nullopt);
break;
}
if (!Opcode)
@@ -2210,21 +2112,18 @@ bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
- NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2,
NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
break;
case 2:
Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
- NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2,
NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
break;
case 4:
Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
NVPTX::StoreRetvalV4I32, std::nullopt,
- NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2,
NVPTX::StoreRetvalV4F32, std::nullopt);
break;
}
@@ -2289,21 +2188,18 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
NVPTX::StoreParamI8, NVPTX::StoreParamI16,
NVPTX::StoreParamI32, NVPTX::StoreParamI64,
- NVPTX::StoreParamF16, NVPTX::StoreParamF16x2,
NVPTX::StoreParamF32, NVPTX::StoreParamF64);
break;
case 2:
Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
- NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2,
NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
break;
case 4:
Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
NVPTX::StoreParamV4I32, std::nullopt,
- NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2,
NVPTX::StoreParamV4F32, std::nullopt);
break;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 692ec58bdedff..c46ed2111258c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -410,10 +410,10 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
- addRegisterClass(MVT::f16, &NVPTX::Float16RegsRegClass);
- addRegisterClass(MVT::v2f16, &NVPTX::Float16x2RegsRegClass);
- addRegisterClass(MVT::bf16, &NVPTX::Float16RegsRegClass);
- addRegisterClass(MVT::v2bf16, &NVPTX::Float16x2RegsRegClass);
+ addRegisterClass(MVT::f16, &NVPTX::Int16RegsRegClass);
+ addRegisterClass(MVT::v2f16, &NVPTX::Int32RegsRegClass);
+ addRegisterClass(MVT::bf16, &NVPTX::Int16RegsRegClass);
+ addRegisterClass(MVT::v2bf16, &NVPTX::Int32RegsRegClass);
// Conversion to/from FP16/FP16x2 is always legal.
setOperationAction(ISD::SINT_TO_FP, MVT::f16, Legal);
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 8df6f13aa68e1..b0d792b5ee3fe 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -51,11 +51,6 @@ void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
} else if (DestRC == &NVPTX::Int64RegsRegClass) {
Op = (SrcRC == &NVPTX::Int64RegsRegClass ? NVPTX::IMOV64rr
: NVPTX::BITCONVERT_64_F2I);
- } else if (DestRC == &NVPTX::Float16RegsRegClass) {
- Op = (SrcRC == &NVPTX::Float16RegsRegClass ? NVPTX::FMOV16rr
- : NVPTX::BITCONVERT_16_I2F);
- } else if (DestRC == &NVPTX::Float16x2RegsRegClass) {
- Op = NVPTX::IMOV32rr;
} else if (DestRC == &NVPTX::Float32RegsRegClass) {
Op = (SrcRC == &NVPTX::Float32RegsRegClass ? NVPTX::FMOV32rr
: NVPTX::BITCONVERT_32_I2F);
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index a540b3d8364f1..43fd6da00828e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -164,10 +164,10 @@ class ValueToRegClass<ValueType T> {
!eq(name, "i16"): Int16Regs,
!eq(name, "i32"): Int32Regs,
!eq(name, "i64"): Int64Regs,
- !eq(name, "f16"): Float16Regs,
- !eq(name, "v2f16"): Float16x2Regs,
- !eq(name, "bf16"): Float16Regs,
- !eq(name, "v2bf16"): Float16x2Regs,
+ !eq(name, "f16"): Int16Regs,
+ !eq(name, "v2f16"): Int32Regs,
+ !eq(name, "bf16"): Int16Regs,
+ !eq(name, "v2bf16"): Int32Regs,
!eq(name, "f32"): Float32Regs,
!eq(name, "f64"): Float64Regs,
!eq(name, "ai32"): Int32ArgRegs,
@@ -280,29 +280,29 @@ multiclass F3<string OpcStr, SDNode OpNode> {
[(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
def f16rr_ftz :
- NVPTXInst<(outs Float16Regs:$dst),
- (ins Float16Regs:$a, Float16Regs:$b),
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
!strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"),
- [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>,
+ [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>,
Requires<[useFP16Math, doF32FTZ]>;
def f16rr :
- NVPTXInst<(outs Float16Regs:$dst),
- (ins Float16Regs:$a, Float16Regs:$b),
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
!strconcat(OpcStr, ".f16 \t$dst, $a, $b;"),
- [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>,
+ [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>,
Requires<[useFP16Math]>;
def f16x2rr_ftz :
- NVPTXInst<(outs Float16x2Regs:$dst),
- (ins Float16x2Regs:$a, Float16x2Regs:$b),
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a, $b;"),
- [(set Float16x2Regs:$dst, (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>,
+ [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>,
Requires<[useFP16Math, doF32FTZ]>;
def f16x2rr :
- NVPTXInst<(outs Float16x2Regs:$dst),
- (ins Float16x2Regs:$a, Float16x2Regs:$b),
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, ".f16x2 \t$dst, $a, $b;"),
- [(set Float16x2Regs:$dst, (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>,
+ [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>,
Requires<[useFP16Math]>;
}
@@ -354,29 +354,29 @@ multiclass F3_fma_component<string OpcStr, SDNode OpNode> {
Requires<[allowFMA]>;
def f16rr_ftz :
- NVPTXInst<(outs Float16Regs:$dst),
- (ins Float16Regs:$a, Float16Regs:$b),
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
!strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"),
- [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>,
+ [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>,
Requires<[useFP16Math, allowFMA, doF32FTZ]>;
def f16rr :
- NVPTXInst<(outs Float16Regs:$dst),
- (ins Float16Regs:$a, Float16Regs:$b),
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
!strconcat(OpcStr, ".f16 \t$dst, $a, $b;"),
- [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>,
+ [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>,
Requires<[useFP16Math, allowFMA]>;
def f16x2rr_ftz :
- NVPTXInst<(outs Float16x2Regs:$dst),
- (ins Float16x2Regs:$a, Float16x2Regs:$b),
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a, $b;"),
- [(set (v2f16 Float16x2Regs:$dst), (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>,
+ [(set (v2f16 Int32Regs:$dst), (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>,
Requires<[useFP16Math, allowFMA, doF32FTZ]>;
def f16x2rr :
- NVPTXInst<(outs Float16x2Regs:$dst),
- (ins Float16x2Regs:$a, Float16x2Regs:$b),
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, ".f16x2 \t$dst, $a, $b;"),
- [(set Float16x2Regs:$dst, (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>,
+ [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>,
Requires<[useFP16Math, allowFMA]>;
// These have strange names so we don't perturb existing mir tests.
@@ -417,28 +417,28 @@ multiclass F3_fma_component<string OpcStr, SDNode OpNode> {
[(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
Requires<[noFMA]>;
def _rnf16rr_ftz :
- NVPTXInst<(outs Float16Regs:$dst),
- (ins Float16Regs:$a, Float16Regs:$b),
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
!strconcat(OpcStr, ".rn.ftz.f16 \t$dst, $a, $b;"),
- [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>,
+ [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>,
Requires<[useFP16Math, noFMA, doF32FTZ]>;
def _rnf16rr :
- NVPTXInst<(outs Float16Regs:$dst),
- (ins Float16Regs:$a, Float16Regs:$b),
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
!strconcat(OpcStr, ".rn.f16 \t$dst, $a, $b;"),
- [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>,
+ [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>,
Requires<[useFP16Math, noFMA]>;
def _rnf16x2rr_ftz :
- NVPTXInst<(outs Float16x2Regs:$dst),
- (ins Float16x2Regs:$a, Float16x2Regs:$b),
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, ".rn.ftz.f16x2 \t$dst, $a, $b;"),
- [(set Float16x2Regs:$dst, (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>,
+ [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>,
Requires<[useFP16Math, noFMA, doF32FTZ]>;
def _rnf16x2rr :
- NVPTXInst<(outs Float16x2Regs:$dst),
- (ins Float16x2Regs:$a, Float16x2Regs:$b),
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, ".rn.f16x2 \t$dst, $a, $b;"),
- [(set Float16x2Regs:$dst, (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>,
+ [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>,
Requires<[useFP16Math, noFMA]>;
}
@@ -513,7 +513,7 @@ let hasSideEffects = false in {
FromName, ".u64 \t$dst, $src;"), []>;
def _f16 :
NVPTXInst<(outs RC:$dst),
- (ins Float16Regs:$src, CvtMode:$mode),
+ (ins Int16Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
FromName, ".f16 \t$dst, $src;"), []>;
def _f32 :
@@ -537,7 +537,7 @@ let hasSideEffects = false in {
defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>;
defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>;
defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>;
- defm CVT_f16 : CVT_FROM_ALL<"f16", Float16Regs>;
+ defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>;
defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>;
defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>;
@@ -576,7 +576,7 @@ multiclass CVT_FROM_FLOAT_SM80<string FromName, RegisterClass RC> {
Requires<[hasPTX<70>, hasSM<80>]>;
}
- defm CVT_f16x2 : CVT_FROM_FLOAT_V2_SM80<"f16x2", Float16x2Regs>;
+ defm CVT_f16x2 : CVT_FROM_FLOAT_V2_SM80<"f16x2", Int32Regs>;
defm CVT_bf16x2 : CVT_FROM_FLOAT_V2_SM80<"bf16x2", Int32Regs>;
}
@@ -640,21 +640,21 @@ defm SELP_u32 : SELP<"u32", Int32Regs, i32imm>;
defm SELP_b64 : SELP_PATTERN<"b64", i64, Int64Regs, i64imm, imm>;
defm SELP_s64 : SELP<"s64", Int64Regs, i64imm>;
defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>;
-defm SELP_f16 : SELP_PATTERN<"b16", f16, Float16Regs, f16imm, fpimm>;
+defm SELP_f16 : SELP_PATTERN<"b16", f16, Int16Regs, f16imm, fpimm>;
defm SELP_f32 : SELP_PATTERN<"f32", f32, Float32Regs, f32imm, fpimm>;
defm SELP_f64 : SELP_PATTERN<"f64", f64, Float64Regs, f64imm, fpimm>;
// This does not work as tablegen fails to infer the type of 'imm'.
// def v2f16imm : Operand<v2f16>;
-// defm SELP_f16x2 : SELP_PATTERN<"b32", v2f16, Float16x2Regs, v2f16imm, imm>;
+// defm SELP_f16x2 : SELP_PATTERN<"b32", v2f16, Int32Regs, v2f16imm, imm>;
def SELP_f16x2rr :
- NVPTXInst<(outs Float16x2Regs:$dst),
- (ins Float16x2Regs:$a, Float16x2Regs:$b, Int1Regs:$p),
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b, Int1Regs:$p),
"selp.b32 \t$dst, $a, $b, $p;",
- [(set Float16x2Regs:$dst,
- (select Int1Regs:$p, (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>;
+ [(set Int32Regs:$dst,
+ (select Int1Regs:$p, (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>;
//-----------------------------------
// Test Instructions
@@ -783,26 +783,26 @@ def mul_wide_signed : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>;
def mul_wide_unsigned : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>;
// Matchers for signed, unsigned mul.wide ISD nodes.
-def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)),
- (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,
+def : Pat<(i32 (mul_wide_signed i16:$a, i16:$b)),
+ (MULWIDES32 i16:$a, i16:$b)>,
Requires<[doMulWide]>;
def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)),
(MULWIDES32Imm Int16Regs:$a, imm:$b)>,
Requires<[doMulWide]>;
-def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)),
+def : Pat<(i32 (mul_wide_unsigned i16:$a, i16:$b)),
(MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
Requires<[doMulWide]>;
def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)),
(MULWIDEU32Imm Int16Regs:$a, imm:$b)>,
Requires<[doMulWide]>;
-def : Pat<(i64 (mul_wide_signed Int32Regs:$a, Int32Regs:$b)),
+def : Pat<(i64 (mul_wide_signed i32:$a, i32:$b)),
(MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
Requires<[doMulWide]>;
def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)),
(MULWIDES64Imm Int32Regs:$a, imm:$b)>,
Requires<[doMulWide]>;
-def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, Int32Regs:$b)),
+def : Pat<(i64 (mul_wide_unsigned i32:$a, i32:$b)),
(MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
Requires<[doMulWide]>;
def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)),
@@ -1003,7 +1003,7 @@ def DoubleConst1 : PatLeaf<(fpimm), [{
// fp16 immediate values in .f16 instructions. Instead we have to load
// the constant into a register using mov.b16.
def LOAD_CONST_F16 :
- NVPTXInst<(outs Float16Regs:$dst), (ins f16imm:$a),
+ NVPTXInst<(outs Int16Regs:$dst), (ins f16imm:$a),
"mov.b16 \t$dst, $a;", []>;
defm FADD : F3_fma_component<"add", fadd>;
@@ -1028,10 +1028,10 @@ class FNEG_F16_F16X2<string OpcStr, ValueType T, RegisterClass RC, Predicate Pre
!strconcat(OpcStr, " \t$dst, $src;"),
[(set RC:$dst, (fneg (T RC:$src)))]>,
Requires<[useFP16Math, hasPTX<60>, hasSM<53>, Pred]>;
-def FNEG16_ftz : FNEG_F16_F16X2<"neg.ftz.f16", f16, Float16Regs, doF32FTZ>;
-def FNEG16 : FNEG_F16_F16X2<"neg.f16", f16, Float16Regs, True>;
-def FNEG16x2_ftz : FNEG_F16_F16X2<"neg.ftz.f16x2", v2f16, Float16x2Regs, doF32FTZ>;
-def FNEG16x2 : FNEG_F16_F16X2<"neg.f16x2", v2f16, Float16x2Regs, True>;
+def FNEG16_ftz : FNEG_F16_F16X2<"neg.ftz.f16", f16, Int16Regs, doF32FTZ>;
+def FNEG16 : FNEG_F16_F16X2<"neg.f16", f16, Int16Regs, True>;
+def FNEG16x2_ftz : FNEG_F16_F16X2<"neg.ftz.f16x2", v2f16, Int32Regs, doF32FTZ>;
+def FNEG16x2 : FNEG_F16_F16X2<"neg.f16x2", v2f16, Int32Regs, True>;
//
// F64 division
@@ -1211,10 +1211,10 @@ multiclass FMA_F16<string OpcStr, ValueType T, RegisterClass RC, Predicate Pred>
Requires<[useFP16Math, Pred]>;
}
-defm FMA16_ftz : FMA_F16<"fma.rn.ftz.f16", f16, Float16Regs, doF32FTZ>;
-defm FMA16 : FMA_F16<"fma.rn.f16", f16, Float16Regs, True>;
-defm FMA16x2_ftz : FMA_F16<"fma.rn.ftz.f16x2", v2f16, Float16x2Regs, doF32FTZ>;
-defm FMA16x2 : FMA_F16<"fma.rn.f16x2", v2f16, Float16x2Regs, True>;
+defm FMA16_ftz : FMA_F16<"fma.rn.ftz.f16", f16, Int16Regs, doF32FTZ>;
+defm FMA16 : FMA_F16<"fma.rn.f16", f16, Int16Regs, True>;
+defm FMA16x2_ftz : FMA_F16<"fma.rn.ftz.f16x2", v2f16, Int32Regs, doF32FTZ>;
+defm FMA16x2 : FMA_F16<"fma.rn.f16x2", v2f16, Int32Regs, True>;
defm FMA32_ftz : FMA<"fma.rn.ftz.f32", Float32Regs, f32imm, doF32FTZ>;
defm FMA32 : FMA<"fma.rn.f32", Float32Regs, f32imm, True>;
defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, True>;
@@ -1651,13 +1651,13 @@ defm SETP_f32 : SETP<"f32", Float32Regs, f32imm>;
defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>;
def SETP_f16rr :
NVPTXInst<(outs Int1Regs:$dst),
- (ins Float16Regs:$a, Float16Regs:$b, CmpMode:$cmp),
+ (ins Int16Regs:$a, Int16Regs:$b, CmpMode:$cmp),
"setp${cmp:base}${cmp:ftz}.f16 \t$dst, $a, $b;",
[]>, Requires<[useFP16Math]>;
def SETP_f16x2rr :
NVPTXInst<(outs Int1Regs:$p, Int1Regs:$q),
- (ins Float16x2Regs:$a, Float16x2Regs:$b, CmpMode:$cmp),
+ (ins Int32Regs:$a, Int32Regs:$b, CmpMode:$cmp),
"setp${cmp:base}${cmp:ftz}.f16x2 \t$p|$q, $a, $b;",
[]>,
Requires<[useFP16Math]>;
@@ -1690,7 +1690,7 @@ defm SET_u32 : SET<"u32", Int32Regs, i32imm>;
defm SET_b64 : SET<"b64", Int64Regs, i64imm>;
defm SET_s64 : SET<"s64", Int64Regs, i64imm>;
defm SET_u64 : SET<"u64", Int64Regs, i64imm>;
-defm SET_f16 : SET<"f16", Float16Regs, f16imm>;
+defm SET_f16 : SET<"f16", Int16Regs, f16imm>;
defm SET_f32 : SET<"f32", Float32Regs, f32imm>;
defm SET_f64 : SET<"f64", Float64Regs, f64imm>;
@@ -1760,7 +1760,14 @@ let IsSimpleMove=1, hasSideEffects=0 in {
def IMOV64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
"mov.u64 \t$dst, $sss;", []>;
- def FMOV16rr : NVPTXInst<(outs Float16Regs:$dst), (ins Float16Regs:$src),
+ def IMOVB16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
+ "mov.b16 \t$dst, $sss;", []>;
+ def IMOVB32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
+ "mov.b32 \t$dst, $sss;", []>;
+ def IMOVB64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
+ "mov.b64 \t$dst, $sss;", []>;
+
+ def FMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
// We have to use .b16 here as there's no mov.f16.
"mov.b16 \t$dst, $src;", []>;
def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
@@ -1782,6 +1789,13 @@ def IMOV64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
"mov.u64 \t$dst, $src;",
[(set Int64Regs:$dst, imm:$src)]>;
+def IMOVB16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
+ "mov.b16 \t$dst, $src;", []>;
+def IMOVB32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
+ "mov.b32 \t$dst, $src;", []>;
+def IMOVB64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
+ "mov.b64 \t$dst, $src;", []>;
+
def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
"mov.f32 \t$dst, $src;",
[(set Float32Regs:$dst, fpimm:$src)]>;
@@ -1824,14 +1838,14 @@ multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
Instruction set_64ri,
Instruction set_64ir> {
// i16 -> pred
- def : Pat<(i1 (OpNode Int16Regs:$a, Int16Regs:$b)),
+ def : Pat<(i1 (OpNode i16:$a, i16:$b)),
(setp_16rr Int16Regs:$a, Int16Regs:$b, Mode)>;
def : Pat<(i1 (OpNode Int16Regs:$a, imm:$b)),
(setp_16ri Int16Regs:$a, imm:$b, Mode)>;
def : Pat<(i1 (OpNode imm:$a, Int16Regs:$b)),
(setp_16ir imm:$a, Int16Regs:$b, Mode)>;
// i32 -> pred
- def : Pat<(i1 (OpNode Int32Regs:$a, Int32Regs:$b)),
+ def : Pat<(i1 (OpNode i32:$a, i32:$b)),
(setp_32rr Int32Regs:$a, Int32Regs:$b, Mode)>;
def : Pat<(i1 (OpNode Int32Regs:$a, imm:$b)),
(setp_32ri Int32Regs:$a, imm:$b, Mode)>;
@@ -1846,14 +1860,14 @@ multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
(setp_64ir imm:$a, Int64Regs:$b, Mode)>;
// i16 -> i32
- def : Pat<(i32 (OpNode Int16Regs:$a, Int16Regs:$b)),
+ def : Pat<(i32 (OpNode i16:$a, i16:$b)),
(set_16rr Int16Regs:$a, Int16Regs:$b, Mode)>;
def : Pat<(i32 (OpNode Int16Regs:$a, imm:$b)),
(set_16ri Int16Regs:$a, imm:$b, Mode)>;
def : Pat<(i32 (OpNode imm:$a, Int16Regs:$b)),
(set_16ir imm:$a, Int16Regs:$b, Mode)>;
// i32 -> i32
- def : Pat<(i32 (OpNode Int32Regs:$a, Int32Regs:$b)),
+ def : Pat<(i32 (OpNode i32:$a, i32:$b)),
(set_32rr Int32Regs:$a, Int32Regs:$b, Mode)>;
def : Pat<(i32 (OpNode Int32Regs:$a, imm:$b)),
(set_32ri Int32Regs:$a, imm:$b, Mode)>;
@@ -1926,23 +1940,23 @@ def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)),
multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
// f16 -> pred
- def : Pat<(i1 (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b))),
- (SETP_f16rr Float16Regs:$a, Float16Regs:$b, ModeFTZ)>,
+ def : Pat<(i1 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))),
+ (SETP_f16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>,
Requires<[useFP16Math,doF32FTZ]>;
- def : Pat<(i1 (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b))),
- (SETP_f16rr Float16Regs:$a, Float16Regs:$b, Mode)>,
+ def : Pat<(i1 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))),
+ (SETP_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
Requires<[useFP16Math]>;
- def : Pat<(i1 (OpNode (f16 Float16Regs:$a), fpimm:$b)),
- (SETP_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>,
+ def : Pat<(i1 (OpNode (f16 Int16Regs:$a), fpimm:$b)),
+ (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>,
Requires<[useFP16Math,doF32FTZ]>;
- def : Pat<(i1 (OpNode (f16 Float16Regs:$a), fpimm:$b)),
- (SETP_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>,
+ def : Pat<(i1 (OpNode (f16 Int16Regs:$a), fpimm:$b)),
+ (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>,
Requires<[useFP16Math]>;
- def : Pat<(i1 (OpNode fpimm:$a, (f16 Float16Regs:$b))),
- (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, ModeFTZ)>,
+ def : Pat<(i1 (OpNode fpimm:$a, (f16 Int16Regs:$b))),
+ (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
Requires<[useFP16Math,doF32FTZ]>;
- def : Pat<(i1 (OpNode fpimm:$a, (f16 Float16Regs:$b))),
- (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, Mode)>,
+ def : Pat<(i1 (OpNode fpimm:$a, (f16 Int16Regs:$b))),
+ (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>,
Requires<[useFP16Math]>;
// f32 -> pred
@@ -1971,23 +1985,23 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
(SETP_f64ir fpimm:$a, Float64Regs:$b, Mode)>;
// f16 -> i32
- def : Pat<(i32 (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b))),
- (SET_f16rr Float16Regs:$a, Float16Regs:$b, ModeFTZ)>,
+ def : Pat<(i32 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))),
+ (SET_f16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>,
Requires<[useFP16Math, doF32FTZ]>;
- def : Pat<(i32 (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b))),
- (SET_f16rr Float16Regs:$a, Float16Regs:$b, Mode)>,
+ def : Pat<(i32 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))),
+ (SET_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
Requires<[useFP16Math]>;
- def : Pat<(i32 (OpNode (f16 Float16Regs:$a), fpimm:$b)),
- (SET_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>,
+ def : Pat<(i32 (OpNode (f16 Int16Regs:$a), fpimm:$b)),
+ (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>,
Requires<[useFP16Math, doF32FTZ]>;
- def : Pat<(i32 (OpNode (f16 Float16Regs:$a), fpimm:$b)),
- (SET_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>,
+ def : Pat<(i32 (OpNode (f16 Int16Regs:$a), fpimm:$b)),
+ (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>,
Requires<[useFP16Math]>;
- def : Pat<(i32 (OpNode fpimm:$a, (f16 Float16Regs:$b))),
- (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, ModeFTZ)>,
+ def : Pat<(i32 (OpNode fpimm:$a, (f16 Int16Regs:$b))),
+ (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
Requires<[useFP16Math, doF32FTZ]>;
- def : Pat<(i32 (OpNode fpimm:$a, (f16 Float16Regs:$b))),
- (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, Mode)>,
+ def : Pat<(i32 (OpNode fpimm:$a, (f16 Int16Regs:$b))),
+ (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>,
Requires<[useFP16Math]>;
// f32 -> i32
@@ -2276,16 +2290,10 @@ def LoadParamMemV2I8 : LoadParamV2MemInst<Int16Regs, ".b8">;
def LoadParamMemV4I32 : LoadParamV4MemInst<Int32Regs, ".b32">;
def LoadParamMemV4I16 : LoadParamV4MemInst<Int16Regs, ".b16">;
def LoadParamMemV4I8 : LoadParamV4MemInst<Int16Regs, ".b8">;
-def LoadParamMemF16 : LoadParamMemInst<Float16Regs, ".b16">;
-def LoadParamMemF16x2 : LoadParamMemInst<Float16x2Regs, ".b32">;
def LoadParamMemF32 : LoadParamMemInst<Float32Regs, ".f32">;
def LoadParamMemF64 : LoadParamMemInst<Float64Regs, ".f64">;
-def LoadParamMemV2F16 : LoadParamV2MemInst<Float16Regs, ".b16">;
-def LoadParamMemV2F16x2: LoadParamV2MemInst<Float16x2Regs, ".b32">;
def LoadParamMemV2F32 : LoadParamV2MemInst<Float32Regs, ".f32">;
def LoadParamMemV2F64 : LoadParamV2MemInst<Float64Regs, ".f64">;
-def LoadParamMemV4F16 : LoadParamV4MemInst<Float16Regs, ".b16">;
-def LoadParamMemV4F16x2: LoadParamV4MemInst<Float16x2Regs, ".b32">;
def LoadParamMemV4F32 : LoadParamV4MemInst<Float32Regs, ".f32">;
def StoreParamI64 : StoreParamInst<Int64Regs, ".b64">;
@@ -2302,16 +2310,10 @@ def StoreParamV4I32 : StoreParamV4Inst<Int32Regs, ".b32">;
def StoreParamV4I16 : StoreParamV4Inst<Int16Regs, ".b16">;
def StoreParamV4I8 : StoreParamV4Inst<Int16Regs, ".b8">;
-def StoreParamF16 : StoreParamInst<Float16Regs, ".b16">;
-def StoreParamF16x2 : StoreParamInst<Float16x2Regs, ".b32">;
def StoreParamF32 : StoreParamInst<Float32Regs, ".f32">;
def StoreParamF64 : StoreParamInst<Float64Regs, ".f64">;
-def StoreParamV2F16 : StoreParamV2Inst<Float16Regs, ".b16">;
-def StoreParamV2F16x2 : StoreParamV2Inst<Float16x2Regs, ".b32">;
def StoreParamV2F32 : StoreParamV2Inst<Float32Regs, ".f32">;
def StoreParamV2F64 : StoreParamV2Inst<Float64Regs, ".f64">;
-def StoreParamV4F16 : StoreParamV4Inst<Float16Regs, ".b16">;
-def StoreParamV4F16x2 : StoreParamV4Inst<Float16x2Regs, ".b32">;
def StoreParamV4F32 : StoreParamV4Inst<Float32Regs, ".f32">;
def StoreRetvalI64 : StoreRetvalInst<Int64Regs, ".b64">;
@@ -2328,15 +2330,9 @@ def StoreRetvalV4I8 : StoreRetvalV4Inst<Int16Regs, ".b8">;
def StoreRetvalF64 : StoreRetvalInst<Float64Regs, ".f64">;
def StoreRetvalF32 : StoreRetvalInst<Float32Regs, ".f32">;
-def StoreRetvalF16 : StoreRetvalInst<Float16Regs, ".b16">;
-def StoreRetvalF16x2 : StoreRetvalInst<Float16x2Regs, ".b32">;
def StoreRetvalV2F64 : StoreRetvalV2Inst<Float64Regs, ".f64">;
def StoreRetvalV2F32 : StoreRetvalV2Inst<Float32Regs, ".f32">;
-def StoreRetvalV2F16 : StoreRetvalV2Inst<Float16Regs, ".b16">;
-def StoreRetvalV2F16x2: StoreRetvalV2Inst<Float16x2Regs, ".b32">;
def StoreRetvalV4F32 : StoreRetvalV4Inst<Float32Regs, ".f32">;
-def StoreRetvalV4F16 : StoreRetvalV4Inst<Float16Regs, ".b16">;
-def StoreRetvalV4F16x2: StoreRetvalV4Inst<Float16x2Regs, ".b32">;
def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
@@ -2347,19 +2343,26 @@ class CallArgInst<NVPTXRegClass regclass> :
NVPTXInst<(outs), (ins regclass:$a), "$a, ",
[(CallArg (i32 0), regclass:$a)]>;
+class CallArgInstVT<NVPTXRegClass regclass, ValueType vt> :
+ NVPTXInst<(outs), (ins regclass:$a), "$a, ",
+ [(CallArg (i32 0), vt:$a)]>;
+
class LastCallArgInst<NVPTXRegClass regclass> :
NVPTXInst<(outs), (ins regclass:$a), "$a",
[(LastCallArg (i32 0), regclass:$a)]>;
+class LastCallArgInstVT<NVPTXRegClass regclass, ValueType vt> :
+ NVPTXInst<(outs), (ins regclass:$a), "$a",
+ [(LastCallArg (i32 0), vt:$a)]>;
def CallArgI64 : CallArgInst<Int64Regs>;
-def CallArgI32 : CallArgInst<Int32Regs>;
-def CallArgI16 : CallArgInst<Int16Regs>;
+def CallArgI32 : CallArgInstVT<Int32Regs, i32>;
+def CallArgI16 : CallArgInstVT<Int16Regs, i16>;
def CallArgF64 : CallArgInst<Float64Regs>;
def CallArgF32 : CallArgInst<Float32Regs>;
def LastCallArgI64 : LastCallArgInst<Int64Regs>;
-def LastCallArgI32 : LastCallArgInst<Int32Regs>;
-def LastCallArgI16 : LastCallArgInst<Int16Regs>;
+def LastCallArgI32 : LastCallArgInstVT<Int32Regs, i32>;
+def LastCallArgI16 : LastCallArgInstVT<Int16Regs, i16>;
def LastCallArgF64 : LastCallArgInst<Float64Regs>;
def LastCallArgF32 : LastCallArgInst<Float32Regs>;
@@ -2376,7 +2379,7 @@ def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), "$addr, ",
[(CallVoid (Wrapper tglobaladdr:$addr))]>;
def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), "$addr, ",
- [(CallVoid Int32Regs:$addr)]>;
+ [(CallVoid i32:$addr)]>;
def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr), "$addr, ",
[(CallVoid Int64Regs:$addr)]>;
def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), ", prototype_$val;",
@@ -2413,53 +2416,54 @@ class MoveParamInst<ValueType T, NVPTXRegClass regclass, string asmstr> :
!strconcat("mov", asmstr, " \t$dst, $src;"),
[(set (T regclass:$dst), (MoveParam (T regclass:$src)))]>;
-class MoveParamSymbolInst<NVPTXRegClass regclass, Operand srcty,
+class MoveParamSymbolInst<NVPTXRegClass regclass, Operand srcty, ValueType vt,
string asmstr> :
NVPTXInst<(outs regclass:$dst), (ins srcty:$src),
!strconcat("mov", asmstr, " \t$dst, $src;"),
- [(set regclass:$dst, (MoveParam texternalsym:$src))]>;
+ [(set vt:$dst, (MoveParam texternalsym:$src))]>;
def MoveParamI64 : MoveParamInst<i64, Int64Regs, ".b64">;
def MoveParamI32 : MoveParamInst<i32, Int32Regs, ".b32">;
-def MoveParamSymbolI64 : MoveParamSymbolInst<Int64Regs, i64imm, ".b64">;
-def MoveParamSymbolI32 : MoveParamSymbolInst<Int32Regs, i32imm, ".b32">;
+def MoveParamSymbolI64 : MoveParamSymbolInst<Int64Regs, i64imm, i64, ".b64">;
+def MoveParamSymbolI32 : MoveParamSymbolInst<Int32Regs, i32imm, i32, ".b32">;
def MoveParamI16 :
NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
- "cvt.u16.u32 \t$dst, $src;",
- [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
+ "cvt.u16.u32 \t$dst, $src;", // ??? Why cvt.u16.u32 ?
+ [(set i16:$dst, (MoveParam i16:$src))]>;
def MoveParamF64 : MoveParamInst<f64, Float64Regs, ".f64">;
def MoveParamF32 : MoveParamInst<f32, Float32Regs, ".f32">;
-def MoveParamF16 : MoveParamInst<f16, Float16Regs, ".f16">;
-class PseudoUseParamInst<NVPTXRegClass regclass> :
+class PseudoUseParamInst<NVPTXRegClass regclass, ValueType vt> :
NVPTXInst<(outs), (ins regclass:$src),
"// Pseudo use of $src",
- [(PseudoUseParam regclass:$src)]>;
+ [(PseudoUseParam vt:$src)]>;
-def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>;
-def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>;
-def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
-def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
-def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
+def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs, i64>;
+def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs, i32>;
+def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs, i16>;
+def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs, f64>;
+def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs, f32>;
class ProxyRegInst<string SzStr, ValueType T, NVPTXRegClass regclass> :
NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
!strconcat("mov.", SzStr, " \t$dst, $src;"),
[(set (T regclass:$dst), (ProxyReg (T regclass:$src)))]>;
-let isCodeGenOnly=1, isPseudo=1 in {
- def ProxyRegI1 : ProxyRegInst<"pred", i1, Int1Regs>;
- def ProxyRegI16 : ProxyRegInst<"b16", i16, Int16Regs>;
- def ProxyRegI32 : ProxyRegInst<"b32", i32, Int32Regs>;
- def ProxyRegI64 : ProxyRegInst<"b64", i64, Int64Regs>;
- def ProxyRegF16 : ProxyRegInst<"b16", f16, Float16Regs>;
- def ProxyRegBF16 : ProxyRegInst<"b16", bf16, Float16Regs>;
- def ProxyRegF32 : ProxyRegInst<"f32", f32, Float32Regs>;
- def ProxyRegF64 : ProxyRegInst<"f64", f64, Float64Regs>;
- def ProxyRegF16x2 : ProxyRegInst<"b32", v2f16, Float16x2Regs>;
- def ProxyRegBF16x2 : ProxyRegInst<"b32", v2bf16, Float16x2Regs>;
+def ProxyRegI1 : ProxyRegInst<"pred", i1, Int1Regs>;
+def ProxyRegI16 : ProxyRegInst<"b16", i16, Int16Regs>;
+def ProxyRegI32 : ProxyRegInst<"b32", i32, Int32Regs>;
+def ProxyRegI64 : ProxyRegInst<"b64", i64, Int64Regs>;
+def ProxyRegF32 : ProxyRegInst<"f32", f32, Float32Regs>;
+def ProxyRegF64 : ProxyRegInst<"f64", f64, Float64Regs>;
+
+foreach vt = [f16, bf16] in {
+ def: Pat<(vt (ProxyReg vt:$src)), (ProxyRegI16 Int16Regs:$src)>;
+}
+
+foreach vt = [v2f16, v2bf16] in {
+ def: Pat<(vt (ProxyReg vt:$src)), (ProxyRegI32 Int32Regs:$src)>;
}
//
@@ -2509,8 +2513,6 @@ let mayLoad=1, hasSideEffects=0 in {
defm LD_i16 : LD<Int16Regs>;
defm LD_i32 : LD<Int32Regs>;
defm LD_i64 : LD<Int64Regs>;
- defm LD_f16 : LD<Float16Regs>;
- defm LD_f16x2 : LD<Float16x2Regs>;
defm LD_f32 : LD<Float32Regs>;
defm LD_f64 : LD<Float64Regs>;
}
@@ -2559,8 +2561,6 @@ let mayStore=1, hasSideEffects=0 in {
defm ST_i16 : ST<Int16Regs>;
defm ST_i32 : ST<Int32Regs>;
defm ST_i64 : ST<Int64Regs>;
- defm ST_f16 : ST<Float16Regs>;
- defm ST_f16x2 : ST<Float16x2Regs>;
defm ST_f32 : ST<Float32Regs>;
defm ST_f64 : ST<Float64Regs>;
}
@@ -2647,8 +2647,8 @@ let mayLoad=1, hasSideEffects=0 in {
defm LDV_i16 : LD_VEC<Int16Regs>;
defm LDV_i32 : LD_VEC<Int32Regs>;
defm LDV_i64 : LD_VEC<Int64Regs>;
- defm LDV_f16 : LD_VEC<Float16Regs>;
- defm LDV_f16x2 : LD_VEC<Float16x2Regs>;
+ defm LDV_f16 : LD_VEC<Int16Regs>;
+ defm LDV_f16x2 : LD_VEC<Int32Regs>;
defm LDV_f32 : LD_VEC<Float32Regs>;
defm LDV_f64 : LD_VEC<Float64Regs>;
}
@@ -2742,8 +2742,8 @@ let mayStore=1, hasSideEffects=0 in {
defm STV_i16 : ST_VEC<Int16Regs>;
defm STV_i32 : ST_VEC<Int32Regs>;
defm STV_i64 : ST_VEC<Int64Regs>;
- defm STV_f16 : ST_VEC<Float16Regs>;
- defm STV_f16x2 : ST_VEC<Float16x2Regs>;
+ defm STV_f16 : ST_VEC<Int16Regs>;
+ defm STV_f16x2 : ST_VEC<Int32Regs>;
defm STV_f32 : ST_VEC<Float32Regs>;
defm STV_f64 : ST_VEC<Float64Regs>;
}
@@ -2757,23 +2757,30 @@ class F_BITCONVERT<string SzStr, ValueType TIn, ValueType TOut,
!strconcat("mov.b", SzStr, " \t$d, $a;"),
[(set (TOut regclassOut:$d), (bitconvert (TIn regclassIn:$a)))]>;
-def BITCONVERT_16_I2F : F_BITCONVERT<"16", i16, f16>;
-def BITCONVERT_16_F2I : F_BITCONVERT<"16", f16, i16>;
-def BITCONVERT_16_I2BF : F_BITCONVERT<"16", i16, bf16>;
-def BITCONVERT_16_BF2I : F_BITCONVERT<"16", bf16, i16>;
def BITCONVERT_32_I2F : F_BITCONVERT<"32", i32, f32>;
def BITCONVERT_32_F2I : F_BITCONVERT<"32", f32, i32>;
def BITCONVERT_64_I2F : F_BITCONVERT<"64", i64, f64>;
def BITCONVERT_64_F2I : F_BITCONVERT<"64", f64, i64>;
-def BITCONVERT_32_I2F16x2 : F_BITCONVERT<"32", i32, v2f16>;
-def BITCONVERT_32_F16x22I : F_BITCONVERT<"32", v2f16, i32>;
-def BITCONVERT_32_F2F16x2 : F_BITCONVERT<"32", f32, v2f16>;
-def BITCONVERT_32_F16x22F : F_BITCONVERT<"32", v2f16, f32>;
-def BITCONVERT_32_I2BF16x2 : F_BITCONVERT<"32", i32, v2bf16>;
-def BITCONVERT_32_BF16x22I : F_BITCONVERT<"32", v2bf16, i32>;
-def BITCONVERT_32_F2BF16x2 : F_BITCONVERT<"32", f32, v2bf16>;
-def BITCONVERT_32_BF16x22F : F_BITCONVERT<"32", v2bf16, f32>;
+foreach vt = [v2f16, v2bf16] in {
+def: Pat<(vt (bitconvert (i32 UInt32Const:$a))),
+ (IMOVB32ri UInt32Const:$a)>;
+def: Pat<(vt (bitconvert (i32 Int32Regs:$a))),
+ (ProxyRegI32 Int32Regs:$a)>;
+def: Pat<(i32 (bitconvert (vt Int32Regs:$a))),
+ (ProxyRegI32 Int32Regs:$a)>;
+def: Pat<(vt (bitconvert (f32 Float32Regs:$a))),
+ (BITCONVERT_32_F2I Float32Regs:$a)>;
+}
+foreach vt = [f16, bf16] in {
+def: Pat<(vt (bitconvert (i16 UInt16Const:$a))),
+ (IMOVB16ri UInt16Const:$a)>;
+def: Pat<(vt (bitconvert (i16 Int16Regs:$a))),
+ (ProxyRegI16 Int16Regs:$a)>;
+def: Pat<(i16 (bitconvert (vt Int16Regs:$a))),
+ (ProxyRegI16 Int16Regs:$a)>;
+}
+
// NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where
// we cannot specify floating-point literals in isel patterns. Therefore, we
// use an integer selp to select either 1 or 0 and then cvt to floating-point.
@@ -2840,24 +2847,24 @@ def : Pat<(f64 (uint_to_fp Int64Regs:$a)),
// f16 -> sint
-def : Pat<(i1 (fp_to_sint (f16 Float16Regs:$a))),
- (SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>;
-def : Pat<(i16 (fp_to_sint (f16 Float16Regs:$a))),
- (CVT_s16_f16 (f16 Float16Regs:$a), CvtRZI)>;
-def : Pat<(i32 (fp_to_sint (f16 Float16Regs:$a))),
- (CVT_s32_f16 (f16 Float16Regs:$a), CvtRZI)>;
-def : Pat<(i64 (fp_to_sint (f16 Float16Regs:$a))),
- (CVT_s64_f16 Float16Regs:$a, CvtRZI)>;
+def : Pat<(i1 (fp_to_sint (f16 Int16Regs:$a))),
+ (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>;
+def : Pat<(i16 (fp_to_sint (f16 Int16Regs:$a))),
+ (CVT_s16_f16 (f16 Int16Regs:$a), CvtRZI)>;
+def : Pat<(i32 (fp_to_sint (f16 Int16Regs:$a))),
+ (CVT_s32_f16 (f16 Int16Regs:$a), CvtRZI)>;
+def : Pat<(i64 (fp_to_sint (f16 Int16Regs:$a))),
+ (CVT_s64_f16 Int16Regs:$a, CvtRZI)>;
// f16 -> uint
-def : Pat<(i1 (fp_to_uint (f16 Float16Regs:$a))),
- (SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>;
-def : Pat<(i16 (fp_to_uint (f16 Float16Regs:$a))),
- (CVT_u16_f16 Float16Regs:$a, CvtRZI)>;
-def : Pat<(i32 (fp_to_uint (f16 Float16Regs:$a))),
- (CVT_u32_f16 Float16Regs:$a, CvtRZI)>;
-def : Pat<(i64 (fp_to_uint (f16 Float16Regs:$a))),
- (CVT_u64_f16 Float16Regs:$a, CvtRZI)>;
+def : Pat<(i1 (fp_to_uint (f16 Int16Regs:$a))),
+ (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>;
+def : Pat<(i16 (fp_to_uint (f16 Int16Regs:$a))),
+ (CVT_u16_f16 Int16Regs:$a, CvtRZI)>;
+def : Pat<(i32 (fp_to_uint (f16 Int16Regs:$a))),
+ (CVT_u32_f16 Int16Regs:$a, CvtRZI)>;
+def : Pat<(i64 (fp_to_uint (f16 Int16Regs:$a))),
+ (CVT_u64_f16 Int16Regs:$a, CvtRZI)>;
// f32 -> sint
def : Pat<(i1 (fp_to_sint Float32Regs:$a)),
@@ -2994,17 +3001,17 @@ def : Pat<(sext_inreg Int64Regs:$a, i32), (CVT_INREG_s64_s32 Int64Regs:$a)>;
// Select instructions with 32-bit predicates
-def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b),
+def : Pat<(select Int32Regs:$pred, i16:$a, i16:$b),
(SELP_b16rr Int16Regs:$a, Int16Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
-def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b),
+def : Pat<(select Int32Regs:$pred, i32:$a, i32:$b),
(SELP_b32rr Int32Regs:$a, Int32Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b),
(SELP_b64rr Int64Regs:$a, Int64Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
-def : Pat<(select Int32Regs:$pred, (f16 Float16Regs:$a), (f16 Float16Regs:$b)),
- (SELP_f16rr Float16Regs:$a, Float16Regs:$b,
+def : Pat<(select Int32Regs:$pred, (f16 Int16Regs:$a), (f16 Int16Regs:$b)),
+ (SELP_f16rr Int16Regs:$a, Int16Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b),
(SELP_f32rr Float32Regs:$a, Float32Regs:$b,
@@ -3070,39 +3077,39 @@ let hasSideEffects = false in {
// Extract element of f16x2 register. PTX does not provide any way
// to access elements of f16x2 vector directly, so we need to
// extract it using a temporary register.
- def F16x2toF16_0 : NVPTXInst<(outs Float16Regs:$dst),
- (ins Float16x2Regs:$src),
+ def F16x2toF16_0 : NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int32Regs:$src),
"{{ .reg .b16 \t%tmp_hi;\n\t"
" mov.b32 \t{$dst, %tmp_hi}, $src; }}",
- [(set Float16Regs:$dst,
- (extractelt (v2f16 Float16x2Regs:$src), 0))]>;
- def F16x2toF16_1 : NVPTXInst<(outs Float16Regs:$dst),
- (ins Float16x2Regs:$src),
+ [(set Int16Regs:$dst,
+ (extractelt (v2f16 Int32Regs:$src), 0))]>;
+ def F16x2toF16_1 : NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int32Regs:$src),
"{{ .reg .b16 \t%tmp_lo;\n\t"
" mov.b32 \t{%tmp_lo, $dst}, $src; }}",
- [(set Float16Regs:$dst,
- (extractelt (v2f16 Float16x2Regs:$src), 1))]>;
+ [(set Int16Regs:$dst,
+ (extractelt (v2f16 Int32Regs:$src), 1))]>;
// Coalesce two f16 registers into f16x2
- def BuildF16x2 : NVPTXInst<(outs Float16x2Regs:$dst),
- (ins Float16Regs:$a, Float16Regs:$b),
+ def BuildF16x2 : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
"mov.b32 \t$dst, {{$a, $b}};",
- [(set (v2f16 Float16x2Regs:$dst),
- (build_vector (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>;
+ [(set (v2f16 Int32Regs:$dst),
+ (build_vector (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>;
// Directly initializing underlying the b32 register is one less SASS
// instruction than than vector-packing move.
- def BuildF16x2i : NVPTXInst<(outs Float16x2Regs:$dst), (ins i32imm:$src),
+ def BuildF16x2i : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
"mov.b32 \t$dst, $src;",
[]>;
// Split f16x2 into two f16 registers.
- def SplitF16x2 : NVPTXInst<(outs Float16Regs:$lo, Float16Regs:$hi),
- (ins Float16x2Regs:$src),
+ def SplitF16x2 : NVPTXInst<(outs Int16Regs:$lo, Int16Regs:$hi),
+ (ins Int32Regs:$src),
"mov.b32 \t{{$lo, $hi}}, $src;",
[]>;
// Split an i32 into two f16
- def SplitI32toF16x2 : NVPTXInst<(outs Float16Regs:$lo, Float16Regs:$hi),
+ def SplitI32toF16x2 : NVPTXInst<(outs Int16Regs:$lo, Int16Regs:$hi),
(ins Int32Regs:$src),
"mov.b32 \t{{$lo, $hi}}, $src;",
[]>;
@@ -3186,14 +3193,14 @@ def : Pat<(f32 (fpround Float64Regs:$a)),
(CVT_f32_f64 Float64Regs:$a, CvtRN)>;
// fpextend f16 -> f32
-def : Pat<(f32 (fpextend (f16 Float16Regs:$a))),
- (CVT_f32_f16 Float16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(f32 (fpextend (f16 Float16Regs:$a))),
- (CVT_f32_f16 Float16Regs:$a, CvtNONE)>;
+def : Pat<(f32 (fpextend (f16 Int16Regs:$a))),
+ (CVT_f32_f16 Int16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(f32 (fpextend (f16 Int16Regs:$a))),
+ (CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
// fpextend f16 -> f64
-def : Pat<(f64 (fpextend (f16 Float16Regs:$a))),
- (CVT_f64_f16 Float16Regs:$a, CvtNONE)>;
+def : Pat<(f64 (fpextend (f16 Int16Regs:$a))),
+ (CVT_f64_f16 Int16Regs:$a, CvtNONE)>;
// fpextend f32 -> f64
def : Pat<(f64 (fpextend Float32Regs:$a)),
@@ -3207,8 +3214,8 @@ def retglue : SDNode<"NVPTXISD::RET_GLUE", SDTNone,
// fceil, ffloor, froundeven, ftrunc.
multiclass CVT_ROUND<SDNode OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
- def : Pat<(OpNode (f16 Float16Regs:$a)),
- (CVT_f16_f16 Float16Regs:$a, Mode)>;
+ def : Pat<(OpNode (f16 Int16Regs:$a)),
+ (CVT_f16_f16 Int16Regs:$a, Mode)>;
def : Pat<(OpNode Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, ModeFTZ)>, Requires<[doF32FTZ]>;
def : Pat<(OpNode Float32Regs:$a),
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 16ae89cebfc8b..bfc79d383191b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -686,46 +686,46 @@ class MIN_MAX_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
multiclass MIN_MAX<string IntName> {
foreach P = [
MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16,
- int_nvvm_fmax_f16), Float16Regs>,
+ int_nvvm_fmax_f16), Int16Regs>,
MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16,
- int_nvvm_fmax_ftz_f16), Float16Regs>,
+ int_nvvm_fmax_ftz_f16), Int16Regs>,
MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16,
- int_nvvm_fmax_nan_f16), Float16Regs>,
+ int_nvvm_fmax_nan_f16), Int16Regs>,
MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"),
- int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Float16Regs>,
+ int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Int16Regs>,
MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"),
int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16),
- Float16Regs, [hasPTX<72>, hasSM<86>]>,
+ Int16Regs, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"),
int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16),
- Float16Regs, [hasPTX<72>, hasSM<86>]>,
+ Int16Regs, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16),
- Float16Regs, [hasPTX<72>, hasSM<86>]>,
+ Int16Regs, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
int_nvvm_fmin_ftz_nan_xorsign_abs_f16,
- int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Float16Regs, [hasPTX<72>, hasSM<86>]>,
+ int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Int16Regs, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2,
- int_nvvm_fmax_f16x2), Float16x2Regs>,
+ int_nvvm_fmax_f16x2), Int32Regs>,
MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"),
- int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Float16x2Regs>,
+ int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Int32Regs>,
MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"),
- int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Float16x2Regs>,
+ int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Int32Regs>,
MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"),
- int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Float16x2Regs>,
+ int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Int32Regs>,
MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2),
- Float16x2Regs, [hasPTX<72>, hasSM<86>]>,
+ Int32Regs, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2),
- Float16x2Regs, [hasPTX<72>, hasSM<86>]>,
+ Int32Regs, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2),
- Float16x2Regs, [hasPTX<72>, hasSM<86>]>,
+ Int32Regs, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2),
- Float16x2Regs, [hasPTX<72>, hasSM<86>]>,
+ Int32Regs, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"),
int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>,
MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16,
@@ -933,9 +933,9 @@ def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;",
- Float16Regs, Float16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>;
+ Int16Regs, Int16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>;
def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;",
- Float16x2Regs, Float16x2Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>;
+ Int32Regs, Int32Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>;
def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
@@ -986,30 +986,30 @@ multiclass FMA_INST {
FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>,
FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>,
- FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Float16Regs, [hasPTX<42>, hasSM<53>]>,
- FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Float16Regs,
+ FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>,
+ FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Int16Regs,
[hasPTX<42>, hasSM<53>]>,
- FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Float16Regs,
+ FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Int16Regs,
[hasPTX<42>, hasSM<53>]>,
- FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Float16Regs,
+ FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Int16Regs,
[hasPTX<42>, hasSM<53>]>,
- FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Float16Regs,
+ FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Int16Regs,
[hasPTX<70>, hasSM<80>]>,
- FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Float16Regs,
+ FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Int16Regs,
[hasPTX<70>, hasSM<80>]>,
- FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Float16x2Regs,
+ FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Int32Regs,
[hasPTX<42>, hasSM<53>]>,
- FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Float16x2Regs,
+ FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Int32Regs,
[hasPTX<42>, hasSM<53>]>,
- FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Float16x2Regs,
+ FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Int32Regs,
[hasPTX<42>, hasSM<53>]>,
FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2,
- Float16x2Regs, [hasPTX<42>, hasSM<53>]>,
- FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Float16x2Regs,
+ Int32Regs, [hasPTX<42>, hasSM<53>]>,
+ FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Int32Regs,
[hasPTX<70>, hasSM<80>]>,
FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2,
- Float16x2Regs, [hasPTX<70>, hasSM<80>]>,
+ Int32Regs, [hasPTX<70>, hasSM<80>]>,
FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX<70>, hasSM<80>]>,
FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs,
@@ -1397,9 +1397,9 @@ def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
- (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
+ (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>;
def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
- (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
+ (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
//
// Bitcast
@@ -2159,12 +2159,8 @@ defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
-defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
-defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
-defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
-defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
// vector
@@ -2212,9 +2208,9 @@ defm INT_PTX_LDU_G_v2i16_ELE
defm INT_PTX_LDU_G_v2i32_ELE
: VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
defm INT_PTX_LDU_G_v2f16_ELE
- : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
+ : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
defm INT_PTX_LDU_G_v2f16x2_ELE
- : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
+ : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
defm INT_PTX_LDU_G_v2f32_ELE
: VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
defm INT_PTX_LDU_G_v2i64_ELE
@@ -2231,10 +2227,10 @@ defm INT_PTX_LDU_G_v4i32_ELE
Int32Regs>;
defm INT_PTX_LDU_G_v4f16_ELE
: VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
- Float16Regs>;
+ Int16Regs>;
defm INT_PTX_LDU_G_v4f16x2_ELE
: VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
- Float16x2Regs>;
+ Int32Regs>;
defm INT_PTX_LDU_G_v4f32_ELE
: VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
Float32Regs>;
@@ -2274,18 +2270,10 @@ defm INT_PTX_LDG_GLOBAL_i32
: LDG_G<"u32 \t$result, [$src];", Int32Regs>;
defm INT_PTX_LDG_GLOBAL_i64
: LDG_G<"u64 \t$result, [$src];", Int64Regs>;
-defm INT_PTX_LDG_GLOBAL_f16
- : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
-defm INT_PTX_LDG_GLOBAL_f16x2
- : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
defm INT_PTX_LDG_GLOBAL_f32
: LDG_G<"f32 \t$result, [$src];", Float32Regs>;
defm INT_PTX_LDG_GLOBAL_f64
: LDG_G<"f64 \t$result, [$src];", Float64Regs>;
-defm INT_PTX_LDG_GLOBAL_p32
- : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
-defm INT_PTX_LDG_GLOBAL_p64
- : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
// vector
@@ -2333,10 +2321,6 @@ defm INT_PTX_LDG_G_v2i16_ELE
: VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
defm INT_PTX_LDG_G_v2i32_ELE
: VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
-defm INT_PTX_LDG_G_v2f16_ELE
- : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
-defm INT_PTX_LDG_G_v2f16x2_ELE
- : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
defm INT_PTX_LDG_G_v2f32_ELE
: VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
defm INT_PTX_LDG_G_v2i64_ELE
@@ -2349,10 +2333,6 @@ defm INT_PTX_LDG_G_v4i16_ELE
: VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
defm INT_PTX_LDG_G_v4i32_ELE
: VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
-defm INT_PTX_LDG_G_v4f16_ELE
- : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
-defm INT_PTX_LDG_G_v4f16x2_ELE
- : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
defm INT_PTX_LDG_G_v4f32_ELE
: VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
@@ -6305,7 +6285,7 @@ class WMMA_REGINFO<WMMA_REGS r, string op>
: WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
// NVPTX register types used to carry fragment data.
NVPTXRegClass regclass = !cond(
- !eq(ptx_elt_type, "f16") : Float16x2Regs,
+ !eq(ptx_elt_type, "f16") : Int32Regs,
!eq(ptx_elt_type, "f32") : Float32Regs,
!eq(ptx_elt_type, "f64") : Float64Regs,
!eq(ptx_elt_type, "bf16") : Int32Regs,
diff --git a/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp b/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
index 869231ff4ffe9..258ae97a20d58 100644
--- a/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
@@ -73,10 +73,6 @@ bool NVPTXProxyRegErasure::runOnMachineFunction(MachineFunction &MF) {
case NVPTX::ProxyRegI16:
case NVPTX::ProxyRegI32:
case NVPTX::ProxyRegI64:
- case NVPTX::ProxyRegF16:
- case NVPTX::ProxyRegF16x2:
- case NVPTX::ProxyRegBF16:
- case NVPTX::ProxyRegBF16x2:
case NVPTX::ProxyRegF32:
case NVPTX::ProxyRegF64:
replaceMachineInstructionUsage(MF, MI);
diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 6e4208d272412..f1213f030bba7 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -29,14 +29,6 @@ namespace llvm {
std::string getNVPTXRegClassName(TargetRegisterClass const *RC) {
if (RC == &NVPTX::Float32RegsRegClass)
return ".f32";
- if (RC == &NVPTX::Float16RegsRegClass)
- // Ideally fp16 registers should be .f16, but this syntax is only
- // supported on sm_53+. On the other hand, .b16 registers are
- // accepted for all supported fp16 instructions on all GPU
- // variants, so we can use them instead.
- return ".b16";
- if (RC == &NVPTX::Float16x2RegsRegClass)
- return ".b32";
if (RC == &NVPTX::Float64RegsRegClass)
return ".f64";
if (RC == &NVPTX::Int64RegsRegClass)
@@ -73,10 +65,6 @@ std::string getNVPTXRegClassName(TargetRegisterClass const *RC) {
std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) {
if (RC == &NVPTX::Float32RegsRegClass)
return "%f";
- if (RC == &NVPTX::Float16RegsRegClass)
- return "%h";
- if (RC == &NVPTX::Float16x2RegsRegClass)
- return "%hh";
if (RC == &NVPTX::Float64RegsRegClass)
return "%fd";
if (RC == &NVPTX::Int64RegsRegClass)
diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
index 31d5441e58b38..b62460e8cd31f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -57,11 +57,11 @@ foreach i = 0...31 in {
// Register classes
//===----------------------------------------------------------------------===//
def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 4))>;
-def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%u", 0, 4))>;
-def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%u", 0, 4), VRFrame32, VRFrameLocal32)>;
+def Int16Regs : NVPTXRegClass<[i16, f16, bf16], 16, (add (sequence "RS%u", 0, 4))>;
+def Int32Regs : NVPTXRegClass<[i32, v2f16, v2bf16], 32,
+ (add (sequence "R%u", 0, 4),
+ VRFrame32, VRFrameLocal32)>;
def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 4), VRFrame64, VRFrameLocal64)>;
-def Float16Regs : NVPTXRegClass<[f16,bf16], 16, (add (sequence "H%u", 0, 4))>;
-def Float16x2Regs : NVPTXRegClass<[v2f16,v2bf16], 32, (add (sequence "HH%u", 0, 4))>;
def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%u", 0, 4))>;
def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%u", 0, 4))>;
def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%u", 0, 4))>;
diff --git a/llvm/test/CodeGen/NVPTX/bf16.ll b/llvm/test/CodeGen/NVPTX/bf16.ll
index 80113f510a05a..c2cf804c5013a 100644
--- a/llvm/test/CodeGen/NVPTX/bf16.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16.ll
@@ -7,7 +7,7 @@
define void @test_load_store(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; CHECK-LABEL: @test_load_store
-; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}]
+; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}]
; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]]
%val = load bfloat, ptr addrspace(1) %in
store bfloat %val, ptr addrspace(1) %out
@@ -16,7 +16,7 @@ define void @test_load_store(ptr addrspace(1) %in, ptr addrspace(1) %out) {
define void @test_bitcast_from_bfloat(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; CHECK-LABEL: @test_bitcast_from_bfloat
-; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}]
+; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}]
; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]]
%val = load bfloat, ptr addrspace(1) %in
%val_int = bitcast bfloat %val to i16
diff --git a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll
index 0a411f4ef4d5a..56f9c905caa91 100644
--- a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll
+++ b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll
@@ -14,8 +14,8 @@ target triple = "nvptx64-nvidia-cuda"
%complex_half = type { half, half }
; CHECK: .param .align 2 .b8 param2[4];
-; CHECK: st.param.b16 [param2+0], %h1;
-; CHECK: st.param.b16 [param2+2], %h2;
+; CHECK: st.param.b16 [param2+0], %rs1;
+; CHECK: st.param.b16 [param2+2], %rs2;
; CHECK: .param .align 2 .b8 retval0[4];
; CHECK: call.uni (retval0),
; CHECK-NEXT: _Z20__spirv_GroupCMulKHRjjN5__spv12complex_halfE,
@@ -37,8 +37,8 @@ define internal void @callee(ptr byval(%"class.complex") %byval_arg) {
define void @boom() {
%fp = call ptr @usefp(ptr @callee)
; CHECK: .param .align 2 .b8 param0[4];
- ; CHECK: st.param.b16 [param0+0], %h1;
- ; CHECK: st.param.b16 [param0+2], %h2;
+ ; CHECK: st.param.b16 [param0+0], %rs1;
+ ; CHECK: st.param.b16 [param0+2], %rs2;
; CHECK: .callprototype ()_ (.param .align 2 .b8 _[4]);
call void %fp(ptr byval(%"class.complex") null)
ret void
diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
index 0b994a7406773..55fde7837487b 100644
--- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
@@ -43,7 +43,7 @@
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
; CHECK-LABEL: test_ret_const(
-; CHECK: mov.b16 [[R:%h[0-9]+]], 0x3C00;
+; CHECK: mov.b16 [[R:%rs[0-9]+]], 0x3C00;
; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
define half @test_ret_const() #0 {
@@ -51,14 +51,14 @@ define half @test_ret_const() #0 {
}
; CHECK-LABEL: test_fadd(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fadd_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_param_1];
-; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]];
-; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fadd_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fadd_param_1];
+; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]];
+; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]];
-; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
define half @test_fadd(half %a, half %b) #0 {
@@ -67,14 +67,14 @@ define half @test_fadd(half %a, half %b) #0 {
}
; CHECK-LABEL: test_fadd_v1f16(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fadd_v1f16_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_v1f16_param_1];
-; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]];
-; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fadd_v1f16_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fadd_v1f16_param_1];
+; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]];
+; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]];
-; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
define <1 x half> @test_fadd_v1f16(<1 x half> %a, <1 x half> %b) #0 {
@@ -84,14 +84,14 @@ define <1 x half> @test_fadd_v1f16(<1 x half> %a, <1 x half> %b) #0 {
; Check that we can lower fadd with immediate arguments.
; CHECK-LABEL: test_fadd_imm_0(
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_imm_0_param_0];
-; CHECK-F16-NOFTZ-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00;
-; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[A]];
-; CHECK-F16-FTZ-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00;
-; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[B]], [[A]];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fadd_imm_0_param_0];
+; CHECK-F16-NOFTZ-DAG: mov.b16 [[A:%rs[0-9]+]], 0x3C00;
+; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%rs[0-9]+]], [[B]], [[A]];
+; CHECK-F16-FTZ-DAG: mov.b16 [[A:%rs[0-9]+]], 0x3C00;
+; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[B]], [[A]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], 0f3F800000;
-; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
define half @test_fadd_imm_0(half %b) #0 {
@@ -100,14 +100,14 @@ define half @test_fadd_imm_0(half %b) #0 {
}
; CHECK-LABEL: test_fadd_imm_1(
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_imm_1_param_0];
-; CHECK-F16-NOFTZ-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00;
-; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[A]];
-; CHECK-F16-FTZ-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00;
-; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[B]], [[A]];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fadd_imm_1_param_0];
+; CHECK-F16-NOFTZ-DAG: mov.b16 [[A:%rs[0-9]+]], 0x3C00;
+; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%rs[0-9]+]], [[B]], [[A]];
+; CHECK-F16-FTZ-DAG: mov.b16 [[A:%rs[0-9]+]], 0x3C00;
+; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[B]], [[A]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], 0f3F800000;
-; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
define half @test_fadd_imm_1(half %a) #0 {
@@ -116,14 +116,14 @@ define half @test_fadd_imm_1(half %a) #0 {
}
; CHECK-LABEL: test_fsub(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fsub_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fsub_param_1];
-; CHECK-F16-NOFTZ-NEXT: sub.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]];
-; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fsub_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fsub_param_1];
+; CHECK-F16-NOFTZ-NEXT: sub.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]];
+; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]];
-; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
define half @test_fsub(half %a, half %b) #0 {
@@ -132,15 +132,15 @@ define half @test_fsub(half %a, half %b) #0 {
}
; CHECK-LABEL: test_fneg(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fneg_param_0];
-; CHECK-F16-NOFTZ-NEXT: mov.b16 [[Z:%h[0-9]+]], 0x0000
-; CHECK-F16-NOFTZ-NEXT: sub.rn.f16 [[R:%h[0-9]+]], [[Z]], [[A]];
-; CHECK-F16-FTZ-NEXT: mov.b16 [[Z:%h[0-9]+]], 0x0000
-; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%h[0-9]+]], [[Z]], [[A]];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fneg_param_0];
+; CHECK-F16-NOFTZ-NEXT: mov.b16 [[Z:%rs[0-9]+]], 0x0000
+; CHECK-F16-NOFTZ-NEXT: sub.rn.f16 [[R:%rs[0-9]+]], [[Z]], [[A]];
+; CHECK-F16-FTZ-NEXT: mov.b16 [[Z:%rs[0-9]+]], 0x0000
+; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%rs[0-9]+]], [[Z]], [[A]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000;
; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[Z]], [[A32]];
-; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
define half @test_fneg(half %a) #0 {
@@ -149,14 +149,14 @@ define half @test_fneg(half %a) #0 {
}
; CHECK-LABEL: test_fmul(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fmul_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fmul_param_1];
-; CHECK-F16-NOFTZ-NEXT: mul.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]];
-; CHECK-F16-FTZ-NEXT: mul.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fmul_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fmul_param_1];
+; CHECK-F16-NOFTZ-NEXT: mul.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]];
+; CHECK-F16-FTZ-NEXT: mul.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
; CHECK-NOF16-NEXT: mul.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]];
-; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
define half @test_fmul(half %a, half %b) #0 {
@@ -165,15 +165,15 @@ define half @test_fmul(half %a, half %b) #0 {
}
; CHECK-LABEL: test_fdiv(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fdiv_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fdiv_param_1];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fdiv_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fdiv_param_1];
; CHECK-NOFTZ-DAG: cvt.f32.f16 [[F0:%f[0-9]+]], [[A]];
; CHECK-NOFTZ-DAG: cvt.f32.f16 [[F1:%f[0-9]+]], [[B]];
; CHECK-NOFTZ-NEXT: div.rn.f32 [[FR:%f[0-9]+]], [[F0]], [[F1]];
; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[F0:%f[0-9]+]], [[A]];
; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[F1:%f[0-9]+]], [[B]];
; CHECK-F16-FTZ-NEXT: div.rn.ftz.f32 [[FR:%f[0-9]+]], [[F0]], [[F1]];
-; CHECK-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[FR]];
+; CHECK-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[FR]];
; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
define half @test_fdiv(half %a, half %b) #0 {
@@ -182,8 +182,8 @@ define half @test_fdiv(half %a, half %b) #0 {
}
; CHECK-LABEL: test_frem(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_frem_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_frem_param_1];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_frem_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_frem_param_1];
; CHECK-NOFTZ-DAG: cvt.f32.f16 [[FA:%f[0-9]+]], [[A]];
; CHECK-NOFTZ-DAG: cvt.f32.f16 [[FB:%f[0-9]+]], [[B]];
; CHECK-NOFTZ-NEXT: div.rn.f32 [[D:%f[0-9]+]], [[FA]], [[FB]];
@@ -198,7 +198,7 @@ define half @test_fdiv(half %a, half %b) #0 {
; CHECK-F16-FTZ-NEXT: sub.ftz.f32 [[RF:%f[0-9]+]], [[FA]], [[RI]];
; CHECK-NEXT: testp.infinite.f32 [[ISBINF:%p[0-9]+]], [[FB]];
; CHECK-NEXT: selp.f32 [[RESULT:%f[0-9]+]], [[FA]], [[RF]], [[ISBINF]];
-; CHECK-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RESULT]];
+; CHECK-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RESULT]];
; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
define half @test_frem(half %a, half %b) #0 {
@@ -207,7 +207,7 @@ define half @test_frem(half %a, half %b) #0 {
}
; CHECK-LABEL: test_store(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_store_param_0];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_store_param_0];
; CHECK-DAG: ld.param.u64 %[[PTR:rd[0-9]+]], [test_store_param_1];
; CHECK-NEXT: st.b16 [%[[PTR]]], [[A]];
; CHECK-NEXT: ret;
@@ -218,7 +218,7 @@ define void @test_store(half %a, ptr %b) #0 {
; CHECK-LABEL: test_load(
; CHECK: ld.param.u64 %[[PTR:rd[0-9]+]], [test_load_param_0];
-; CHECK-NEXT: ld.b16 [[R:%h[0-9]+]], [%[[PTR]]];
+; CHECK-NEXT: ld.b16 [[R:%rs[0-9]+]], [%[[PTR]]];
; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
define half @test_load(ptr %a) #0 {
@@ -243,8 +243,8 @@ define void @test_halfp0a1(ptr noalias readonly %from, ptr %to) {
declare half @test_callee(half %a, half %b) #0
; CHECK-LABEL: test_call(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_call_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_call_param_1];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_call_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_call_param_1];
; CHECK: {
; CHECK-DAG: .param .b32 param0;
; CHECK-DAG: .param .b32 param1;
@@ -254,7 +254,7 @@ declare half @test_callee(half %a, half %b) #0
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_callee,
; CHECK: );
-; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0];
+; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0];
; CHECK-NEXT: }
; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
@@ -264,8 +264,8 @@ define half @test_call(half %a, half %b) #0 {
}
; CHECK-LABEL: test_call_flipped(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_call_flipped_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_call_flipped_param_1];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_call_flipped_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_call_flipped_param_1];
; CHECK: {
; CHECK-DAG: .param .b32 param0;
; CHECK-DAG: .param .b32 param1;
@@ -275,7 +275,7 @@ define half @test_call(half %a, half %b) #0 {
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_callee,
; CHECK: );
-; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0];
+; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0];
; CHECK-NEXT: }
; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
@@ -285,8 +285,8 @@ define half @test_call_flipped(half %a, half %b) #0 {
}
; CHECK-LABEL: test_tailcall_flipped(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_tailcall_flipped_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_tailcall_flipped_param_1];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_tailcall_flipped_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_tailcall_flipped_param_1];
; CHECK: {
; CHECK-DAG: .param .b32 param0;
; CHECK-DAG: .param .b32 param1;
@@ -296,7 +296,7 @@ define half @test_call_flipped(half %a, half %b) #0 {
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_callee,
; CHECK: );
-; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0];
+; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0];
; CHECK-NEXT: }
; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
@@ -306,10 +306,10 @@ define half @test_tailcall_flipped(half %a, half %b) #0 {
}
; CHECK-LABEL: test_select(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_param_1];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_select_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_select_param_1];
; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1;
-; CHECK-NEXT: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]];
+; CHECK-NEXT: selp.b16 [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]];
; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
define half @test_select(half %a, half %b, i1 zeroext %c) #0 {
@@ -318,15 +318,15 @@ define half @test_select(half %a, half %b, i1 zeroext %c) #0 {
}
; CHECK-LABEL: test_select_cc(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_cc_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_cc_param_1];
-; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_select_cc_param_2];
-; CHECK-DAG: ld.param.b16 [[D:%h[0-9]+]], [test_select_cc_param_3];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_select_cc_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_select_cc_param_1];
+; CHECK-DAG: ld.param.b16 [[C:%rs[0-9]+]], [test_select_cc_param_2];
+; CHECK-DAG: ld.param.b16 [[D:%rs[0-9]+]], [test_select_cc_param_3];
; CHECK-F16-NOFTZ: setp.neu.f16 [[PRED:%p[0-9]+]], [[C]], [[D]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]];
; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[CF]], [[DF]]
-; CHECK: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]];
+; CHECK: selp.b16 [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]];
; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
define half @test_select_cc(half %a, half %b, half %c, half %d) #0 {
@@ -338,8 +338,8 @@ define half @test_select_cc(half %a, half %b, half %c, half %d) #0 {
; CHECK-LABEL: test_select_cc_f32_f16(
; CHECK-DAG: ld.param.f32 [[A:%f[0-9]+]], [test_select_cc_f32_f16_param_0];
; CHECK-DAG: ld.param.f32 [[B:%f[0-9]+]], [test_select_cc_f32_f16_param_1];
-; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_select_cc_f32_f16_param_2];
-; CHECK-DAG: ld.param.b16 [[D:%h[0-9]+]], [test_select_cc_f32_f16_param_3];
+; CHECK-DAG: ld.param.b16 [[C:%rs[0-9]+]], [test_select_cc_f32_f16_param_2];
+; CHECK-DAG: ld.param.b16 [[D:%rs[0-9]+]], [test_select_cc_f32_f16_param_3];
; CHECK-F16-NOFTZ: setp.neu.f16 [[PRED:%p[0-9]+]], [[C]], [[D]]
; CHECK-F16-FTZ: setp.neu.ftz.f16 [[PRED:%p[0-9]+]], [[C]], [[D]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]];
@@ -355,13 +355,13 @@ define float @test_select_cc_f32_f16(float %a, float %b, half %c, half %d) #0 {
}
; CHECK-LABEL: test_select_cc_f16_f32(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_cc_f16_f32_param_0];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_select_cc_f16_f32_param_0];
; CHECK-DAG: ld.param.f32 [[C:%f[0-9]+]], [test_select_cc_f16_f32_param_2];
; CHECK-DAG: ld.param.f32 [[D:%f[0-9]+]], [test_select_cc_f16_f32_param_3];
; CHECK-NOFTZ-DAG: setp.neu.f32 [[PRED:%p[0-9]+]], [[C]], [[D]]
; CHECK-F16-FTZ-DAG: setp.neu.ftz.f32 [[PRED:%p[0-9]+]], [[C]], [[D]]
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_cc_f16_f32_param_1];
-; CHECK-NEXT: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_select_cc_f16_f32_param_1];
+; CHECK-NEXT: selp.b16 [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]];
; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 {
@@ -371,8 +371,8 @@ define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 {
}
; CHECK-LABEL: test_fcmp_une(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_une_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_une_param_1];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_une_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_une_param_1];
; CHECK-F16-NOFTZ: setp.neu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-F16-FTZ: setp.neu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
@@ -387,8 +387,8 @@ define i1 @test_fcmp_une(half %a, half %b) #0 {
}
; CHECK-LABEL: test_fcmp_ueq(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ueq_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ueq_param_1];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ueq_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ueq_param_1];
; CHECK-F16-NOFTZ: setp.equ.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-F16-FTZ: setp.equ.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
@@ -403,8 +403,8 @@ define i1 @test_fcmp_ueq(half %a, half %b) #0 {
}
; CHECK-LABEL: test_fcmp_ugt(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ugt_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ugt_param_1];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ugt_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ugt_param_1];
; CHECK-F16-NOFTZ: setp.gtu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-F16-FTZ: setp.gtu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
@@ -419,8 +419,8 @@ define i1 @test_fcmp_ugt(half %a, half %b) #0 {
}
; CHECK-LABEL: test_fcmp_uge(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_uge_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_uge_param_1];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_uge_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_uge_param_1];
; CHECK-F16-NOFTZ: setp.geu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-F16-FTZ: setp.geu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
@@ -435,8 +435,8 @@ define i1 @test_fcmp_uge(half %a, half %b) #0 {
}
; CHECK-LABEL: test_fcmp_ult(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ult_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ult_param_1];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ult_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ult_param_1];
; CHECK-F16-NOFTZ: setp.ltu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-F16-FTZ: setp.ltu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
@@ -451,8 +451,8 @@ define i1 @test_fcmp_ult(half %a, half %b) #0 {
}
; CHECK-LABEL: test_fcmp_ule(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ule_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ule_param_1];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ule_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ule_param_1];
; CHECK-F16-NOFTZ: setp.leu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-F16-FTZ: setp.leu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
@@ -468,8 +468,8 @@ define i1 @test_fcmp_ule(half %a, half %b) #0 {
; CHECK-LABEL: test_fcmp_uno(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_uno_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_uno_param_1];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_uno_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_uno_param_1];
; CHECK-F16-NOFTZ: setp.nan.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-F16-FTZ: setp.nan.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
@@ -484,8 +484,8 @@ define i1 @test_fcmp_uno(half %a, half %b) #0 {
}
; CHECK-LABEL: test_fcmp_one(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_one_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_one_param_1];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_one_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_one_param_1];
; CHECK-F16-NOFTZ: setp.ne.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-F16-FTZ: setp.ne.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
@@ -500,8 +500,8 @@ define i1 @test_fcmp_one(half %a, half %b) #0 {
}
; CHECK-LABEL: test_fcmp_oeq(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_oeq_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_oeq_param_1];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_oeq_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_oeq_param_1];
; CHECK-F16-NOFTZ: setp.eq.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-F16-FTZ: setp.eq.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
@@ -516,8 +516,8 @@ define i1 @test_fcmp_oeq(half %a, half %b) #0 {
}
; CHECK-LABEL: test_fcmp_ogt(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ogt_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ogt_param_1];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ogt_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ogt_param_1];
; CHECK-F16-NOFTZ: setp.gt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-F16-FTZ: setp.gt.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
@@ -532,8 +532,8 @@ define i1 @test_fcmp_ogt(half %a, half %b) #0 {
}
; CHECK-LABEL: test_fcmp_oge(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_oge_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_oge_param_1];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_oge_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_oge_param_1];
; CHECK-F16-NOFTZ: setp.ge.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-F16-FTZ: setp.ge.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
@@ -548,8 +548,8 @@ define i1 @test_fcmp_oge(half %a, half %b) #0 {
}
; XCHECK-LABEL: test_fcmp_olt(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_olt_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_olt_param_1];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_olt_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_olt_param_1];
; CHECK-F16-NOFTZ: setp.lt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-F16-FTZ: setp.lt.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
@@ -564,8 +564,8 @@ define i1 @test_fcmp_olt(half %a, half %b) #0 {
}
; XCHECK-LABEL: test_fcmp_ole(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ole_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ole_param_1];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ole_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ole_param_1];
; CHECK-F16-NOFTZ: setp.le.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-F16-FTZ: setp.le.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
@@ -580,8 +580,8 @@ define i1 @test_fcmp_ole(half %a, half %b) #0 {
}
; CHECK-LABEL: test_fcmp_ord(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ord_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ord_param_1];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ord_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ord_param_1];
; CHECK-F16-NOFTZ: setp.num.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-F16-FTZ: setp.num.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
@@ -596,8 +596,8 @@ define i1 @test_fcmp_ord(half %a, half %b) #0 {
}
; CHECK-LABEL: test_br_cc(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_br_cc_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_br_cc_param_1];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_br_cc_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_br_cc_param_1];
; CHECK-DAG: ld.param.u64 %[[C:rd[0-9]+]], [test_br_cc_param_2];
; CHECK-DAG: ld.param.u64 %[[D:rd[0-9]+]], [test_br_cc_param_3];
; CHECK-F16-NOFTZ: setp.lt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
@@ -623,10 +623,10 @@ else:
; CHECK-LABEL: test_phi(
; CHECK: ld.param.u64 %[[P1:rd[0-9]+]], [test_phi_param_0];
-; CHECK: ld.b16 {{%h[0-9]+}}, [%[[P1]]];
+; CHECK: ld.b16 {{%rs[0-9]+}}, [%[[P1]]];
; CHECK: [[LOOP:\$L__BB[0-9_]+]]:
-; CHECK: mov.b16 [[R:%h[0-9]+]], [[AB:%h[0-9]+]];
-; CHECK: ld.b16 [[AB:%h[0-9]+]], [%[[P1]]];
+; CHECK: mov.u16 [[R:%rs[0-9]+]], [[AB:%rs[0-9]+]];
+; CHECK: ld.b16 [[AB:%rs[0-9]+]], [%[[P1]]];
; CHECK: {
; CHECK: st.param.b64 [param0+0], %[[P1]];
; CHECK: call.uni (retval0),
@@ -651,7 +651,7 @@ return:
declare i1 @test_dummy(ptr %p1) #0
; CHECK-LABEL: test_fptosi_i32(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptosi_i32_param_0];
+; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptosi_i32_param_0];
; CHECK: cvt.rzi.s32.f16 [[R:%r[0-9]+]], [[A]];
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
@@ -661,7 +661,7 @@ define i32 @test_fptosi_i32(half %a) #0 {
}
; CHECK-LABEL: test_fptosi_i64(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptosi_i64_param_0];
+; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptosi_i64_param_0];
; CHECK: cvt.rzi.s64.f16 [[R:%rd[0-9]+]], [[A]];
; CHECK: st.param.b64 [func_retval0+0], [[R]];
; CHECK: ret;
@@ -671,7 +671,7 @@ define i64 @test_fptosi_i64(half %a) #0 {
}
; CHECK-LABEL: test_fptoui_i32(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptoui_i32_param_0];
+; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptoui_i32_param_0];
; CHECK: cvt.rzi.u32.f16 [[R:%r[0-9]+]], [[A]];
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
@@ -681,7 +681,7 @@ define i32 @test_fptoui_i32(half %a) #0 {
}
; CHECK-LABEL: test_fptoui_i64(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptoui_i64_param_0];
+; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptoui_i64_param_0];
; CHECK: cvt.rzi.u64.f16 [[R:%rd[0-9]+]], [[A]];
; CHECK: st.param.b64 [func_retval0+0], [[R]];
; CHECK: ret;
@@ -692,7 +692,7 @@ define i64 @test_fptoui_i64(half %a) #0 {
; CHECK-LABEL: test_uitofp_i32(
; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_uitofp_i32_param_0];
-; CHECK: cvt.rn.f16.u32 [[R:%h[0-9]+]], [[A]];
+; CHECK: cvt.rn.f16.u32 [[R:%rs[0-9]+]], [[A]];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_uitofp_i32(i32 %a) #0 {
@@ -702,7 +702,7 @@ define half @test_uitofp_i32(i32 %a) #0 {
; CHECK-LABEL: test_uitofp_i64(
; CHECK: ld.param.u64 [[A:%rd[0-9]+]], [test_uitofp_i64_param_0];
-; CHECK: cvt.rn.f16.u64 [[R:%h[0-9]+]], [[A]];
+; CHECK: cvt.rn.f16.u64 [[R:%rs[0-9]+]], [[A]];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_uitofp_i64(i64 %a) #0 {
@@ -712,7 +712,7 @@ define half @test_uitofp_i64(i64 %a) #0 {
; CHECK-LABEL: test_sitofp_i32(
; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_sitofp_i32_param_0];
-; CHECK: cvt.rn.f16.s32 [[R:%h[0-9]+]], [[A]];
+; CHECK: cvt.rn.f16.s32 [[R:%rs[0-9]+]], [[A]];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_sitofp_i32(i32 %a) #0 {
@@ -722,7 +722,7 @@ define half @test_sitofp_i32(i32 %a) #0 {
; CHECK-LABEL: test_sitofp_i64(
; CHECK: ld.param.u64 [[A:%rd[0-9]+]], [test_sitofp_i64_param_0];
-; CHECK: cvt.rn.f16.s64 [[R:%h[0-9]+]], [[A]];
+; CHECK: cvt.rn.f16.s64 [[R:%rs[0-9]+]], [[A]];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_sitofp_i64(i64 %a) #0 {
@@ -732,14 +732,14 @@ define half @test_sitofp_i64(i64 %a) #0 {
; CHECK-LABEL: test_uitofp_i32_fadd(
; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_uitofp_i32_fadd_param_0];
-; CHECK-DAG: cvt.rn.f16.u32 [[C:%h[0-9]+]], [[A]];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_uitofp_i32_fadd_param_1];
-; CHECK-F16-NOFTZ: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[C]];
-; CHECK-F16-FTZ: add.rn.ftz.f16 [[R:%h[0-9]+]], [[B]], [[C]];
+; CHECK-DAG: cvt.rn.f16.u32 [[C:%rs[0-9]+]], [[A]];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_uitofp_i32_fadd_param_1];
+; CHECK-F16-NOFTZ: add.rn.f16 [[R:%rs[0-9]+]], [[B]], [[C]];
+; CHECK-F16-FTZ: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[B]], [[C]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]]
; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], [[C32]];
-; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 {
@@ -750,14 +750,14 @@ define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 {
; CHECK-LABEL: test_sitofp_i32_fadd(
; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_sitofp_i32_fadd_param_0];
-; CHECK-DAG: cvt.rn.f16.s32 [[C:%h[0-9]+]], [[A]];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_sitofp_i32_fadd_param_1];
-; CHECK-F16-NOFTZ: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[C]];
-; CHECK-F16-FTZ: add.rn.ftz.f16 [[R:%h[0-9]+]], [[B]], [[C]];
+; CHECK-DAG: cvt.rn.f16.s32 [[C:%rs[0-9]+]], [[A]];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_sitofp_i32_fadd_param_1];
+; CHECK-F16-NOFTZ: add.rn.f16 [[R:%rs[0-9]+]], [[B]], [[C]];
+; CHECK-F16-FTZ: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[B]], [[C]];
; XCHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
; XCHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]]
; XCHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], [[C32]];
-; XCHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; XCHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 {
@@ -768,7 +768,7 @@ define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 {
; CHECK-LABEL: test_fptrunc_float(
; CHECK: ld.param.f32 [[A:%f[0-9]+]], [test_fptrunc_float_param_0];
-; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[A]];
+; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[A]];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_fptrunc_float(float %a) #0 {
@@ -778,7 +778,7 @@ define half @test_fptrunc_float(float %a) #0 {
; CHECK-LABEL: test_fptrunc_double(
; CHECK: ld.param.f64 [[A:%fd[0-9]+]], [test_fptrunc_double_param_0];
-; CHECK: cvt.rn.f16.f64 [[R:%h[0-9]+]], [[A]];
+; CHECK: cvt.rn.f16.f64 [[R:%rs[0-9]+]], [[A]];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_fptrunc_double(double %a) #0 {
@@ -787,7 +787,7 @@ define half @test_fptrunc_double(double %a) #0 {
}
; CHECK-LABEL: test_fpext_float(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fpext_float_param_0];
+; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fpext_float_param_0];
; CHECK-NOFTZ: cvt.f32.f16 [[R:%f[0-9]+]], [[A]];
; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[R:%f[0-9]+]], [[A]];
; CHECK: st.param.f32 [func_retval0+0], [[R]];
@@ -798,7 +798,7 @@ define float @test_fpext_float(half %a) #0 {
}
; CHECK-LABEL: test_fpext_double(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fpext_double_param_0];
+; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fpext_double_param_0];
; CHECK: cvt.f64.f16 [[R:%fd[0-9]+]], [[A]];
; CHECK: st.param.f64 [func_retval0+0], [[R]];
; CHECK: ret;
@@ -809,9 +809,8 @@ define double @test_fpext_double(half %a) #0 {
; CHECK-LABEL: test_bitcast_halftoi16(
-; CHECK: ld.param.b16 [[AH:%h[0-9]+]], [test_bitcast_halftoi16_param_0];
-; CHECK: mov.b16 [[AS:%rs[0-9]+]], [[AH]]
-; CHECK: cvt.u32.u16 [[R:%r[0-9]+]], [[AS]]
+; CHECK: ld.param.b16 [[AH:%rs[0-9]+]], [test_bitcast_halftoi16_param_0];
+; CHECK: cvt.u32.u16 [[R:%r[0-9]+]], [[AH]]
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define i16 @test_bitcast_halftoi16(half %a) #0 {
@@ -821,8 +820,7 @@ define i16 @test_bitcast_halftoi16(half %a) #0 {
; CHECK-LABEL: test_bitcast_i16tohalf(
; CHECK: ld.param.u16 [[AS:%rs[0-9]+]], [test_bitcast_i16tohalf_param_0];
-; CHECK: mov.b16 [[AH:%h[0-9]+]], [[AS]]
-; CHECK: st.param.b16 [func_retval0+0], [[AH]];
+; CHECK: st.param.b16 [func_retval0+0], [[AS]];
; CHECK: ret;
define half @test_bitcast_i16tohalf(i16 %a) #0 {
%r = bitcast i16 %a to half
@@ -855,12 +853,12 @@ declare half @llvm.roundeven.f16(half %a) #0
declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0
; CHECK-LABEL: test_sqrt(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_sqrt_param_0];
+; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_sqrt_param_0];
; CHECK-NOFTZ: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
; CHECK-NOFTZ: sqrt.rn.f32 [[RF:%f[0-9]+]], [[AF]];
; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]];
; CHECK-F16-FTZ: sqrt.rn.ftz.f32 [[RF:%f[0-9]+]], [[AF]];
-; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
+; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_sqrt(half %a) #0 {
@@ -876,11 +874,11 @@ define half @test_sqrt(half %a) #0 {
;}
; CHECK-LABEL: test_sin(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_sin_param_0];
+; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_sin_param_0];
; CHECK-NOFTZ: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]];
; CHECK: sin.approx.f32 [[RF:%f[0-9]+]], [[AF]];
-; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
+; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_sin(half %a) #0 #1 {
@@ -889,11 +887,11 @@ define half @test_sin(half %a) #0 #1 {
}
; CHECK-LABEL: test_cos(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_cos_param_0];
+; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_cos_param_0];
; CHECK-NOFTZ: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]];
; CHECK: cos.approx.f32 [[RF:%f[0-9]+]], [[AF]];
-; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
+; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_cos(half %a) #0 #1 {
@@ -944,16 +942,16 @@ define half @test_cos(half %a) #0 #1 {
;}
; CHECK-LABEL: test_fma(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fma_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fma_param_1];
-; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_fma_param_2];
-; CHECK-F16-NOFTZ: fma.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]];
-; CHECK-F16-FTZ: fma.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fma_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fma_param_1];
+; CHECK-DAG: ld.param.b16 [[C:%rs[0-9]+]], [test_fma_param_2];
+; CHECK-F16-NOFTZ: fma.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]];
+; CHECK-F16-FTZ: fma.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]]
; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]];
-; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret
define half @test_fma(half %a, half %b, half %c) #0 {
@@ -962,12 +960,12 @@ define half @test_fma(half %a, half %b, half %c) #0 {
}
; CHECK-LABEL: test_fabs(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fabs_param_0];
+; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fabs_param_0];
; CHECK-NOFTZ: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
; CHECK-NOFTZ: abs.f32 [[RF:%f[0-9]+]], [[AF]];
; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]];
; CHECK-F16-FTZ: abs.ftz.f32 [[RF:%f[0-9]+]], [[AF]];
-; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
+; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_fabs(half %a) #0 {
@@ -976,15 +974,15 @@ define half @test_fabs(half %a) #0 {
}
; CHECK-LABEL: test_minnum(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_minnum_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_minnum_param_1];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_minnum_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_minnum_param_1];
; CHECK-NOFTZ-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
; CHECK-NOFTZ-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
; CHECK-NOFTZ: min.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]];
; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]];
; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[BF:%f[0-9]+]], [[B]];
; CHECK-F16-FTZ: min.ftz.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]];
-; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
+; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_minnum(half %a, half %b) #0 {
@@ -993,15 +991,15 @@ define half @test_minnum(half %a, half %b) #0 {
}
; CHECK-LABEL: test_maxnum(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_maxnum_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_maxnum_param_1];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_maxnum_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_maxnum_param_1];
; CHECK-NOFTZ-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
; CHECK-NOFTZ-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
; CHECK-NOFTZ: max.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]];
; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]];
; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[BF:%f[0-9]+]], [[B]];
; CHECK-F16-FTZ: max.ftz.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]];
-; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
+; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_maxnum(half %a, half %b) #0 {
@@ -1010,15 +1008,12 @@ define half @test_maxnum(half %a, half %b) #0 {
}
; CHECK-LABEL: test_copysign(
-; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_param_0];
-; CHECK-DAG: ld.param.b16 [[BH:%h[0-9]+]], [test_copysign_param_1];
-; CHECK-DAG: mov.b16 [[AS:%rs[0-9]+]], [[AH]];
-; CHECK-DAG: mov.b16 [[BS:%rs[0-9]+]], [[BH]];
-; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AS]], 32767;
-; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BS]], -32768;
+; CHECK-DAG: ld.param.b16 [[AH:%rs[0-9]+]], [test_copysign_param_0];
+; CHECK-DAG: ld.param.b16 [[BH:%rs[0-9]+]], [test_copysign_param_1];
+; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AH]], 32767;
+; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BH]], -32768;
; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX]];
-; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: st.param.b16 [func_retval0+0], [[RX]];
; CHECK: ret;
define half @test_copysign(half %a, half %b) #0 {
%r = call half @llvm.copysign.f16(half %a, half %b)
@@ -1026,16 +1021,14 @@ define half @test_copysign(half %a, half %b) #0 {
}
; CHECK-LABEL: test_copysign_f32(
-; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_f32_param_0];
+; CHECK-DAG: ld.param.b16 [[AH:%rs[0-9]+]], [test_copysign_f32_param_0];
; CHECK-DAG: ld.param.f32 [[BF:%f[0-9]+]], [test_copysign_f32_param_1];
-; CHECK-DAG: mov.b16 [[A:%rs[0-9]+]], [[AH]];
; CHECK-DAG: mov.b32 [[B:%r[0-9]+]], [[BF]];
-; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[A]], 32767;
+; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AH]], 32767;
; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[B]], -2147483648;
; CHECK-DAG: mov.b32 {tmp, [[BX2:%rs[0-9]+]]}, [[BX0]];
; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX2]];
-; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: st.param.b16 [func_retval0+0], [[RX]];
; CHECK: ret;
define half @test_copysign_f32(half %a, float %b) #0 {
%tb = fptrunc float %b to half
@@ -1044,17 +1037,15 @@ define half @test_copysign_f32(half %a, float %b) #0 {
}
; CHECK-LABEL: test_copysign_f64(
-; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_f64_param_0];
+; CHECK-DAG: ld.param.b16 [[AH:%rs[0-9]+]], [test_copysign_f64_param_0];
; CHECK-DAG: ld.param.f64 [[BD:%fd[0-9]+]], [test_copysign_f64_param_1];
-; CHECK-DAG: mov.b16 [[A:%rs[0-9]+]], [[AH]];
; CHECK-DAG: mov.b64 [[B:%rd[0-9]+]], [[BD]];
-; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[A]], 32767;
+; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AH]], 32767;
; CHECK-DAG: and.b64 [[BX0:%rd[0-9]+]], [[B]], -9223372036854775808;
; CHECK-DAG: shr.u64 [[BX1:%rd[0-9]+]], [[BX0]], 48;
; CHECK-DAG: cvt.u16.u64 [[BX2:%rs[0-9]+]], [[BX1]];
; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX2]];
-; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: st.param.b16 [func_retval0+0], [[RX]];
; CHECK: ret;
define half @test_copysign_f64(half %a, double %b) #0 {
%tb = fptrunc double %b to half
@@ -1063,16 +1054,13 @@ define half @test_copysign_f64(half %a, double %b) #0 {
}
; CHECK-LABEL: test_copysign_extended(
-; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_extended_param_0];
-; CHECK-DAG: ld.param.b16 [[BH:%h[0-9]+]], [test_copysign_extended_param_1];
-; CHECK-DAG: mov.b16 [[AS:%rs[0-9]+]], [[AH]];
-; CHECK-DAG: mov.b16 [[BS:%rs[0-9]+]], [[BH]];
-; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AS]], 32767;
-; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BS]], -32768;
+; CHECK-DAG: ld.param.b16 [[AH:%rs[0-9]+]], [test_copysign_extended_param_0];
+; CHECK-DAG: ld.param.b16 [[BH:%rs[0-9]+]], [test_copysign_extended_param_1];
+; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AH]], 32767;
+; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BH]], -32768;
; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX]];
-; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]];
-; CHECK-NOFTZ: cvt.f32.f16 [[XR:%f[0-9]+]], [[R]];
-; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[XR:%f[0-9]+]], [[R]];
+; CHECK-NOFTZ: cvt.f32.f16 [[XR:%f[0-9]+]], [[RX]];
+; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[XR:%f[0-9]+]], [[RX]];
; CHECK: st.param.f32 [func_retval0+0], [[XR]];
; CHECK: ret;
define float @test_copysign_extended(half %a, half %b) #0 {
@@ -1082,8 +1070,8 @@ define float @test_copysign_extended(half %a, half %b) #0 {
}
; CHECK-LABEL: test_floor(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_floor_param_0];
-; CHECK: cvt.rmi.f16.f16 [[R:%h[0-9]+]], [[A]];
+; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_floor_param_0];
+; CHECK: cvt.rmi.f16.f16 [[R:%rs[0-9]+]], [[A]];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_floor(half %a) #0 {
@@ -1092,8 +1080,8 @@ define half @test_floor(half %a) #0 {
}
; CHECK-LABEL: test_ceil(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_ceil_param_0];
-; CHECK: cvt.rpi.f16.f16 [[R:%h[0-9]+]], [[A]];
+; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_ceil_param_0];
+; CHECK: cvt.rpi.f16.f16 [[R:%rs[0-9]+]], [[A]];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_ceil(half %a) #0 {
@@ -1102,8 +1090,8 @@ define half @test_ceil(half %a) #0 {
}
; CHECK-LABEL: test_trunc(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_trunc_param_0];
-; CHECK: cvt.rzi.f16.f16 [[R:%h[0-9]+]], [[A]];
+; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_trunc_param_0];
+; CHECK: cvt.rzi.f16.f16 [[R:%rs[0-9]+]], [[A]];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_trunc(half %a) #0 {
@@ -1112,8 +1100,8 @@ define half @test_trunc(half %a) #0 {
}
; CHECK-LABEL: test_rint(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_rint_param_0];
-; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]];
+; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_rint_param_0];
+; CHECK: cvt.rni.f16.f16 [[R:%rs[0-9]+]], [[A]];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_rint(half %a) #0 {
@@ -1122,8 +1110,8 @@ define half @test_rint(half %a) #0 {
}
; CHECK-LABEL: test_nearbyint(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_nearbyint_param_0];
-; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]];
+; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_nearbyint_param_0];
+; CHECK: cvt.rni.f16.f16 [[R:%rs[0-9]+]], [[A]];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_nearbyint(half %a) #0 {
@@ -1132,8 +1120,8 @@ define half @test_nearbyint(half %a) #0 {
}
; CHECK-LABEL: test_roundeven(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_roundeven_param_0];
-; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]];
+; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_roundeven_param_0];
+; CHECK: cvt.rni.f16.f16 [[R:%rs[0-9]+]], [[A]];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_roundeven(half %a) #0 {
@@ -1154,16 +1142,16 @@ define half @test_round(half %a) #0 {
}
; CHECK-LABEL: test_fmuladd(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fmuladd_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fmuladd_param_1];
-; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_fmuladd_param_2];
-; CHECK-F16-NOFTZ: fma.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]];
-; CHECK-F16-FTZ: fma.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]];
+; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fmuladd_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fmuladd_param_1];
+; CHECK-DAG: ld.param.b16 [[C:%rs[0-9]+]], [test_fmuladd_param_2];
+; CHECK-F16-NOFTZ: fma.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]];
+; CHECK-F16-FTZ: fma.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]]
; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]];
-; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_fmuladd(half %a, half %b, half %c) #0 {
diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
index 6fd7261f20a1f..d0e2ef4f6a540 100644
--- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
@@ -31,8 +31,7 @@
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
; CHECK-LABEL: test_ret_const(
-; CHECK: mov.u32 [[T:%r[0-9+]]], 1073757184;
-; CHECK: mov.b32 [[R:%hh[0-9+]]], [[T]];
+; CHECK: mov.b32 [[R:%r[0-9+]]], 1073757184;
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
define <2 x half> @test_ret_const() #0 {
@@ -40,8 +39,8 @@ define <2 x half> @test_ret_const() #0 {
}
; CHECK-LABEL: test_extract_0(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_0_param_0];
-; CHECK: mov.b32 {[[R:%h[0-9]+]], %tmp_hi}, [[A]];
+; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_extract_0_param_0];
+; CHECK: mov.b32 {[[R:%rs[0-9]+]], %tmp_hi}, [[A]];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_extract_0(<2 x half> %a) #0 {
@@ -50,8 +49,8 @@ define half @test_extract_0(<2 x half> %a) #0 {
}
; CHECK-LABEL: test_extract_1(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_1_param_0];
-; CHECK: mov.b32 {%tmp_lo, [[R:%h[0-9]+]]}, [[A]];
+; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_extract_1_param_0];
+; CHECK: mov.b32 {%tmp_lo, [[R:%rs[0-9]+]]}, [[A]];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_extract_1(<2 x half> %a) #0 {
@@ -60,11 +59,11 @@ define half @test_extract_1(<2 x half> %a) #0 {
}
; CHECK-LABEL: test_extract_i(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_i_param_0];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_extract_i_param_0];
; CHECK-DAG: ld.param.u64 [[IDX:%rd[0-9]+]], [test_extract_i_param_1];
; CHECK-DAG: setp.eq.s64 [[PRED:%p[0-9]+]], [[IDX]], 0;
-; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[A]];
-; CHECK: selp.b16 [[R:%h[0-9]+]], [[E0]], [[E1]], [[PRED]];
+; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[A]];
+; CHECK: selp.b16 [[R:%rs[0-9]+]], [[E0]], [[E1]], [[PRED]];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define half @test_extract_i(<2 x half> %a, i64 %idx) #0 {
@@ -73,22 +72,22 @@ define half @test_extract_i(<2 x half> %a, i64 %idx) #0 {
}
; CHECK-LABEL: test_fadd(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fadd_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fadd_param_1];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fadd_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fadd_param_1];
;
-; CHECK-F16-NEXT: add.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]];
+; CHECK-F16-NEXT: add.rn.f16x2 [[R:%r[0-9]+]], [[A]], [[B]];
;
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
-; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]]
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]]
+; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
;
; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
@@ -99,20 +98,19 @@ define <2 x half> @test_fadd(<2 x half> %a, <2 x half> %b) #0 {
; Check that we can lower fadd with immediate arguments.
; CHECK-LABEL: test_fadd_imm_0(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fadd_imm_0_param_0];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fadd_imm_0_param_0];
;
-; CHECK-F16: mov.u32 [[I:%r[0-9+]]], 1073757184;
-; CHECK-F16: mov.b32 [[IHH:%hh[0-9+]]], [[I]];
-; CHECK-F16: add.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[IHH]];
+; CHECK-F16: mov.b32 [[I:%r[0-9+]]], 1073757184;
+; CHECK-F16: add.rn.f16x2 [[R:%r[0-9]+]], [[A]], [[I]];
;
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000;
; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], 0f40000000;
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
-; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]]
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]]
+; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
;
; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
@@ -122,20 +120,19 @@ define <2 x half> @test_fadd_imm_0(<2 x half> %a) #0 {
}
; CHECK-LABEL: test_fadd_imm_1(
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fadd_imm_1_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fadd_imm_1_param_0];
;
-; CHECK-F16: mov.u32 [[I:%r[0-9+]]], 1073757184;
-; CHECK-F16: mov.b32 [[IHH:%hh[0-9+]]], [[I]];
-; CHECK-F16: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[IHH]];
+; CHECK-F16: mov.b32 [[I:%r[0-9+]]], 1073757184;
+; CHECK-F16: add.rn.f16x2 [[R:%r[0-9]+]], [[B]], [[I]];
;
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000;
; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], 0f40000000;
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
-; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]]
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]]
+; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
;
; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
@@ -145,22 +142,22 @@ define <2 x half> @test_fadd_imm_1(<2 x half> %a) #0 {
}
; CHECK-LABEL: test_fsub(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fsub_param_0];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fsub_param_0];
;
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fsub_param_1];
-; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fsub_param_1];
+; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%r[0-9]+]], [[A]], [[B]];
;
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
; CHECK-NOF16-DAG: sub.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
; CHECK-NOF16-DAG: sub.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
-; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]]
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]]
+; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
;
; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
@@ -170,21 +167,20 @@ define <2 x half> @test_fsub(<2 x half> %a, <2 x half> %b) #0 {
}
; CHECK-LABEL: test_fneg(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fneg_param_0];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fneg_param_0];
;
-; CHECK-F16: mov.u32 [[I0:%r[0-9+]]], 0;
-; CHECK-F16: mov.b32 [[IHH0:%hh[0-9+]]], [[I0]];
-; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%hh[0-9]+]], [[IHH0]], [[A]];
+; CHECK-F16: mov.b32 [[I:%r[0-9+]]], 0;
+; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%r[0-9]+]], [[I]], [[A]];
;
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000;
; CHECK-NOF16-DAG: sub.rn.f32 [[FR0:%f[0-9]+]], [[Z]], [[FA0]];
; CHECK-NOF16-DAG: sub.rn.f32 [[FR1:%f[0-9]+]], [[Z]], [[FA1]];
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
-; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]]
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]]
+; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
;
; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
@@ -194,21 +190,21 @@ define <2 x half> @test_fneg(<2 x half> %a) #0 {
}
; CHECK-LABEL: test_fmul(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fmul_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fmul_param_1];
-; CHECK-F16-NEXT: mul.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fmul_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fmul_param_1];
+; CHECK-F16-NEXT: mul.rn.f16x2 [[R:%r[0-9]+]], [[A]], [[B]];
;
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
; CHECK-NOF16-DAG: mul.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
; CHECK-NOF16-DAG: mul.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
-; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]]
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]]
+; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
;
; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
@@ -218,19 +214,19 @@ define <2 x half> @test_fmul(<2 x half> %a, <2 x half> %b) #0 {
}
; CHECK-LABEL: test_fdiv(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fdiv_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fdiv_param_1];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fdiv_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fdiv_param_1];
+; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
; CHECK-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]];
; CHECK-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]];
; CHECK-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]];
; CHECK-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]];
; CHECK-DAG: div.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
; CHECK-DAG: div.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]];
-; CHECK-NEXT: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]];
+; CHECK-NEXT: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
define <2 x half> @test_fdiv(<2 x half> %a, <2 x half> %b) #0 {
@@ -240,11 +236,11 @@ define <2 x half> @test_fdiv(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-LABEL: test_frem(
; -- Load two 16x2 inputs and split them into f16 elements
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_frem_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_frem_param_1];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_frem_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_frem_param_1];
; -- Split into elements
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
; -- promote to f32.
; CHECK-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]];
; CHECK-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]];
@@ -265,10 +261,10 @@ define <2 x half> @test_fdiv(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-DAG: testp.infinite.f32 [[ISB1INF:%p[0-9]+]], [[FB1]];
; CHECK-DAG: selp.f32 [[RF1:%f[0-9]+]], [[FA1]], [[RFNINF1]], [[ISB1INF]];
; -- convert back to f16.
-; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]];
; -- merge into f16x2 and return it.
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
define <2 x half> @test_frem(<2 x half> %a, <2 x half> %b) #0 {
@@ -279,8 +275,8 @@ define <2 x half> @test_frem(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-LABEL: .func test_ldst_v2f16(
; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v2f16_param_0];
; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v2f16_param_1];
-; CHECK-DAG: ld.b32 [[E:%hh[0-9]+]], [%[[A]]]
-; CHECK: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[E]];
+; CHECK-DAG: ld.b32 [[E:%r[0-9]+]], [%[[A]]]
+; CHECK: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[E]];
; CHECK-DAG: st.v2.b16 [%[[B]]], {[[E0]], [[E1]]};
; CHECK: ret;
define void @test_ldst_v2f16(ptr %a, ptr %b) {
@@ -309,7 +305,7 @@ define void @test_ldst_v3f16(ptr %a, ptr %b) {
; CHECK-LABEL: .func test_ldst_v4f16(
; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v4f16_param_0];
; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v4f16_param_1];
-; CHECK-DAG: ld.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [%[[A]]];
+; CHECK-DAG: ld.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [%[[A]]];
; CHECK-DAG: st.v4.b16 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]};
; CHECK: ret;
define void @test_ldst_v4f16(ptr %a, ptr %b) {
@@ -333,8 +329,8 @@ define void @test_ldst_v8f16(ptr %a, ptr %b) {
declare <2 x half> @test_callee(<2 x half> %a, <2 x half> %b) #0
; CHECK-LABEL: test_call(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_call_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_call_param_1];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_call_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_call_param_1];
; CHECK: {
; CHECK-DAG: .param .align 4 .b8 param0[4];
; CHECK-DAG: .param .align 4 .b8 param1[4];
@@ -344,7 +340,7 @@ declare <2 x half> @test_callee(<2 x half> %a, <2 x half> %b) #0
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_callee,
; CHECK: );
-; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0];
+; CHECK-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0+0];
; CHECK-NEXT: }
; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
@@ -354,8 +350,8 @@ define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 {
}
; CHECK-LABEL: test_call_flipped(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_call_flipped_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_call_flipped_param_1];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_call_flipped_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_call_flipped_param_1];
; CHECK: {
; CHECK-DAG: .param .align 4 .b8 param0[4];
; CHECK-DAG: .param .align 4 .b8 param1[4];
@@ -365,7 +361,7 @@ define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 {
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_callee,
; CHECK: );
-; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0];
+; CHECK-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0+0];
; CHECK-NEXT: }
; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
@@ -375,8 +371,8 @@ define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 {
}
; CHECK-LABEL: test_tailcall_flipped(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_tailcall_flipped_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_tailcall_flipped_param_1];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_tailcall_flipped_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_tailcall_flipped_param_1];
; CHECK: {
; CHECK-DAG: .param .align 4 .b8 param0[4];
; CHECK-DAG: .param .align 4 .b8 param1[4];
@@ -386,7 +382,7 @@ define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 {
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_callee,
; CHECK: );
-; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0];
+; CHECK-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0+0];
; CHECK-NEXT: }
; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
@@ -396,11 +392,11 @@ define <2 x half> @test_tailcall_flipped(<2 x half> %a, <2 x half> %b) #0 {
}
; CHECK-LABEL: test_select(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_param_1];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_select_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_select_param_1];
; CHECK-DAG: ld.param.u8 [[C:%rs[0-9]+]], [test_select_param_2]
; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1;
-; CHECK-NEXT: selp.b32 [[R:%hh[0-9]+]], [[A]], [[B]], [[PRED]];
+; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], [[A]], [[B]], [[PRED]];
; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
define <2 x half> @test_select(<2 x half> %a, <2 x half> %b, i1 zeroext %c) #0 {
@@ -409,15 +405,15 @@ define <2 x half> @test_select(<2 x half> %a, <2 x half> %b, i1 zeroext %c) #0 {
}
; CHECK-LABEL: test_select_cc(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_cc_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_cc_param_1];
-; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_select_cc_param_2];
-; CHECK-DAG: ld.param.b32 [[D:%hh[0-9]+]], [test_select_cc_param_3];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_select_cc_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_select_cc_param_1];
+; CHECK-DAG: ld.param.b32 [[C:%r[0-9]+]], [test_select_cc_param_2];
+; CHECK-DAG: ld.param.b32 [[D:%r[0-9]+]], [test_select_cc_param_3];
;
; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]]
;
-; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]]
-; CHECK-NOF16-DAG: mov.b32 {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]]
+; CHECK-NOF16-DAG: mov.b32 {[[C0:%rs[0-9]+]], [[C1:%rs[0-9]+]]}, [[C]]
+; CHECK-NOF16-DAG: mov.b32 {[[D0:%rs[0-9]+]], [[D1:%rs[0-9]+]]}, [[D]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]];
@@ -425,11 +421,11 @@ define <2 x half> @test_select(<2 x half> %a, <2 x half> %b, i1 zeroext %c) #0 {
; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[CF0]], [[DF0]]
; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[CF1]], [[DF1]]
;
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-DAG: selp.b16 [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]];
-; CHECK-DAG: selp.b16 [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
+; CHECK-DAG: selp.b16 [[R0:%rs[0-9]+]], [[A0]], [[B0]], [[P0]];
+; CHECK-DAG: selp.b16 [[R1:%rs[0-9]+]], [[A1]], [[B1]], [[P1]];
+; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
define <2 x half> @test_select_cc(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d) #0 {
@@ -441,12 +437,12 @@ define <2 x half> @test_select_cc(<2 x half> %a, <2 x half> %b, <2 x half> %c, <
; CHECK-LABEL: test_select_cc_f32_f16(
; CHECK-DAG: ld.param.v2.f32 {[[A0:%f[0-9]+]], [[A1:%f[0-9]+]]}, [test_select_cc_f32_f16_param_0];
; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_select_cc_f32_f16_param_1];
-; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_select_cc_f32_f16_param_2];
-; CHECK-DAG: ld.param.b32 [[D:%hh[0-9]+]], [test_select_cc_f32_f16_param_3];
+; CHECK-DAG: ld.param.b32 [[C:%r[0-9]+]], [test_select_cc_f32_f16_param_2];
+; CHECK-DAG: ld.param.b32 [[D:%r[0-9]+]], [test_select_cc_f32_f16_param_3];
;
; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]]
-; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]]
-; CHECK-NOF16-DAG: mov.b32 {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]]
+; CHECK-NOF16-DAG: mov.b32 {[[C0:%rs[0-9]+]], [[C1:%rs[0-9]+]]}, [[C]]
+; CHECK-NOF16-DAG: mov.b32 {[[D0:%rs[0-9]+]], [[D1:%rs[0-9]+]]}, [[D]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]];
@@ -466,17 +462,17 @@ define <2 x float> @test_select_cc_f32_f16(<2 x float> %a, <2 x float> %b,
}
; CHECK-LABEL: test_select_cc_f16_f32(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_cc_f16_f32_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_cc_f16_f32_param_1];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_select_cc_f16_f32_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_select_cc_f16_f32_param_1];
; CHECK-DAG: ld.param.v2.f32 {[[C0:%f[0-9]+]], [[C1:%f[0-9]+]]}, [test_select_cc_f16_f32_param_2];
; CHECK-DAG: ld.param.v2.f32 {[[D0:%f[0-9]+]], [[D1:%f[0-9]+]]}, [test_select_cc_f16_f32_param_3];
; CHECK-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[C0]], [[D0]]
; CHECK-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[C1]], [[D1]]
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-DAG: selp.b16 [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]];
-; CHECK-DAG: selp.b16 [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
+; CHECK-DAG: selp.b16 [[R0:%rs[0-9]+]], [[A0]], [[B0]], [[P0]];
+; CHECK-DAG: selp.b16 [[R1:%rs[0-9]+]], [[A1]], [[B1]], [[P1]];
+; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
define <2 x half> @test_select_cc_f16_f32(<2 x half> %a, <2 x half> %b,
@@ -487,11 +483,11 @@ define <2 x half> @test_select_cc_f16_f32(<2 x half> %a, <2 x half> %b,
}
; CHECK-LABEL: test_fcmp_une(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_une_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_une_param_1];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_une_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_une_param_1];
; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
@@ -509,11 +505,11 @@ define <2 x i1> @test_fcmp_une(<2 x half> %a, <2 x half> %b) #0 {
}
; CHECK-LABEL: test_fcmp_ueq(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ueq_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ueq_param_1];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_ueq_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_ueq_param_1];
; CHECK-F16: setp.equ.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
@@ -531,11 +527,11 @@ define <2 x i1> @test_fcmp_ueq(<2 x half> %a, <2 x half> %b) #0 {
}
; CHECK-LABEL: test_fcmp_ugt(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ugt_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ugt_param_1];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_ugt_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_ugt_param_1];
; CHECK-F16: setp.gtu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
@@ -553,11 +549,11 @@ define <2 x i1> @test_fcmp_ugt(<2 x half> %a, <2 x half> %b) #0 {
}
; CHECK-LABEL: test_fcmp_uge(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_uge_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_uge_param_1];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_uge_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_uge_param_1];
; CHECK-F16: setp.geu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
@@ -575,11 +571,11 @@ define <2 x i1> @test_fcmp_uge(<2 x half> %a, <2 x half> %b) #0 {
}
; CHECK-LABEL: test_fcmp_ult(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ult_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ult_param_1];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_ult_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_ult_param_1];
; CHECK-F16: setp.ltu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
@@ -597,11 +593,11 @@ define <2 x i1> @test_fcmp_ult(<2 x half> %a, <2 x half> %b) #0 {
}
; CHECK-LABEL: test_fcmp_ule(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ule_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ule_param_1];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_ule_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_ule_param_1];
; CHECK-F16: setp.leu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
@@ -620,11 +616,11 @@ define <2 x i1> @test_fcmp_ule(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-LABEL: test_fcmp_uno(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_uno_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_uno_param_1];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_uno_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_uno_param_1];
; CHECK-F16: setp.nan.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
@@ -642,11 +638,11 @@ define <2 x i1> @test_fcmp_uno(<2 x half> %a, <2 x half> %b) #0 {
}
; CHECK-LABEL: test_fcmp_one(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_one_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_one_param_1];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_one_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_one_param_1];
; CHECK-F16: setp.ne.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
@@ -664,11 +660,11 @@ define <2 x i1> @test_fcmp_one(<2 x half> %a, <2 x half> %b) #0 {
}
; CHECK-LABEL: test_fcmp_oeq(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_oeq_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_oeq_param_1];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_oeq_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_oeq_param_1];
; CHECK-F16: setp.eq.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
@@ -686,11 +682,11 @@ define <2 x i1> @test_fcmp_oeq(<2 x half> %a, <2 x half> %b) #0 {
}
; CHECK-LABEL: test_fcmp_ogt(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ogt_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ogt_param_1];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_ogt_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_ogt_param_1];
; CHECK-F16: setp.gt.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
@@ -708,11 +704,11 @@ define <2 x i1> @test_fcmp_ogt(<2 x half> %a, <2 x half> %b) #0 {
}
; CHECK-LABEL: test_fcmp_oge(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_oge_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_oge_param_1];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_oge_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_oge_param_1];
; CHECK-F16: setp.ge.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
@@ -730,11 +726,11 @@ define <2 x i1> @test_fcmp_oge(<2 x half> %a, <2 x half> %b) #0 {
}
; CHECK-LABEL: test_fcmp_olt(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_olt_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_olt_param_1];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_olt_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_olt_param_1];
; CHECK-F16: setp.lt.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
@@ -752,11 +748,11 @@ define <2 x i1> @test_fcmp_olt(<2 x half> %a, <2 x half> %b) #0 {
}
; XCHECK-LABEL: test_fcmp_ole(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ole_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ole_param_1];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_ole_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_ole_param_1];
; CHECK-F16: setp.le.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
@@ -774,11 +770,11 @@ define <2 x i1> @test_fcmp_ole(<2 x half> %a, <2 x half> %b) #0 {
}
; CHECK-LABEL: test_fcmp_ord(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ord_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ord_param_1];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_ord_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_ord_param_1];
; CHECK-F16: setp.num.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
@@ -796,8 +792,8 @@ define <2 x i1> @test_fcmp_ord(<2 x half> %a, <2 x half> %b) #0 {
}
; CHECK-LABEL: test_fptosi_i32(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptosi_i32_param_0];
-; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fptosi_i32_param_0];
+; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
; CHECK-DAG: cvt.rzi.s32.f16 [[R0:%r[0-9]+]], [[A0]];
; CHECK-DAG: cvt.rzi.s32.f16 [[R1:%r[0-9]+]], [[A1]];
; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]}
@@ -808,8 +804,8 @@ define <2 x i32> @test_fptosi_i32(<2 x half> %a) #0 {
}
; CHECK-LABEL: test_fptosi_i64(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptosi_i64_param_0];
-; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fptosi_i64_param_0];
+; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
; CHECK-DAG: cvt.rzi.s64.f16 [[R0:%rd[0-9]+]], [[A0]];
; CHECK-DAG: cvt.rzi.s64.f16 [[R1:%rd[0-9]+]], [[A1]];
; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]}
@@ -820,8 +816,8 @@ define <2 x i64> @test_fptosi_i64(<2 x half> %a) #0 {
}
; CHECK-LABEL: test_fptoui_2xi32(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptoui_2xi32_param_0];
-; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fptoui_2xi32_param_0];
+; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
; CHECK-DAG: cvt.rzi.u32.f16 [[R0:%r[0-9]+]], [[A0]];
; CHECK-DAG: cvt.rzi.u32.f16 [[R1:%r[0-9]+]], [[A1]];
; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]}
@@ -832,8 +828,8 @@ define <2 x i32> @test_fptoui_2xi32(<2 x half> %a) #0 {
}
; CHECK-LABEL: test_fptoui_2xi64(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptoui_2xi64_param_0];
-; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fptoui_2xi64_param_0];
+; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
; CHECK-DAG: cvt.rzi.u64.f16 [[R0:%rd[0-9]+]], [[A0]];
; CHECK-DAG: cvt.rzi.u64.f16 [[R1:%rd[0-9]+]], [[A1]];
; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]}
@@ -845,9 +841,9 @@ define <2 x i64> @test_fptoui_2xi64(<2 x half> %a) #0 {
; CHECK-LABEL: test_uitofp_2xi32(
; CHECK: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_param_0];
-; CHECK-DAG: cvt.rn.f16.u32 [[R0:%h[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.rn.f16.u32 [[R1:%h[0-9]+]], [[A1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-DAG: cvt.rn.f16.u32 [[R0:%rs[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.rn.f16.u32 [[R1:%rs[0-9]+]], [[A1]];
+; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_uitofp_2xi32(<2 x i32> %a) #0 {
@@ -857,9 +853,9 @@ define <2 x half> @test_uitofp_2xi32(<2 x i32> %a) #0 {
; CHECK-LABEL: test_uitofp_2xi64(
; CHECK: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_uitofp_2xi64_param_0];
-; CHECK-DAG: cvt.rn.f16.u64 [[R0:%h[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.rn.f16.u64 [[R1:%h[0-9]+]], [[A1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-DAG: cvt.rn.f16.u64 [[R0:%rs[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.rn.f16.u64 [[R1:%rs[0-9]+]], [[A1]];
+; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_uitofp_2xi64(<2 x i64> %a) #0 {
@@ -869,9 +865,9 @@ define <2 x half> @test_uitofp_2xi64(<2 x i64> %a) #0 {
; CHECK-LABEL: test_sitofp_2xi32(
; CHECK: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_param_0];
-; CHECK-DAG: cvt.rn.f16.s32 [[R0:%h[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.rn.f16.s32 [[R1:%h[0-9]+]], [[A1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-DAG: cvt.rn.f16.s32 [[R0:%rs[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.rn.f16.s32 [[R1:%rs[0-9]+]], [[A1]];
+; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_sitofp_2xi32(<2 x i32> %a) #0 {
@@ -881,9 +877,9 @@ define <2 x half> @test_sitofp_2xi32(<2 x i32> %a) #0 {
; CHECK-LABEL: test_sitofp_2xi64(
; CHECK: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_sitofp_2xi64_param_0];
-; CHECK-DAG: cvt.rn.f16.s64 [[R0:%h[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.rn.f16.s64 [[R1:%h[0-9]+]], [[A1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-DAG: cvt.rn.f16.s64 [[R0:%rs[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.rn.f16.s64 [[R1:%rs[0-9]+]], [[A1]];
+; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_sitofp_2xi64(<2 x i64> %a) #0 {
@@ -893,23 +889,23 @@ define <2 x half> @test_sitofp_2xi64(<2 x i64> %a) #0 {
; CHECK-LABEL: test_uitofp_2xi32_fadd(
; CHECK-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_fadd_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_uitofp_2xi32_fadd_param_1];
-; CHECK-DAG: cvt.rn.f16.u32 [[C0:%h[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.rn.f16.u32 [[C1:%h[0-9]+]], [[A1]];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_uitofp_2xi32_fadd_param_1];
+; CHECK-DAG: cvt.rn.f16.u32 [[C0:%rs[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.rn.f16.u32 [[C1:%rs[0-9]+]], [[A1]];
-; CHECK-F16-DAG: mov.b32 [[C:%hh[0-9]+]], {[[C0]], [[C1]]}
-; CHECK-F16-DAG: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[C]];
+; CHECK-F16-DAG: mov.b32 [[C:%r[0-9]+]], {[[C0]], [[C1]]}
+; CHECK-F16-DAG: add.rn.f16x2 [[R:%r[0-9]+]], [[B]], [[C]];
;
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FC1:%f[0-9]+]], [[C1]]
; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FB0]], [[FC0]];
; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FB1]], [[FC1]];
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
-; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]]
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]]
+; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
;
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
@@ -921,23 +917,23 @@ define <2 x half> @test_uitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 {
; CHECK-LABEL: test_sitofp_2xi32_fadd(
; CHECK-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_fadd_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_sitofp_2xi32_fadd_param_1];
-; CHECK-DAG: cvt.rn.f16.s32 [[C0:%h[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.rn.f16.s32 [[C1:%h[0-9]+]], [[A1]];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_sitofp_2xi32_fadd_param_1];
+; CHECK-DAG: cvt.rn.f16.s32 [[C0:%rs[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.rn.f16.s32 [[C1:%rs[0-9]+]], [[A1]];
;
-; CHECK-F16-DAG: mov.b32 [[C:%hh[0-9]+]], {[[C0]], [[C1]]}
-; CHECK-F16-DAG: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[C]];
+; CHECK-F16-DAG: mov.b32 [[C:%r[0-9]+]], {[[C0]], [[C1]]}
+; CHECK-F16-DAG: add.rn.f16x2 [[R:%r[0-9]+]], [[B]], [[C]];
;
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FC1:%f[0-9]+]], [[C1]]
; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FB0]], [[FC0]];
; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FB1]], [[FC1]];
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
-; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]]
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]]
+; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
;
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
@@ -949,9 +945,9 @@ define <2 x half> @test_sitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 {
; CHECK-LABEL: test_fptrunc_2xfloat(
; CHECK: ld.param.v2.f32 {[[A0:%f[0-9]+]], [[A1:%f[0-9]+]]}, [test_fptrunc_2xfloat_param_0];
-; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[A1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[A1]];
+; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_fptrunc_2xfloat(<2 x float> %a) #0 {
@@ -961,9 +957,9 @@ define <2 x half> @test_fptrunc_2xfloat(<2 x float> %a) #0 {
; CHECK-LABEL: test_fptrunc_2xdouble(
; CHECK: ld.param.v2.f64 {[[A0:%fd[0-9]+]], [[A1:%fd[0-9]+]]}, [test_fptrunc_2xdouble_param_0];
-; CHECK-DAG: cvt.rn.f16.f64 [[R0:%h[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.rn.f16.f64 [[R1:%h[0-9]+]], [[A1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-DAG: cvt.rn.f16.f64 [[R0:%rs[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.rn.f16.f64 [[R1:%rs[0-9]+]], [[A1]];
+; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_fptrunc_2xdouble(<2 x double> %a) #0 {
@@ -972,8 +968,8 @@ define <2 x half> @test_fptrunc_2xdouble(<2 x double> %a) #0 {
}
; CHECK-LABEL: test_fpext_2xfloat(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fpext_2xfloat_param_0];
-; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fpext_2xfloat_param_0];
+; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
; CHECK-DAG: cvt.f32.f16 [[R0:%f[0-9]+]], [[A0]];
; CHECK-DAG: cvt.f32.f16 [[R1:%f[0-9]+]], [[A1]];
; CHECK-NEXT: st.param.v2.f32 [func_retval0+0], {[[R0]], [[R1]]};
@@ -984,8 +980,8 @@ define <2 x float> @test_fpext_2xfloat(<2 x half> %a) #0 {
}
; CHECK-LABEL: test_fpext_2xdouble(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fpext_2xdouble_param_0];
-; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fpext_2xdouble_param_0];
+; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
; CHECK-DAG: cvt.f64.f16 [[R0:%fd[0-9]+]], [[A0]];
; CHECK-DAG: cvt.f64.f16 [[R1:%fd[0-9]+]], [[A1]];
; CHECK-NEXT: st.param.v2.f64 [func_retval0+0], {[[R0]], [[R1]]};
@@ -1012,8 +1008,7 @@ define <2 x i16> @test_bitcast_2xhalf_to_2xi16(<2 x half> %a) #0 {
; CHECK-DAG: cvt.u32.u16 [[R0:%r[0-9]+]], [[RS0]];
; CHECK-DAG: cvt.u32.u16 [[R1:%r[0-9]+]], [[RS1]];
; CHECK-DAG: shl.b32 [[R1H:%r[0-9]+]], [[R1]], 16;
-; CHECK-DAG: or.b32 [[R1H0L:%r[0-9]+]], [[R0]], [[R1H]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], [[R1H0L]];
+; CHECK-DAG: or.b32 [[R:%r[0-9]+]], [[R0]], [[R1H]];
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_bitcast_2xi16_to_2xhalf(<2 x i16> %a) #0 {
@@ -1023,7 +1018,7 @@ define <2 x half> @test_bitcast_2xi16_to_2xhalf(<2 x i16> %a) #0 {
; CHECK-LABEL: test_bitcast_float_to_2xhalf(
; CHECK: ld.param.f32 [[AF1:%f[0-9]+]], [test_bitcast_float_to_2xhalf_param_0];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], [[AF1]];
+; CHECK: mov.b32 [[R:%r[0-9]+]], [[AF1]];
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_bitcast_float_to_2xhalf(float %a) #0 {
@@ -1066,15 +1061,15 @@ declare <2 x half> @llvm.roundeven.f16(<2 x half> %a) #0
declare <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0
; CHECK-LABEL: test_sqrt(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_sqrt_param_0];
-; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_sqrt_param_0];
+; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
; CHECK-DAG: sqrt.rn.f32 [[RF0:%f[0-9]+]], [[AF0]];
; CHECK-DAG: sqrt.rn.f32 [[RF1:%f[0-9]+]], [[AF1]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]];
+; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_sqrt(<2 x half> %a) #0 {
@@ -1090,15 +1085,15 @@ define <2 x half> @test_sqrt(<2 x half> %a) #0 {
;}
; CHECK-LABEL: test_sin(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_sin_param_0];
-; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_sin_param_0];
+; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
; CHECK-DAG: sin.approx.f32 [[RF0:%f[0-9]+]], [[AF0]];
; CHECK-DAG: sin.approx.f32 [[RF1:%f[0-9]+]], [[AF1]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]];
+; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_sin(<2 x half> %a) #0 #1 {
@@ -1107,15 +1102,15 @@ define <2 x half> @test_sin(<2 x half> %a) #0 #1 {
}
; CHECK-LABEL: test_cos(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_cos_param_0];
-; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_cos_param_0];
+; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
; CHECK-DAG: cos.approx.f32 [[RF0:%f[0-9]+]], [[AF0]];
; CHECK-DAG: cos.approx.f32 [[RF1:%f[0-9]+]], [[AF1]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]];
+; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_cos(<2 x half> %a) #0 #1 {
@@ -1166,15 +1161,15 @@ define <2 x half> @test_cos(<2 x half> %a) #0 #1 {
;}
; CHECK-LABEL: test_fma(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fma_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fma_param_1];
-; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_fma_param_2];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fma_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fma_param_1];
+; CHECK-DAG: ld.param.b32 [[C:%r[0-9]+]], [test_fma_param_2];
;
-; CHECK-F16: fma.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]], [[C]];
+; CHECK-F16: fma.rn.f16x2 [[R:%r[0-9]+]], [[A]], [[B]], [[C]];
;
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[C0:%rs[0-9]+]], [[C1:%rs[0-9]+]]}, [[C]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
@@ -1183,9 +1178,9 @@ define <2 x half> @test_cos(<2 x half> %a) #0 #1 {
; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
; CHECK-NOF16-DAG: fma.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]], [[FC0]];
; CHECK-NOF16-DAG: fma.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]], [[FC1]];
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
-; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]]
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]]
+; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret
@@ -1195,15 +1190,15 @@ define <2 x half> @test_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 {
}
; CHECK-LABEL: test_fabs(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fabs_param_0];
-; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fabs_param_0];
+; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
; CHECK-DAG: abs.f32 [[RF0:%f[0-9]+]], [[AF0]];
; CHECK-DAG: abs.f32 [[RF1:%f[0-9]+]], [[AF1]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]];
+; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_fabs(<2 x half> %a) #0 {
@@ -1212,19 +1207,19 @@ define <2 x half> @test_fabs(<2 x half> %a) #0 {
}
; CHECK-LABEL: test_minnum(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_minnum_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_minnum_param_1];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_minnum_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_minnum_param_1];
+; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
; CHECK-DAG: cvt.f32.f16 [[BF0:%f[0-9]+]], [[B0]];
; CHECK-DAG: cvt.f32.f16 [[BF1:%f[0-9]+]], [[B1]];
; CHECK-DAG: min.f32 [[RF0:%f[0-9]+]], [[AF0]], [[BF0]];
; CHECK-DAG: min.f32 [[RF1:%f[0-9]+]], [[AF1]], [[BF1]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]];
+; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_minnum(<2 x half> %a, <2 x half> %b) #0 {
@@ -1233,19 +1228,19 @@ define <2 x half> @test_minnum(<2 x half> %a, <2 x half> %b) #0 {
}
; CHECK-LABEL: test_maxnum(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_maxnum_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_maxnum_param_1];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_maxnum_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_maxnum_param_1];
+; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
; CHECK-DAG: cvt.f32.f16 [[BF0:%f[0-9]+]], [[B0]];
; CHECK-DAG: cvt.f32.f16 [[BF1:%f[0-9]+]], [[B1]];
; CHECK-DAG: max.f32 [[RF0:%f[0-9]+]], [[AF0]], [[BF0]];
; CHECK-DAG: max.f32 [[RF1:%f[0-9]+]], [[AF1]], [[BF1]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]];
+; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_maxnum(<2 x half> %a, <2 x half> %b) #0 {
@@ -1254,23 +1249,17 @@ define <2 x half> @test_maxnum(<2 x half> %a, <2 x half> %b) #0 {
}
; CHECK-LABEL: test_copysign(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_copysign_param_1];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]];
-; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]];
-; CHECK-DAG: mov.b16 [[BS0:%rs[0-9]+]], [[B0]];
-; CHECK-DAG: mov.b16 [[BS1:%rs[0-9]+]], [[B1]];
-; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[AS0]], 32767;
-; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[AS1]], 32767;
-; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[BS0]], -32768;
-; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[BS1]], -32768;
-; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]];
-; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]];
-; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]];
-; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]];
-; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_copysign_param_1];
+; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
+; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767;
+; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767;
+; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768;
+; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768;
+; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]];
+; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]];
+; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 {
@@ -1279,24 +1268,20 @@ define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 {
}
; CHECK-LABEL: test_copysign_f32(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_f32_param_0];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_f32_param_0];
; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_copysign_f32_param_1];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]];
-; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]];
+; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
; CHECK-DAG: mov.b32 [[BI0:%r[0-9]+]], [[B0]];
; CHECK-DAG: mov.b32 [[BI1:%r[0-9]+]], [[B1]];
-; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[AS0]], 32767;
-; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[AS1]], 32767;
+; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767;
+; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767;
; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[BI0]], -2147483648;
; CHECK-DAG: and.b32 [[BX1:%r[0-9]+]], [[BI1]], -2147483648;
; CHECK-DAG: mov.b32 {tmp, [[BZ0:%rs[0-9]+]]}, [[BX0]]; }
; CHECK-DAG: mov.b32 {tmp, [[BZ1:%rs[0-9]+]]}, [[BX1]]; }
-; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]];
-; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]];
-; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]];
-; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]];
-; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]];
+; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]];
+; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 {
@@ -1306,26 +1291,22 @@ define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 {
}
; CHECK-LABEL: test_copysign_f64(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_f64_param_0];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_f64_param_0];
; CHECK-DAG: ld.param.v2.f64 {[[B0:%fd[0-9]+]], [[B1:%fd[0-9]+]]}, [test_copysign_f64_param_1];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]];
-; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]];
+; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
; CHECK-DAG: mov.b64 [[BI0:%rd[0-9]+]], [[B0]];
; CHECK-DAG: mov.b64 [[BI1:%rd[0-9]+]], [[B1]];
-; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[AS0]], 32767;
-; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[AS1]], 32767;
+; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767;
+; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767;
; CHECK-DAG: and.b64 [[BX0:%rd[0-9]+]], [[BI0]], -9223372036854775808;
; CHECK-DAG: and.b64 [[BX1:%rd[0-9]+]], [[BI1]], -9223372036854775808;
; CHECK-DAG: shr.u64 [[BY0:%rd[0-9]+]], [[BX0]], 48;
; CHECK-DAG: shr.u64 [[BY1:%rd[0-9]+]], [[BX1]], 48;
; CHECK-DAG: cvt.u16.u64 [[BZ0:%rs[0-9]+]], [[BY0]];
; CHECK-DAG: cvt.u16.u64 [[BZ1:%rs[0-9]+]], [[BY1]];
-; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]];
-; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]];
-; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]];
-; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]];
-; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]];
+; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]];
+; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 {
@@ -1335,24 +1316,18 @@ define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 {
}
; CHECK-LABEL: test_copysign_extended(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_extended_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_copysign_extended_param_1];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]];
-; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]];
-; CHECK-DAG: mov.b16 [[BS0:%rs[0-9]+]], [[B0]];
-; CHECK-DAG: mov.b16 [[BS1:%rs[0-9]+]], [[B1]];
-; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[AS0]], 32767;
-; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[AS1]], 32767;
-; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[BS0]], -32768;
-; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[BS1]], -32768;
-; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]];
-; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]];
-; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]];
-; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]];
-; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: mov.b32 {[[RX0:%h[0-9]+]], [[RX1:%h[0-9]+]]}, [[R]]
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_extended_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_copysign_extended_param_1];
+; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
+; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767;
+; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767;
+; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768;
+; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768;
+; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]];
+; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]];
+; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: mov.b32 {[[RX0:%rs[0-9]+]], [[RX1:%rs[0-9]+]]}, [[R]]
; CHECK-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[RX0]];
; CHECK-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[RX1]];
; CHECK: st.param.v2.f32 [func_retval0+0], {[[XR0]], [[XR1]]};
@@ -1364,11 +1339,11 @@ define <2 x float> @test_copysign_extended(<2 x half> %a, <2 x half> %b) #0 {
}
; CHECK-LABEL: test_floor(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_floor_param_0];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
-; CHECK-DAG: cvt.rmi.f16.f16 [[R1:%h[0-9]+]], [[A1]];
-; CHECK-DAG: cvt.rmi.f16.f16 [[R0:%h[0-9]+]], [[A0]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_floor_param_0];
+; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]];
+; CHECK-DAG: cvt.rmi.f16.f16 [[R1:%rs[0-9]+]], [[A1]];
+; CHECK-DAG: cvt.rmi.f16.f16 [[R0:%rs[0-9]+]], [[A0]];
+; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_floor(<2 x half> %a) #0 {
@@ -1377,11 +1352,11 @@ define <2 x half> @test_floor(<2 x half> %a) #0 {
}
; CHECK-LABEL: test_ceil(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_ceil_param_0];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
-; CHECK-DAG: cvt.rpi.f16.f16 [[R1:%h[0-9]+]], [[A1]];
-; CHECK-DAG: cvt.rpi.f16.f16 [[R0:%h[0-9]+]], [[A0]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_ceil_param_0];
+; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]];
+; CHECK-DAG: cvt.rpi.f16.f16 [[R1:%rs[0-9]+]], [[A1]];
+; CHECK-DAG: cvt.rpi.f16.f16 [[R0:%rs[0-9]+]], [[A0]];
+; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_ceil(<2 x half> %a) #0 {
@@ -1390,11 +1365,11 @@ define <2 x half> @test_ceil(<2 x half> %a) #0 {
}
; CHECK-LABEL: test_trunc(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_trunc_param_0];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
-; CHECK-DAG: cvt.rzi.f16.f16 [[R1:%h[0-9]+]], [[A1]];
-; CHECK-DAG: cvt.rzi.f16.f16 [[R0:%h[0-9]+]], [[A0]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_trunc_param_0];
+; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]];
+; CHECK-DAG: cvt.rzi.f16.f16 [[R1:%rs[0-9]+]], [[A1]];
+; CHECK-DAG: cvt.rzi.f16.f16 [[R0:%rs[0-9]+]], [[A0]];
+; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_trunc(<2 x half> %a) #0 {
@@ -1403,11 +1378,11 @@ define <2 x half> @test_trunc(<2 x half> %a) #0 {
}
; CHECK-LABEL: test_rint(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_rint_param_0];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
-; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]];
-; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_rint_param_0];
+; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]];
+; CHECK-DAG: cvt.rni.f16.f16 [[R1:%rs[0-9]+]], [[A1]];
+; CHECK-DAG: cvt.rni.f16.f16 [[R0:%rs[0-9]+]], [[A0]];
+; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_rint(<2 x half> %a) #0 {
@@ -1416,11 +1391,11 @@ define <2 x half> @test_rint(<2 x half> %a) #0 {
}
; CHECK-LABEL: test_nearbyint(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_nearbyint_param_0];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
-; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]];
-; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_nearbyint_param_0];
+; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]];
+; CHECK-DAG: cvt.rni.f16.f16 [[R1:%rs[0-9]+]], [[A1]];
+; CHECK-DAG: cvt.rni.f16.f16 [[R0:%rs[0-9]+]], [[A0]];
+; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_nearbyint(<2 x half> %a) #0 {
@@ -1429,11 +1404,11 @@ define <2 x half> @test_nearbyint(<2 x half> %a) #0 {
}
; CHECK-LABEL: test_roundeven(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_roundeven_param_0];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
-; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]];
-; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_roundeven_param_0];
+; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]];
+; CHECK-DAG: cvt.rni.f16.f16 [[R1:%rs[0-9]+]], [[A1]];
+; CHECK-DAG: cvt.rni.f16.f16 [[R0:%rs[0-9]+]], [[A0]];
+; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
define <2 x half> @test_roundeven(<2 x half> %a) #0 {
@@ -1456,15 +1431,15 @@ define <2 x half> @test_round(<2 x half> %a) #0 {
}
; CHECK-LABEL: test_fmuladd(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fmuladd_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fmuladd_param_1];
-; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_fmuladd_param_2];
+; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fmuladd_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fmuladd_param_1];
+; CHECK-DAG: ld.param.b32 [[C:%r[0-9]+]], [test_fmuladd_param_2];
;
-; CHECK-F16: fma.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]], [[C]];
+; CHECK-F16: fma.rn.f16x2 [[R:%r[0-9]+]], [[A]], [[B]], [[C]];
;
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[C0:%rs[0-9]+]], [[C1:%rs[0-9]+]]}, [[C]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
@@ -1473,9 +1448,9 @@ define <2 x half> @test_round(<2 x half> %a) #0 {
; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
; CHECK-NOF16-DAG: fma.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]], [[FC0]];
; CHECK-NOF16-DAG: fma.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]], [[FC1]];
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
-; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]]
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]]
+; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
;
; CHECK: st.param.b32 [func_retval0+0], [[R]];
; CHECK: ret;
@@ -1485,16 +1460,16 @@ define <2 x half> @test_fmuladd(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0
}
; CHECK-LABEL: test_shufflevector(
-; CHECK: mov.b32 {%h1, %h2}, %hh1;
-; CHECK: mov.b32 %hh2, {%h2, %h1};
+; CHECK: mov.b32 {%rs1, %rs2}, %r1;
+; CHECK: mov.b32 %r2, {%rs2, %rs1};
define <2 x half> @test_shufflevector(<2 x half> %a) #0 {
%s = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 0>
ret <2 x half> %s
}
; CHECK-LABEL: test_insertelement(
-; CHECK: mov.b32 {%h2, %tmp_hi}, %hh1;
-; CHECK: mov.b32 %hh2, {%h2, %h1};
+; CHECK: mov.b32 {%rs2, %tmp_hi}, %r1;
+; CHECK: mov.b32 %r2, {%rs2, %rs1};
define <2 x half> @test_insertelement(<2 x half> %a, half %x) #0 {
%i = insertelement <2 x half> %a, half %x, i64 1
ret <2 x half> %i
diff --git a/llvm/test/CodeGen/NVPTX/half.ll b/llvm/test/CodeGen/NVPTX/half.ll
index d9a3b95ef341e..92acc43914b1e 100644
--- a/llvm/test/CodeGen/NVPTX/half.ll
+++ b/llvm/test/CodeGen/NVPTX/half.ll
@@ -7,7 +7,7 @@
define void @test_load_store(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; CHECK-LABEL: @test_load_store
-; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}]
+; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}]
; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]]
%val = load half, ptr addrspace(1) %in
store half %val, ptr addrspace(1) %out
@@ -16,7 +16,7 @@ define void @test_load_store(ptr addrspace(1) %in, ptr addrspace(1) %out) {
define void @test_bitcast_from_half(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; CHECK-LABEL: @test_bitcast_from_half
-; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}]
+; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}]
; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]]
%val = load half, ptr addrspace(1) %in
%val_int = bitcast half %val to i16
diff --git a/llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll b/llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll
index ff2f60c04d370..a157616db9fb4 100644
--- a/llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll
+++ b/llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx80| FileCheck --check-prefixes=CHECK %s
-; RUN: %if ptxas-11.8 %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx80| %ptxas-verify -arch=sm_90 %}
+; RUN: %if ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx80| %ptxas-verify -arch=sm_90 %}
; CHECK-LABEL: test_isspacep
define i1 @test_isspacep_shared_cluster(ptr %p) {
diff --git a/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py b/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py
index 67fabbac1d4e9..e6681fe744845 100644
--- a/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py
+++ b/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py
@@ -28,8 +28,8 @@
"i16": "r",
"i32": "r",
"i64": "rd",
- "half": "h",
- "<2 x half>": "hh",
+ "half": "rs",
+ "<2 x half>": "r",
"float": "f",
"double": "fd",
}
diff --git a/llvm/test/CodeGen/NVPTX/ldg-invariant.ll b/llvm/test/CodeGen/NVPTX/ldg-invariant.ll
index ac33e3e1dc443..5d0db6f80a83d 100644
--- a/llvm/test/CodeGen/NVPTX/ldg-invariant.ll
+++ b/llvm/test/CodeGen/NVPTX/ldg-invariant.ll
@@ -15,7 +15,7 @@ define i32 @ld_global(ptr addrspace(1) %ptr) {
define half @ld_global_v2f16(ptr addrspace(1) %ptr) {
; Load of v2f16 is weird. We consider it to be a legal type, which happens to be
; loaded/stored as a 32-bit scalar.
-; CHECK: ld.global.nc.b32
+; CHECK: ld.global.nc.u32
%a = load <2 x half>, ptr addrspace(1) %ptr, !invariant.load !0
%v1 = extractelement <2 x half> %a, i32 0
%v2 = extractelement <2 x half> %a, i32 1
@@ -28,7 +28,7 @@ define half @ld_global_v4f16(ptr addrspace(1) %ptr) {
; Larger f16 vectors may be split into individual f16 elements and multiple
; loads/stores may be vectorized using f16 element type. Practically it's
; limited to v4 variant only.
-; CHECK: ld.global.nc.v4.b16
+; CHECK: ld.global.nc.v4.u16
%a = load <4 x half>, ptr addrspace(1) %ptr, !invariant.load !0
%v1 = extractelement <4 x half> %a, i32 0
%v2 = extractelement <4 x half> %a, i32 1
@@ -44,7 +44,7 @@ define half @ld_global_v4f16(ptr addrspace(1) %ptr) {
define half @ld_global_v8f16(ptr addrspace(1) %ptr) {
; Larger vectors are, again, loaded as v4i32. PTX has no v8 variants of loads/stores,
; so load/store vectorizer has to convert v8f16 -> v4 x v2f16.
-; CHECK: ld.global.nc.v4.b32
+; CHECK: ld.global.nc.v4.u32
%a = load <8 x half>, ptr addrspace(1) %ptr, !invariant.load !0
%v1 = extractelement <8 x half> %a, i32 0
%v2 = extractelement <8 x half> %a, i32 2
diff --git a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
index c152f835afe07..66f0954c34c83 100644
--- a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
+++ b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
@@ -1,11 +1,12 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_32 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_32 | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_32 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_32 | %ptxas-verify %}
declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
declare i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
declare i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare ptr @llvm.nvvm.ldu.global.p.p1i8(ptr addrspace(1) %ptr, i32 %align)
declare float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align)
declare double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align)
declare half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align)
@@ -15,6 +16,7 @@ declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
declare i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
declare i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
declare i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare ptr @llvm.nvvm.ldg.global.p.p1i8(ptr addrspace(1) %ptr, i32 %align)
declare float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align)
declare double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align)
declare half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align)
@@ -48,6 +50,14 @@ define i64 @test_ldu_i64(ptr addrspace(1) %ptr) {
ret i64 %val
}
+; CHECK-LABEL: test_ldu_p
+define ptr @test_ldu_p(ptr addrspace(1) %ptr) {
+ ; CHECK: ldu.global.u64
+ %val = tail call ptr @llvm.nvvm.ldu.global.p.p1i8(ptr addrspace(1) %ptr, i32 8)
+ ret ptr %val
+}
+
+
; CHECK-LABEL: test_ldu_f32
define float @test_ldu_f32(ptr addrspace(1) %ptr) {
; CHECK: ldu.global.f32
@@ -64,14 +74,14 @@ define double @test_ldu_f64(ptr addrspace(1) %ptr) {
; CHECK-LABEL: test_ldu_f16
define half @test_ldu_f16(ptr addrspace(1) %ptr) {
- ; CHECK: ldu.global.b16
+ ; CHECK: ldu.global.u16
%val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
ret half %val
}
; CHECK-LABEL: test_ldu_v2f16
define <2 x half> @test_ldu_v2f16(ptr addrspace(1) %ptr) {
- ; CHECK: ldu.global.b32
+ ; CHECK: ldu.global.u32
%val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
ret <2 x half> %val
}
@@ -104,6 +114,13 @@ define i64 @test_ldg_i64(ptr addrspace(1) %ptr) {
ret i64 %val
}
+; CHECK-LABEL: test_ldg_p
+define ptr @test_ldg_p(ptr addrspace(1) %ptr) {
+ ; CHECK: ld.global.nc.u64
+ %val = tail call ptr @llvm.nvvm.ldg.global.p.p1i8(ptr addrspace(1) %ptr, i32 8)
+ ret ptr %val
+}
+
; CHECK-LABEL: test_ldg_f32
define float @test_ldg_f32(ptr addrspace(1) %ptr) {
; CHECK: ld.global.nc.f32
@@ -120,14 +137,14 @@ define double @test_ldg_f64(ptr addrspace(1) %ptr) {
; CHECK-LABEL: test_ldg_f16
define half @test_ldg_f16(ptr addrspace(1) %ptr) {
- ; CHECK: ld.global.nc.b16
+ ; CHECK: ld.global.nc.u16
%val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
ret half %val
}
; CHECK-LABEL: test_ldg_v2f16
define <2 x half> @test_ldg_v2f16(ptr addrspace(1) %ptr) {
- ; CHECK: ld.global.nc.b32
+ ; CHECK: ld.global.nc.u32
%val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
ret <2 x half> %val
}
diff --git a/llvm/test/CodeGen/NVPTX/param-load-store.ll b/llvm/test/CodeGen/NVPTX/param-load-store.ll
index f2ff7e3fd7afb..b05fbaea17087 100644
--- a/llvm/test/CodeGen/NVPTX/param-load-store.ll
+++ b/llvm/test/CodeGen/NVPTX/param-load-store.ll
@@ -384,13 +384,13 @@ define <5 x i16> @test_v5i16(<5 x i16> %a) {
; CHECK: .func (.param .b32 func_retval0)
; CHECK-LABEL: test_f16(
; CHECK-NEXT: .param .b32 test_f16_param_0
-; CHECK: ld.param.b16 [[E:%h[0-9]+]], [test_f16_param_0];
+; CHECK: ld.param.b16 [[E:%rs[0-9]+]], [test_f16_param_0];
; CHECK: .param .b32 param0;
; CHECK: st.param.b16 [param0+0], [[E]];
; CHECK: .param .b32 retval0;
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_f16,
-; CHECK: ld.param.b16 [[R:%h[0-9]+]], [retval0+0];
+; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0];
; CHECK: st.param.b16 [func_retval0+0], [[R]]
; CHECK-NEXT: ret;
define half @test_f16(half %a) {
@@ -401,13 +401,13 @@ define half @test_f16(half %a) {
; CHECK: .func (.param .align 4 .b8 func_retval0[4])
; CHECK-LABEL: test_v2f16(
; CHECK-NEXT: .param .align 4 .b8 test_v2f16_param_0[4]
-; CHECK: ld.param.b32 [[E:%hh[0-9]+]], [test_v2f16_param_0];
+; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_v2f16_param_0];
; CHECK: .param .align 4 .b8 param0[4];
; CHECK: st.param.b32 [param0+0], [[E]];
; CHECK: .param .align 4 .b8 retval0[4];
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_v2f16,
-; CHECK: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0];
+; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0];
; CHECK: st.param.b32 [func_retval0+0], [[R]]
; CHECK-NEXT: ret;
define <2 x half> @test_v2f16(<2 x half> %a) {
@@ -418,17 +418,17 @@ define <2 x half> @test_v2f16(<2 x half> %a) {
; CHECK:.func (.param .align 8 .b8 func_retval0[8])
; CHECK-LABEL: test_v3f16(
; CHECK: .param .align 8 .b8 test_v3f16_param_0[8]
-; CHECK-DAG: ld.param.b32 [[HH01:%hh[0-9]+]], [test_v3f16_param_0];
-; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[HH01]];
-; CHECK-DAG: ld.param.b16 [[E2:%h[0-9]+]], [test_v3f16_param_0+4];
+; CHECK-DAG: ld.param.b32 [[HH01:%r[0-9]+]], [test_v3f16_param_0];
+; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[HH01]];
+; CHECK-DAG: ld.param.b16 [[E2:%rs[0-9]+]], [test_v3f16_param_0+4];
; CHECK: .param .align 8 .b8 param0[8];
; CHECK-DAG: st.param.v2.b16 [param0+0], {[[E0]], [[E1]]};
; CHECK-DAG: st.param.b16 [param0+4], [[E2]];
; CHECK: .param .align 8 .b8 retval0[8];
; CHECK: call.uni (retval0),
; CHECK: test_v3f16,
-; CHECK-DAG: ld.param.v2.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]]}, [retval0+0];
-; CHECK-DAG: ld.param.b16 [[R2:%h[0-9]+]], [retval0+4];
+; CHECK-DAG: ld.param.v2.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [retval0+0];
+; CHECK-DAG: ld.param.b16 [[R2:%rs[0-9]+]], [retval0+4];
; CHECK-DAG: st.param.v2.b16 [func_retval0+0], {[[R0]], [[R1]]};
; CHECK-DAG: st.param.b16 [func_retval0+4], [[R2]];
; CHECK: ret;
@@ -441,14 +441,12 @@ define <3 x half> @test_v3f16(<3 x half> %a) {
; CHECK-LABEL: test_v4f16(
; CHECK: .param .align 8 .b8 test_v4f16_param_0[8]
; CHECK: ld.param.v2.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]]}, [test_v4f16_param_0];
-; CHECK-DAG: mov.b32 [[HH01:%hh[0-9]+]], [[R01]];
-; CHECK-DAG: mov.b32 [[HH23:%hh[0-9]+]], [[R23]];
; CHECK: .param .align 8 .b8 param0[8];
-; CHECK: st.param.v2.b32 [param0+0], {[[HH01]], [[HH23]]};
+; CHECK: st.param.v2.b32 [param0+0], {[[R01]], [[R23]]};
; CHECK: .param .align 8 .b8 retval0[8];
; CHECK: call.uni (retval0),
; CHECK: test_v4f16,
-; CHECK: ld.param.v2.b32 {[[RH01:%hh[0-9]+]], [[RH23:%hh[0-9]+]]}, [retval0+0];
+; CHECK: ld.param.v2.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]]}, [retval0+0];
; CHECK: st.param.v2.b32 [func_retval0+0], {[[RH01]], [[RH23]]};
; CHECK: ret;
define <4 x half> @test_v4f16(<4 x half> %a) {
@@ -459,17 +457,17 @@ define <4 x half> @test_v4f16(<4 x half> %a) {
; CHECK:.func (.param .align 16 .b8 func_retval0[16])
; CHECK-LABEL: test_v5f16(
; CHECK: .param .align 16 .b8 test_v5f16_param_0[16]
-; CHECK-DAG: ld.param.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [test_v5f16_param_0];
-; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[HH01]];
-; CHECK-DAG: ld.param.b16 [[E4:%h[0-9]+]], [test_v5f16_param_0+8];
+; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5f16_param_0];
+; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[HH01]];
+; CHECK-DAG: ld.param.b16 [[E4:%rs[0-9]+]], [test_v5f16_param_0+8];
; CHECK: .param .align 16 .b8 param0[16];
; CHECK-DAG: st.param.v4.b16 [param0+0],
; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
; CHECK: .param .align 16 .b8 retval0[16];
; CHECK: call.uni (retval0),
; CHECK: test_v5f16,
-; CHECK-DAG: ld.param.v4.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]], [[R2:%h[0-9]+]], [[R3:%h[0-9]+]]}, [retval0+0];
-; CHECK-DAG: ld.param.b16 [[R4:%h[0-9]+]], [retval0+8];
+; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0+0];
+; CHECK-DAG: ld.param.b16 [[R4:%rs[0-9]+]], [retval0+8];
; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]};
; CHECK-DAG: st.param.b16 [func_retval0+8], [[R4]];
; CHECK: ret;
@@ -482,16 +480,12 @@ define <5 x half> @test_v5f16(<5 x half> %a) {
; CHECK-LABEL: test_v8f16(
; CHECK: .param .align 16 .b8 test_v8f16_param_0[16]
; CHECK: ld.param.v4.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]], [[R45:%r[0-9]+]], [[R67:%r[0-9]+]]}, [test_v8f16_param_0];
-; CHECK-DAG: mov.b32 [[HH01:%hh[0-9]+]], [[R01]];
-; CHECK-DAG: mov.b32 [[HH23:%hh[0-9]+]], [[R23]];
-; CHECK-DAG: mov.b32 [[HH45:%hh[0-9]+]], [[R45]];
-; CHECK-DAG: mov.b32 [[HH67:%hh[0-9]+]], [[R67]];
; CHECK: .param .align 16 .b8 param0[16];
-; CHECK: st.param.v4.b32 [param0+0], {[[HH01]], [[HH23]], [[HH45]], [[HH67]]};
+; CHECK: st.param.v4.b32 [param0+0], {[[R01]], [[R23]], [[R45]], [[R67]]};
; CHECK: .param .align 16 .b8 retval0[16];
; CHECK: call.uni (retval0),
; CHECK: test_v8f16,
-; CHECK: ld.param.v4.b32 {[[RH01:%hh[0-9]+]], [[RH23:%hh[0-9]+]], [[RH45:%hh[0-9]+]], [[RH67:%hh[0-9]+]]}, [retval0+0];
+; CHECK: ld.param.v4.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]], [[RH45:%r[0-9]+]], [[RH67:%r[0-9]+]]}, [retval0+0];
; CHECK: st.param.v4.b32 [func_retval0+0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]};
; CHECK: ret;
define <8 x half> @test_v8f16(<8 x half> %a) {
@@ -502,9 +496,9 @@ define <8 x half> @test_v8f16(<8 x half> %a) {
; CHECK:.func (.param .align 32 .b8 func_retval0[32])
; CHECK-LABEL: test_v9f16(
; CHECK: .param .align 32 .b8 test_v9f16_param_0[32]
-; CHECK-DAG: ld.param.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [test_v9f16_param_0];
-; CHECK-DAG: ld.param.v4.b16 {[[E4:%h[0-9]+]], [[E5:%h[0-9]+]], [[E6:%h[0-9]+]], [[E7:%h[0-9]+]]}, [test_v9f16_param_0+8];
-; CHECK-DAG: ld.param.b16 [[E8:%h[0-9]+]], [test_v9f16_param_0+16];
+; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v9f16_param_0];
+; CHECK-DAG: ld.param.v4.b16 {[[E4:%rs[0-9]+]], [[E5:%rs[0-9]+]], [[E6:%rs[0-9]+]], [[E7:%rs[0-9]+]]}, [test_v9f16_param_0+8];
+; CHECK-DAG: ld.param.b16 [[E8:%rs[0-9]+]], [test_v9f16_param_0+16];
; CHECK: .param .align 32 .b8 param0[32];
; CHECK-DAG: st.param.v4.b16 [param0+0],
; CHECK-DAG: st.param.v4.b16 [param0+8],
@@ -512,9 +506,9 @@ define <8 x half> @test_v8f16(<8 x half> %a) {
; CHECK: .param .align 32 .b8 retval0[32];
; CHECK: call.uni (retval0),
; CHECK: test_v9f16,
-; CHECK-DAG: ld.param.v4.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]], [[R2:%h[0-9]+]], [[R3:%h[0-9]+]]}, [retval0+0];
-; CHECK-DAG: ld.param.v4.b16 {[[R4:%h[0-9]+]], [[R5:%h[0-9]+]], [[R6:%h[0-9]+]], [[R7:%h[0-9]+]]}, [retval0+8];
-; CHECK-DAG: ld.param.b16 [[R8:%h[0-9]+]], [retval0+16];
+; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0+0];
+; CHECK-DAG: ld.param.v4.b16 {[[R4:%rs[0-9]+]], [[R5:%rs[0-9]+]], [[R6:%rs[0-9]+]], [[R7:%rs[0-9]+]]}, [retval0+8];
+; CHECK-DAG: ld.param.b16 [[R8:%rs[0-9]+]], [retval0+16];
; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]};
; CHECK-DAG: st.param.v4.b16 [func_retval0+8], {[[R4]], [[R5]], [[R6]], [[R7]]};
; CHECK-DAG: st.param.b16 [func_retval0+16], [[R8]];
@@ -915,13 +909,13 @@ define %s_i16 @test_s_i16(%s_i16 %a) {
; CHECK: .func (.param .align 2 .b8 func_retval0[2])
; CHECK-LABEL: test_s_f16(
; CHECK-NEXT: .param .align 2 .b8 test_s_f16_param_0[2]
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_s_f16_param_0];
+; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_s_f16_param_0];
; CHECK: .param .align 2 .b8 param0[2];
; CHECK: st.param.b16 [param0+0], [[A]]
; CHECK: .param .align 2 .b8 retval0[2];
; CHECK: call.uni
; CHECK-NEXT: test_s_f16,
-; CHECK: ld.param.b16 [[R:%h[0-9]+]], [retval0+0];
+; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
define %s_f16 @test_s_f16(%s_f16 %a) {
diff --git a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll
index 7a3b29b20c4e9..fa138f3d0936e 100644
--- a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll
+++ b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll
@@ -92,10 +92,10 @@ declare half @callee_f16()
define half @check_f16() {
; PTX-LABEL: check_f16
; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
- ; PTX-DAG: ld.param.b16 [[LD:%h[0-9]+]], [retval0+0];
+ ; PTX-DAG: ld.param.b16 [[LD:%rs[0-9]+]], [retval0+0];
; PTX-DAG: } // callseq {{[0-9]+}}
- ; PTX-WITHOUT-DAG: mov.b16 [[PROXY:%h[0-9]+]], [[LD]];
+ ; PTX-WITHOUT-DAG: mov.b16 [[PROXY:%rs[0-9]+]], [[LD]];
; PTX-WITHOUT-DAG: st.param.b16 [func_retval0+0], [[PROXY]];
; PTX-WITH-DAG: st.param.b16 [func_retval0+0], [[LD]];
@@ -155,10 +155,10 @@ declare <2 x half> @callee_vec_f16()
define <2 x half> @check_vec_f16() {
; PTX-LABEL: check_vec_f16
; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
- ; PTX-DAG: ld.param.b32 [[LD:%hh[0-9]+]], [retval0+0];
+ ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0+0];
; PTX-DAG: } // callseq {{[0-9]+}}
- ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%hh[0-9]+]], [[LD]];
+ ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]];
; PTX-WITHOUT-DAG: st.param.b32 [func_retval0+0], [[PROXY]];
; PTX-WITH-DAG: st.param.b32 [func_retval0+0], [[LD]];
diff --git a/llvm/test/CodeGen/NVPTX/wmma.py b/llvm/test/CodeGen/NVPTX/wmma.py
index 928abe8795a7b..b7153d684671f 100644
--- a/llvm/test/CodeGen/NVPTX/wmma.py
+++ b/llvm/test/CodeGen/NVPTX/wmma.py
@@ -118,7 +118,7 @@ def __init__(self, ptx_type):
}[ptx_type]
self.ptx_reg_pattern = {
- "f16": "%hh[0-9]+",
+ "f16": "%r[0-9]+",
"f32": "%f[0-9]+",
"f64": "%fd[0-9]+",
}.get(ptx_type, "%r[0-9]+")
More information about the llvm-commits
mailing list