[llvm] r303082 - [NVPTX] Don't flag StoreParam/LoadParam memory chain operands as ReadMem/WriteMem (PR32146)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon May 15 10:17:44 PDT 2017
Author: rksimon
Date: Mon May 15 12:17:44 2017
New Revision: 303082
URL: http://llvm.org/viewvc/llvm-project?rev=303082&view=rev
Log:
[NVPTX] Don't flag StoreParam/LoadParam memory chain operands as ReadMem/WriteMem (PR32146)
Follow up to D33147
NVPTXTargetLowering::LowerCall was trusting the default argument values.
Fixes another 17 of the NVPTX '-verify-machineinstrs with EXPENSIVE_CHECKS' errors in PR32146.
Differential Revision: https://reviews.llvm.org/D33189
Modified:
llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
llvm/trunk/test/CodeGen/NVPTX/bug17709.ll
llvm/trunk/test/CodeGen/NVPTX/f16-instructions.ll
llvm/trunk/test/CodeGen/NVPTX/f16x2-instructions.ll
llvm/trunk/test/CodeGen/NVPTX/fma.ll
llvm/trunk/test/CodeGen/NVPTX/i8-param.ll
llvm/trunk/test/CodeGen/NVPTX/param-load-store.ll
llvm/trunk/test/CodeGen/NVPTX/simple-call.ll
llvm/trunk/test/CodeGen/NVPTX/vector-call.ll
llvm/trunk/test/CodeGen/NVPTX/zeroext-32bit.ll
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp?rev=303082&r1=303081&r2=303082&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp Mon May 15 12:17:44 2017
@@ -1549,7 +1549,7 @@ SDValue NVPTXTargetLowering::LowerCall(T
Chain = DAG.getMemIntrinsicNode(
Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands,
TheStoreType, MachinePointerInfo(), EltAlign,
- /* Volatile */ false, /* ReadMem */ true,
+ /* Volatile */ false, /* ReadMem */ false,
/* WriteMem */ true, /* Size */ 0);
InFlag = Chain.getValue(1);
@@ -1611,7 +1611,7 @@ SDValue NVPTXTargetLowering::LowerCall(T
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
CopyParamOps, elemtype,
MachinePointerInfo(), /* Align */ 0,
- /* Volatile */ false, /* ReadMem */ true,
+ /* Volatile */ false, /* ReadMem */ false,
/* WriteMem */ true, /* Size */ 0);
InFlag = Chain.getValue(1);
@@ -1799,7 +1799,7 @@ SDValue NVPTXTargetLowering::LowerCall(T
SDValue RetVal = DAG.getMemIntrinsicNode(
Op, dl, DAG.getVTList(LoadVTs), LoadOperands, TheLoadType,
MachinePointerInfo(), EltAlign, /* Volatile */ false,
- /* ReadMem */ true, /* WriteMem */ true, /* Size */ 0);
+ /* ReadMem */ true, /* WriteMem */ false, /* Size */ 0);
for (unsigned j = 0; j < NumElts; ++j) {
SDValue Ret = RetVal.getValue(j);
Modified: llvm/trunk/test/CodeGen/NVPTX/bug17709.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/bug17709.ll?rev=303082&r1=303081&r2=303082&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/bug17709.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/bug17709.ll Mon May 15 12:17:44 2017
@@ -1,26 +1,26 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
-
-; ModuleID = '__kernelgen_main_module'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
-target triple = "nvptx64-nvidia-cuda"
-
-define private ptx_device { double, double } @__utils1_MOD_trace(%"struct.array2_complex(kind=8).43.5.57"* noalias %m) {
-entry:
- ;unreachable
- %t0 = insertvalue {double, double} undef, double 1.0, 0
- %t1 = insertvalue {double, double} %t0, double 1.0, 1
- ret { double, double } %t1
-}
-
-%struct.descriptor_dimension.0.52 = type { i64, i64, i64 }
-%"struct.array2_complex(kind=8).37.18.70" = type { i8*, i64, i64, [2 x %struct.descriptor_dimension.0.52] }
-%"struct.array2_complex(kind=8).43.5.57" = type { i8*, i64, i64, [2 x %struct.descriptor_dimension.0.52] }
- at replacementOfAlloca8 = private global %"struct.array2_complex(kind=8).37.18.70" zeroinitializer, align 4096
-
-; CHECK: .visible .entry __kernelgen_main
-define ptx_kernel void @__kernelgen_main(i32* nocapture %args, i32*) {
-entry:
- %1 = tail call ptx_device { double, double } bitcast ({ double, double } (%"struct.array2_complex(kind=8).43.5.57"*)* @__utils1_MOD_trace to { double, double } (%"struct.array2_complex(kind=8).37.18.70"*)*)(%"struct.array2_complex(kind=8).37.18.70"* noalias @replacementOfAlloca8)
- ret void
-}
-
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
+
+; ModuleID = '__kernelgen_main_module'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+define private ptx_device { double, double } @__utils1_MOD_trace(%"struct.array2_complex(kind=8).43.5.57"* noalias %m) {
+entry:
+ ;unreachable
+ %t0 = insertvalue {double, double} undef, double 1.0, 0
+ %t1 = insertvalue {double, double} %t0, double 1.0, 1
+ ret { double, double } %t1
+}
+
+%struct.descriptor_dimension.0.52 = type { i64, i64, i64 }
+%"struct.array2_complex(kind=8).37.18.70" = type { i8*, i64, i64, [2 x %struct.descriptor_dimension.0.52] }
+%"struct.array2_complex(kind=8).43.5.57" = type { i8*, i64, i64, [2 x %struct.descriptor_dimension.0.52] }
+ at replacementOfAlloca8 = private global %"struct.array2_complex(kind=8).37.18.70" zeroinitializer, align 4096
+
+; CHECK: .visible .entry __kernelgen_main
+define ptx_kernel void @__kernelgen_main(i32* nocapture %args, i32*) {
+entry:
+ %1 = tail call ptx_device { double, double } bitcast ({ double, double } (%"struct.array2_complex(kind=8).43.5.57"*)* @__utils1_MOD_trace to { double, double } (%"struct.array2_complex(kind=8).37.18.70"*)*)(%"struct.array2_complex(kind=8).37.18.70"* noalias @replacementOfAlloca8)
+ ret void
+}
+
Modified: llvm/trunk/test/CodeGen/NVPTX/f16-instructions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/f16-instructions.ll?rev=303082&r1=303081&r2=303082&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/f16-instructions.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/f16-instructions.ll Mon May 15 12:17:44 2017
@@ -1,1078 +1,1079 @@
-; ## Full FP16 support enabled by default.
-; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
-; RUN: -O0 -disable-post-ra -disable-fp-elim \
-; RUN: | FileCheck -check-prefixes CHECK,CHECK-F16 %s
-; ## FP16 support explicitly disabled.
-; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
-; RUN: -O0 -disable-post-ra -disable-fp-elim --nvptx-no-f16-math \
-; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOF16 %s
-; ## FP16 is not supported by hardware.
-; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \
-; RUN: -disable-post-ra -disable-fp-elim \
-; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOF16 %s
-
-target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
-
-; CHECK-LABEL: test_ret_const(
-; CHECK: mov.b16 [[R:%h[0-9]+]], 0x3C00;
-; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define half @test_ret_const() #0 {
- ret half 1.0
-}
-
-; CHECK-LABEL: test_fadd(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fadd_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_param_1];
-; CHECK-F16-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
-; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]];
-; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
-; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define half @test_fadd(half %a, half %b) #0 {
- %r = fadd half %a, %b
- ret half %r
-}
-
-; CHECK-LABEL: test_fadd_v1f16(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fadd_v1f16_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_v1f16_param_1];
-; CHECK-F16-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
-; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]];
-; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
-; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define <1 x half> @test_fadd_v1f16(<1 x half> %a, <1 x half> %b) #0 {
- %r = fadd <1 x half> %a, %b
- ret <1 x half> %r
-}
-
-; Check that we can lower fadd with immediate arguments.
-; CHECK-LABEL: test_fadd_imm_0(
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_imm_0_param_0];
-; CHECK-F16-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00;
-; CHECK-F16-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[A]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
-; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], 0f3F800000;
-; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
-; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define half @test_fadd_imm_0(half %b) #0 {
- %r = fadd half 1.0, %b
- ret half %r
-}
-
-; CHECK-LABEL: test_fadd_imm_1(
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_imm_1_param_0];
-; CHECK-F16-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00;
-; CHECK-F16-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[A]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
-; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], 0f3F800000;
-; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
-; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define half @test_fadd_imm_1(half %a) #0 {
- %r = fadd half %a, 1.0
- ret half %r
-}
-
-; CHECK-LABEL: test_fsub(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fsub_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fsub_param_1];
-; CHECK-F16-NEXT: sub.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
-; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]];
-; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
-; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define half @test_fsub(half %a, half %b) #0 {
- %r = fsub half %a, %b
- ret half %r
-}
-
-; CHECK-LABEL: test_fneg(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fneg_param_0];
-; CHECK-F16-NEXT: mov.b16 [[Z:%h[0-9]+]], 0x0000
-; CHECK-F16-NEXT: sub.rn.f16 [[R:%h[0-9]+]], [[Z]], [[A]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
-; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000;
-; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[Z]], [[A32]];
-; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
-; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define half @test_fneg(half %a) #0 {
- %r = fsub half 0.0, %a
- ret half %r
-}
-
-; CHECK-LABEL: test_fmul(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fmul_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fmul_param_1];
-; CHECK-F16-NEXT: mul.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
-; CHECK-NOF16-NEXT: mul.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]];
-; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
-; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define half @test_fmul(half %a, half %b) #0 {
- %r = fmul half %a, %b
- ret half %r
-}
-
-; CHECK-LABEL: test_fdiv(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fdiv_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fdiv_param_1];
-; CHECK-DAG: cvt.f32.f16 [[F0:%f[0-9]+]], [[A]];
-; CHECK-DAG: cvt.f32.f16 [[F1:%f[0-9]+]], [[B]];
-; CHECK-NEXT: div.rn.f32 [[FR:%f[0-9]+]], [[F0]], [[F1]];
-; CHECK-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[FR]];
-; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define half @test_fdiv(half %a, half %b) #0 {
- %r = fdiv half %a, %b
- ret half %r
-}
-
-; CHECK-LABEL: test_frem(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_frem_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_frem_param_1];
-; CHECK-DAG: cvt.f32.f16 [[FA:%f[0-9]+]], [[A]];
-; CHECK-DAG: cvt.f32.f16 [[FB:%f[0-9]+]], [[B]];
-; CHECK-NEXT: div.rn.f32 [[D:%f[0-9]+]], [[FA]], [[FB]];
-; CHECK-NEXT: cvt.rmi.f32.f32 [[DI:%f[0-9]+]], [[D]];
-; CHECK-NEXT: mul.f32 [[RI:%f[0-9]+]], [[DI]], [[FB]];
-; CHECK-NEXT: sub.f32 [[RF:%f[0-9]+]], [[FA]], [[RI]];
-; CHECK-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
-; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define half @test_frem(half %a, half %b) #0 {
- %r = frem half %a, %b
- ret half %r
-}
-
-; CHECK-LABEL: test_store(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_store_param_0];
-; CHECK-DAG: ld.param.u64 %[[PTR:rd[0-9]+]], [test_store_param_1];
-; CHECK-NEXT: st.b16 [%[[PTR]]], [[A]];
-; CHECK-NEXT: ret;
-define void @test_store(half %a, half* %b) #0 {
- store half %a, half* %b
- ret void
-}
-
-; CHECK-LABEL: test_load(
-; CHECK: ld.param.u64 %[[PTR:rd[0-9]+]], [test_load_param_0];
-; CHECK-NEXT: ld.b16 [[R:%h[0-9]+]], [%[[PTR]]];
-; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define half @test_load(half* %a) #0 {
- %r = load half, half* %a
- ret half %r
-}
-
-; CHECK-LABEL: .visible .func test_halfp0a1(
-; CHECK-DAG: ld.param.u64 %[[FROM:rd?[0-9]+]], [test_halfp0a1_param_0];
-; CHECK-DAG: ld.param.u64 %[[TO:rd?[0-9]+]], [test_halfp0a1_param_1];
-; CHECK-DAG: ld.u8 [[B0:%r[sd]?[0-9]+]], [%[[FROM]]]
-; CHECK-DAG: st.u8 [%[[TO]]], [[B0]]
-; CHECK-DAG: ld.u8 [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1]
-; CHECK-DAG: st.u8 [%[[TO]]+1], [[B1]]
-; CHECK: ret
-define void @test_halfp0a1(half * noalias readonly %from, half * %to) {
- %1 = load half, half * %from , align 1
- store half %1, half * %to , align 1
- ret void
-}
-
-declare half @test_callee(half %a, half %b) #0
-
-; CHECK-LABEL: test_call(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_call_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_call_param_1];
-; CHECK: {
-; CHECK-DAG: .param .b32 param0;
-; CHECK-DAG: .param .b32 param1;
-; CHECK-DAG: st.param.b16 [param0+0], [[A]];
-; CHECK-DAG: st.param.b16 [param1+0], [[B]];
-; CHECK-DAG: .param .b32 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_callee,
-; CHECK: );
-; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0];
-; CHECK-NEXT: }
-; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define half @test_call(half %a, half %b) #0 {
- %r = call half @test_callee(half %a, half %b)
- ret half %r
-}
-
-; CHECK-LABEL: test_call_flipped(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_call_flipped_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_call_flipped_param_1];
-; CHECK: {
-; CHECK-DAG: .param .b32 param0;
-; CHECK-DAG: .param .b32 param1;
-; CHECK-DAG: st.param.b16 [param0+0], [[B]];
-; CHECK-DAG: st.param.b16 [param1+0], [[A]];
-; CHECK-DAG: .param .b32 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_callee,
-; CHECK: );
-; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0];
-; CHECK-NEXT: }
-; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define half @test_call_flipped(half %a, half %b) #0 {
- %r = call half @test_callee(half %b, half %a)
- ret half %r
-}
-
-; CHECK-LABEL: test_tailcall_flipped(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_tailcall_flipped_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_tailcall_flipped_param_1];
-; CHECK: {
-; CHECK-DAG: .param .b32 param0;
-; CHECK-DAG: .param .b32 param1;
-; CHECK-DAG: st.param.b16 [param0+0], [[B]];
-; CHECK-DAG: st.param.b16 [param1+0], [[A]];
-; CHECK-DAG: .param .b32 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_callee,
-; CHECK: );
-; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0];
-; CHECK-NEXT: }
-; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define half @test_tailcall_flipped(half %a, half %b) #0 {
- %r = tail call half @test_callee(half %b, half %a)
- ret half %r
-}
-
-; CHECK-LABEL: test_select(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_param_1];
-; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1;
-; CHECK-NEXT: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]];
-; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define half @test_select(half %a, half %b, i1 zeroext %c) #0 {
- %r = select i1 %c, half %a, half %b
- ret half %r
-}
-
-; CHECK-LABEL: test_select_cc(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_cc_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_cc_param_1];
-; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_select_cc_param_2];
-; CHECK-DAG: ld.param.b16 [[D:%h[0-9]+]], [test_select_cc_param_3];
-; CHECK-F16: setp.neu.f16 [[PRED:%p[0-9]+]], [[C]], [[D]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]];
-; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[CF]], [[DF]]
-; CHECK: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]];
-; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define half @test_select_cc(half %a, half %b, half %c, half %d) #0 {
- %cc = fcmp une half %c, %d
- %r = select i1 %cc, half %a, half %b
- ret half %r
-}
-
-; CHECK-LABEL: test_select_cc_f32_f16(
-; CHECK-DAG: ld.param.f32 [[A:%f[0-9]+]], [test_select_cc_f32_f16_param_0];
-; CHECK-DAG: ld.param.f32 [[B:%f[0-9]+]], [test_select_cc_f32_f16_param_1];
-; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_select_cc_f32_f16_param_2];
-; CHECK-DAG: ld.param.b16 [[D:%h[0-9]+]], [test_select_cc_f32_f16_param_3];
-; CHECK-F16: setp.neu.f16 [[PRED:%p[0-9]+]], [[C]], [[D]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]];
-; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[CF]], [[DF]]
-; CHECK-NEXT: selp.f32 [[R:%f[0-9]+]], [[A]], [[B]], [[PRED]];
-; CHECK-NEXT: st.param.f32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define float @test_select_cc_f32_f16(float %a, float %b, half %c, half %d) #0 {
- %cc = fcmp une half %c, %d
- %r = select i1 %cc, float %a, float %b
- ret float %r
-}
-
-; CHECK-LABEL: test_select_cc_f16_f32(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_cc_f16_f32_param_0];
-; CHECK-DAG: ld.param.f32 [[C:%f[0-9]+]], [test_select_cc_f16_f32_param_2];
-; CHECK-DAG: ld.param.f32 [[D:%f[0-9]+]], [test_select_cc_f16_f32_param_3];
-; CHECK-DAG: setp.neu.f32 [[PRED:%p[0-9]+]], [[C]], [[D]]
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_cc_f16_f32_param_1];
-; CHECK-NEXT: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]];
-; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 {
- %cc = fcmp une float %c, %d
- %r = select i1 %cc, half %a, half %b
- ret half %r
-}
-
-; CHECK-LABEL: test_fcmp_une(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_une_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_une_param_1];
-; CHECK-F16: setp.neu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
-; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define i1 @test_fcmp_une(half %a, half %b) #0 {
- %r = fcmp une half %a, %b
- ret i1 %r
-}
-
-; CHECK-LABEL: test_fcmp_ueq(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ueq_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ueq_param_1];
-; CHECK-F16: setp.equ.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
-; CHECK-NOF16: setp.equ.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define i1 @test_fcmp_ueq(half %a, half %b) #0 {
- %r = fcmp ueq half %a, %b
- ret i1 %r
-}
-
-; CHECK-LABEL: test_fcmp_ugt(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ugt_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ugt_param_1];
-; CHECK-F16: setp.gtu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
-; CHECK-NOF16: setp.gtu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define i1 @test_fcmp_ugt(half %a, half %b) #0 {
- %r = fcmp ugt half %a, %b
- ret i1 %r
-}
-
-; CHECK-LABEL: test_fcmp_uge(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_uge_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_uge_param_1];
-; CHECK-F16: setp.geu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
-; CHECK-NOF16: setp.geu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define i1 @test_fcmp_uge(half %a, half %b) #0 {
- %r = fcmp uge half %a, %b
- ret i1 %r
-}
-
-; CHECK-LABEL: test_fcmp_ult(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ult_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ult_param_1];
-; CHECK-F16: setp.ltu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
-; CHECK-NOF16: setp.ltu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define i1 @test_fcmp_ult(half %a, half %b) #0 {
- %r = fcmp ult half %a, %b
- ret i1 %r
-}
-
-; CHECK-LABEL: test_fcmp_ule(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ule_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ule_param_1];
-; CHECK-F16: setp.leu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
-; CHECK-NOF16: setp.leu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define i1 @test_fcmp_ule(half %a, half %b) #0 {
- %r = fcmp ule half %a, %b
- ret i1 %r
-}
-
-
-; CHECK-LABEL: test_fcmp_uno(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_uno_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_uno_param_1];
-; CHECK-F16: setp.nan.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
-; CHECK-NOF16: setp.nan.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define i1 @test_fcmp_uno(half %a, half %b) #0 {
- %r = fcmp uno half %a, %b
- ret i1 %r
-}
-
-; CHECK-LABEL: test_fcmp_one(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_one_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_one_param_1];
-; CHECK-F16: setp.ne.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
-; CHECK-NOF16: setp.ne.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define i1 @test_fcmp_one(half %a, half %b) #0 {
- %r = fcmp one half %a, %b
- ret i1 %r
-}
-
-; CHECK-LABEL: test_fcmp_oeq(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_oeq_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_oeq_param_1];
-; CHECK-F16: setp.eq.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
-; CHECK-NOF16: setp.eq.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define i1 @test_fcmp_oeq(half %a, half %b) #0 {
- %r = fcmp oeq half %a, %b
- ret i1 %r
-}
-
-; CHECK-LABEL: test_fcmp_ogt(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ogt_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ogt_param_1];
-; CHECK-F16: setp.gt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
-; CHECK-NOF16: setp.gt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define i1 @test_fcmp_ogt(half %a, half %b) #0 {
- %r = fcmp ogt half %a, %b
- ret i1 %r
-}
-
-; CHECK-LABEL: test_fcmp_oge(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_oge_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_oge_param_1];
-; CHECK-F16: setp.ge.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
-; CHECK-NOF16: setp.ge.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define i1 @test_fcmp_oge(half %a, half %b) #0 {
- %r = fcmp oge half %a, %b
- ret i1 %r
-}
-
-; XCHECK-LABEL: test_fcmp_olt(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_olt_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_olt_param_1];
-; CHECK-F16: setp.lt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
-; CHECK-NOF16: setp.lt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define i1 @test_fcmp_olt(half %a, half %b) #0 {
- %r = fcmp olt half %a, %b
- ret i1 %r
-}
-
-; XCHECK-LABEL: test_fcmp_ole(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ole_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ole_param_1];
-; CHECK-F16: setp.le.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
-; CHECK-NOF16: setp.le.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define i1 @test_fcmp_ole(half %a, half %b) #0 {
- %r = fcmp ole half %a, %b
- ret i1 %r
-}
-
-; CHECK-LABEL: test_fcmp_ord(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ord_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ord_param_1];
-; CHECK-F16: setp.num.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
-; CHECK-NOF16: setp.num.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define i1 @test_fcmp_ord(half %a, half %b) #0 {
- %r = fcmp ord half %a, %b
- ret i1 %r
-}
-
-; CHECK-LABEL: test_br_cc(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_br_cc_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_br_cc_param_1];
-; CHECK-DAG: ld.param.u64 %[[C:rd[0-9]+]], [test_br_cc_param_2];
-; CHECK-DAG: ld.param.u64 %[[D:rd[0-9]+]], [test_br_cc_param_3];
-; CHECK-F16: setp.lt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
-; CHECK-NOF16: setp.lt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
-; CHECK-NEXT: @[[PRED]] bra [[LABEL:LBB.*]];
-; CHECK: st.u32 [%[[C]]],
-; CHECK: [[LABEL]]:
-; CHECK: st.u32 [%[[D]]],
-; CHECK: ret;
-define void @test_br_cc(half %a, half %b, i32* %p1, i32* %p2) #0 {
- %c = fcmp uge half %a, %b
- br i1 %c, label %then, label %else
-then:
- store i32 0, i32* %p1
- ret void
-else:
- store i32 0, i32* %p2
- ret void
-}
-
-; CHECK-LABEL: test_phi(
-; CHECK: ld.param.u64 %[[P1:rd[0-9]+]], [test_phi_param_0];
-; CHECK: ld.b16 {{%h[0-9]+}}, [%[[P1]]];
-; CHECK: [[LOOP:LBB[0-9_]+]]:
-; CHECK: mov.b16 [[R:%h[0-9]+]], [[AB:%h[0-9]+]];
-; CHECK: ld.b16 [[AB:%h[0-9]+]], [%[[P1]]];
-; CHECK: {
-; CHECK: st.param.b64 [param0+0], %[[P1]];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_dummy
-; CHECK: }
-; CHECK: setp.eq.b32 [[PRED:%p[0-9]+]], %r{{[0-9]+}}, 1;
-; CHECK: @[[PRED]] bra [[LOOP]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_phi(half* %p1) #0 {
-entry:
- %a = load half, half* %p1
- br label %loop
-loop:
- %r = phi half [%a, %entry], [%b, %loop]
- %b = load half, half* %p1
- %c = call i1 @test_dummy(half* %p1)
- br i1 %c, label %loop, label %return
-return:
- ret half %r
-}
-declare i1 @test_dummy(half* %p1) #0
-
-; CHECK-LABEL: test_fptosi_i32(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptosi_i32_param_0];
-; CHECK: cvt.rzi.s32.f16 [[R:%r[0-9]+]], [[A]];
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define i32 @test_fptosi_i32(half %a) #0 {
- %r = fptosi half %a to i32
- ret i32 %r
-}
-
-; CHECK-LABEL: test_fptosi_i64(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptosi_i64_param_0];
-; CHECK: cvt.rzi.s64.f16 [[R:%rd[0-9]+]], [[A]];
-; CHECK: st.param.b64 [func_retval0+0], [[R]];
-; CHECK: ret;
-define i64 @test_fptosi_i64(half %a) #0 {
- %r = fptosi half %a to i64
- ret i64 %r
-}
-
-; CHECK-LABEL: test_fptoui_i32(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptoui_i32_param_0];
-; CHECK: cvt.rzi.u32.f16 [[R:%r[0-9]+]], [[A]];
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define i32 @test_fptoui_i32(half %a) #0 {
- %r = fptoui half %a to i32
- ret i32 %r
-}
-
-; CHECK-LABEL: test_fptoui_i64(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptoui_i64_param_0];
-; CHECK: cvt.rzi.u64.f16 [[R:%rd[0-9]+]], [[A]];
-; CHECK: st.param.b64 [func_retval0+0], [[R]];
-; CHECK: ret;
-define i64 @test_fptoui_i64(half %a) #0 {
- %r = fptoui half %a to i64
- ret i64 %r
-}
-
-; CHECK-LABEL: test_uitofp_i32(
-; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_uitofp_i32_param_0];
-; CHECK: cvt.rn.f16.u32 [[R:%h[0-9]+]], [[A]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_uitofp_i32(i32 %a) #0 {
- %r = uitofp i32 %a to half
- ret half %r
-}
-
-; CHECK-LABEL: test_uitofp_i64(
-; CHECK: ld.param.u64 [[A:%rd[0-9]+]], [test_uitofp_i64_param_0];
-; CHECK: cvt.rn.f16.u64 [[R:%h[0-9]+]], [[A]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_uitofp_i64(i64 %a) #0 {
- %r = uitofp i64 %a to half
- ret half %r
-}
-
-; CHECK-LABEL: test_sitofp_i32(
-; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_sitofp_i32_param_0];
-; CHECK: cvt.rn.f16.s32 [[R:%h[0-9]+]], [[A]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_sitofp_i32(i32 %a) #0 {
- %r = sitofp i32 %a to half
- ret half %r
-}
-
-; CHECK-LABEL: test_sitofp_i64(
-; CHECK: ld.param.u64 [[A:%rd[0-9]+]], [test_sitofp_i64_param_0];
-; CHECK: cvt.rn.f16.s64 [[R:%h[0-9]+]], [[A]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_sitofp_i64(i64 %a) #0 {
- %r = sitofp i64 %a to half
- ret half %r
-}
-
-; CHECK-LABEL: test_uitofp_i32_fadd(
-; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_uitofp_i32_fadd_param_0];
-; CHECK-DAG: cvt.rn.f16.u32 [[C:%h[0-9]+]], [[A]];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_uitofp_i32_fadd_param_1];
-; CHECK-F16: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[C]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]]
-; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], [[C32]];
-; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 {
- %c = uitofp i32 %a to half
- %r = fadd half %b, %c
- ret half %r
-}
-
-; CHECK-LABEL: test_sitofp_i32_fadd(
-; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_sitofp_i32_fadd_param_0];
-; CHECK-DAG: cvt.rn.f16.s32 [[C:%h[0-9]+]], [[A]];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_sitofp_i32_fadd_param_1];
-; CHECK-F16: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[C]];
-; XCHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
-; XCHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]]
-; XCHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], [[C32]];
-; XCHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 {
- %c = sitofp i32 %a to half
- %r = fadd half %b, %c
- ret half %r
-}
-
-; CHECK-LABEL: test_fptrunc_float(
-; CHECK: ld.param.f32 [[A:%f[0-9]+]], [test_fptrunc_float_param_0];
-; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[A]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_fptrunc_float(float %a) #0 {
- %r = fptrunc float %a to half
- ret half %r
-}
-
-; CHECK-LABEL: test_fptrunc_double(
-; CHECK: ld.param.f64 [[A:%fd[0-9]+]], [test_fptrunc_double_param_0];
-; CHECK: cvt.rn.f16.f64 [[R:%h[0-9]+]], [[A]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_fptrunc_double(double %a) #0 {
- %r = fptrunc double %a to half
- ret half %r
-}
-
-; CHECK-LABEL: test_fpext_float(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fpext_float_param_0];
-; CHECK: cvt.f32.f16 [[R:%f[0-9]+]], [[A]];
-; CHECK: st.param.f32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define float @test_fpext_float(half %a) #0 {
- %r = fpext half %a to float
- ret float %r
-}
-
-; CHECK-LABEL: test_fpext_double(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fpext_double_param_0];
-; CHECK: cvt.f64.f16 [[R:%fd[0-9]+]], [[A]];
-; CHECK: st.param.f64 [func_retval0+0], [[R]];
-; CHECK: ret;
-define double @test_fpext_double(half %a) #0 {
- %r = fpext half %a to double
- ret double %r
-}
-
-
-; CHECK-LABEL: test_bitcast_halftoi16(
-; CHECK: ld.param.b16 [[AH:%h[0-9]+]], [test_bitcast_halftoi16_param_0];
-; CHECK: mov.b16 [[AS:%rs[0-9]+]], [[AH]]
-; CHECK: cvt.u32.u16 [[R:%r[0-9]+]], [[AS]]
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define i16 @test_bitcast_halftoi16(half %a) #0 {
- %r = bitcast half %a to i16
- ret i16 %r
-}
-
-; CHECK-LABEL: test_bitcast_i16tohalf(
-; CHECK: ld.param.u16 [[AS:%rs[0-9]+]], [test_bitcast_i16tohalf_param_0];
-; CHECK: mov.b16 [[AH:%h[0-9]+]], [[AS]]
-; CHECK: st.param.b16 [func_retval0+0], [[AH]];
-; CHECK: ret;
-define half @test_bitcast_i16tohalf(i16 %a) #0 {
- %r = bitcast i16 %a to half
- ret half %r
-}
-
-
-declare half @llvm.sqrt.f16(half %a) #0
-declare half @llvm.powi.f16(half %a, i32 %b) #0
-declare half @llvm.sin.f16(half %a) #0
-declare half @llvm.cos.f16(half %a) #0
-declare half @llvm.pow.f16(half %a, half %b) #0
-declare half @llvm.exp.f16(half %a) #0
-declare half @llvm.exp2.f16(half %a) #0
-declare half @llvm.log.f16(half %a) #0
-declare half @llvm.log10.f16(half %a) #0
-declare half @llvm.log2.f16(half %a) #0
-declare half @llvm.fma.f16(half %a, half %b, half %c) #0
-declare half @llvm.fabs.f16(half %a) #0
-declare half @llvm.minnum.f16(half %a, half %b) #0
-declare half @llvm.maxnum.f16(half %a, half %b) #0
-declare half @llvm.copysign.f16(half %a, half %b) #0
-declare half @llvm.floor.f16(half %a) #0
-declare half @llvm.ceil.f16(half %a) #0
-declare half @llvm.trunc.f16(half %a) #0
-declare half @llvm.rint.f16(half %a) #0
-declare half @llvm.nearbyint.f16(half %a) #0
-declare half @llvm.round.f16(half %a) #0
-declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0
-
-; CHECK-LABEL: test_sqrt(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_sqrt_param_0];
-; CHECK: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
-; CHECK: sqrt.rn.f32 [[RF:%f[0-9]+]], [[AF]];
-; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_sqrt(half %a) #0 {
- %r = call half @llvm.sqrt.f16(half %a)
- ret half %r
-}
-
-;;; Can't do this yet: requires libcall.
-; XCHECK-LABEL: test_powi(
-;define half @test_powi(half %a, i32 %b) #0 {
-; %r = call half @llvm.powi.f16(half %a, i32 %b)
-; ret half %r
-;}
-
-; CHECK-LABEL: test_sin(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_sin_param_0];
-; CHECK: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
-; CHECK: sin.approx.f32 [[RF:%f[0-9]+]], [[AF]];
-; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_sin(half %a) #0 #1 {
- %r = call half @llvm.sin.f16(half %a)
- ret half %r
-}
-
-; CHECK-LABEL: test_cos(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_cos_param_0];
-; CHECK: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
-; CHECK: cos.approx.f32 [[RF:%f[0-9]+]], [[AF]];
-; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_cos(half %a) #0 #1 {
- %r = call half @llvm.cos.f16(half %a)
- ret half %r
-}
-
-;;; Can't do this yet: requires libcall.
-; XCHECK-LABEL: test_pow(
-;define half @test_pow(half %a, half %b) #0 {
-; %r = call half @llvm.pow.f16(half %a, half %b)
-; ret half %r
-;}
-
-;;; Can't do this yet: requires libcall.
-; XCHECK-LABEL: test_exp(
-;define half @test_exp(half %a) #0 {
-; %r = call half @llvm.exp.f16(half %a)
-; ret half %r
-;}
-
-;;; Can't do this yet: requires libcall.
-; XCHECK-LABEL: test_exp2(
-;define half @test_exp2(half %a) #0 {
-; %r = call half @llvm.exp2.f16(half %a)
-; ret half %r
-;}
-
-;;; Can't do this yet: requires libcall.
-; XCHECK-LABEL: test_log(
-;define half @test_log(half %a) #0 {
-; %r = call half @llvm.log.f16(half %a)
-; ret half %r
-;}
-
-;;; Can't do this yet: requires libcall.
-; XCHECK-LABEL: test_log10(
-;define half @test_log10(half %a) #0 {
-; %r = call half @llvm.log10.f16(half %a)
-; ret half %r
-;}
-
-;;; Can't do this yet: requires libcall.
-; XCHECK-LABEL: test_log2(
-;define half @test_log2(half %a) #0 {
-; %r = call half @llvm.log2.f16(half %a)
-; ret half %r
-;}
-
-; CHECK-LABEL: test_fma(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fma_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fma_param_1];
-; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_fma_param_2];
-; CHECK-F16: fma.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]]
-; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]];
-; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret
-define half @test_fma(half %a, half %b, half %c) #0 {
- %r = call half @llvm.fma.f16(half %a, half %b, half %c)
- ret half %r
-}
-
-; CHECK-LABEL: test_fabs(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fabs_param_0];
-; CHECK: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
-; CHECK: abs.f32 [[RF:%f[0-9]+]], [[AF]];
-; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_fabs(half %a) #0 {
- %r = call half @llvm.fabs.f16(half %a)
- ret half %r
-}
-
-; CHECK-LABEL: test_minnum(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_minnum_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_minnum_param_1];
-; CHECK-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
-; CHECK-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
-; CHECK: min.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]];
-; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_minnum(half %a, half %b) #0 {
- %r = call half @llvm.minnum.f16(half %a, half %b)
- ret half %r
-}
-
-; CHECK-LABEL: test_maxnum(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_maxnum_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_maxnum_param_1];
-; CHECK-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
-; CHECK-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
-; CHECK: max.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]];
-; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_maxnum(half %a, half %b) #0 {
- %r = call half @llvm.maxnum.f16(half %a, half %b)
- ret half %r
-}
-
-; CHECK-LABEL: test_copysign(
-; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_param_0];
-; CHECK-DAG: ld.param.b16 [[BH:%h[0-9]+]], [test_copysign_param_1];
-; CHECK-DAG: mov.b16 [[AS:%rs[0-9]+]], [[AH]];
-; CHECK-DAG: mov.b16 [[BS:%rs[0-9]+]], [[BH]];
-; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AS]], 32767;
-; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BS]], -32768;
-; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX]];
-; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_copysign(half %a, half %b) #0 {
- %r = call half @llvm.copysign.f16(half %a, half %b)
- ret half %r
-}
-
-; CHECK-LABEL: test_copysign_f32(
-; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_f32_param_0];
-; CHECK-DAG: ld.param.f32 [[BF:%f[0-9]+]], [test_copysign_f32_param_1];
-; CHECK-DAG: mov.b16 [[A:%rs[0-9]+]], [[AH]];
-; CHECK-DAG: mov.b32 [[B:%r[0-9]+]], [[BF]];
-; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[A]], 32767;
-; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[B]], -2147483648;
-; CHECK-DAG: shr.u32 [[BX1:%r[0-9]+]], [[BX0]], 16;
-; CHECK-DAG: cvt.u16.u32 [[BX2:%rs[0-9]+]], [[BX1]];
-; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX2]];
-; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_copysign_f32(half %a, float %b) #0 {
- %tb = fptrunc float %b to half
- %r = call half @llvm.copysign.f16(half %a, half %tb)
- ret half %r
-}
-
-; CHECK-LABEL: test_copysign_f64(
-; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_f64_param_0];
-; CHECK-DAG: ld.param.f64 [[BD:%fd[0-9]+]], [test_copysign_f64_param_1];
-; CHECK-DAG: mov.b16 [[A:%rs[0-9]+]], [[AH]];
-; CHECK-DAG: mov.b64 [[B:%rd[0-9]+]], [[BD]];
-; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[A]], 32767;
-; CHECK-DAG: and.b64 [[BX0:%rd[0-9]+]], [[B]], -9223372036854775808;
-; CHECK-DAG: shr.u64 [[BX1:%rd[0-9]+]], [[BX0]], 48;
-; CHECK-DAG: cvt.u16.u64 [[BX2:%rs[0-9]+]], [[BX1]];
-; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX2]];
-; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_copysign_f64(half %a, double %b) #0 {
- %tb = fptrunc double %b to half
- %r = call half @llvm.copysign.f16(half %a, half %tb)
- ret half %r
-}
-
-; CHECK-LABEL: test_copysign_extended(
-; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_extended_param_0];
-; CHECK-DAG: ld.param.b16 [[BH:%h[0-9]+]], [test_copysign_extended_param_1];
-; CHECK-DAG: mov.b16 [[AS:%rs[0-9]+]], [[AH]];
-; CHECK-DAG: mov.b16 [[BS:%rs[0-9]+]], [[BH]];
-; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AS]], 32767;
-; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BS]], -32768;
-; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX]];
-; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]];
-; CHECK: cvt.f32.f16 [[XR:%f[0-9]+]], [[R]];
-; CHECK: st.param.f32 [func_retval0+0], [[XR]];
-; CHECK: ret;
-define float @test_copysign_extended(half %a, half %b) #0 {
- %r = call half @llvm.copysign.f16(half %a, half %b)
- %xr = fpext half %r to float
- ret float %xr
-}
-
-; CHECK-LABEL: test_floor(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_floor_param_0];
-; CHECK: cvt.rmi.f16.f16 [[R:%h[0-9]+]], [[A]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_floor(half %a) #0 {
- %r = call half @llvm.floor.f16(half %a)
- ret half %r
-}
-
-; CHECK-LABEL: test_ceil(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_ceil_param_0];
-; CHECK: cvt.rpi.f16.f16 [[R:%h[0-9]+]], [[A]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_ceil(half %a) #0 {
- %r = call half @llvm.ceil.f16(half %a)
- ret half %r
-}
-
-; CHECK-LABEL: test_trunc(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_trunc_param_0];
-; CHECK: cvt.rzi.f16.f16 [[R:%h[0-9]+]], [[A]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_trunc(half %a) #0 {
- %r = call half @llvm.trunc.f16(half %a)
- ret half %r
-}
-
-; CHECK-LABEL: test_rint(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_rint_param_0];
-; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_rint(half %a) #0 {
- %r = call half @llvm.rint.f16(half %a)
- ret half %r
-}
-
-; CHECK-LABEL: test_nearbyint(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_nearbyint_param_0];
-; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_nearbyint(half %a) #0 {
- %r = call half @llvm.nearbyint.f16(half %a)
- ret half %r
-}
-
-; CHECK-LABEL: test_round(
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_round_param_0];
-; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_round(half %a) #0 {
- %r = call half @llvm.round.f16(half %a)
- ret half %r
-}
-
-; CHECK-LABEL: test_fmuladd(
-; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fmuladd_param_0];
-; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fmuladd_param_1];
-; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_fmuladd_param_2];
-; CHECK-F16: fma.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]]
-; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]];
-; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_fmuladd(half %a, half %b, half %c) #0 {
- %r = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
- ret half %r
-}
-
-attributes #0 = { nounwind }
-attributes #1 = { "unsafe-fp-math" = "true" }
+; ## Full FP16 support enabled by default.
+; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
+; RUN: -O0 -disable-post-ra -disable-fp-elim -verify-machineinstrs \
+; RUN: | FileCheck -check-prefixes CHECK,CHECK-F16 %s
+; ## FP16 support explicitly disabled.
+; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
+; RUN: -O0 -disable-post-ra -disable-fp-elim --nvptx-no-f16-math \
+; RUN: -verify-machineinstrs \
+; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOF16 %s
+; ## FP16 is not supported by hardware.
+; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \
+; RUN: -disable-post-ra -disable-fp-elim -verify-machineinstrs \
+; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOF16 %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+; CHECK-LABEL: test_ret_const(
+; CHECK: mov.b16 [[R:%h[0-9]+]], 0x3C00;
+; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define half @test_ret_const() #0 {
+ ret half 1.0
+}
+
+; CHECK-LABEL: test_fadd(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fadd_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_param_1];
+; CHECK-F16-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
+; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]];
+; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define half @test_fadd(half %a, half %b) #0 {
+ %r = fadd half %a, %b
+ ret half %r
+}
+
+; CHECK-LABEL: test_fadd_v1f16(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fadd_v1f16_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_v1f16_param_1];
+; CHECK-F16-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
+; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]];
+; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define <1 x half> @test_fadd_v1f16(<1 x half> %a, <1 x half> %b) #0 {
+ %r = fadd <1 x half> %a, %b
+ ret <1 x half> %r
+}
+
+; Check that we can lower fadd with immediate arguments.
+; CHECK-LABEL: test_fadd_imm_0(
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_imm_0_param_0];
+; CHECK-F16-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00;
+; CHECK-F16-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[A]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
+; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], 0f3F800000;
+; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define half @test_fadd_imm_0(half %b) #0 {
+ %r = fadd half 1.0, %b
+ ret half %r
+}
+
+; CHECK-LABEL: test_fadd_imm_1(
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_imm_1_param_0];
+; CHECK-F16-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00;
+; CHECK-F16-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[A]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
+; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], 0f3F800000;
+; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define half @test_fadd_imm_1(half %a) #0 {
+ %r = fadd half %a, 1.0
+ ret half %r
+}
+
+; CHECK-LABEL: test_fsub(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fsub_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fsub_param_1];
+; CHECK-F16-NEXT: sub.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
+; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]];
+; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define half @test_fsub(half %a, half %b) #0 {
+ %r = fsub half %a, %b
+ ret half %r
+}
+
+; CHECK-LABEL: test_fneg(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fneg_param_0];
+; CHECK-F16-NEXT: mov.b16 [[Z:%h[0-9]+]], 0x0000
+; CHECK-F16-NEXT: sub.rn.f16 [[R:%h[0-9]+]], [[Z]], [[A]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
+; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000;
+; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[Z]], [[A32]];
+; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define half @test_fneg(half %a) #0 {
+ %r = fsub half 0.0, %a
+ ret half %r
+}
+
+; CHECK-LABEL: test_fmul(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fmul_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fmul_param_1];
+; CHECK-F16-NEXT: mul.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
+; CHECK-NOF16-NEXT: mul.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]];
+; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define half @test_fmul(half %a, half %b) #0 {
+ %r = fmul half %a, %b
+ ret half %r
+}
+
+; CHECK-LABEL: test_fdiv(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fdiv_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fdiv_param_1];
+; CHECK-DAG: cvt.f32.f16 [[F0:%f[0-9]+]], [[A]];
+; CHECK-DAG: cvt.f32.f16 [[F1:%f[0-9]+]], [[B]];
+; CHECK-NEXT: div.rn.f32 [[FR:%f[0-9]+]], [[F0]], [[F1]];
+; CHECK-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[FR]];
+; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define half @test_fdiv(half %a, half %b) #0 {
+ %r = fdiv half %a, %b
+ ret half %r
+}
+
+; CHECK-LABEL: test_frem(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_frem_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_frem_param_1];
+; CHECK-DAG: cvt.f32.f16 [[FA:%f[0-9]+]], [[A]];
+; CHECK-DAG: cvt.f32.f16 [[FB:%f[0-9]+]], [[B]];
+; CHECK-NEXT: div.rn.f32 [[D:%f[0-9]+]], [[FA]], [[FB]];
+; CHECK-NEXT: cvt.rmi.f32.f32 [[DI:%f[0-9]+]], [[D]];
+; CHECK-NEXT: mul.f32 [[RI:%f[0-9]+]], [[DI]], [[FB]];
+; CHECK-NEXT: sub.f32 [[RF:%f[0-9]+]], [[FA]], [[RI]];
+; CHECK-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
+; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define half @test_frem(half %a, half %b) #0 {
+ %r = frem half %a, %b
+ ret half %r
+}
+
+; CHECK-LABEL: test_store(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_store_param_0];
+; CHECK-DAG: ld.param.u64 %[[PTR:rd[0-9]+]], [test_store_param_1];
+; CHECK-NEXT: st.b16 [%[[PTR]]], [[A]];
+; CHECK-NEXT: ret;
+define void @test_store(half %a, half* %b) #0 {
+ store half %a, half* %b
+ ret void
+}
+
+; CHECK-LABEL: test_load(
+; CHECK: ld.param.u64 %[[PTR:rd[0-9]+]], [test_load_param_0];
+; CHECK-NEXT: ld.b16 [[R:%h[0-9]+]], [%[[PTR]]];
+; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define half @test_load(half* %a) #0 {
+ %r = load half, half* %a
+ ret half %r
+}
+
+; CHECK-LABEL: .visible .func test_halfp0a1(
+; CHECK-DAG: ld.param.u64 %[[FROM:rd?[0-9]+]], [test_halfp0a1_param_0];
+; CHECK-DAG: ld.param.u64 %[[TO:rd?[0-9]+]], [test_halfp0a1_param_1];
+; CHECK-DAG: ld.u8 [[B0:%r[sd]?[0-9]+]], [%[[FROM]]]
+; CHECK-DAG: st.u8 [%[[TO]]], [[B0]]
+; CHECK-DAG: ld.u8 [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1]
+; CHECK-DAG: st.u8 [%[[TO]]+1], [[B1]]
+; CHECK: ret
+define void @test_halfp0a1(half * noalias readonly %from, half * %to) {
+ %1 = load half, half * %from , align 1
+ store half %1, half * %to , align 1
+ ret void
+}
+
+declare half @test_callee(half %a, half %b) #0
+
+; CHECK-LABEL: test_call(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_call_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_call_param_1];
+; CHECK: {
+; CHECK-DAG: .param .b32 param0;
+; CHECK-DAG: .param .b32 param1;
+; CHECK-DAG: st.param.b16 [param0+0], [[A]];
+; CHECK-DAG: st.param.b16 [param1+0], [[B]];
+; CHECK-DAG: .param .b32 retval0;
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_callee,
+; CHECK: );
+; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0];
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define half @test_call(half %a, half %b) #0 {
+ %r = call half @test_callee(half %a, half %b)
+ ret half %r
+}
+
+; CHECK-LABEL: test_call_flipped(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_call_flipped_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_call_flipped_param_1];
+; CHECK: {
+; CHECK-DAG: .param .b32 param0;
+; CHECK-DAG: .param .b32 param1;
+; CHECK-DAG: st.param.b16 [param0+0], [[B]];
+; CHECK-DAG: st.param.b16 [param1+0], [[A]];
+; CHECK-DAG: .param .b32 retval0;
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_callee,
+; CHECK: );
+; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0];
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define half @test_call_flipped(half %a, half %b) #0 {
+ %r = call half @test_callee(half %b, half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_tailcall_flipped(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_tailcall_flipped_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_tailcall_flipped_param_1];
+; CHECK: {
+; CHECK-DAG: .param .b32 param0;
+; CHECK-DAG: .param .b32 param1;
+; CHECK-DAG: st.param.b16 [param0+0], [[B]];
+; CHECK-DAG: st.param.b16 [param1+0], [[A]];
+; CHECK-DAG: .param .b32 retval0;
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_callee,
+; CHECK: );
+; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0];
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define half @test_tailcall_flipped(half %a, half %b) #0 {
+ %r = tail call half @test_callee(half %b, half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_select(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_param_1];
+; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1;
+; CHECK-NEXT: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]];
+; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define half @test_select(half %a, half %b, i1 zeroext %c) #0 {
+ %r = select i1 %c, half %a, half %b
+ ret half %r
+}
+
+; CHECK-LABEL: test_select_cc(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_cc_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_cc_param_1];
+; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_select_cc_param_2];
+; CHECK-DAG: ld.param.b16 [[D:%h[0-9]+]], [test_select_cc_param_3];
+; CHECK-F16: setp.neu.f16 [[PRED:%p[0-9]+]], [[C]], [[D]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]];
+; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[CF]], [[DF]]
+; CHECK: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]];
+; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define half @test_select_cc(half %a, half %b, half %c, half %d) #0 {
+ %cc = fcmp une half %c, %d
+ %r = select i1 %cc, half %a, half %b
+ ret half %r
+}
+
+; CHECK-LABEL: test_select_cc_f32_f16(
+; CHECK-DAG: ld.param.f32 [[A:%f[0-9]+]], [test_select_cc_f32_f16_param_0];
+; CHECK-DAG: ld.param.f32 [[B:%f[0-9]+]], [test_select_cc_f32_f16_param_1];
+; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_select_cc_f32_f16_param_2];
+; CHECK-DAG: ld.param.b16 [[D:%h[0-9]+]], [test_select_cc_f32_f16_param_3];
+; CHECK-F16: setp.neu.f16 [[PRED:%p[0-9]+]], [[C]], [[D]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]];
+; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[CF]], [[DF]]
+; CHECK-NEXT: selp.f32 [[R:%f[0-9]+]], [[A]], [[B]], [[PRED]];
+; CHECK-NEXT: st.param.f32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define float @test_select_cc_f32_f16(float %a, float %b, half %c, half %d) #0 {
+ %cc = fcmp une half %c, %d
+ %r = select i1 %cc, float %a, float %b
+ ret float %r
+}
+
+; CHECK-LABEL: test_select_cc_f16_f32(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_cc_f16_f32_param_0];
+; CHECK-DAG: ld.param.f32 [[C:%f[0-9]+]], [test_select_cc_f16_f32_param_2];
+; CHECK-DAG: ld.param.f32 [[D:%f[0-9]+]], [test_select_cc_f16_f32_param_3];
+; CHECK-DAG: setp.neu.f32 [[PRED:%p[0-9]+]], [[C]], [[D]]
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_cc_f16_f32_param_1];
+; CHECK-NEXT: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]];
+; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 {
+ %cc = fcmp une float %c, %d
+ %r = select i1 %cc, half %a, half %b
+ ret half %r
+}
+
+; CHECK-LABEL: test_fcmp_une(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_une_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_une_param_1];
+; CHECK-F16: setp.neu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
+; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
+; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define i1 @test_fcmp_une(half %a, half %b) #0 {
+ %r = fcmp une half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_ueq(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ueq_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ueq_param_1];
+; CHECK-F16: setp.equ.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
+; CHECK-NOF16: setp.equ.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
+; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define i1 @test_fcmp_ueq(half %a, half %b) #0 {
+ %r = fcmp ueq half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_ugt(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ugt_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ugt_param_1];
+; CHECK-F16: setp.gtu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
+; CHECK-NOF16: setp.gtu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
+; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define i1 @test_fcmp_ugt(half %a, half %b) #0 {
+ %r = fcmp ugt half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_uge(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_uge_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_uge_param_1];
+; CHECK-F16: setp.geu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
+; CHECK-NOF16: setp.geu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
+; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define i1 @test_fcmp_uge(half %a, half %b) #0 {
+ %r = fcmp uge half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_ult(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ult_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ult_param_1];
+; CHECK-F16: setp.ltu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
+; CHECK-NOF16: setp.ltu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
+; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define i1 @test_fcmp_ult(half %a, half %b) #0 {
+ %r = fcmp ult half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_ule(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ule_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ule_param_1];
+; CHECK-F16: setp.leu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
+; CHECK-NOF16: setp.leu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
+; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define i1 @test_fcmp_ule(half %a, half %b) #0 {
+ %r = fcmp ule half %a, %b
+ ret i1 %r
+}
+
+
+; CHECK-LABEL: test_fcmp_uno(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_uno_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_uno_param_1];
+; CHECK-F16: setp.nan.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
+; CHECK-NOF16: setp.nan.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
+; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define i1 @test_fcmp_uno(half %a, half %b) #0 {
+ %r = fcmp uno half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_one(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_one_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_one_param_1];
+; CHECK-F16: setp.ne.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
+; CHECK-NOF16: setp.ne.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
+; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define i1 @test_fcmp_one(half %a, half %b) #0 {
+ %r = fcmp one half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_oeq(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_oeq_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_oeq_param_1];
+; CHECK-F16: setp.eq.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
+; CHECK-NOF16: setp.eq.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
+; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define i1 @test_fcmp_oeq(half %a, half %b) #0 {
+ %r = fcmp oeq half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_ogt(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ogt_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ogt_param_1];
+; CHECK-F16: setp.gt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
+; CHECK-NOF16: setp.gt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
+; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define i1 @test_fcmp_ogt(half %a, half %b) #0 {
+ %r = fcmp ogt half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_oge(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_oge_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_oge_param_1];
+; CHECK-F16: setp.ge.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
+; CHECK-NOF16: setp.ge.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
+; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define i1 @test_fcmp_oge(half %a, half %b) #0 {
+ %r = fcmp oge half %a, %b
+ ret i1 %r
+}
+
+; XCHECK-LABEL: test_fcmp_olt(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_olt_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_olt_param_1];
+; CHECK-F16: setp.lt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
+; CHECK-NOF16: setp.lt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
+; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define i1 @test_fcmp_olt(half %a, half %b) #0 {
+ %r = fcmp olt half %a, %b
+ ret i1 %r
+}
+
+; XCHECK-LABEL: test_fcmp_ole(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ole_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ole_param_1];
+; CHECK-F16: setp.le.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
+; CHECK-NOF16: setp.le.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
+; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define i1 @test_fcmp_ole(half %a, half %b) #0 {
+ %r = fcmp ole half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_ord(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ord_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ord_param_1];
+; CHECK-F16: setp.num.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
+; CHECK-NOF16: setp.num.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
+; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define i1 @test_fcmp_ord(half %a, half %b) #0 {
+ %r = fcmp ord half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_br_cc(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_br_cc_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_br_cc_param_1];
+; CHECK-DAG: ld.param.u64 %[[C:rd[0-9]+]], [test_br_cc_param_2];
+; CHECK-DAG: ld.param.u64 %[[D:rd[0-9]+]], [test_br_cc_param_3];
+; CHECK-F16: setp.lt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
+; CHECK-NOF16: setp.lt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
+; CHECK-NEXT: @[[PRED]] bra [[LABEL:LBB.*]];
+; CHECK: st.u32 [%[[C]]],
+; CHECK: [[LABEL]]:
+; CHECK: st.u32 [%[[D]]],
+; CHECK: ret;
+define void @test_br_cc(half %a, half %b, i32* %p1, i32* %p2) #0 {
+ %c = fcmp uge half %a, %b
+ br i1 %c, label %then, label %else
+then:
+ store i32 0, i32* %p1
+ ret void
+else:
+ store i32 0, i32* %p2
+ ret void
+}
+
+; CHECK-LABEL: test_phi(
+; CHECK: ld.param.u64 %[[P1:rd[0-9]+]], [test_phi_param_0];
+; CHECK: ld.b16 {{%h[0-9]+}}, [%[[P1]]];
+; CHECK: [[LOOP:LBB[0-9_]+]]:
+; CHECK: mov.b16 [[R:%h[0-9]+]], [[AB:%h[0-9]+]];
+; CHECK: ld.b16 [[AB:%h[0-9]+]], [%[[P1]]];
+; CHECK: {
+; CHECK: st.param.b64 [param0+0], %[[P1]];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_dummy
+; CHECK: }
+; CHECK: setp.eq.b32 [[PRED:%p[0-9]+]], %r{{[0-9]+}}, 1;
+; CHECK: @[[PRED]] bra [[LOOP]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_phi(half* %p1) #0 {
+entry:
+ %a = load half, half* %p1
+ br label %loop
+loop:
+ %r = phi half [%a, %entry], [%b, %loop]
+ %b = load half, half* %p1
+ %c = call i1 @test_dummy(half* %p1)
+ br i1 %c, label %loop, label %return
+return:
+ ret half %r
+}
+declare i1 @test_dummy(half* %p1) #0
+
+; CHECK-LABEL: test_fptosi_i32(
+; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptosi_i32_param_0];
+; CHECK: cvt.rzi.s32.f16 [[R:%r[0-9]+]], [[A]];
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define i32 @test_fptosi_i32(half %a) #0 {
+ %r = fptosi half %a to i32
+ ret i32 %r
+}
+
+; CHECK-LABEL: test_fptosi_i64(
+; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptosi_i64_param_0];
+; CHECK: cvt.rzi.s64.f16 [[R:%rd[0-9]+]], [[A]];
+; CHECK: st.param.b64 [func_retval0+0], [[R]];
+; CHECK: ret;
+define i64 @test_fptosi_i64(half %a) #0 {
+ %r = fptosi half %a to i64
+ ret i64 %r
+}
+
+; CHECK-LABEL: test_fptoui_i32(
+; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptoui_i32_param_0];
+; CHECK: cvt.rzi.u32.f16 [[R:%r[0-9]+]], [[A]];
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define i32 @test_fptoui_i32(half %a) #0 {
+ %r = fptoui half %a to i32
+ ret i32 %r
+}
+
+; CHECK-LABEL: test_fptoui_i64(
+; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptoui_i64_param_0];
+; CHECK: cvt.rzi.u64.f16 [[R:%rd[0-9]+]], [[A]];
+; CHECK: st.param.b64 [func_retval0+0], [[R]];
+; CHECK: ret;
+define i64 @test_fptoui_i64(half %a) #0 {
+ %r = fptoui half %a to i64
+ ret i64 %r
+}
+
+; CHECK-LABEL: test_uitofp_i32(
+; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_uitofp_i32_param_0];
+; CHECK: cvt.rn.f16.u32 [[R:%h[0-9]+]], [[A]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_uitofp_i32(i32 %a) #0 {
+ %r = uitofp i32 %a to half
+ ret half %r
+}
+
+; CHECK-LABEL: test_uitofp_i64(
+; CHECK: ld.param.u64 [[A:%rd[0-9]+]], [test_uitofp_i64_param_0];
+; CHECK: cvt.rn.f16.u64 [[R:%h[0-9]+]], [[A]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_uitofp_i64(i64 %a) #0 {
+ %r = uitofp i64 %a to half
+ ret half %r
+}
+
+; CHECK-LABEL: test_sitofp_i32(
+; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_sitofp_i32_param_0];
+; CHECK: cvt.rn.f16.s32 [[R:%h[0-9]+]], [[A]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_sitofp_i32(i32 %a) #0 {
+ %r = sitofp i32 %a to half
+ ret half %r
+}
+
+; CHECK-LABEL: test_sitofp_i64(
+; CHECK: ld.param.u64 [[A:%rd[0-9]+]], [test_sitofp_i64_param_0];
+; CHECK: cvt.rn.f16.s64 [[R:%h[0-9]+]], [[A]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_sitofp_i64(i64 %a) #0 {
+ %r = sitofp i64 %a to half
+ ret half %r
+}
+
+; CHECK-LABEL: test_uitofp_i32_fadd(
+; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_uitofp_i32_fadd_param_0];
+; CHECK-DAG: cvt.rn.f16.u32 [[C:%h[0-9]+]], [[A]];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_uitofp_i32_fadd_param_1];
+; CHECK-F16: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[C]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]]
+; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], [[C32]];
+; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 {
+ %c = uitofp i32 %a to half
+ %r = fadd half %b, %c
+ ret half %r
+}
+
+; CHECK-LABEL: test_sitofp_i32_fadd(
+; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_sitofp_i32_fadd_param_0];
+; CHECK-DAG: cvt.rn.f16.s32 [[C:%h[0-9]+]], [[A]];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_sitofp_i32_fadd_param_1];
+; CHECK-F16: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[C]];
+; XCHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
+; XCHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]]
+; XCHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], [[C32]];
+; XCHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 {
+ %c = sitofp i32 %a to half
+ %r = fadd half %b, %c
+ ret half %r
+}
+
+; CHECK-LABEL: test_fptrunc_float(
+; CHECK: ld.param.f32 [[A:%f[0-9]+]], [test_fptrunc_float_param_0];
+; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[A]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_fptrunc_float(float %a) #0 {
+ %r = fptrunc float %a to half
+ ret half %r
+}
+
+; CHECK-LABEL: test_fptrunc_double(
+; CHECK: ld.param.f64 [[A:%fd[0-9]+]], [test_fptrunc_double_param_0];
+; CHECK: cvt.rn.f16.f64 [[R:%h[0-9]+]], [[A]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_fptrunc_double(double %a) #0 {
+ %r = fptrunc double %a to half
+ ret half %r
+}
+
+; CHECK-LABEL: test_fpext_float(
+; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fpext_float_param_0];
+; CHECK: cvt.f32.f16 [[R:%f[0-9]+]], [[A]];
+; CHECK: st.param.f32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define float @test_fpext_float(half %a) #0 {
+ %r = fpext half %a to float
+ ret float %r
+}
+
+; CHECK-LABEL: test_fpext_double(
+; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fpext_double_param_0];
+; CHECK: cvt.f64.f16 [[R:%fd[0-9]+]], [[A]];
+; CHECK: st.param.f64 [func_retval0+0], [[R]];
+; CHECK: ret;
+define double @test_fpext_double(half %a) #0 {
+ %r = fpext half %a to double
+ ret double %r
+}
+
+
+; CHECK-LABEL: test_bitcast_halftoi16(
+; CHECK: ld.param.b16 [[AH:%h[0-9]+]], [test_bitcast_halftoi16_param_0];
+; CHECK: mov.b16 [[AS:%rs[0-9]+]], [[AH]]
+; CHECK: cvt.u32.u16 [[R:%r[0-9]+]], [[AS]]
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define i16 @test_bitcast_halftoi16(half %a) #0 {
+ %r = bitcast half %a to i16
+ ret i16 %r
+}
+
+; CHECK-LABEL: test_bitcast_i16tohalf(
+; CHECK: ld.param.u16 [[AS:%rs[0-9]+]], [test_bitcast_i16tohalf_param_0];
+; CHECK: mov.b16 [[AH:%h[0-9]+]], [[AS]]
+; CHECK: st.param.b16 [func_retval0+0], [[AH]];
+; CHECK: ret;
+define half @test_bitcast_i16tohalf(i16 %a) #0 {
+ %r = bitcast i16 %a to half
+ ret half %r
+}
+
+
+declare half @llvm.sqrt.f16(half %a) #0
+declare half @llvm.powi.f16(half %a, i32 %b) #0
+declare half @llvm.sin.f16(half %a) #0
+declare half @llvm.cos.f16(half %a) #0
+declare half @llvm.pow.f16(half %a, half %b) #0
+declare half @llvm.exp.f16(half %a) #0
+declare half @llvm.exp2.f16(half %a) #0
+declare half @llvm.log.f16(half %a) #0
+declare half @llvm.log10.f16(half %a) #0
+declare half @llvm.log2.f16(half %a) #0
+declare half @llvm.fma.f16(half %a, half %b, half %c) #0
+declare half @llvm.fabs.f16(half %a) #0
+declare half @llvm.minnum.f16(half %a, half %b) #0
+declare half @llvm.maxnum.f16(half %a, half %b) #0
+declare half @llvm.copysign.f16(half %a, half %b) #0
+declare half @llvm.floor.f16(half %a) #0
+declare half @llvm.ceil.f16(half %a) #0
+declare half @llvm.trunc.f16(half %a) #0
+declare half @llvm.rint.f16(half %a) #0
+declare half @llvm.nearbyint.f16(half %a) #0
+declare half @llvm.round.f16(half %a) #0
+declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0
+
+; CHECK-LABEL: test_sqrt(
+; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_sqrt_param_0];
+; CHECK: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
+; CHECK: sqrt.rn.f32 [[RF:%f[0-9]+]], [[AF]];
+; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_sqrt(half %a) #0 {
+ %r = call half @llvm.sqrt.f16(half %a)
+ ret half %r
+}
+
+;;; Can't do this yet: requires libcall.
+; XCHECK-LABEL: test_powi(
+;define half @test_powi(half %a, i32 %b) #0 {
+; %r = call half @llvm.powi.f16(half %a, i32 %b)
+; ret half %r
+;}
+
+; CHECK-LABEL: test_sin(
+; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_sin_param_0];
+; CHECK: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
+; CHECK: sin.approx.f32 [[RF:%f[0-9]+]], [[AF]];
+; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_sin(half %a) #0 #1 {
+ %r = call half @llvm.sin.f16(half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_cos(
+; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_cos_param_0];
+; CHECK: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
+; CHECK: cos.approx.f32 [[RF:%f[0-9]+]], [[AF]];
+; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_cos(half %a) #0 #1 {
+ %r = call half @llvm.cos.f16(half %a)
+ ret half %r
+}
+
+;;; Can't do this yet: requires libcall.
+; XCHECK-LABEL: test_pow(
+;define half @test_pow(half %a, half %b) #0 {
+; %r = call half @llvm.pow.f16(half %a, half %b)
+; ret half %r
+;}
+
+;;; Can't do this yet: requires libcall.
+; XCHECK-LABEL: test_exp(
+;define half @test_exp(half %a) #0 {
+; %r = call half @llvm.exp.f16(half %a)
+; ret half %r
+;}
+
+;;; Can't do this yet: requires libcall.
+; XCHECK-LABEL: test_exp2(
+;define half @test_exp2(half %a) #0 {
+; %r = call half @llvm.exp2.f16(half %a)
+; ret half %r
+;}
+
+;;; Can't do this yet: requires libcall.
+; XCHECK-LABEL: test_log(
+;define half @test_log(half %a) #0 {
+; %r = call half @llvm.log.f16(half %a)
+; ret half %r
+;}
+
+;;; Can't do this yet: requires libcall.
+; XCHECK-LABEL: test_log10(
+;define half @test_log10(half %a) #0 {
+; %r = call half @llvm.log10.f16(half %a)
+; ret half %r
+;}
+
+;;; Can't do this yet: requires libcall.
+; XCHECK-LABEL: test_log2(
+;define half @test_log2(half %a) #0 {
+; %r = call half @llvm.log2.f16(half %a)
+; ret half %r
+;}
+
+; CHECK-LABEL: test_fma(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fma_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fma_param_1];
+; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_fma_param_2];
+; CHECK-F16: fma.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]]
+; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]];
+; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret
+define half @test_fma(half %a, half %b, half %c) #0 {
+ %r = call half @llvm.fma.f16(half %a, half %b, half %c)
+ ret half %r
+}
+
+; CHECK-LABEL: test_fabs(
+; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fabs_param_0];
+; CHECK: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
+; CHECK: abs.f32 [[RF:%f[0-9]+]], [[AF]];
+; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_fabs(half %a) #0 {
+ %r = call half @llvm.fabs.f16(half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_minnum(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_minnum_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_minnum_param_1];
+; CHECK-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
+; CHECK-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
+; CHECK: min.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]];
+; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_minnum(half %a, half %b) #0 {
+ %r = call half @llvm.minnum.f16(half %a, half %b)
+ ret half %r
+}
+
+; CHECK-LABEL: test_maxnum(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_maxnum_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_maxnum_param_1];
+; CHECK-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
+; CHECK-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
+; CHECK: max.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]];
+; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_maxnum(half %a, half %b) #0 {
+ %r = call half @llvm.maxnum.f16(half %a, half %b)
+ ret half %r
+}
+
+; CHECK-LABEL: test_copysign(
+; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_param_0];
+; CHECK-DAG: ld.param.b16 [[BH:%h[0-9]+]], [test_copysign_param_1];
+; CHECK-DAG: mov.b16 [[AS:%rs[0-9]+]], [[AH]];
+; CHECK-DAG: mov.b16 [[BS:%rs[0-9]+]], [[BH]];
+; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AS]], 32767;
+; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BS]], -32768;
+; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX]];
+; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_copysign(half %a, half %b) #0 {
+ %r = call half @llvm.copysign.f16(half %a, half %b)
+ ret half %r
+}
+
+; CHECK-LABEL: test_copysign_f32(
+; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_f32_param_0];
+; CHECK-DAG: ld.param.f32 [[BF:%f[0-9]+]], [test_copysign_f32_param_1];
+; CHECK-DAG: mov.b16 [[A:%rs[0-9]+]], [[AH]];
+; CHECK-DAG: mov.b32 [[B:%r[0-9]+]], [[BF]];
+; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[A]], 32767;
+; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[B]], -2147483648;
+; CHECK-DAG: shr.u32 [[BX1:%r[0-9]+]], [[BX0]], 16;
+; CHECK-DAG: cvt.u16.u32 [[BX2:%rs[0-9]+]], [[BX1]];
+; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX2]];
+; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_copysign_f32(half %a, float %b) #0 {
+ %tb = fptrunc float %b to half
+ %r = call half @llvm.copysign.f16(half %a, half %tb)
+ ret half %r
+}
+
+; CHECK-LABEL: test_copysign_f64(
+; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_f64_param_0];
+; CHECK-DAG: ld.param.f64 [[BD:%fd[0-9]+]], [test_copysign_f64_param_1];
+; CHECK-DAG: mov.b16 [[A:%rs[0-9]+]], [[AH]];
+; CHECK-DAG: mov.b64 [[B:%rd[0-9]+]], [[BD]];
+; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[A]], 32767;
+; CHECK-DAG: and.b64 [[BX0:%rd[0-9]+]], [[B]], -9223372036854775808;
+; CHECK-DAG: shr.u64 [[BX1:%rd[0-9]+]], [[BX0]], 48;
+; CHECK-DAG: cvt.u16.u64 [[BX2:%rs[0-9]+]], [[BX1]];
+; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX2]];
+; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_copysign_f64(half %a, double %b) #0 {
+ %tb = fptrunc double %b to half
+ %r = call half @llvm.copysign.f16(half %a, half %tb)
+ ret half %r
+}
+
+; CHECK-LABEL: test_copysign_extended(
+; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_extended_param_0];
+; CHECK-DAG: ld.param.b16 [[BH:%h[0-9]+]], [test_copysign_extended_param_1];
+; CHECK-DAG: mov.b16 [[AS:%rs[0-9]+]], [[AH]];
+; CHECK-DAG: mov.b16 [[BS:%rs[0-9]+]], [[BH]];
+; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AS]], 32767;
+; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BS]], -32768;
+; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX]];
+; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]];
+; CHECK: cvt.f32.f16 [[XR:%f[0-9]+]], [[R]];
+; CHECK: st.param.f32 [func_retval0+0], [[XR]];
+; CHECK: ret;
+define float @test_copysign_extended(half %a, half %b) #0 {
+ %r = call half @llvm.copysign.f16(half %a, half %b)
+ %xr = fpext half %r to float
+ ret float %xr
+}
+
+; CHECK-LABEL: test_floor(
+; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_floor_param_0];
+; CHECK: cvt.rmi.f16.f16 [[R:%h[0-9]+]], [[A]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_floor(half %a) #0 {
+ %r = call half @llvm.floor.f16(half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_ceil(
+; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_ceil_param_0];
+; CHECK: cvt.rpi.f16.f16 [[R:%h[0-9]+]], [[A]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_ceil(half %a) #0 {
+ %r = call half @llvm.ceil.f16(half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_trunc(
+; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_trunc_param_0];
+; CHECK: cvt.rzi.f16.f16 [[R:%h[0-9]+]], [[A]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_trunc(half %a) #0 {
+ %r = call half @llvm.trunc.f16(half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_rint(
+; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_rint_param_0];
+; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_rint(half %a) #0 {
+ %r = call half @llvm.rint.f16(half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_nearbyint(
+; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_nearbyint_param_0];
+; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_nearbyint(half %a) #0 {
+ %r = call half @llvm.nearbyint.f16(half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_round(
+; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_round_param_0];
+; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_round(half %a) #0 {
+ %r = call half @llvm.round.f16(half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_fmuladd(
+; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fmuladd_param_0];
+; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fmuladd_param_1];
+; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_fmuladd_param_2];
+; CHECK-F16: fma.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]]
+; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]];
+; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_fmuladd(half %a, half %b, half %c) #0 {
+ %r = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
+ ret half %r
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { "unsafe-fp-math" = "true" }
Modified: llvm/trunk/test/CodeGen/NVPTX/f16x2-instructions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/f16x2-instructions.ll?rev=303082&r1=303081&r2=303082&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/f16x2-instructions.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/f16x2-instructions.ll Mon May 15 12:17:44 2017
@@ -1,1426 +1,1427 @@
-; ## Full FP16 support enabled by default.
-; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
-; RUN: -O0 -disable-post-ra -disable-fp-elim \
-; RUN: | FileCheck -check-prefixes CHECK,CHECK-F16 %s
-; ## FP16 support explicitly disabled.
-; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
-; RUN: -O0 -disable-post-ra -disable-fp-elim --nvptx-no-f16-math \
-; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOF16 %s
-; ## FP16 is not supported by hardware.
-; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \
-; RUN: -disable-post-ra -disable-fp-elim \
-; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOF16 %s
-
-target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
-
-; CHECK-LABEL: test_ret_const(
-; CHECK: mov.u32 [[T:%r[0-9+]]], 1073757184;
-; CHECK: mov.b32 [[R:%hh[0-9+]]], [[T]];
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define <2 x half> @test_ret_const() #0 {
- ret <2 x half> <half 1.0, half 2.0>
-}
-
-; CHECK-LABEL: test_extract_0(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_0_param_0];
-; CHECK: mov.b32 {[[R:%h[0-9]+]], %tmp_hi}, [[A]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_extract_0(<2 x half> %a) #0 {
- %e = extractelement <2 x half> %a, i32 0
- ret half %e
-}
-
-; CHECK-LABEL: test_extract_1(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_1_param_0];
-; CHECK: mov.b32 {%tmp_lo, [[R:%h[0-9]+]]}, [[A]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_extract_1(<2 x half> %a) #0 {
- %e = extractelement <2 x half> %a, i32 1
- ret half %e
-}
-
-; CHECK-LABEL: test_extract_i(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_i_param_0];
-; CHECK-DAG: ld.param.u64 [[IDX:%rd[0-9]+]], [test_extract_i_param_1];
-; CHECK-DAG: setp.eq.s64 [[PRED:%p[0-9]+]], [[IDX]], 0;
-; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[A]];
-; CHECK: selp.b16 [[R:%h[0-9]+]], [[E0]], [[E1]], [[PRED]];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK: ret;
-define half @test_extract_i(<2 x half> %a, i64 %idx) #0 {
- %e = extractelement <2 x half> %a, i64 %idx
- ret half %e
-}
-
-; CHECK-LABEL: test_fadd(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fadd_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fadd_param_1];
-;
-; CHECK-F16-NEXT: add.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]];
-;
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
-; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
-; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
-; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-;
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define <2 x half> @test_fadd(<2 x half> %a, <2 x half> %b) #0 {
- %r = fadd <2 x half> %a, %b
- ret <2 x half> %r
-}
-
-; Check that we can lower fadd with immediate arguments.
-; CHECK-LABEL: test_fadd_imm_0(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fadd_imm_0_param_0];
-;
-; CHECK-F16: mov.u32 [[I:%r[0-9+]]], 1073757184;
-; CHECK-F16: mov.b32 [[IHH:%hh[0-9+]]], [[I]];
-; CHECK-F16: add.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[IHH]];
-;
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
-; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000;
-; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], 0f40000000;
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
-; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-;
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define <2 x half> @test_fadd_imm_0(<2 x half> %a) #0 {
- %r = fadd <2 x half> <half 1.0, half 2.0>, %a
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_fadd_imm_1(
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fadd_imm_1_param_0];
-;
-; CHECK-F16: mov.u32 [[I:%r[0-9+]]], 1073757184;
-; CHECK-F16: mov.b32 [[IHH:%hh[0-9+]]], [[I]];
-; CHECK-F16: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[IHH]];
-;
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
-; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000;
-; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], 0f40000000;
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
-; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-;
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define <2 x half> @test_fadd_imm_1(<2 x half> %a) #0 {
- %r = fadd <2 x half> %a, <half 1.0, half 2.0>
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_fsub(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fsub_param_0];
-;
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fsub_param_1];
-; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]];
-;
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
-; CHECK-NOF16-DAG: sub.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
-; CHECK-NOF16-DAG: sub.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
-; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-;
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define <2 x half> @test_fsub(<2 x half> %a, <2 x half> %b) #0 {
- %r = fsub <2 x half> %a, %b
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_fneg(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fneg_param_0];
-;
-; CHECK-F16: mov.u32 [[I0:%r[0-9+]]], 0;
-; CHECK-F16: mov.b32 [[IHH0:%hh[0-9+]]], [[I0]];
-; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%hh[0-9]+]], [[IHH0]], [[A]];
-;
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
-; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000;
-; CHECK-NOF16-DAG: sub.rn.f32 [[FR0:%f[0-9]+]], [[Z]], [[FA0]];
-; CHECK-NOF16-DAG: sub.rn.f32 [[FR1:%f[0-9]+]], [[Z]], [[FA1]];
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
-; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-;
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define <2 x half> @test_fneg(<2 x half> %a) #0 {
- %r = fsub <2 x half> <half 0.0, half 0.0>, %a
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_fmul(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fmul_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fmul_param_1];
-; CHECK-F16-NEXT: mul.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]];
-;
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
-; CHECK-NOF16-DAG: mul.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
-; CHECK-NOF16-DAG: mul.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
-; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-;
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define <2 x half> @test_fmul(<2 x half> %a, <2 x half> %b) #0 {
- %r = fmul <2 x half> %a, %b
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_fdiv(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fdiv_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fdiv_param_1];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]];
-; CHECK-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]];
-; CHECK-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]];
-; CHECK-DAG: div.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
-; CHECK-DAG: div.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]];
-; CHECK-NEXT: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define <2 x half> @test_fdiv(<2 x half> %a, <2 x half> %b) #0 {
- %r = fdiv <2 x half> %a, %b
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_frem(
-; -- Load two 16x2 inputs and split them into f16 elements
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_frem_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_frem_param_1];
-; -- Split into elements
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; -- promote to f32.
-; CHECK-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]];
-; CHECK-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]];
-; CHECK-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]];
-; -- frem(a[0],b[0]).
-; CHECK-DAG: div.rn.f32 [[FD0:%f[0-9]+]], [[FA0]], [[FB0]];
-; CHECK-DAG: cvt.rmi.f32.f32 [[DI0:%f[0-9]+]], [[FD0]];
-; CHECK-DAG: mul.f32 [[RI0:%f[0-9]+]], [[DI0]], [[FB0]];
-; CHECK-DAG: sub.f32 [[RF0:%f[0-9]+]], [[FA0]], [[RI0]];
-; -- frem(a[1],b[1]).
-; CHECK-DAG: div.rn.f32 [[FD1:%f[0-9]+]], [[FA1]], [[FB1]];
-; CHECK-DAG: cvt.rmi.f32.f32 [[DI1:%f[0-9]+]], [[FD1]];
-; CHECK-DAG: mul.f32 [[RI1:%f[0-9]+]], [[DI1]], [[FB1]];
-; CHECK-DAG: sub.f32 [[RF1:%f[0-9]+]], [[FA1]], [[RI1]];
-; -- convert back to f16.
-; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
-; -- merge into f16x2 and return it.
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define <2 x half> @test_frem(<2 x half> %a, <2 x half> %b) #0 {
- %r = frem <2 x half> %a, %b
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: .func test_ldst_v2f16(
-; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v2f16_param_0];
-; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v2f16_param_1];
-; CHECK-DAG: ld.b32 [[E:%hh[0-9]+]], [%[[A]]]
-; CHECK: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[E]];
-; CHECK-DAG: st.v2.b16 [%[[B]]], {[[E0]], [[E1]]};
-; CHECK: ret;
-define void @test_ldst_v2f16(<2 x half>* %a, <2 x half>* %b) {
- %t1 = load <2 x half>, <2 x half>* %a
- store <2 x half> %t1, <2 x half>* %b, align 16
- ret void
-}
-
-; CHECK-LABEL: .func test_ldst_v3f16(
-; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v3f16_param_0];
-; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v3f16_param_1];
-; -- v3 is inconvenient to capture as it's lowered as ld.b64 + fair
-; number of bitshifting instructions that may change at llvm's whim.
-; So we only verify that we only issue correct number of writes using
-; correct offset, but not the values we write.
-; CHECK-DAG: ld.u64
-; CHECK-DAG: st.u32 [%[[B]]],
-; CHECK-DAG: st.b16 [%[[B]]+4],
-; CHECK: ret;
-define void @test_ldst_v3f16(<3 x half>* %a, <3 x half>* %b) {
- %t1 = load <3 x half>, <3 x half>* %a
- store <3 x half> %t1, <3 x half>* %b, align 16
- ret void
-}
-
-; CHECK-LABEL: .func test_ldst_v4f16(
-; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v4f16_param_0];
-; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v4f16_param_1];
-; CHECK-DAG: ld.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [%[[A]]];
-; CHECK-DAG: st.v4.b16 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]};
-; CHECK: ret;
-define void @test_ldst_v4f16(<4 x half>* %a, <4 x half>* %b) {
- %t1 = load <4 x half>, <4 x half>* %a
- store <4 x half> %t1, <4 x half>* %b, align 16
- ret void
-}
-
-; CHECK-LABEL: .func test_ldst_v8f16(
-; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v8f16_param_0];
-; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v8f16_param_1];
-; CHECK-DAG: ld.v4.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [%[[A]]];
-; CHECK-DAG: st.v4.b32 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]};
-; CHECK: ret;
-define void @test_ldst_v8f16(<8 x half>* %a, <8 x half>* %b) {
- %t1 = load <8 x half>, <8 x half>* %a
- store <8 x half> %t1, <8 x half>* %b, align 16
- ret void
-}
-
-declare <2 x half> @test_callee(<2 x half> %a, <2 x half> %b) #0
-
-; CHECK-LABEL: test_call(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_call_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_call_param_1];
-; CHECK: {
-; CHECK-DAG: .param .align 4 .b8 param0[4];
-; CHECK-DAG: .param .align 4 .b8 param1[4];
-; CHECK-DAG: st.param.b32 [param0+0], [[A]];
-; CHECK-DAG: st.param.b32 [param1+0], [[B]];
-; CHECK-DAG: .param .align 4 .b8 retval0[4];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_callee,
-; CHECK: );
-; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0];
-; CHECK-NEXT: }
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 {
- %r = call <2 x half> @test_callee(<2 x half> %a, <2 x half> %b)
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_call_flipped(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_call_flipped_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_call_flipped_param_1];
-; CHECK: {
-; CHECK-DAG: .param .align 4 .b8 param0[4];
-; CHECK-DAG: .param .align 4 .b8 param1[4];
-; CHECK-DAG: st.param.b32 [param0+0], [[B]];
-; CHECK-DAG: st.param.b32 [param1+0], [[A]];
-; CHECK-DAG: .param .align 4 .b8 retval0[4];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_callee,
-; CHECK: );
-; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0];
-; CHECK-NEXT: }
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 {
- %r = call <2 x half> @test_callee(<2 x half> %b, <2 x half> %a)
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_tailcall_flipped(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_tailcall_flipped_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_tailcall_flipped_param_1];
-; CHECK: {
-; CHECK-DAG: .param .align 4 .b8 param0[4];
-; CHECK-DAG: .param .align 4 .b8 param1[4];
-; CHECK-DAG: st.param.b32 [param0+0], [[B]];
-; CHECK-DAG: st.param.b32 [param1+0], [[A]];
-; CHECK-DAG: .param .align 4 .b8 retval0[4];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_callee,
-; CHECK: );
-; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0];
-; CHECK-NEXT: }
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define <2 x half> @test_tailcall_flipped(<2 x half> %a, <2 x half> %b) #0 {
- %r = tail call <2 x half> @test_callee(<2 x half> %b, <2 x half> %a)
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_select(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_param_1];
-; CHECK-DAG: ld.param.u8 [[C:%rs[0-9]+]], [test_select_param_2]
-; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1;
-; CHECK-NEXT: selp.b32 [[R:%hh[0-9]+]], [[A]], [[B]], [[PRED]];
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define <2 x half> @test_select(<2 x half> %a, <2 x half> %b, i1 zeroext %c) #0 {
- %r = select i1 %c, <2 x half> %a, <2 x half> %b
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_select_cc(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_cc_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_cc_param_1];
-; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_select_cc_param_2];
-; CHECK-DAG: ld.param.b32 [[D:%hh[0-9]+]], [test_select_cc_param_3];
-;
-; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]]
-;
-; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]]
-; CHECK-NOF16-DAG: mov.b32 {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[CF1:%f[0-9]+]], [[C1]];
-; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[CF0]], [[DF0]]
-; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[CF1]], [[DF1]]
-;
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-DAG: selp.b16 [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]];
-; CHECK-DAG: selp.b16 [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define <2 x half> @test_select_cc(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d) #0 {
- %cc = fcmp une <2 x half> %c, %d
- %r = select <2 x i1> %cc, <2 x half> %a, <2 x half> %b
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_select_cc_f32_f16(
-; CHECK-DAG: ld.param.v2.f32 {[[A0:%f[0-9]+]], [[A1:%f[0-9]+]]}, [test_select_cc_f32_f16_param_0];
-; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_select_cc_f32_f16_param_1];
-; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_select_cc_f32_f16_param_2];
-; CHECK-DAG: ld.param.b32 [[D:%hh[0-9]+]], [test_select_cc_f32_f16_param_3];
-;
-; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]]
-; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]]
-; CHECK-NOF16-DAG: mov.b32 {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]];
-; CHECK-NOF16-DAG: cvt.f32.f16 [[CF1:%f[0-9]+]], [[C1]];
-; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[CF0]], [[DF0]]
-; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[CF1]], [[DF1]]
-;
-; CHECK-DAG: selp.f32 [[R0:%f[0-9]+]], [[A0]], [[B0]], [[P0]];
-; CHECK-DAG: selp.f32 [[R1:%f[0-9]+]], [[A1]], [[B1]], [[P1]];
-; CHECK-NEXT: st.param.v2.f32 [func_retval0+0], {[[R0]], [[R1]]};
-; CHECK-NEXT: ret;
-define <2 x float> @test_select_cc_f32_f16(<2 x float> %a, <2 x float> %b,
- <2 x half> %c, <2 x half> %d) #0 {
- %cc = fcmp une <2 x half> %c, %d
- %r = select <2 x i1> %cc, <2 x float> %a, <2 x float> %b
- ret <2 x float> %r
-}
-
-; CHECK-LABEL: test_select_cc_f16_f32(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_cc_f16_f32_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_cc_f16_f32_param_1];
-; CHECK-DAG: ld.param.v2.f32 {[[C0:%f[0-9]+]], [[C1:%f[0-9]+]]}, [test_select_cc_f16_f32_param_2];
-; CHECK-DAG: ld.param.v2.f32 {[[D0:%f[0-9]+]], [[D1:%f[0-9]+]]}, [test_select_cc_f16_f32_param_3];
-; CHECK-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[C0]], [[D0]]
-; CHECK-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[C1]], [[D1]]
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-DAG: selp.b16 [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]];
-; CHECK-DAG: selp.b16 [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define <2 x half> @test_select_cc_f16_f32(<2 x half> %a, <2 x half> %b,
- <2 x float> %c, <2 x float> %d) #0 {
- %cc = fcmp une <2 x float> %c, %d
- %r = select <2 x i1> %cc, <2 x half> %a, <2 x half> %b
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_fcmp_une(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_une_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_une_param_1];
-; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
-; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
-; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
-; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
-; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
-; CHECK-NEXT: ret;
-define <2 x i1> @test_fcmp_une(<2 x half> %a, <2 x half> %b) #0 {
- %r = fcmp une <2 x half> %a, %b
- ret <2 x i1> %r
-}
-
-; CHECK-LABEL: test_fcmp_ueq(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ueq_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ueq_param_1];
-; CHECK-F16: setp.equ.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
-; CHECK-NOF16-DAG: setp.equ.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
-; CHECK-NOF16-DAG: setp.equ.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
-; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
-; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
-; CHECK-NEXT: ret;
-define <2 x i1> @test_fcmp_ueq(<2 x half> %a, <2 x half> %b) #0 {
- %r = fcmp ueq <2 x half> %a, %b
- ret <2 x i1> %r
-}
-
-; CHECK-LABEL: test_fcmp_ugt(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ugt_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ugt_param_1];
-; CHECK-F16: setp.gtu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
-; CHECK-NOF16-DAG: setp.gtu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
-; CHECK-NOF16-DAG: setp.gtu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
-; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
-; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
-; CHECK-NEXT: ret;
-define <2 x i1> @test_fcmp_ugt(<2 x half> %a, <2 x half> %b) #0 {
- %r = fcmp ugt <2 x half> %a, %b
- ret <2 x i1> %r
-}
-
-; CHECK-LABEL: test_fcmp_uge(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_uge_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_uge_param_1];
-; CHECK-F16: setp.geu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
-; CHECK-NOF16-DAG: setp.geu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
-; CHECK-NOF16-DAG: setp.geu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
-; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
-; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
-; CHECK-NEXT: ret;
-define <2 x i1> @test_fcmp_uge(<2 x half> %a, <2 x half> %b) #0 {
- %r = fcmp uge <2 x half> %a, %b
- ret <2 x i1> %r
-}
-
-; CHECK-LABEL: test_fcmp_ult(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ult_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ult_param_1];
-; CHECK-F16: setp.ltu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
-; CHECK-NOF16-DAG: setp.ltu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
-; CHECK-NOF16-DAG: setp.ltu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
-; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
-; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
-; CHECK-NEXT: ret;
-define <2 x i1> @test_fcmp_ult(<2 x half> %a, <2 x half> %b) #0 {
- %r = fcmp ult <2 x half> %a, %b
- ret <2 x i1> %r
-}
-
-; CHECK-LABEL: test_fcmp_ule(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ule_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ule_param_1];
-; CHECK-F16: setp.leu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
-; CHECK-NOF16-DAG: setp.leu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
-; CHECK-NOF16-DAG: setp.leu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
-; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
-; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
-; CHECK-NEXT: ret;
-define <2 x i1> @test_fcmp_ule(<2 x half> %a, <2 x half> %b) #0 {
- %r = fcmp ule <2 x half> %a, %b
- ret <2 x i1> %r
-}
-
-
-; CHECK-LABEL: test_fcmp_uno(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_uno_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_uno_param_1];
-; CHECK-F16: setp.nan.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
-; CHECK-NOF16-DAG: setp.nan.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
-; CHECK-NOF16-DAG: setp.nan.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
-; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
-; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
-; CHECK-NEXT: ret;
-define <2 x i1> @test_fcmp_uno(<2 x half> %a, <2 x half> %b) #0 {
- %r = fcmp uno <2 x half> %a, %b
- ret <2 x i1> %r
-}
-
-; CHECK-LABEL: test_fcmp_one(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_one_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_one_param_1];
-; CHECK-F16: setp.ne.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
-; CHECK-NOF16-DAG: setp.ne.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
-; CHECK-NOF16-DAG: setp.ne.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
-; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
-; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
-; CHECK-NEXT: ret;
-define <2 x i1> @test_fcmp_one(<2 x half> %a, <2 x half> %b) #0 {
- %r = fcmp one <2 x half> %a, %b
- ret <2 x i1> %r
-}
-
-; CHECK-LABEL: test_fcmp_oeq(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_oeq_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_oeq_param_1];
-; CHECK-F16: setp.eq.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
-; CHECK-NOF16-DAG: setp.eq.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
-; CHECK-NOF16-DAG: setp.eq.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
-; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
-; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
-; CHECK-NEXT: ret;
-define <2 x i1> @test_fcmp_oeq(<2 x half> %a, <2 x half> %b) #0 {
- %r = fcmp oeq <2 x half> %a, %b
- ret <2 x i1> %r
-}
-
-; CHECK-LABEL: test_fcmp_ogt(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ogt_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ogt_param_1];
-; CHECK-F16: setp.gt.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
-; CHECK-NOF16-DAG: setp.gt.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
-; CHECK-NOF16-DAG: setp.gt.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
-; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
-; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
-; CHECK-NEXT: ret;
-define <2 x i1> @test_fcmp_ogt(<2 x half> %a, <2 x half> %b) #0 {
- %r = fcmp ogt <2 x half> %a, %b
- ret <2 x i1> %r
-}
-
-; CHECK-LABEL: test_fcmp_oge(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_oge_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_oge_param_1];
-; CHECK-F16: setp.ge.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
-; CHECK-NOF16-DAG: setp.ge.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
-; CHECK-NOF16-DAG: setp.ge.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
-; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
-; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
-; CHECK-NEXT: ret;
-define <2 x i1> @test_fcmp_oge(<2 x half> %a, <2 x half> %b) #0 {
- %r = fcmp oge <2 x half> %a, %b
- ret <2 x i1> %r
-}
-
-; CHECK-LABEL: test_fcmp_olt(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_olt_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_olt_param_1];
-; CHECK-F16: setp.lt.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
-; CHECK-NOF16-DAG: setp.lt.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
-; CHECK-NOF16-DAG: setp.lt.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
-; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
-; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
-; CHECK-NEXT: ret;
-define <2 x i1> @test_fcmp_olt(<2 x half> %a, <2 x half> %b) #0 {
- %r = fcmp olt <2 x half> %a, %b
- ret <2 x i1> %r
-}
-
-; XCHECK-LABEL: test_fcmp_ole(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ole_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ole_param_1];
-; CHECK-F16: setp.le.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
-; CHECK-NOF16-DAG: setp.le.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
-; CHECK-NOF16-DAG: setp.le.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
-; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
-; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
-; CHECK-NEXT: ret;
-define <2 x i1> @test_fcmp_ole(<2 x half> %a, <2 x half> %b) #0 {
- %r = fcmp ole <2 x half> %a, %b
- ret <2 x i1> %r
-}
-
-; CHECK-LABEL: test_fcmp_ord(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ord_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ord_param_1];
-; CHECK-F16: setp.num.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
-; CHECK-NOF16-DAG: setp.num.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
-; CHECK-NOF16-DAG: setp.num.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
-; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
-; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
-; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
-; CHECK-NEXT: ret;
-define <2 x i1> @test_fcmp_ord(<2 x half> %a, <2 x half> %b) #0 {
- %r = fcmp ord <2 x half> %a, %b
- ret <2 x i1> %r
-}
-
-; CHECK-LABEL: test_fptosi_i32(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptosi_i32_param_0];
-; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: cvt.rzi.s32.f16 [[R0:%r[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.rzi.s32.f16 [[R1:%r[0-9]+]], [[A1]];
-; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]}
-; CHECK: ret;
-define <2 x i32> @test_fptosi_i32(<2 x half> %a) #0 {
- %r = fptosi <2 x half> %a to <2 x i32>
- ret <2 x i32> %r
-}
-
-; CHECK-LABEL: test_fptosi_i64(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptosi_i64_param_0];
-; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: cvt.rzi.s64.f16 [[R0:%rd[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.rzi.s64.f16 [[R1:%rd[0-9]+]], [[A1]];
-; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]}
-; CHECK: ret;
-define <2 x i64> @test_fptosi_i64(<2 x half> %a) #0 {
- %r = fptosi <2 x half> %a to <2 x i64>
- ret <2 x i64> %r
-}
-
-; CHECK-LABEL: test_fptoui_2xi32(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptoui_2xi32_param_0];
-; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: cvt.rzi.u32.f16 [[R0:%r[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.rzi.u32.f16 [[R1:%r[0-9]+]], [[A1]];
-; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]}
-; CHECK: ret;
-define <2 x i32> @test_fptoui_2xi32(<2 x half> %a) #0 {
- %r = fptoui <2 x half> %a to <2 x i32>
- ret <2 x i32> %r
-}
-
-; CHECK-LABEL: test_fptoui_2xi64(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptoui_2xi64_param_0];
-; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: cvt.rzi.u64.f16 [[R0:%rd[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.rzi.u64.f16 [[R1:%rd[0-9]+]], [[A1]];
-; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]}
-; CHECK: ret;
-define <2 x i64> @test_fptoui_2xi64(<2 x half> %a) #0 {
- %r = fptoui <2 x half> %a to <2 x i64>
- ret <2 x i64> %r
-}
-
-; CHECK-LABEL: test_uitofp_2xi32(
-; CHECK: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_param_0];
-; CHECK-DAG: cvt.rn.f16.u32 [[R0:%h[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.rn.f16.u32 [[R1:%h[0-9]+]], [[A1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_uitofp_2xi32(<2 x i32> %a) #0 {
- %r = uitofp <2 x i32> %a to <2 x half>
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_uitofp_2xi64(
-; CHECK: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_uitofp_2xi64_param_0];
-; CHECK-DAG: cvt.rn.f32.u64 [[F0:%f[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.rn.f32.u64 [[F1:%f[0-9]+]], [[A1]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[F0]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[F1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_uitofp_2xi64(<2 x i64> %a) #0 {
- %r = uitofp <2 x i64> %a to <2 x half>
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_sitofp_2xi32(
-; CHECK: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_param_0];
-; CHECK-DAG: cvt.rn.f16.s32 [[R0:%h[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.rn.f16.s32 [[R1:%h[0-9]+]], [[A1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_sitofp_2xi32(<2 x i32> %a) #0 {
- %r = sitofp <2 x i32> %a to <2 x half>
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_sitofp_2xi64(
-; CHECK: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_sitofp_2xi64_param_0];
-; CHECK-DAG: cvt.rn.f32.s64 [[F0:%f[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.rn.f32.s64 [[F1:%f[0-9]+]], [[A1]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[F0]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[F1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_sitofp_2xi64(<2 x i64> %a) #0 {
- %r = sitofp <2 x i64> %a to <2 x half>
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_uitofp_2xi32_fadd(
-; CHECK-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_fadd_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_uitofp_2xi32_fadd_param_1];
-; CHECK-DAG: cvt.rn.f16.u32 [[C0:%h[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.rn.f16.u32 [[C1:%h[0-9]+]], [[A1]];
-
-; CHECK-F16-DAG: mov.b32 [[C:%hh[0-9]+]], {[[C0]], [[C1]]}
-; CHECK-F16-DAG: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[C]];
-;
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FC1:%f[0-9]+]], [[C1]]
-; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FB0]], [[FC0]];
-; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FB1]], [[FC1]];
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
-; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-;
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_uitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 {
- %c = uitofp <2 x i32> %a to <2 x half>
- %r = fadd <2 x half> %b, %c
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_sitofp_2xi32_fadd(
-; CHECK-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_fadd_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_sitofp_2xi32_fadd_param_1];
-; CHECK-DAG: cvt.rn.f16.s32 [[C0:%h[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.rn.f16.s32 [[C1:%h[0-9]+]], [[A1]];
-;
-; CHECK-F16-DAG: mov.b32 [[C:%hh[0-9]+]], {[[C0]], [[C1]]}
-; CHECK-F16-DAG: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[C]];
-;
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FC1:%f[0-9]+]], [[C1]]
-; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FB0]], [[FC0]];
-; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FB1]], [[FC1]];
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
-; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-;
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_sitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 {
- %c = sitofp <2 x i32> %a to <2 x half>
- %r = fadd <2 x half> %b, %c
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_fptrunc_2xfloat(
-; CHECK: ld.param.v2.f32 {[[A0:%f[0-9]+]], [[A1:%f[0-9]+]]}, [test_fptrunc_2xfloat_param_0];
-; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[A1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_fptrunc_2xfloat(<2 x float> %a) #0 {
- %r = fptrunc <2 x float> %a to <2 x half>
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_fptrunc_2xdouble(
-; CHECK: ld.param.v2.f64 {[[A0:%fd[0-9]+]], [[A1:%fd[0-9]+]]}, [test_fptrunc_2xdouble_param_0];
-; CHECK-DAG: cvt.rn.f16.f64 [[R0:%h[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.rn.f16.f64 [[R1:%h[0-9]+]], [[A1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_fptrunc_2xdouble(<2 x double> %a) #0 {
- %r = fptrunc <2 x double> %a to <2 x half>
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_fpext_2xfloat(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fpext_2xfloat_param_0];
-; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: cvt.f32.f16 [[R0:%f[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.f32.f16 [[R1:%f[0-9]+]], [[A1]];
-; CHECK-NEXT: st.param.v2.f32 [func_retval0+0], {[[R0]], [[R1]]};
-; CHECK: ret;
-define <2 x float> @test_fpext_2xfloat(<2 x half> %a) #0 {
- %r = fpext <2 x half> %a to <2 x float>
- ret <2 x float> %r
-}
-
-; CHECK-LABEL: test_fpext_2xdouble(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fpext_2xdouble_param_0];
-; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: cvt.f64.f16 [[R0:%fd[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.f64.f16 [[R1:%fd[0-9]+]], [[A1]];
-; CHECK-NEXT: st.param.v2.f64 [func_retval0+0], {[[R0]], [[R1]]};
-; CHECK: ret;
-define <2 x double> @test_fpext_2xdouble(<2 x half> %a) #0 {
- %r = fpext <2 x half> %a to <2 x double>
- ret <2 x double> %r
-}
-
-
-; CHECK-LABEL: test_bitcast_2xhalf_to_2xi16(
-; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_bitcast_2xhalf_to_2xi16_param_0];
-; CHECK-DAG: cvt.u16.u32 [[R0:%rs[0-9]+]], [[A]]
-; CHECK-DAG: shr.u32 [[AH:%r[0-9]+]], [[A]], 16
-; CHECK-DAG: cvt.u16.u32 [[R1:%rs[0-9]+]], [[AH]]
-; CHECK: st.param.v2.b16 [func_retval0+0], {[[R0]], [[R1]]}
-; CHECK: ret;
-define <2 x i16> @test_bitcast_2xhalf_to_2xi16(<2 x half> %a) #0 {
- %r = bitcast <2 x half> %a to <2 x i16>
- ret <2 x i16> %r
-}
-
-; CHECK-LABEL: test_bitcast_2xi16_to_2xhalf(
-; CHECK: ld.param.v2.u16 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [test_bitcast_2xi16_to_2xhalf_param_0];
-; CHECK-DAG: cvt.u32.u16 [[R0:%r[0-9]+]], [[RS0]];
-; CHECK-DAG: cvt.u32.u16 [[R1:%r[0-9]+]], [[RS1]];
-; CHECK-DAG: shl.b32 [[R1H:%r[0-9]+]], [[R1]], 16;
-; CHECK-DAG: or.b32 [[R1H0L:%r[0-9]+]], [[R0]], [[R1H]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], [[R1H0L]];
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_bitcast_2xi16_to_2xhalf(<2 x i16> %a) #0 {
- %r = bitcast <2 x i16> %a to <2 x half>
- ret <2 x half> %r
-}
-
-
-declare <2 x half> @llvm.sqrt.f16(<2 x half> %a) #0
-declare <2 x half> @llvm.powi.f16(<2 x half> %a, <2 x i32> %b) #0
-declare <2 x half> @llvm.sin.f16(<2 x half> %a) #0
-declare <2 x half> @llvm.cos.f16(<2 x half> %a) #0
-declare <2 x half> @llvm.pow.f16(<2 x half> %a, <2 x half> %b) #0
-declare <2 x half> @llvm.exp.f16(<2 x half> %a) #0
-declare <2 x half> @llvm.exp2.f16(<2 x half> %a) #0
-declare <2 x half> @llvm.log.f16(<2 x half> %a) #0
-declare <2 x half> @llvm.log10.f16(<2 x half> %a) #0
-declare <2 x half> @llvm.log2.f16(<2 x half> %a) #0
-declare <2 x half> @llvm.fma.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0
-declare <2 x half> @llvm.fabs.f16(<2 x half> %a) #0
-declare <2 x half> @llvm.minnum.f16(<2 x half> %a, <2 x half> %b) #0
-declare <2 x half> @llvm.maxnum.f16(<2 x half> %a, <2 x half> %b) #0
-declare <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b) #0
-declare <2 x half> @llvm.floor.f16(<2 x half> %a) #0
-declare <2 x half> @llvm.ceil.f16(<2 x half> %a) #0
-declare <2 x half> @llvm.trunc.f16(<2 x half> %a) #0
-declare <2 x half> @llvm.rint.f16(<2 x half> %a) #0
-declare <2 x half> @llvm.nearbyint.f16(<2 x half> %a) #0
-declare <2 x half> @llvm.round.f16(<2 x half> %a) #0
-declare <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0
-
-; CHECK-LABEL: test_sqrt(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_sqrt_param_0];
-; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
-; CHECK-DAG: sqrt.rn.f32 [[RF0:%f[0-9]+]], [[AF0]];
-; CHECK-DAG: sqrt.rn.f32 [[RF1:%f[0-9]+]], [[AF1]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_sqrt(<2 x half> %a) #0 {
- %r = call <2 x half> @llvm.sqrt.f16(<2 x half> %a)
- ret <2 x half> %r
-}
-
-;;; Can't do this yet: requires libcall.
-; XCHECK-LABEL: test_powi(
-;define <2 x half> @test_powi(<2 x half> %a, <2 x i32> %b) #0 {
-; %r = call <2 x half> @llvm.powi.f16(<2 x half> %a, <2 x i32> %b)
-; ret <2 x half> %r
-;}
-
-; CHECK-LABEL: test_sin(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_sin_param_0];
-; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
-; CHECK-DAG: sin.approx.f32 [[RF0:%f[0-9]+]], [[AF0]];
-; CHECK-DAG: sin.approx.f32 [[RF1:%f[0-9]+]], [[AF1]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_sin(<2 x half> %a) #0 #1 {
- %r = call <2 x half> @llvm.sin.f16(<2 x half> %a)
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_cos(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_cos_param_0];
-; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
-; CHECK-DAG: cos.approx.f32 [[RF0:%f[0-9]+]], [[AF0]];
-; CHECK-DAG: cos.approx.f32 [[RF1:%f[0-9]+]], [[AF1]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_cos(<2 x half> %a) #0 #1 {
- %r = call <2 x half> @llvm.cos.f16(<2 x half> %a)
- ret <2 x half> %r
-}
-
-;;; Can't do this yet: requires libcall.
-; XCHECK-LABEL: test_pow(
-;define <2 x half> @test_pow(<2 x half> %a, <2 x half> %b) #0 {
-; %r = call <2 x half> @llvm.pow.f16(<2 x half> %a, <2 x half> %b)
-; ret <2 x half> %r
-;}
-
-;;; Can't do this yet: requires libcall.
-; XCHECK-LABEL: test_exp(
-;define <2 x half> @test_exp(<2 x half> %a) #0 {
-; %r = call <2 x half> @llvm.exp.f16(<2 x half> %a)
-; ret <2 x half> %r
-;}
-
-;;; Can't do this yet: requires libcall.
-; XCHECK-LABEL: test_exp2(
-;define <2 x half> @test_exp2(<2 x half> %a) #0 {
-; %r = call <2 x half> @llvm.exp2.f16(<2 x half> %a)
-; ret <2 x half> %r
-;}
-
-;;; Can't do this yet: requires libcall.
-; XCHECK-LABEL: test_log(
-;define <2 x half> @test_log(<2 x half> %a) #0 {
-; %r = call <2 x half> @llvm.log.f16(<2 x half> %a)
-; ret <2 x half> %r
-;}
-
-;;; Can't do this yet: requires libcall.
-; XCHECK-LABEL: test_log10(
-;define <2 x half> @test_log10(<2 x half> %a) #0 {
-; %r = call <2 x half> @llvm.log10.f16(<2 x half> %a)
-; ret <2 x half> %r
-;}
-
-;;; Can't do this yet: requires libcall.
-; XCHECK-LABEL: test_log2(
-;define <2 x half> @test_log2(<2 x half> %a) #0 {
-; %r = call <2 x half> @llvm.log2.f16(<2 x half> %a)
-; ret <2 x half> %r
-;}
-
-; CHECK-LABEL: test_fma(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fma_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fma_param_1];
-; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_fma_param_2];
-;
-; CHECK-F16: fma.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]], [[C]];
-;
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
-; CHECK-NOF16-DAG: fma.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]], [[FC0]];
-; CHECK-NOF16-DAG: fma.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]], [[FC1]];
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
-; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret
-define <2 x half> @test_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 {
- %r = call <2 x half> @llvm.fma.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_fabs(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fabs_param_0];
-; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
-; CHECK-DAG: abs.f32 [[RF0:%f[0-9]+]], [[AF0]];
-; CHECK-DAG: abs.f32 [[RF1:%f[0-9]+]], [[AF1]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_fabs(<2 x half> %a) #0 {
- %r = call <2 x half> @llvm.fabs.f16(<2 x half> %a)
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_minnum(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_minnum_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_minnum_param_1];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
-; CHECK-DAG: cvt.f32.f16 [[BF0:%f[0-9]+]], [[B0]];
-; CHECK-DAG: cvt.f32.f16 [[BF1:%f[0-9]+]], [[B1]];
-; CHECK-DAG: min.f32 [[RF0:%f[0-9]+]], [[AF0]], [[BF0]];
-; CHECK-DAG: min.f32 [[RF1:%f[0-9]+]], [[AF1]], [[BF1]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_minnum(<2 x half> %a, <2 x half> %b) #0 {
- %r = call <2 x half> @llvm.minnum.f16(<2 x half> %a, <2 x half> %b)
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_maxnum(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_maxnum_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_maxnum_param_1];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
-; CHECK-DAG: cvt.f32.f16 [[BF0:%f[0-9]+]], [[B0]];
-; CHECK-DAG: cvt.f32.f16 [[BF1:%f[0-9]+]], [[B1]];
-; CHECK-DAG: max.f32 [[RF0:%f[0-9]+]], [[AF0]], [[BF0]];
-; CHECK-DAG: max.f32 [[RF1:%f[0-9]+]], [[AF1]], [[BF1]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
-; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_maxnum(<2 x half> %a, <2 x half> %b) #0 {
- %r = call <2 x half> @llvm.maxnum.f16(<2 x half> %a, <2 x half> %b)
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_copysign(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_copysign_param_1];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]];
-; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]];
-; CHECK-DAG: mov.b16 [[BS0:%rs[0-9]+]], [[B0]];
-; CHECK-DAG: mov.b16 [[BS1:%rs[0-9]+]], [[B1]];
-; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[AS0]], 32767;
-; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[AS1]], 32767;
-; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[BS0]], -32768;
-; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[BS1]], -32768;
-; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]];
-; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]];
-; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]];
-; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]];
-; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 {
- %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b)
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_copysign_f32(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_f32_param_0];
-; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_copysign_f32_param_1];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]];
-; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]];
-; CHECK-DAG: mov.b32 [[BI0:%r[0-9]+]], [[B0]];
-; CHECK-DAG: mov.b32 [[BI1:%r[0-9]+]], [[B1]];
-; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[AS0]], 32767;
-; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[AS1]], 32767;
-; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[BI0]], -2147483648;
-; CHECK-DAG: and.b32 [[BX1:%r[0-9]+]], [[BI1]], -2147483648;
-; CHECK-DAG: shr.u32 [[BY0:%r[0-9]+]], [[BX0]], 16;
-; CHECK-DAG: shr.u32 [[BY1:%r[0-9]+]], [[BX1]], 16;
-; CHECK-DAG: cvt.u16.u32 [[BZ0:%rs[0-9]+]], [[BY0]];
-; CHECK-DAG: cvt.u16.u32 [[BZ1:%rs[0-9]+]], [[BY1]];
-; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]];
-; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]];
-; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]];
-; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]];
-; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 {
- %tb = fptrunc <2 x float> %b to <2 x half>
- %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %tb)
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_copysign_f64(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_f64_param_0];
-; CHECK-DAG: ld.param.v2.f64 {[[B0:%fd[0-9]+]], [[B1:%fd[0-9]+]]}, [test_copysign_f64_param_1];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]];
-; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]];
-; CHECK-DAG: mov.b64 [[BI0:%rd[0-9]+]], [[B0]];
-; CHECK-DAG: mov.b64 [[BI1:%rd[0-9]+]], [[B1]];
-; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[AS0]], 32767;
-; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[AS1]], 32767;
-; CHECK-DAG: and.b64 [[BX0:%rd[0-9]+]], [[BI0]], -9223372036854775808;
-; CHECK-DAG: and.b64 [[BX1:%rd[0-9]+]], [[BI1]], -9223372036854775808;
-; CHECK-DAG: shr.u64 [[BY0:%rd[0-9]+]], [[BX0]], 48;
-; CHECK-DAG: shr.u64 [[BY1:%rd[0-9]+]], [[BX1]], 48;
-; CHECK-DAG: cvt.u16.u64 [[BZ0:%rs[0-9]+]], [[BY0]];
-; CHECK-DAG: cvt.u16.u64 [[BZ1:%rs[0-9]+]], [[BY1]];
-; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]];
-; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]];
-; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]];
-; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]];
-; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 {
- %tb = fptrunc <2 x double> %b to <2 x half>
- %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %tb)
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_copysign_extended(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_extended_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_copysign_extended_param_1];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]];
-; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]];
-; CHECK-DAG: mov.b16 [[BS0:%rs[0-9]+]], [[B0]];
-; CHECK-DAG: mov.b16 [[BS1:%rs[0-9]+]], [[B1]];
-; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[AS0]], 32767;
-; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[AS1]], 32767;
-; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[BS0]], -32768;
-; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[BS1]], -32768;
-; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]];
-; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]];
-; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]];
-; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]];
-; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: mov.b32 {[[RX0:%h[0-9]+]], [[RX1:%h[0-9]+]]}, [[R]]
-; CHECK-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[RX0]];
-; CHECK-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[RX1]];
-; CHECK: st.param.v2.f32 [func_retval0+0], {[[XR0]], [[XR1]]};
-; CHECK: ret;
-define <2 x float> @test_copysign_extended(<2 x half> %a, <2 x half> %b) #0 {
- %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b)
- %xr = fpext <2 x half> %r to <2 x float>
- ret <2 x float> %xr
-}
-
-; CHECK-LABEL: test_floor(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_floor_param_0];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
-; CHECK-DAG: cvt.rmi.f16.f16 [[R1:%h[0-9]+]], [[A1]];
-; CHECK-DAG: cvt.rmi.f16.f16 [[R0:%h[0-9]+]], [[A0]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_floor(<2 x half> %a) #0 {
- %r = call <2 x half> @llvm.floor.f16(<2 x half> %a)
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_ceil(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_ceil_param_0];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
-; CHECK-DAG: cvt.rpi.f16.f16 [[R1:%h[0-9]+]], [[A1]];
-; CHECK-DAG: cvt.rpi.f16.f16 [[R0:%h[0-9]+]], [[A0]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_ceil(<2 x half> %a) #0 {
- %r = call <2 x half> @llvm.ceil.f16(<2 x half> %a)
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_trunc(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_trunc_param_0];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
-; CHECK-DAG: cvt.rzi.f16.f16 [[R1:%h[0-9]+]], [[A1]];
-; CHECK-DAG: cvt.rzi.f16.f16 [[R0:%h[0-9]+]], [[A0]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_trunc(<2 x half> %a) #0 {
- %r = call <2 x half> @llvm.trunc.f16(<2 x half> %a)
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_rint(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_rint_param_0];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
-; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]];
-; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_rint(<2 x half> %a) #0 {
- %r = call <2 x half> @llvm.rint.f16(<2 x half> %a)
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_nearbyint(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_nearbyint_param_0];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
-; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]];
-; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_nearbyint(<2 x half> %a) #0 {
- %r = call <2 x half> @llvm.nearbyint.f16(<2 x half> %a)
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_round(
-; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_round_param_0];
-; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
-; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]];
-; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]];
-; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_round(<2 x half> %a) #0 {
- %r = call <2 x half> @llvm.round.f16(<2 x half> %a)
- ret <2 x half> %r
-}
-
-; CHECK-LABEL: test_fmuladd(
-; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fmuladd_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fmuladd_param_1];
-; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_fmuladd_param_2];
-;
-; CHECK-F16: fma.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]], [[C]];
-;
-; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
-; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
-; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
-; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
-; CHECK-NOF16-DAG: fma.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]], [[FC0]];
-; CHECK-NOF16-DAG: fma.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]], [[FC1]];
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
-; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
-; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
-;
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define <2 x half> @test_fmuladd(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 {
- %r = call <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
- ret <2 x half> %r
-}
-
-attributes #0 = { nounwind }
-attributes #1 = { "unsafe-fp-math" = "true" }
+; ## Full FP16 support enabled by default.
+; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
+; RUN: -O0 -disable-post-ra -disable-fp-elim -verify-machineinstrs \
+; RUN: | FileCheck -check-prefixes CHECK,CHECK-F16 %s
+; ## FP16 support explicitly disabled.
+; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
+; RUN: -O0 -disable-post-ra -disable-fp-elim --nvptx-no-f16-math \
+; RUN: -verify-machineinstrs \
+; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOF16 %s
+; ## FP16 is not supported by hardware.
+; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \
+; RUN: -disable-post-ra -disable-fp-elim -verify-machineinstrs \
+; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOF16 %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+; CHECK-LABEL: test_ret_const(
+; CHECK: mov.u32 [[T:%r[0-9+]]], 1073757184;
+; CHECK: mov.b32 [[R:%hh[0-9+]]], [[T]];
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define <2 x half> @test_ret_const() #0 {
+ ret <2 x half> <half 1.0, half 2.0>
+}
+
+; CHECK-LABEL: test_extract_0(
+; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_0_param_0];
+; CHECK: mov.b32 {[[R:%h[0-9]+]], %tmp_hi}, [[A]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_extract_0(<2 x half> %a) #0 {
+ %e = extractelement <2 x half> %a, i32 0
+ ret half %e
+}
+
+; CHECK-LABEL: test_extract_1(
+; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_1_param_0];
+; CHECK: mov.b32 {%tmp_lo, [[R:%h[0-9]+]]}, [[A]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_extract_1(<2 x half> %a) #0 {
+ %e = extractelement <2 x half> %a, i32 1
+ ret half %e
+}
+
+; CHECK-LABEL: test_extract_i(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_i_param_0];
+; CHECK-DAG: ld.param.u64 [[IDX:%rd[0-9]+]], [test_extract_i_param_1];
+; CHECK-DAG: setp.eq.s64 [[PRED:%p[0-9]+]], [[IDX]], 0;
+; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[A]];
+; CHECK: selp.b16 [[R:%h[0-9]+]], [[E0]], [[E1]], [[PRED]];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK: ret;
+define half @test_extract_i(<2 x half> %a, i64 %idx) #0 {
+ %e = extractelement <2 x half> %a, i64 %idx
+ ret half %e
+}
+
+; CHECK-LABEL: test_fadd(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fadd_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fadd_param_1];
+;
+; CHECK-F16-NEXT: add.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]];
+;
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
+; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
+; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
+; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+;
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define <2 x half> @test_fadd(<2 x half> %a, <2 x half> %b) #0 {
+ %r = fadd <2 x half> %a, %b
+ ret <2 x half> %r
+}
+
+; Check that we can lower fadd with immediate arguments.
+; CHECK-LABEL: test_fadd_imm_0(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fadd_imm_0_param_0];
+;
+; CHECK-F16: mov.u32 [[I:%r[0-9+]]], 1073757184;
+; CHECK-F16: mov.b32 [[IHH:%hh[0-9+]]], [[I]];
+; CHECK-F16: add.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[IHH]];
+;
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
+; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000;
+; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], 0f40000000;
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
+; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+;
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define <2 x half> @test_fadd_imm_0(<2 x half> %a) #0 {
+ %r = fadd <2 x half> <half 1.0, half 2.0>, %a
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_fadd_imm_1(
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fadd_imm_1_param_0];
+;
+; CHECK-F16: mov.u32 [[I:%r[0-9+]]], 1073757184;
+; CHECK-F16: mov.b32 [[IHH:%hh[0-9+]]], [[I]];
+; CHECK-F16: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[IHH]];
+;
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
+; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000;
+; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], 0f40000000;
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
+; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+;
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define <2 x half> @test_fadd_imm_1(<2 x half> %a) #0 {
+ %r = fadd <2 x half> %a, <half 1.0, half 2.0>
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_fsub(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fsub_param_0];
+;
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fsub_param_1];
+; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]];
+;
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
+; CHECK-NOF16-DAG: sub.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
+; CHECK-NOF16-DAG: sub.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
+; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+;
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define <2 x half> @test_fsub(<2 x half> %a, <2 x half> %b) #0 {
+ %r = fsub <2 x half> %a, %b
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_fneg(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fneg_param_0];
+;
+; CHECK-F16: mov.u32 [[I0:%r[0-9+]]], 0;
+; CHECK-F16: mov.b32 [[IHH0:%hh[0-9+]]], [[I0]];
+; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%hh[0-9]+]], [[IHH0]], [[A]];
+;
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
+; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000;
+; CHECK-NOF16-DAG: sub.rn.f32 [[FR0:%f[0-9]+]], [[Z]], [[FA0]];
+; CHECK-NOF16-DAG: sub.rn.f32 [[FR1:%f[0-9]+]], [[Z]], [[FA1]];
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
+; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+;
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define <2 x half> @test_fneg(<2 x half> %a) #0 {
+ %r = fsub <2 x half> <half 0.0, half 0.0>, %a
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_fmul(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fmul_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fmul_param_1];
+; CHECK-F16-NEXT: mul.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]];
+;
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
+; CHECK-NOF16-DAG: mul.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
+; CHECK-NOF16-DAG: mul.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
+; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+;
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define <2 x half> @test_fmul(<2 x half> %a, <2 x half> %b) #0 {
+ %r = fmul <2 x half> %a, %b
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_fdiv(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fdiv_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fdiv_param_1];
+; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]];
+; CHECK-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]];
+; CHECK-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]];
+; CHECK-DAG: div.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
+; CHECK-DAG: div.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]];
+; CHECK-NEXT: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define <2 x half> @test_fdiv(<2 x half> %a, <2 x half> %b) #0 {
+ %r = fdiv <2 x half> %a, %b
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_frem(
+; -- Load two 16x2 inputs and split them into f16 elements
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_frem_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_frem_param_1];
+; -- Split into elements
+; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; -- promote to f32.
+; CHECK-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]];
+; CHECK-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]];
+; CHECK-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]];
+; -- frem(a[0],b[0]).
+; CHECK-DAG: div.rn.f32 [[FD0:%f[0-9]+]], [[FA0]], [[FB0]];
+; CHECK-DAG: cvt.rmi.f32.f32 [[DI0:%f[0-9]+]], [[FD0]];
+; CHECK-DAG: mul.f32 [[RI0:%f[0-9]+]], [[DI0]], [[FB0]];
+; CHECK-DAG: sub.f32 [[RF0:%f[0-9]+]], [[FA0]], [[RI0]];
+; -- frem(a[1],b[1]).
+; CHECK-DAG: div.rn.f32 [[FD1:%f[0-9]+]], [[FA1]], [[FB1]];
+; CHECK-DAG: cvt.rmi.f32.f32 [[DI1:%f[0-9]+]], [[FD1]];
+; CHECK-DAG: mul.f32 [[RI1:%f[0-9]+]], [[DI1]], [[FB1]];
+; CHECK-DAG: sub.f32 [[RF1:%f[0-9]+]], [[FA1]], [[RI1]];
+; -- convert back to f16.
+; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
+; -- merge into f16x2 and return it.
+; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define <2 x half> @test_frem(<2 x half> %a, <2 x half> %b) #0 {
+ %r = frem <2 x half> %a, %b
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: .func test_ldst_v2f16(
+; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v2f16_param_0];
+; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v2f16_param_1];
+; CHECK-DAG: ld.b32 [[E:%hh[0-9]+]], [%[[A]]]
+; CHECK: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[E]];
+; CHECK-DAG: st.v2.b16 [%[[B]]], {[[E0]], [[E1]]};
+; CHECK: ret;
+define void @test_ldst_v2f16(<2 x half>* %a, <2 x half>* %b) {
+ %t1 = load <2 x half>, <2 x half>* %a
+ store <2 x half> %t1, <2 x half>* %b, align 16
+ ret void
+}
+
+; CHECK-LABEL: .func test_ldst_v3f16(
+; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v3f16_param_0];
+; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v3f16_param_1];
+; -- v3 is inconvenient to capture as it's lowered as ld.b64 + fair
+; number of bitshifting instructions that may change at llvm's whim.
+; So we only verify that we only issue correct number of writes using
+; correct offset, but not the values we write.
+; CHECK-DAG: ld.u64
+; CHECK-DAG: st.u32 [%[[B]]],
+; CHECK-DAG: st.b16 [%[[B]]+4],
+; CHECK: ret;
+define void @test_ldst_v3f16(<3 x half>* %a, <3 x half>* %b) {
+ %t1 = load <3 x half>, <3 x half>* %a
+ store <3 x half> %t1, <3 x half>* %b, align 16
+ ret void
+}
+
+; CHECK-LABEL: .func test_ldst_v4f16(
+; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v4f16_param_0];
+; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v4f16_param_1];
+; CHECK-DAG: ld.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [%[[A]]];
+; CHECK-DAG: st.v4.b16 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]};
+; CHECK: ret;
+define void @test_ldst_v4f16(<4 x half>* %a, <4 x half>* %b) {
+ %t1 = load <4 x half>, <4 x half>* %a
+ store <4 x half> %t1, <4 x half>* %b, align 16
+ ret void
+}
+
+; CHECK-LABEL: .func test_ldst_v8f16(
+; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v8f16_param_0];
+; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v8f16_param_1];
+; CHECK-DAG: ld.v4.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [%[[A]]];
+; CHECK-DAG: st.v4.b32 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]};
+; CHECK: ret;
+define void @test_ldst_v8f16(<8 x half>* %a, <8 x half>* %b) {
+ %t1 = load <8 x half>, <8 x half>* %a
+ store <8 x half> %t1, <8 x half>* %b, align 16
+ ret void
+}
+
+declare <2 x half> @test_callee(<2 x half> %a, <2 x half> %b) #0
+
+; CHECK-LABEL: test_call(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_call_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_call_param_1];
+; CHECK: {
+; CHECK-DAG: .param .align 4 .b8 param0[4];
+; CHECK-DAG: .param .align 4 .b8 param1[4];
+; CHECK-DAG: st.param.b32 [param0+0], [[A]];
+; CHECK-DAG: st.param.b32 [param1+0], [[B]];
+; CHECK-DAG: .param .align 4 .b8 retval0[4];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_callee,
+; CHECK: );
+; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0];
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 {
+ %r = call <2 x half> @test_callee(<2 x half> %a, <2 x half> %b)
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_call_flipped(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_call_flipped_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_call_flipped_param_1];
+; CHECK: {
+; CHECK-DAG: .param .align 4 .b8 param0[4];
+; CHECK-DAG: .param .align 4 .b8 param1[4];
+; CHECK-DAG: st.param.b32 [param0+0], [[B]];
+; CHECK-DAG: st.param.b32 [param1+0], [[A]];
+; CHECK-DAG: .param .align 4 .b8 retval0[4];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_callee,
+; CHECK: );
+; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0];
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 {
+ %r = call <2 x half> @test_callee(<2 x half> %b, <2 x half> %a)
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_tailcall_flipped(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_tailcall_flipped_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_tailcall_flipped_param_1];
+; CHECK: {
+; CHECK-DAG: .param .align 4 .b8 param0[4];
+; CHECK-DAG: .param .align 4 .b8 param1[4];
+; CHECK-DAG: st.param.b32 [param0+0], [[B]];
+; CHECK-DAG: st.param.b32 [param1+0], [[A]];
+; CHECK-DAG: .param .align 4 .b8 retval0[4];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_callee,
+; CHECK: );
+; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0];
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define <2 x half> @test_tailcall_flipped(<2 x half> %a, <2 x half> %b) #0 {
+ %r = tail call <2 x half> @test_callee(<2 x half> %b, <2 x half> %a)
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_select(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_param_1];
+; CHECK-DAG: ld.param.u8 [[C:%rs[0-9]+]], [test_select_param_2]
+; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1;
+; CHECK-NEXT: selp.b32 [[R:%hh[0-9]+]], [[A]], [[B]], [[PRED]];
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define <2 x half> @test_select(<2 x half> %a, <2 x half> %b, i1 zeroext %c) #0 {
+ %r = select i1 %c, <2 x half> %a, <2 x half> %b
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_select_cc(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_cc_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_cc_param_1];
+; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_select_cc_param_2];
+; CHECK-DAG: ld.param.b32 [[D:%hh[0-9]+]], [test_select_cc_param_3];
+;
+; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]]
+;
+; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]]
+; CHECK-NOF16-DAG: mov.b32 {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[CF1:%f[0-9]+]], [[C1]];
+; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[CF0]], [[DF0]]
+; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[CF1]], [[DF1]]
+;
+; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-DAG: selp.b16 [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]];
+; CHECK-DAG: selp.b16 [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]];
+; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define <2 x half> @test_select_cc(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d) #0 {
+ %cc = fcmp une <2 x half> %c, %d
+ %r = select <2 x i1> %cc, <2 x half> %a, <2 x half> %b
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_select_cc_f32_f16(
+; CHECK-DAG: ld.param.v2.f32 {[[A0:%f[0-9]+]], [[A1:%f[0-9]+]]}, [test_select_cc_f32_f16_param_0];
+; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_select_cc_f32_f16_param_1];
+; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_select_cc_f32_f16_param_2];
+; CHECK-DAG: ld.param.b32 [[D:%hh[0-9]+]], [test_select_cc_f32_f16_param_3];
+;
+; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]]
+; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]]
+; CHECK-NOF16-DAG: mov.b32 {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]];
+; CHECK-NOF16-DAG: cvt.f32.f16 [[CF1:%f[0-9]+]], [[C1]];
+; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[CF0]], [[DF0]]
+; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[CF1]], [[DF1]]
+;
+; CHECK-DAG: selp.f32 [[R0:%f[0-9]+]], [[A0]], [[B0]], [[P0]];
+; CHECK-DAG: selp.f32 [[R1:%f[0-9]+]], [[A1]], [[B1]], [[P1]];
+; CHECK-NEXT: st.param.v2.f32 [func_retval0+0], {[[R0]], [[R1]]};
+; CHECK-NEXT: ret;
+define <2 x float> @test_select_cc_f32_f16(<2 x float> %a, <2 x float> %b,
+ <2 x half> %c, <2 x half> %d) #0 {
+ %cc = fcmp une <2 x half> %c, %d
+ %r = select <2 x i1> %cc, <2 x float> %a, <2 x float> %b
+ ret <2 x float> %r
+}
+
+; CHECK-LABEL: test_select_cc_f16_f32(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_cc_f16_f32_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_cc_f16_f32_param_1];
+; CHECK-DAG: ld.param.v2.f32 {[[C0:%f[0-9]+]], [[C1:%f[0-9]+]]}, [test_select_cc_f16_f32_param_2];
+; CHECK-DAG: ld.param.v2.f32 {[[D0:%f[0-9]+]], [[D1:%f[0-9]+]]}, [test_select_cc_f16_f32_param_3];
+; CHECK-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[C0]], [[D0]]
+; CHECK-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[C1]], [[D1]]
+; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-DAG: selp.b16 [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]];
+; CHECK-DAG: selp.b16 [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]];
+; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define <2 x half> @test_select_cc_f16_f32(<2 x half> %a, <2 x half> %b,
+ <2 x float> %c, <2 x float> %d) #0 {
+ %cc = fcmp une <2 x float> %c, %d
+ %r = select <2 x i1> %cc, <2 x half> %a, <2 x half> %b
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_fcmp_une(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_une_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_une_param_1];
+; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
+; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
+; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
+; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
+; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
+; CHECK-NEXT: ret;
+define <2 x i1> @test_fcmp_une(<2 x half> %a, <2 x half> %b) #0 {
+ %r = fcmp une <2 x half> %a, %b
+ ret <2 x i1> %r
+}
+
+; CHECK-LABEL: test_fcmp_ueq(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ueq_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ueq_param_1];
+; CHECK-F16: setp.equ.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
+; CHECK-NOF16-DAG: setp.equ.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
+; CHECK-NOF16-DAG: setp.equ.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
+; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
+; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
+; CHECK-NEXT: ret;
+define <2 x i1> @test_fcmp_ueq(<2 x half> %a, <2 x half> %b) #0 {
+ %r = fcmp ueq <2 x half> %a, %b
+ ret <2 x i1> %r
+}
+
+; CHECK-LABEL: test_fcmp_ugt(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ugt_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ugt_param_1];
+; CHECK-F16: setp.gtu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
+; CHECK-NOF16-DAG: setp.gtu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
+; CHECK-NOF16-DAG: setp.gtu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
+; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
+; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
+; CHECK-NEXT: ret;
+define <2 x i1> @test_fcmp_ugt(<2 x half> %a, <2 x half> %b) #0 {
+ %r = fcmp ugt <2 x half> %a, %b
+ ret <2 x i1> %r
+}
+
+; CHECK-LABEL: test_fcmp_uge(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_uge_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_uge_param_1];
+; CHECK-F16: setp.geu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
+; CHECK-NOF16-DAG: setp.geu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
+; CHECK-NOF16-DAG: setp.geu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
+; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
+; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
+; CHECK-NEXT: ret;
+define <2 x i1> @test_fcmp_uge(<2 x half> %a, <2 x half> %b) #0 {
+ %r = fcmp uge <2 x half> %a, %b
+ ret <2 x i1> %r
+}
+
+; CHECK-LABEL: test_fcmp_ult(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ult_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ult_param_1];
+; CHECK-F16: setp.ltu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
+; CHECK-NOF16-DAG: setp.ltu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
+; CHECK-NOF16-DAG: setp.ltu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
+; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
+; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
+; CHECK-NEXT: ret;
+define <2 x i1> @test_fcmp_ult(<2 x half> %a, <2 x half> %b) #0 {
+ %r = fcmp ult <2 x half> %a, %b
+ ret <2 x i1> %r
+}
+
+; CHECK-LABEL: test_fcmp_ule(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ule_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ule_param_1];
+; CHECK-F16: setp.leu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
+; CHECK-NOF16-DAG: setp.leu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
+; CHECK-NOF16-DAG: setp.leu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
+; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
+; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
+; CHECK-NEXT: ret;
+define <2 x i1> @test_fcmp_ule(<2 x half> %a, <2 x half> %b) #0 {
+ %r = fcmp ule <2 x half> %a, %b
+ ret <2 x i1> %r
+}
+
+
+; CHECK-LABEL: test_fcmp_uno(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_uno_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_uno_param_1];
+; CHECK-F16: setp.nan.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
+; CHECK-NOF16-DAG: setp.nan.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
+; CHECK-NOF16-DAG: setp.nan.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
+; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
+; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
+; CHECK-NEXT: ret;
+define <2 x i1> @test_fcmp_uno(<2 x half> %a, <2 x half> %b) #0 {
+ %r = fcmp uno <2 x half> %a, %b
+ ret <2 x i1> %r
+}
+
+; CHECK-LABEL: test_fcmp_one(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_one_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_one_param_1];
+; CHECK-F16: setp.ne.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
+; CHECK-NOF16-DAG: setp.ne.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
+; CHECK-NOF16-DAG: setp.ne.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
+; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
+; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
+; CHECK-NEXT: ret;
+define <2 x i1> @test_fcmp_one(<2 x half> %a, <2 x half> %b) #0 {
+ %r = fcmp one <2 x half> %a, %b
+ ret <2 x i1> %r
+}
+
+; CHECK-LABEL: test_fcmp_oeq(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_oeq_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_oeq_param_1];
+; CHECK-F16: setp.eq.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
+; CHECK-NOF16-DAG: setp.eq.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
+; CHECK-NOF16-DAG: setp.eq.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
+; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
+; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
+; CHECK-NEXT: ret;
+define <2 x i1> @test_fcmp_oeq(<2 x half> %a, <2 x half> %b) #0 {
+ %r = fcmp oeq <2 x half> %a, %b
+ ret <2 x i1> %r
+}
+
+; CHECK-LABEL: test_fcmp_ogt(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ogt_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ogt_param_1];
+; CHECK-F16: setp.gt.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
+; CHECK-NOF16-DAG: setp.gt.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
+; CHECK-NOF16-DAG: setp.gt.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
+; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
+; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
+; CHECK-NEXT: ret;
+define <2 x i1> @test_fcmp_ogt(<2 x half> %a, <2 x half> %b) #0 {
+ %r = fcmp ogt <2 x half> %a, %b
+ ret <2 x i1> %r
+}
+
+; CHECK-LABEL: test_fcmp_oge(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_oge_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_oge_param_1];
+; CHECK-F16: setp.ge.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
+; CHECK-NOF16-DAG: setp.ge.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
+; CHECK-NOF16-DAG: setp.ge.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
+; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
+; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
+; CHECK-NEXT: ret;
+define <2 x i1> @test_fcmp_oge(<2 x half> %a, <2 x half> %b) #0 {
+ %r = fcmp oge <2 x half> %a, %b
+ ret <2 x i1> %r
+}
+
+; CHECK-LABEL: test_fcmp_olt(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_olt_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_olt_param_1];
+; CHECK-F16: setp.lt.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
+; CHECK-NOF16-DAG: setp.lt.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
+; CHECK-NOF16-DAG: setp.lt.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
+; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
+; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
+; CHECK-NEXT: ret;
+define <2 x i1> @test_fcmp_olt(<2 x half> %a, <2 x half> %b) #0 {
+ %r = fcmp olt <2 x half> %a, %b
+ ret <2 x i1> %r
+}
+
+; XCHECK-LABEL: test_fcmp_ole(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ole_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ole_param_1];
+; CHECK-F16: setp.le.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
+; CHECK-NOF16-DAG: setp.le.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
+; CHECK-NOF16-DAG: setp.le.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
+; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
+; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
+; CHECK-NEXT: ret;
+define <2 x i1> @test_fcmp_ole(<2 x half> %a, <2 x half> %b) #0 {
+ %r = fcmp ole <2 x half> %a, %b
+ ret <2 x i1> %r
+}
+
+; CHECK-LABEL: test_fcmp_ord(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ord_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ord_param_1];
+; CHECK-F16: setp.num.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
+; CHECK-NOF16-DAG: setp.num.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
+; CHECK-NOF16-DAG: setp.num.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
+; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
+; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
+; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
+; CHECK-NEXT: ret;
+define <2 x i1> @test_fcmp_ord(<2 x half> %a, <2 x half> %b) #0 {
+ %r = fcmp ord <2 x half> %a, %b
+ ret <2 x i1> %r
+}
+
+; CHECK-LABEL: test_fptosi_i32(
+; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptosi_i32_param_0];
+; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-DAG: cvt.rzi.s32.f16 [[R0:%r[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.rzi.s32.f16 [[R1:%r[0-9]+]], [[A1]];
+; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]}
+; CHECK: ret;
+define <2 x i32> @test_fptosi_i32(<2 x half> %a) #0 {
+ %r = fptosi <2 x half> %a to <2 x i32>
+ ret <2 x i32> %r
+}
+
+; CHECK-LABEL: test_fptosi_i64(
+; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptosi_i64_param_0];
+; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-DAG: cvt.rzi.s64.f16 [[R0:%rd[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.rzi.s64.f16 [[R1:%rd[0-9]+]], [[A1]];
+; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]}
+; CHECK: ret;
+define <2 x i64> @test_fptosi_i64(<2 x half> %a) #0 {
+ %r = fptosi <2 x half> %a to <2 x i64>
+ ret <2 x i64> %r
+}
+
+; CHECK-LABEL: test_fptoui_2xi32(
+; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptoui_2xi32_param_0];
+; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-DAG: cvt.rzi.u32.f16 [[R0:%r[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.rzi.u32.f16 [[R1:%r[0-9]+]], [[A1]];
+; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]}
+; CHECK: ret;
+define <2 x i32> @test_fptoui_2xi32(<2 x half> %a) #0 {
+ %r = fptoui <2 x half> %a to <2 x i32>
+ ret <2 x i32> %r
+}
+
+; CHECK-LABEL: test_fptoui_2xi64(
+; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptoui_2xi64_param_0];
+; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-DAG: cvt.rzi.u64.f16 [[R0:%rd[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.rzi.u64.f16 [[R1:%rd[0-9]+]], [[A1]];
+; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]}
+; CHECK: ret;
+define <2 x i64> @test_fptoui_2xi64(<2 x half> %a) #0 {
+ %r = fptoui <2 x half> %a to <2 x i64>
+ ret <2 x i64> %r
+}
+
+; CHECK-LABEL: test_uitofp_2xi32(
+; CHECK: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_param_0];
+; CHECK-DAG: cvt.rn.f16.u32 [[R0:%h[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.rn.f16.u32 [[R1:%h[0-9]+]], [[A1]];
+; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_uitofp_2xi32(<2 x i32> %a) #0 {
+ %r = uitofp <2 x i32> %a to <2 x half>
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_uitofp_2xi64(
+; CHECK: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_uitofp_2xi64_param_0];
+; CHECK-DAG: cvt.rn.f32.u64 [[F0:%f[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.rn.f32.u64 [[F1:%f[0-9]+]], [[A1]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[F0]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[F1]];
+; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_uitofp_2xi64(<2 x i64> %a) #0 {
+ %r = uitofp <2 x i64> %a to <2 x half>
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_sitofp_2xi32(
+; CHECK: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_param_0];
+; CHECK-DAG: cvt.rn.f16.s32 [[R0:%h[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.rn.f16.s32 [[R1:%h[0-9]+]], [[A1]];
+; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_sitofp_2xi32(<2 x i32> %a) #0 {
+ %r = sitofp <2 x i32> %a to <2 x half>
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_sitofp_2xi64(
+; CHECK: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_sitofp_2xi64_param_0];
+; CHECK-DAG: cvt.rn.f32.s64 [[F0:%f[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.rn.f32.s64 [[F1:%f[0-9]+]], [[A1]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[F0]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[F1]];
+; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_sitofp_2xi64(<2 x i64> %a) #0 {
+ %r = sitofp <2 x i64> %a to <2 x half>
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_uitofp_2xi32_fadd(
+; CHECK-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_fadd_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_uitofp_2xi32_fadd_param_1];
+; CHECK-DAG: cvt.rn.f16.u32 [[C0:%h[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.rn.f16.u32 [[C1:%h[0-9]+]], [[A1]];
+
+; CHECK-F16-DAG: mov.b32 [[C:%hh[0-9]+]], {[[C0]], [[C1]]}
+; CHECK-F16-DAG: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[C]];
+;
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FC1:%f[0-9]+]], [[C1]]
+; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FB0]], [[FC0]];
+; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FB1]], [[FC1]];
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
+; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+;
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_uitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 {
+ %c = uitofp <2 x i32> %a to <2 x half>
+ %r = fadd <2 x half> %b, %c
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_sitofp_2xi32_fadd(
+; CHECK-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_fadd_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_sitofp_2xi32_fadd_param_1];
+; CHECK-DAG: cvt.rn.f16.s32 [[C0:%h[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.rn.f16.s32 [[C1:%h[0-9]+]], [[A1]];
+;
+; CHECK-F16-DAG: mov.b32 [[C:%hh[0-9]+]], {[[C0]], [[C1]]}
+; CHECK-F16-DAG: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[C]];
+;
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FC1:%f[0-9]+]], [[C1]]
+; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FB0]], [[FC0]];
+; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FB1]], [[FC1]];
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
+; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+;
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_sitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 {
+ %c = sitofp <2 x i32> %a to <2 x half>
+ %r = fadd <2 x half> %b, %c
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_fptrunc_2xfloat(
+; CHECK: ld.param.v2.f32 {[[A0:%f[0-9]+]], [[A1:%f[0-9]+]]}, [test_fptrunc_2xfloat_param_0];
+; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[A1]];
+; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_fptrunc_2xfloat(<2 x float> %a) #0 {
+ %r = fptrunc <2 x float> %a to <2 x half>
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_fptrunc_2xdouble(
+; CHECK: ld.param.v2.f64 {[[A0:%fd[0-9]+]], [[A1:%fd[0-9]+]]}, [test_fptrunc_2xdouble_param_0];
+; CHECK-DAG: cvt.rn.f16.f64 [[R0:%h[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.rn.f16.f64 [[R1:%h[0-9]+]], [[A1]];
+; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_fptrunc_2xdouble(<2 x double> %a) #0 {
+ %r = fptrunc <2 x double> %a to <2 x half>
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_fpext_2xfloat(
+; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fpext_2xfloat_param_0];
+; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-DAG: cvt.f32.f16 [[R0:%f[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.f32.f16 [[R1:%f[0-9]+]], [[A1]];
+; CHECK-NEXT: st.param.v2.f32 [func_retval0+0], {[[R0]], [[R1]]};
+; CHECK: ret;
+define <2 x float> @test_fpext_2xfloat(<2 x half> %a) #0 {
+ %r = fpext <2 x half> %a to <2 x float>
+ ret <2 x float> %r
+}
+
+; CHECK-LABEL: test_fpext_2xdouble(
+; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fpext_2xdouble_param_0];
+; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-DAG: cvt.f64.f16 [[R0:%fd[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.f64.f16 [[R1:%fd[0-9]+]], [[A1]];
+; CHECK-NEXT: st.param.v2.f64 [func_retval0+0], {[[R0]], [[R1]]};
+; CHECK: ret;
+define <2 x double> @test_fpext_2xdouble(<2 x half> %a) #0 {
+ %r = fpext <2 x half> %a to <2 x double>
+ ret <2 x double> %r
+}
+
+
+; CHECK-LABEL: test_bitcast_2xhalf_to_2xi16(
+; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_bitcast_2xhalf_to_2xi16_param_0];
+; CHECK-DAG: cvt.u16.u32 [[R0:%rs[0-9]+]], [[A]]
+; CHECK-DAG: shr.u32 [[AH:%r[0-9]+]], [[A]], 16
+; CHECK-DAG: cvt.u16.u32 [[R1:%rs[0-9]+]], [[AH]]
+; CHECK: st.param.v2.b16 [func_retval0+0], {[[R0]], [[R1]]}
+; CHECK: ret;
+define <2 x i16> @test_bitcast_2xhalf_to_2xi16(<2 x half> %a) #0 {
+ %r = bitcast <2 x half> %a to <2 x i16>
+ ret <2 x i16> %r
+}
+
+; CHECK-LABEL: test_bitcast_2xi16_to_2xhalf(
+; CHECK: ld.param.v2.u16 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [test_bitcast_2xi16_to_2xhalf_param_0];
+; CHECK-DAG: cvt.u32.u16 [[R0:%r[0-9]+]], [[RS0]];
+; CHECK-DAG: cvt.u32.u16 [[R1:%r[0-9]+]], [[RS1]];
+; CHECK-DAG: shl.b32 [[R1H:%r[0-9]+]], [[R1]], 16;
+; CHECK-DAG: or.b32 [[R1H0L:%r[0-9]+]], [[R0]], [[R1H]];
+; CHECK: mov.b32 [[R:%hh[0-9]+]], [[R1H0L]];
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_bitcast_2xi16_to_2xhalf(<2 x i16> %a) #0 {
+ %r = bitcast <2 x i16> %a to <2 x half>
+ ret <2 x half> %r
+}
+
+
+declare <2 x half> @llvm.sqrt.f16(<2 x half> %a) #0
+declare <2 x half> @llvm.powi.f16(<2 x half> %a, <2 x i32> %b) #0
+declare <2 x half> @llvm.sin.f16(<2 x half> %a) #0
+declare <2 x half> @llvm.cos.f16(<2 x half> %a) #0
+declare <2 x half> @llvm.pow.f16(<2 x half> %a, <2 x half> %b) #0
+declare <2 x half> @llvm.exp.f16(<2 x half> %a) #0
+declare <2 x half> @llvm.exp2.f16(<2 x half> %a) #0
+declare <2 x half> @llvm.log.f16(<2 x half> %a) #0
+declare <2 x half> @llvm.log10.f16(<2 x half> %a) #0
+declare <2 x half> @llvm.log2.f16(<2 x half> %a) #0
+declare <2 x half> @llvm.fma.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0
+declare <2 x half> @llvm.fabs.f16(<2 x half> %a) #0
+declare <2 x half> @llvm.minnum.f16(<2 x half> %a, <2 x half> %b) #0
+declare <2 x half> @llvm.maxnum.f16(<2 x half> %a, <2 x half> %b) #0
+declare <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b) #0
+declare <2 x half> @llvm.floor.f16(<2 x half> %a) #0
+declare <2 x half> @llvm.ceil.f16(<2 x half> %a) #0
+declare <2 x half> @llvm.trunc.f16(<2 x half> %a) #0
+declare <2 x half> @llvm.rint.f16(<2 x half> %a) #0
+declare <2 x half> @llvm.nearbyint.f16(<2 x half> %a) #0
+declare <2 x half> @llvm.round.f16(<2 x half> %a) #0
+declare <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0
+
+; CHECK-LABEL: test_sqrt(
+; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_sqrt_param_0];
+; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
+; CHECK-DAG: sqrt.rn.f32 [[RF0:%f[0-9]+]], [[AF0]];
+; CHECK-DAG: sqrt.rn.f32 [[RF1:%f[0-9]+]], [[AF1]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
+; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_sqrt(<2 x half> %a) #0 {
+ %r = call <2 x half> @llvm.sqrt.f16(<2 x half> %a)
+ ret <2 x half> %r
+}
+
+;;; Can't do this yet: requires libcall.
+; XCHECK-LABEL: test_powi(
+;define <2 x half> @test_powi(<2 x half> %a, <2 x i32> %b) #0 {
+; %r = call <2 x half> @llvm.powi.f16(<2 x half> %a, <2 x i32> %b)
+; ret <2 x half> %r
+;}
+
+; CHECK-LABEL: test_sin(
+; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_sin_param_0];
+; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
+; CHECK-DAG: sin.approx.f32 [[RF0:%f[0-9]+]], [[AF0]];
+; CHECK-DAG: sin.approx.f32 [[RF1:%f[0-9]+]], [[AF1]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
+; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_sin(<2 x half> %a) #0 #1 {
+ %r = call <2 x half> @llvm.sin.f16(<2 x half> %a)
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_cos(
+; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_cos_param_0];
+; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
+; CHECK-DAG: cos.approx.f32 [[RF0:%f[0-9]+]], [[AF0]];
+; CHECK-DAG: cos.approx.f32 [[RF1:%f[0-9]+]], [[AF1]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
+; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_cos(<2 x half> %a) #0 #1 {
+ %r = call <2 x half> @llvm.cos.f16(<2 x half> %a)
+ ret <2 x half> %r
+}
+
+;;; Can't do this yet: requires libcall.
+; XCHECK-LABEL: test_pow(
+;define <2 x half> @test_pow(<2 x half> %a, <2 x half> %b) #0 {
+; %r = call <2 x half> @llvm.pow.f16(<2 x half> %a, <2 x half> %b)
+; ret <2 x half> %r
+;}
+
+;;; Can't do this yet: requires libcall.
+; XCHECK-LABEL: test_exp(
+;define <2 x half> @test_exp(<2 x half> %a) #0 {
+; %r = call <2 x half> @llvm.exp.f16(<2 x half> %a)
+; ret <2 x half> %r
+;}
+
+;;; Can't do this yet: requires libcall.
+; XCHECK-LABEL: test_exp2(
+;define <2 x half> @test_exp2(<2 x half> %a) #0 {
+; %r = call <2 x half> @llvm.exp2.f16(<2 x half> %a)
+; ret <2 x half> %r
+;}
+
+;;; Can't do this yet: requires libcall.
+; XCHECK-LABEL: test_log(
+;define <2 x half> @test_log(<2 x half> %a) #0 {
+; %r = call <2 x half> @llvm.log.f16(<2 x half> %a)
+; ret <2 x half> %r
+;}
+
+;;; Can't do this yet: requires libcall.
+; XCHECK-LABEL: test_log10(
+;define <2 x half> @test_log10(<2 x half> %a) #0 {
+; %r = call <2 x half> @llvm.log10.f16(<2 x half> %a)
+; ret <2 x half> %r
+;}
+
+;;; Can't do this yet: requires libcall.
+; XCHECK-LABEL: test_log2(
+;define <2 x half> @test_log2(<2 x half> %a) #0 {
+; %r = call <2 x half> @llvm.log2.f16(<2 x half> %a)
+; ret <2 x half> %r
+;}
+
+; CHECK-LABEL: test_fma(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fma_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fma_param_1];
+; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_fma_param_2];
+;
+; CHECK-F16: fma.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]], [[C]];
+;
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
+; CHECK-NOF16-DAG: fma.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]], [[FC0]];
+; CHECK-NOF16-DAG: fma.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]], [[FC1]];
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
+; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret
+define <2 x half> @test_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 {
+ %r = call <2 x half> @llvm.fma.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_fabs(
+; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fabs_param_0];
+; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
+; CHECK-DAG: abs.f32 [[RF0:%f[0-9]+]], [[AF0]];
+; CHECK-DAG: abs.f32 [[RF1:%f[0-9]+]], [[AF1]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
+; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_fabs(<2 x half> %a) #0 {
+ %r = call <2 x half> @llvm.fabs.f16(<2 x half> %a)
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_minnum(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_minnum_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_minnum_param_1];
+; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
+; CHECK-DAG: cvt.f32.f16 [[BF0:%f[0-9]+]], [[B0]];
+; CHECK-DAG: cvt.f32.f16 [[BF1:%f[0-9]+]], [[B1]];
+; CHECK-DAG: min.f32 [[RF0:%f[0-9]+]], [[AF0]], [[BF0]];
+; CHECK-DAG: min.f32 [[RF1:%f[0-9]+]], [[AF1]], [[BF1]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
+; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_minnum(<2 x half> %a, <2 x half> %b) #0 {
+ %r = call <2 x half> @llvm.minnum.f16(<2 x half> %a, <2 x half> %b)
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_maxnum(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_maxnum_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_maxnum_param_1];
+; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
+; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
+; CHECK-DAG: cvt.f32.f16 [[BF0:%f[0-9]+]], [[B0]];
+; CHECK-DAG: cvt.f32.f16 [[BF1:%f[0-9]+]], [[B1]];
+; CHECK-DAG: max.f32 [[RF0:%f[0-9]+]], [[AF0]], [[BF0]];
+; CHECK-DAG: max.f32 [[RF1:%f[0-9]+]], [[AF1]], [[BF1]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
+; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
+; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_maxnum(<2 x half> %a, <2 x half> %b) #0 {
+ %r = call <2 x half> @llvm.maxnum.f16(<2 x half> %a, <2 x half> %b)
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_copysign(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_copysign_param_1];
+; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]];
+; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]];
+; CHECK-DAG: mov.b16 [[BS0:%rs[0-9]+]], [[B0]];
+; CHECK-DAG: mov.b16 [[BS1:%rs[0-9]+]], [[B1]];
+; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[AS0]], 32767;
+; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[AS1]], 32767;
+; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[BS0]], -32768;
+; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[BS1]], -32768;
+; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]];
+; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]];
+; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]];
+; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]];
+; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 {
+ %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b)
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_copysign_f32(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_f32_param_0];
+; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_copysign_f32_param_1];
+; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]];
+; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]];
+; CHECK-DAG: mov.b32 [[BI0:%r[0-9]+]], [[B0]];
+; CHECK-DAG: mov.b32 [[BI1:%r[0-9]+]], [[B1]];
+; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[AS0]], 32767;
+; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[AS1]], 32767;
+; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[BI0]], -2147483648;
+; CHECK-DAG: and.b32 [[BX1:%r[0-9]+]], [[BI1]], -2147483648;
+; CHECK-DAG: shr.u32 [[BY0:%r[0-9]+]], [[BX0]], 16;
+; CHECK-DAG: shr.u32 [[BY1:%r[0-9]+]], [[BX1]], 16;
+; CHECK-DAG: cvt.u16.u32 [[BZ0:%rs[0-9]+]], [[BY0]];
+; CHECK-DAG: cvt.u16.u32 [[BZ1:%rs[0-9]+]], [[BY1]];
+; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]];
+; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]];
+; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]];
+; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]];
+; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 {
+ %tb = fptrunc <2 x float> %b to <2 x half>
+ %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %tb)
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_copysign_f64(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_f64_param_0];
+; CHECK-DAG: ld.param.v2.f64 {[[B0:%fd[0-9]+]], [[B1:%fd[0-9]+]]}, [test_copysign_f64_param_1];
+; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]];
+; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]];
+; CHECK-DAG: mov.b64 [[BI0:%rd[0-9]+]], [[B0]];
+; CHECK-DAG: mov.b64 [[BI1:%rd[0-9]+]], [[B1]];
+; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[AS0]], 32767;
+; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[AS1]], 32767;
+; CHECK-DAG: and.b64 [[BX0:%rd[0-9]+]], [[BI0]], -9223372036854775808;
+; CHECK-DAG: and.b64 [[BX1:%rd[0-9]+]], [[BI1]], -9223372036854775808;
+; CHECK-DAG: shr.u64 [[BY0:%rd[0-9]+]], [[BX0]], 48;
+; CHECK-DAG: shr.u64 [[BY1:%rd[0-9]+]], [[BX1]], 48;
+; CHECK-DAG: cvt.u16.u64 [[BZ0:%rs[0-9]+]], [[BY0]];
+; CHECK-DAG: cvt.u16.u64 [[BZ1:%rs[0-9]+]], [[BY1]];
+; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]];
+; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]];
+; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]];
+; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]];
+; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 {
+ %tb = fptrunc <2 x double> %b to <2 x half>
+ %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %tb)
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_copysign_extended(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_extended_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_copysign_extended_param_1];
+; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]];
+; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]];
+; CHECK-DAG: mov.b16 [[BS0:%rs[0-9]+]], [[B0]];
+; CHECK-DAG: mov.b16 [[BS1:%rs[0-9]+]], [[B1]];
+; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[AS0]], 32767;
+; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[AS1]], 32767;
+; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[BS0]], -32768;
+; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[BS1]], -32768;
+; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]];
+; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]];
+; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]];
+; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]];
+; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: mov.b32 {[[RX0:%h[0-9]+]], [[RX1:%h[0-9]+]]}, [[R]]
+; CHECK-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[RX0]];
+; CHECK-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[RX1]];
+; CHECK: st.param.v2.f32 [func_retval0+0], {[[XR0]], [[XR1]]};
+; CHECK: ret;
+define <2 x float> @test_copysign_extended(<2 x half> %a, <2 x half> %b) #0 {
+ %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b)
+ %xr = fpext <2 x half> %r to <2 x float>
+ ret <2 x float> %xr
+}
+
+; CHECK-LABEL: test_floor(
+; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_floor_param_0];
+; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
+; CHECK-DAG: cvt.rmi.f16.f16 [[R1:%h[0-9]+]], [[A1]];
+; CHECK-DAG: cvt.rmi.f16.f16 [[R0:%h[0-9]+]], [[A0]];
+; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_floor(<2 x half> %a) #0 {
+ %r = call <2 x half> @llvm.floor.f16(<2 x half> %a)
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_ceil(
+; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_ceil_param_0];
+; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
+; CHECK-DAG: cvt.rpi.f16.f16 [[R1:%h[0-9]+]], [[A1]];
+; CHECK-DAG: cvt.rpi.f16.f16 [[R0:%h[0-9]+]], [[A0]];
+; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_ceil(<2 x half> %a) #0 {
+ %r = call <2 x half> @llvm.ceil.f16(<2 x half> %a)
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_trunc(
+; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_trunc_param_0];
+; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
+; CHECK-DAG: cvt.rzi.f16.f16 [[R1:%h[0-9]+]], [[A1]];
+; CHECK-DAG: cvt.rzi.f16.f16 [[R0:%h[0-9]+]], [[A0]];
+; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_trunc(<2 x half> %a) #0 {
+ %r = call <2 x half> @llvm.trunc.f16(<2 x half> %a)
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_rint(
+; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_rint_param_0];
+; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
+; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]];
+; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]];
+; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_rint(<2 x half> %a) #0 {
+ %r = call <2 x half> @llvm.rint.f16(<2 x half> %a)
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_nearbyint(
+; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_nearbyint_param_0];
+; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
+; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]];
+; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]];
+; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_nearbyint(<2 x half> %a) #0 {
+ %r = call <2 x half> @llvm.nearbyint.f16(<2 x half> %a)
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_round(
+; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_round_param_0];
+; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];
+; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]];
+; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]];
+; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_round(<2 x half> %a) #0 {
+ %r = call <2 x half> @llvm.round.f16(<2 x half> %a)
+ ret <2 x half> %r
+}
+
+; CHECK-LABEL: test_fmuladd(
+; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fmuladd_param_0];
+; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fmuladd_param_1];
+; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_fmuladd_param_2];
+;
+; CHECK-F16: fma.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]], [[C]];
+;
+; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
+; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
+; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
+; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
+; CHECK-NOF16-DAG: fma.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]], [[FC0]];
+; CHECK-NOF16-DAG: fma.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]], [[FC1]];
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
+; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
+; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
+;
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define <2 x half> @test_fmuladd(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 {
+ %r = call <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
+ ret <2 x half> %r
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { "unsafe-fp-math" = "true" }
Modified: llvm/trunk/test/CodeGen/NVPTX/fma.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/fma.ll?rev=303082&r1=303081&r2=303082&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/fma.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/fma.ll Mon May 15 12:17:44 2017
@@ -1,42 +1,42 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s
-
-declare float @dummy_f32(float, float) #0
-declare double @dummy_f64(double, double) #0
-
-define ptx_device float @t1_f32(float %x, float %y, float %z) {
-; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
- %a = fmul float %x, %y
- %b = fadd float %a, %z
- ret float %b
-}
-
-define ptx_device float @t2_f32(float %x, float %y, float %z, float %w) {
-; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
- %a = fmul float %x, %y
- %b = fadd float %a, %z
- %c = fadd float %a, %w
- %d = call float @dummy_f32(float %b, float %c)
- ret float %d
-}
-
-define ptx_device double @t1_f64(double %x, double %y, double %z) {
-; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
- %a = fmul double %x, %y
- %b = fadd double %a, %z
- ret double %b
-}
-
-define ptx_device double @t2_f64(double %x, double %y, double %z, double %w) {
-; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
- %a = fmul double %x, %y
- %b = fadd double %a, %z
- %c = fadd double %a, %w
- %d = call double @dummy_f64(double %b, double %c)
- ret double %d
-}
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast -verify-machineinstrs | FileCheck %s
+
+declare float @dummy_f32(float, float) #0
+declare double @dummy_f64(double, double) #0
+
+define ptx_device float @t1_f32(float %x, float %y, float %z) {
+; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+ %a = fmul float %x, %y
+ %b = fadd float %a, %z
+ ret float %b
+}
+
+define ptx_device float @t2_f32(float %x, float %y, float %z, float %w) {
+; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+ %a = fmul float %x, %y
+ %b = fadd float %a, %z
+ %c = fadd float %a, %w
+ %d = call float @dummy_f32(float %b, float %c)
+ ret float %d
+}
+
+define ptx_device double @t1_f64(double %x, double %y, double %z) {
+; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK: ret;
+ %a = fmul double %x, %y
+ %b = fadd double %a, %z
+ ret double %b
+}
+
+define ptx_device double @t2_f64(double %x, double %y, double %z, double %w) {
+; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK: ret;
+ %a = fmul double %x, %y
+ %b = fadd double %a, %z
+ %c = fadd double %a, %w
+ %d = call double @dummy_f64(double %b, double %c)
+ ret double %d
+}
Modified: llvm/trunk/test/CodeGen/NVPTX/i8-param.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/i8-param.ll?rev=303082&r1=303081&r2=303082&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/i8-param.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/i8-param.ll Mon May 15 12:17:44 2017
@@ -1,23 +1,23 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
-
-; CHECK: .visible .func (.param .b32 func_retval0) callee
-define i8 @callee(i8 %a) {
-; CHECK: ld.param.u8
- %ret = add i8 %a, 42
-; CHECK: st.param.b32
- ret i8 %ret
-}
-
-; CHECK: .visible .func caller
-define void @caller(i8* %a) {
-; CHECK: ld.u8
- %val = load i8, i8* %a
- %ret = tail call i8 @callee(i8 %val)
-; CHECK: ld.param.b32
- store i8 %ret, i8* %a
- ret void
-}
-
-
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+
+; CHECK: .visible .func (.param .b32 func_retval0) callee
+define i8 @callee(i8 %a) {
+; CHECK: ld.param.u8
+ %ret = add i8 %a, 42
+; CHECK: st.param.b32
+ ret i8 %ret
+}
+
+; CHECK: .visible .func caller
+define void @caller(i8* %a) {
+; CHECK: ld.u8
+ %val = load i8, i8* %a
+ %ret = tail call i8 @callee(i8 %val)
+; CHECK: ld.param.b32
+ store i8 %ret, i8* %a
+ ret void
+}
+
+
Modified: llvm/trunk/test/CodeGen/NVPTX/param-load-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/param-load-store.ll?rev=303082&r1=303081&r2=303082&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/param-load-store.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/param-load-store.ll Mon May 15 12:17:44 2017
@@ -1,939 +1,939 @@
-; Verifies correctness of load/store of parameters and return values.
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -O0 | FileCheck %s
-
-%s_i1 = type { i1 }
-%s_i8 = type { i8 }
-%s_i16 = type { i16 }
-%s_f16 = type { half }
-%s_i32 = type { i32 }
-%s_f32 = type { float }
-%s_i64 = type { i64 }
-%s_f64 = type { double }
-
-; More complicated types. i64 is used to increase natural alignment
-; requirement for the type.
-%s_i32x4 = type { i32, i32, i32, i32, i64}
-%s_i32f32 = type { i32, float, i32, float, i64}
-%s_i8i32x4 = type { i32, i32, i8, i32, i32, i64}
-%s_i8i32x4p = type <{ i32, i32, i8, i32, i32, i64}>
-%s_crossfield = type { i32, [2 x i32], <4 x i32>, [3 x {i32, i32, i32}]}
-; All scalar parameters must be at least 32 bits in size.
-; i1 is loaded/stored as i8.
-
-; CHECK: .func (.param .b32 func_retval0)
-; CHECK-LABEL: test_i1(
-; CHECK-NEXT: .param .b32 test_i1_param_0
-; CHECK: ld.param.u8 [[A8:%r[0-9]+]], [test_i1_param_0];
-; CHECK: and.b32 [[A:%r[0-9]+]], [[A8]], 1;
-; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0+0], [[A]]
-; CHECK: .param .b32 retval0;
-; CHECK: call.uni
-; CHECK-NEXT: test_i1,
-; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0+0];
-; CHECK: and.b32 [[R:%r[0-9]+]], [[R8]], 1;
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK: ret;
-define i1 @test_i1(i1 %a) {
- %r = tail call i1 @test_i1(i1 %a);
- ret i1 %r;
-}
-
-; Signed i1 is a somewhat special case. We only care about one bit and
-; then us neg.s32 to convert it to 32-bit -1 if it's set.
-; CHECK: .func (.param .b32 func_retval0)
-; CHECK-LABEL: test_i1s(
-; CHECK-NEXT: .param .b32 test_i1s_param_0
-; CHECK: ld.param.u8 [[A8:%rs[0-9]+]], [test_i1s_param_0];
-; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]];
-; CHECK: and.b32 [[A1:%r[0-9]+]], [[A32]], 1;
-; CHECK: neg.s32 [[A:%r[0-9]+]], [[A1]];
-; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0+0], [[A]];
-; CHECK: .param .b32 retval0;
-; CHECK: call.uni
-; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0+0];
-; CHECK: and.b32 [[R1:%r[0-9]+]], [[R8]], 1;
-; CHECK: neg.s32 [[R:%r[0-9]+]], [[R1]];
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define signext i1 @test_i1s(i1 signext %a) {
- %r = tail call signext i1 @test_i1s(i1 signext %a);
- ret i1 %r;
-}
-
-; Make sure that i1 loads are vectorized as i8 loads, respecting each element alignment.
-; CHECK: .func (.param .align 4 .b8 func_retval0[4])
-; CHECK-LABEL: test_v3i1(
-; CHECK-NEXT: .param .align 4 .b8 test_v3i1_param_0[4]
-; CHECK-DAG: ld.param.u8 [[E2:%rs[0-9]+]], [test_v3i1_param_0+2];
-; CHECK-DAG: ld.param.v2.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i1_param_0]
-; CHECK: .param .align 4 .b8 param0[4];
-; CHECK-DAG: st.param.v2.b8 [param0+0], {[[E0]], [[E1]]};
-; CHECK-DAG: st.param.b8 [param0+2], [[E2]];
-; CHECK: .param .align 4 .b8 retval0[4];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v3i1,
-; CHECK-DAG: ld.param.v2.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0];
-; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2];
-; CHECK-DAG: st.param.v2.b8 [func_retval0+0], {[[RE0]], [[RE1]]}
-; CHECK-DAG: st.param.b8 [func_retval0+2], [[RE2]];
-; CHECK-NEXT: ret;
-define <3 x i1> @test_v3i1(<3 x i1> %a) {
- %r = tail call <3 x i1> @test_v3i1(<3 x i1> %a);
- ret <3 x i1> %r;
-}
-
-; CHECK: .func (.param .align 4 .b8 func_retval0[4])
-; CHECK-LABEL: test_v4i1(
-; CHECK-NEXT: .param .align 4 .b8 test_v4i1_param_0[4]
-; CHECK: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v4i1_param_0]
-; CHECK: .param .align 4 .b8 param0[4];
-; CHECK: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
-; CHECK: .param .align 4 .b8 retval0[4];
-; CHECK: call.uni (retval0),
-; CHECK: test_v4i1,
-; CHECK: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
-; CHECK: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]};
-; CHECK-NEXT: ret;
-define <4 x i1> @test_v4i1(<4 x i1> %a) {
- %r = tail call <4 x i1> @test_v4i1(<4 x i1> %a);
- ret <4 x i1> %r;
-}
-
-; CHECK: .func (.param .align 8 .b8 func_retval0[8])
-; CHECK-LABEL: test_v5i1(
-; CHECK-NEXT: .param .align 8 .b8 test_v5i1_param_0[8]
-; CHECK-DAG: ld.param.u8 [[E4:%rs[0-9]+]], [test_v5i1_param_0+4];
-; CHECK-DAG: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i1_param_0]
-; CHECK: .param .align 8 .b8 param0[8];
-; CHECK-DAG: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
-; CHECK-DAG: st.param.b8 [param0+4], [[E4]];
-; CHECK: .param .align 8 .b8 retval0[8];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v5i1,
-; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
-; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4];
-; CHECK-DAG: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
-; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]];
-; CHECK-NEXT: ret;
-define <5 x i1> @test_v5i1(<5 x i1> %a) {
- %r = tail call <5 x i1> @test_v5i1(<5 x i1> %a);
- ret <5 x i1> %r;
-}
-
-; Unsigned i8 is loaded directly into 32-bit register.
-; CHECK: .func (.param .b32 func_retval0)
-; CHECK-LABEL: test_i8(
-; CHECK-NEXT: .param .b32 test_i8_param_0
-; CHECK: ld.param.u8 [[A8:%rs[0-9]+]], [test_i8_param_0];
-; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]];
-; CHECK: and.b32 [[A:%r[0-9]+]], [[A32]], 255;
-; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0+0], [[A]];
-; CHECK: .param .b32 retval0;
-; CHECK: call.uni (retval0),
-; CHECK: test_i8,
-; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0+0];
-; CHECK: and.b32 [[R:%r[0-9]+]], [[R32]], 255;
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define i8 @test_i8(i8 %a) {
- %r = tail call i8 @test_i8(i8 %a);
- ret i8 %r;
-}
-
-; signed i8 is loaded into 16-bit register which is then sign-extended to i32.
-; CHECK: .func (.param .b32 func_retval0)
-; CHECK-LABEL: test_i8s(
-; CHECK-NEXT: .param .b32 test_i8s_param_0
-; CHECK: ld.param.s8 [[A8:%rs[0-9]+]], [test_i8s_param_0];
-; CHECK: cvt.s32.s16 [[A:%r[0-9]+]], [[A8]];
-; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0+0], [[A]];
-; CHECK: .param .b32 retval0;
-; CHECK: call.uni (retval0),
-; CHECK: test_i8s,
-; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0+0];
-; -- This is suspicious (though correct) -- why not cvt.u8.u32, cvt.s8.s32 ?
-; CHECK: cvt.u16.u32 [[R16:%rs[0-9]+]], [[R32]];
-; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[R16]];
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define signext i8 @test_i8s(i8 signext %a) {
- %r = tail call signext i8 @test_i8s(i8 signext %a);
- ret i8 %r;
-}
-
-; CHECK: .func (.param .align 4 .b8 func_retval0[4])
-; CHECK-LABEL: test_v3i8(
-; CHECK-NEXT: .param .align 4 .b8 test_v3i8_param_0[4]
-; CHECK-DAG: ld.param.u8 [[E2:%rs[0-9]+]], [test_v3i8_param_0+2];
-; CHECK-DAG: ld.param.v2.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i8_param_0];
-; CHECK: .param .align 4 .b8 param0[4];
-; CHECK: st.param.v2.b8 [param0+0], {[[E0]], [[E1]]};
-; CHECK: st.param.b8 [param0+2], [[E2]];
-; CHECK: .param .align 4 .b8 retval0[4];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v3i8,
-; CHECK-DAG: ld.param.v2.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0];
-; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2];
-; CHECK-DAG: st.param.v2.b8 [func_retval0+0], {[[RE0]], [[RE1]]};
-; CHECK-DAG: st.param.b8 [func_retval0+2], [[RE2]];
-; CHECK-NEXT: ret;
-define <3 x i8> @test_v3i8(<3 x i8> %a) {
- %r = tail call <3 x i8> @test_v3i8(<3 x i8> %a);
- ret <3 x i8> %r;
-}
-
-; CHECK: .func (.param .align 4 .b8 func_retval0[4])
-; CHECK-LABEL: test_v4i8(
-; CHECK-NEXT: .param .align 4 .b8 test_v4i8_param_0[4]
-; CHECK: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v4i8_param_0]
-; CHECK: .param .align 4 .b8 param0[4];
-; CHECK: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
-; CHECK: .param .align 4 .b8 retval0[4];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v4i8,
-; CHECK: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
-; CHECK: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
-; CHECK-NEXT: ret;
-define <4 x i8> @test_v4i8(<4 x i8> %a) {
- %r = tail call <4 x i8> @test_v4i8(<4 x i8> %a);
- ret <4 x i8> %r;
-}
-
-; CHECK: .func (.param .align 8 .b8 func_retval0[8])
-; CHECK-LABEL: test_v5i8(
-; CHECK-NEXT: .param .align 8 .b8 test_v5i8_param_0[8]
-; CHECK-DAG: ld.param.u8 [[E4:%rs[0-9]+]], [test_v5i8_param_0+4];
-; CHECK-DAG ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i8_param_0]
-; CHECK: .param .align 8 .b8 param0[8];
-; CHECK-DAG: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
-; CHECK-DAG: st.param.b8 [param0+4], [[E4]];
-; CHECK: .param .align 8 .b8 retval0[8];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v5i8,
-; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
-; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4];
-; CHECK-DAG: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
-; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]];
-; CHECK-NEXT: ret;
-define <5 x i8> @test_v5i8(<5 x i8> %a) {
- %r = tail call <5 x i8> @test_v5i8(<5 x i8> %a);
- ret <5 x i8> %r;
-}
-
-; CHECK: .func (.param .b32 func_retval0)
-; CHECK-LABEL: test_i16(
-; CHECK-NEXT: .param .b32 test_i16_param_0
-; CHECK: ld.param.u16 [[E16:%rs[0-9]+]], [test_i16_param_0];
-; CHECK: cvt.u32.u16 [[E32:%r[0-9]+]], [[E16]];
-; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0+0], [[E32]];
-; CHECK: .param .b32 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_i16,
-; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0+0];
-; CHECK: and.b32 [[R:%r[0-9]+]], [[RE32]], 65535;
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define i16 @test_i16(i16 %a) {
- %r = tail call i16 @test_i16(i16 %a);
- ret i16 %r;
-}
-
-; CHECK: .func (.param .b32 func_retval0)
-; CHECK-LABEL: test_i16s(
-; CHECK-NEXT: .param .b32 test_i16s_param_0
-; CHECK: ld.param.u16 [[E16:%rs[0-9]+]], [test_i16s_param_0];
-; CHECK: cvt.s32.s16 [[E32:%r[0-9]+]], [[E16]];
-; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0+0], [[E32]];
-; CHECK: .param .b32 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_i16s,
-; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0+0];
-; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[RE32]];
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define signext i16 @test_i16s(i16 signext %a) {
- %r = tail call signext i16 @test_i16s(i16 signext %a);
- ret i16 %r;
-}
-
-; CHECK: .func (.param .align 8 .b8 func_retval0[8])
-; CHECK-LABEL: test_v3i16(
-; CHECK-NEXT: .param .align 8 .b8 test_v3i16_param_0[8]
-; CHECK-DAG: ld.param.u16 [[E2:%rs[0-9]+]], [test_v3i16_param_0+4];
-; CHECK-DAG: ld.param.v2.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i16_param_0];
-; CHECK: .param .align 8 .b8 param0[8];
-; CHECK: st.param.v2.b16 [param0+0], {[[E0]], [[E1]]};
-; CHECK: st.param.b16 [param0+4], [[E2]];
-; CHECK: .param .align 8 .b8 retval0[8];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v3i16,
-; CHECK: ld.param.v2.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0];
-; CHECK: ld.param.b16 [[RE2:%rs[0-9]+]], [retval0+4];
-; CHECK-DAG: st.param.v2.b16 [func_retval0+0], {[[RE0]], [[RE1]]};
-; CHECK-DAG: st.param.b16 [func_retval0+4], [[RE2]];
-; CHECK-NEXT: ret;
-define <3 x i16> @test_v3i16(<3 x i16> %a) {
- %r = tail call <3 x i16> @test_v3i16(<3 x i16> %a);
- ret <3 x i16> %r;
-}
-
-; CHECK: .func (.param .align 8 .b8 func_retval0[8])
-; CHECK-LABEL: test_v4i16(
-; CHECK-NEXT: .param .align 8 .b8 test_v4i16_param_0[8]
-; CHECK: ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v4i16_param_0]
-; CHECK: .param .align 8 .b8 param0[8];
-; CHECK: st.param.v4.b16 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
-; CHECK: .param .align 8 .b8 retval0[8];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v4i16,
-; CHECK: ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
-; CHECK: st.param.v4.b16 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
-; CHECK-NEXT: ret;
-define <4 x i16> @test_v4i16(<4 x i16> %a) {
- %r = tail call <4 x i16> @test_v4i16(<4 x i16> %a);
- ret <4 x i16> %r;
-}
-
-; CHECK: .func (.param .align 16 .b8 func_retval0[16])
-; CHECK-LABEL: test_v5i16(
-; CHECK-NEXT: .param .align 16 .b8 test_v5i16_param_0[16]
-; CHECK-DAG: ld.param.u16 [[E4:%rs[0-9]+]], [test_v5i16_param_0+8];
-; CHECK-DAG ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i16_param_0]
-; CHECK: .param .align 16 .b8 param0[16];
-; CHECK-DAG: st.param.v4.b16 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
-; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
-; CHECK: .param .align 16 .b8 retval0[16];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v5i16,
-; CHECK-DAG: ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
-; CHECK-DAG: ld.param.b16 [[RE4:%rs[0-9]+]], [retval0+8];
-; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
-; CHECK-DAG: st.param.b16 [func_retval0+8], [[RE4]];
-; CHECK-NEXT: ret;
-define <5 x i16> @test_v5i16(<5 x i16> %a) {
- %r = tail call <5 x i16> @test_v5i16(<5 x i16> %a);
- ret <5 x i16> %r;
-}
-
-; CHECK: .func (.param .b32 func_retval0)
-; CHECK-LABEL: test_f16(
-; CHECK-NEXT: .param .b32 test_f16_param_0
-; CHECK: ld.param.b16 [[E:%h[0-9]+]], [test_f16_param_0];
-; CHECK: .param .b32 param0;
-; CHECK: st.param.b16 [param0+0], [[E]];
-; CHECK: .param .b32 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_f16,
-; CHECK: ld.param.b16 [[R:%h[0-9]+]], [retval0+0];
-; CHECK: st.param.b16 [func_retval0+0], [[R]]
-; CHECK-NEXT: ret;
-define half @test_f16(half %a) {
- %r = tail call half @test_f16(half %a);
- ret half %r;
-}
-
-; CHECK: .func (.param .align 4 .b8 func_retval0[4])
-; CHECK-LABEL: test_v2f16(
-; CHECK-NEXT: .param .align 4 .b8 test_v2f16_param_0[4]
-; CHECK: ld.param.b32 [[E:%hh[0-9]+]], [test_v2f16_param_0];
-; CHECK: .param .align 4 .b8 param0[4];
-; CHECK: st.param.b32 [param0+0], [[E]];
-; CHECK: .param .align 4 .b8 retval0[4];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v2f16,
-; CHECK: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0];
-; CHECK: st.param.b32 [func_retval0+0], [[R]]
-; CHECK-NEXT: ret;
-define <2 x half> @test_v2f16(<2 x half> %a) {
- %r = tail call <2 x half> @test_v2f16(<2 x half> %a);
- ret <2 x half> %r;
-}
-
-; CHECK:.func (.param .align 8 .b8 func_retval0[8])
-; CHECK-LABEL: test_v3f16(
-; CHECK: .param .align 8 .b8 test_v3f16_param_0[8]
-; CHECK-DAG: ld.param.b32 [[HH01:%hh[0-9]+]], [test_v3f16_param_0];
-; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[HH01]];
-; CHECK-DAG: ld.param.b16 [[E2:%h[0-9]+]], [test_v3f16_param_0+4];
-; CHECK: .param .align 8 .b8 param0[8];
-; CHECK-DAG: st.param.v2.b16 [param0+0], {[[E0]], [[E1]]};
-; CHECK-DAG: st.param.b16 [param0+4], [[E2]];
-; CHECK: .param .align 8 .b8 retval0[8];
-; CHECK: call.uni (retval0),
-; CHECK: test_v3f16,
-; CHECK-DAG: ld.param.v2.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]]}, [retval0+0];
-; CHECK-DAG: ld.param.b16 [[R2:%h[0-9]+]], [retval0+4];
-; CHECK-DAG: st.param.v2.b16 [func_retval0+0], {[[R0]], [[R1]]};
-; CHECK-DAG: st.param.b16 [func_retval0+4], [[R2]];
-; CHECK: ret;
-define <3 x half> @test_v3f16(<3 x half> %a) {
- %r = tail call <3 x half> @test_v3f16(<3 x half> %a);
- ret <3 x half> %r;
-}
-
-; CHECK:.func (.param .align 8 .b8 func_retval0[8])
-; CHECK-LABEL: test_v4f16(
-; CHECK: .param .align 8 .b8 test_v4f16_param_0[8]
-; CHECK: ld.param.v2.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]]}, [test_v4f16_param_0];
-; CHECK-DAG: mov.b32 [[HH01:%hh[0-9]+]], [[R01]];
-; CHECK-DAG: mov.b32 [[HH23:%hh[0-9]+]], [[R23]];
-; CHECK: .param .align 8 .b8 param0[8];
-; CHECK: st.param.v2.b32 [param0+0], {[[HH01]], [[HH23]]};
-; CHECK: .param .align 8 .b8 retval0[8];
-; CHECK: call.uni (retval0),
-; CHECK: test_v4f16,
-; CHECK: ld.param.v2.b32 {[[RH01:%hh[0-9]+]], [[RH23:%hh[0-9]+]]}, [retval0+0];
-; CHECK: st.param.v2.b32 [func_retval0+0], {[[RH01]], [[RH23]]};
-; CHECK: ret;
-define <4 x half> @test_v4f16(<4 x half> %a) {
- %r = tail call <4 x half> @test_v4f16(<4 x half> %a);
- ret <4 x half> %r;
-}
-
-; CHECK:.func (.param .align 16 .b8 func_retval0[16])
-; CHECK-LABEL: test_v5f16(
-; CHECK: .param .align 16 .b8 test_v5f16_param_0[16]
-; CHECK-DAG: ld.param.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [test_v5f16_param_0];
-; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[HH01]];
-; CHECK-DAG: ld.param.b16 [[E4:%h[0-9]+]], [test_v5f16_param_0+8];
-; CHECK: .param .align 16 .b8 param0[16];
-; CHECK-DAG: st.param.v4.b16 [param0+0],
-; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
-; CHECK: .param .align 16 .b8 retval0[16];
-; CHECK: call.uni (retval0),
-; CHECK: test_v5f16,
-; CHECK-DAG: ld.param.v4.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]], [[R2:%h[0-9]+]], [[R3:%h[0-9]+]]}, [retval0+0];
-; CHECK-DAG: ld.param.b16 [[R4:%h[0-9]+]], [retval0+8];
-; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]};
-; CHECK-DAG: st.param.b16 [func_retval0+8], [[R4]];
-; CHECK: ret;
-define <5 x half> @test_v5f16(<5 x half> %a) {
- %r = tail call <5 x half> @test_v5f16(<5 x half> %a);
- ret <5 x half> %r;
-}
-
-; CHECK:.func (.param .align 16 .b8 func_retval0[16])
-; CHECK-LABEL: test_v8f16(
-; CHECK: .param .align 16 .b8 test_v8f16_param_0[16]
-; CHECK: ld.param.v4.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]], [[R45:%r[0-9]+]], [[R67:%r[0-9]+]]}, [test_v8f16_param_0];
-; CHECK-DAG: mov.b32 [[HH01:%hh[0-9]+]], [[R01]];
-; CHECK-DAG: mov.b32 [[HH23:%hh[0-9]+]], [[R23]];
-; CHECK-DAG: mov.b32 [[HH45:%hh[0-9]+]], [[R45]];
-; CHECK-DAG: mov.b32 [[HH67:%hh[0-9]+]], [[R67]];
-; CHECK: .param .align 16 .b8 param0[16];
-; CHECK: st.param.v4.b32 [param0+0], {[[HH01]], [[HH23]], [[HH45]], [[HH67]]};
-; CHECK: .param .align 16 .b8 retval0[16];
-; CHECK: call.uni (retval0),
-; CHECK: test_v8f16,
-; CHECK: ld.param.v4.b32 {[[RH01:%hh[0-9]+]], [[RH23:%hh[0-9]+]], [[RH45:%hh[0-9]+]], [[RH67:%hh[0-9]+]]}, [retval0+0];
-; CHECK: st.param.v4.b32 [func_retval0+0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]};
-; CHECK: ret;
-define <8 x half> @test_v8f16(<8 x half> %a) {
- %r = tail call <8 x half> @test_v8f16(<8 x half> %a);
- ret <8 x half> %r;
-}
-
-; CHECK:.func (.param .align 32 .b8 func_retval0[32])
-; CHECK-LABEL: test_v9f16(
-; CHECK: .param .align 32 .b8 test_v9f16_param_0[32]
-; CHECK-DAG: ld.param.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [test_v9f16_param_0];
-; CHECK-DAG: ld.param.v4.b16 {[[E4:%h[0-9]+]], [[E5:%h[0-9]+]], [[E6:%h[0-9]+]], [[E7:%h[0-9]+]]}, [test_v9f16_param_0+8];
-; CHECK-DAG: ld.param.b16 [[E8:%h[0-9]+]], [test_v9f16_param_0+16];
-; CHECK: .param .align 32 .b8 param0[32];
-; CHECK-DAG: st.param.v4.b16 [param0+0],
-; CHECK-DAG: st.param.v4.b16 [param0+8],
-; CHECK-DAG: st.param.b16 [param0+16], [[E8]];
-; CHECK: .param .align 32 .b8 retval0[32];
-; CHECK: call.uni (retval0),
-; CHECK: test_v9f16,
-; CHECK-DAG: ld.param.v4.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]], [[R2:%h[0-9]+]], [[R3:%h[0-9]+]]}, [retval0+0];
-; CHECK-DAG: ld.param.v4.b16 {[[R4:%h[0-9]+]], [[R5:%h[0-9]+]], [[R6:%h[0-9]+]], [[R7:%h[0-9]+]]}, [retval0+8];
-; CHECK-DAG: ld.param.b16 [[R8:%h[0-9]+]], [retval0+16];
-; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]};
-; CHECK-DAG: st.param.v4.b16 [func_retval0+8], {[[R4]], [[R5]], [[R6]], [[R7]]};
-; CHECK-DAG: st.param.b16 [func_retval0+16], [[R8]];
-; CHECK: ret;
-define <9 x half> @test_v9f16(<9 x half> %a) {
- %r = tail call <9 x half> @test_v9f16(<9 x half> %a);
- ret <9 x half> %r;
-}
-
-; CHECK: .func (.param .b32 func_retval0)
-; CHECK-LABEL: test_i32(
-; CHECK-NEXT: .param .b32 test_i32_param_0
-; CHECK: ld.param.u32 [[E:%r[0-9]+]], [test_i32_param_0];
-; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0+0], [[E]];
-; CHECK: .param .b32 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_i32,
-; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0];
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define i32 @test_i32(i32 %a) {
- %r = tail call i32 @test_i32(i32 %a);
- ret i32 %r;
-}
-
-; CHECK: .func (.param .align 16 .b8 func_retval0[16])
-; CHECK-LABEL: test_v3i32(
-; CHECK-NEXT: .param .align 16 .b8 test_v3i32_param_0[16]
-; CHECK-DAG: ld.param.u32 [[E2:%r[0-9]+]], [test_v3i32_param_0+8];
-; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v3i32_param_0];
-; CHECK: .param .align 16 .b8 param0[16];
-; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
-; CHECK: st.param.b32 [param0+8], [[E2]];
-; CHECK: .param .align 16 .b8 retval0[16];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v3i32,
-; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
-; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
-; CHECK-DAG: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]};
-; CHECK-DAG: st.param.b32 [func_retval0+8], [[RE2]];
-; CHECK-NEXT: ret;
-define <3 x i32> @test_v3i32(<3 x i32> %a) {
- %r = tail call <3 x i32> @test_v3i32(<3 x i32> %a);
- ret <3 x i32> %r;
-}
-
-; CHECK: .func (.param .align 16 .b8 func_retval0[16])
-; CHECK-LABEL: test_v4i32(
-; CHECK-NEXT: .param .align 16 .b8 test_v4i32_param_0[16]
-; CHECK: ld.param.v4.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v4i32_param_0]
-; CHECK: .param .align 16 .b8 param0[16];
-; CHECK: st.param.v4.b32 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
-; CHECK: .param .align 16 .b8 retval0[16];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v4i32,
-; CHECK: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+0];
-; CHECK: st.param.v4.b32 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
-; CHCK-NEXT: ret;
-define <4 x i32> @test_v4i32(<4 x i32> %a) {
- %r = tail call <4 x i32> @test_v4i32(<4 x i32> %a);
- ret <4 x i32> %r;
-}
-
-; CHECK: .func (.param .align 32 .b8 func_retval0[32])
-; CHECK-LABEL: test_v5i32(
-; CHECK-NEXT: .param .align 32 .b8 test_v5i32_param_0[32]
-; CHECK-DAG: ld.param.u32 [[E4:%r[0-9]+]], [test_v5i32_param_0+16];
-; CHECK-DAG ld.param.v4.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v5i32_param_0]
-; CHECK: .param .align 32 .b8 param0[32];
-; CHECK-DAG: st.param.v4.b32 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
-; CHECK-DAG: st.param.b32 [param0+16], [[E4]];
-; CHECK: .param .align 32 .b8 retval0[32];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v5i32,
-; CHECK-DAG: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+0];
-; CHECK-DAG: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16];
-; CHECK-DAG: st.param.v4.b32 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
-; CHECK-DAG: st.param.b32 [func_retval0+16], [[RE4]];
-; CHECK-NEXT: ret;
-define <5 x i32> @test_v5i32(<5 x i32> %a) {
- %r = tail call <5 x i32> @test_v5i32(<5 x i32> %a);
- ret <5 x i32> %r;
-}
-
-; CHECK: .func (.param .b32 func_retval0)
-; CHECK-LABEL: test_f32(
-; CHECK-NEXT: .param .b32 test_f32_param_0
-; CHECK: ld.param.f32 [[E:%f[0-9]+]], [test_f32_param_0];
-; CHECK: .param .b32 param0;
-; CHECK: st.param.f32 [param0+0], [[E]];
-; CHECK: .param .b32 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_f32,
-; CHECK: ld.param.f32 [[R:%f[0-9]+]], [retval0+0];
-; CHECK: st.param.f32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define float @test_f32(float %a) {
- %r = tail call float @test_f32(float %a);
- ret float %r;
-}
-
-; CHECK: .func (.param .b64 func_retval0)
-; CHECK-LABEL: test_i64(
-; CHECK-NEXT: .param .b64 test_i64_param_0
-; CHECK: ld.param.u64 [[E:%rd[0-9]+]], [test_i64_param_0];
-; CHECK: .param .b64 param0;
-; CHECK: st.param.b64 [param0+0], [[E]];
-; CHECK: .param .b64 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_i64,
-; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0+0];
-; CHECK: st.param.b64 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define i64 @test_i64(i64 %a) {
- %r = tail call i64 @test_i64(i64 %a);
- ret i64 %r;
-}
-
-; CHECK: .func (.param .align 32 .b8 func_retval0[32])
-; CHECK-LABEL: test_v3i64(
-; CHECK-NEXT: .param .align 32 .b8 test_v3i64_param_0[32]
-; CHECK-DAG: ld.param.u64 [[E2:%rd[0-9]+]], [test_v3i64_param_0+16];
-; CHECK-DAG: ld.param.v2.u64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v3i64_param_0];
-; CHECK: .param .align 32 .b8 param0[32];
-; CHECK: st.param.v2.b64 [param0+0], {[[E0]], [[E1]]};
-; CHECK: st.param.b64 [param0+16], [[E2]];
-; CHECK: .param .align 32 .b8 retval0[32];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v3i64,
-; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0+0];
-; CHECK: ld.param.b64 [[RE2:%rd[0-9]+]], [retval0+16];
-; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]};
-; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE2]];
-; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]};
-; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE2]];
-; CHECK-NEXT: ret;
-define <3 x i64> @test_v3i64(<3 x i64> %a) {
- %r = tail call <3 x i64> @test_v3i64(<3 x i64> %a);
- ret <3 x i64> %r;
-}
-
-; For i64 vector loads are limited by PTX to 2 elements.
-; CHECK: .func (.param .align 32 .b8 func_retval0[32])
-; CHECK-LABEL: test_v4i64(
-; CHECK-NEXT: .param .align 32 .b8 test_v4i64_param_0[32]
-; CHECK-DAG: ld.param.v2.u64 {[[E2:%rd[0-9]+]], [[E3:%rd[0-9]+]]}, [test_v4i64_param_0+16];
-; CHECK-DAG: ld.param.v2.u64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v4i64_param_0];
-; CHECK: .param .align 32 .b8 param0[32];
-; CHECK: st.param.v2.b64 [param0+0], {[[E0]], [[E1]]};
-; CHECK: st.param.v2.b64 [param0+16], {[[E2]], [[E3]]};
-; CHECK: .param .align 32 .b8 retval0[32];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v4i64,
-; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0+0];
-; CHECK: ld.param.v2.b64 {[[RE2:%rd[0-9]+]], [[RE3:%rd[0-9]+]]}, [retval0+16];
-; CHECK-DAG: st.param.v2.b64 [func_retval0+16], {[[RE2]], [[RE3]]};
-; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]};
-; CHECK-NEXT: ret;
-define <4 x i64> @test_v4i64(<4 x i64> %a) {
- %r = tail call <4 x i64> @test_v4i64(<4 x i64> %a);
- ret <4 x i64> %r;
-}
-
-; Aggregates, on the other hand, do not get extended.
-
-; CHECK: .func (.param .align 1 .b8 func_retval0[1])
-; CHECK-LABEL: test_s_i1(
-; CHECK-NEXT: .align 1 .b8 test_s_i1_param_0[1]
-; CHECK: ld.param.u8 [[A:%rs[0-9]+]], [test_s_i1_param_0];
-; CHECK: .param .align 1 .b8 param0[1];
-; CHECK: st.param.b8 [param0+0], [[A]]
-; CHECK: .param .align 1 .b8 retval0[1];
-; CHECK: call.uni
-; CHECK-NEXT: test_s_i1,
-; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0+0];
-; CHECK: st.param.b8 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define %s_i1 @test_s_i1(%s_i1 %a) {
- %r = tail call %s_i1 @test_s_i1(%s_i1 %a);
- ret %s_i1 %r;
-}
-
-; CHECK: .func (.param .align 1 .b8 func_retval0[1])
-; CHECK-LABEL: test_s_i8(
-; CHECK-NEXT: .param .align 1 .b8 test_s_i8_param_0[1]
-; CHECK: ld.param.u8 [[A:%rs[0-9]+]], [test_s_i8_param_0];
-; CHECK: .param .align 1 .b8 param0[1];
-; CHECK: st.param.b8 [param0+0], [[A]]
-; CHECK: .param .align 1 .b8 retval0[1];
-; CHECK: call.uni
-; CHECK-NEXT: test_s_i8,
-; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0+0];
-; CHECK: st.param.b8 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define %s_i8 @test_s_i8(%s_i8 %a) {
- %r = tail call %s_i8 @test_s_i8(%s_i8 %a);
- ret %s_i8 %r;
-}
-
-; CHECK: .func (.param .align 2 .b8 func_retval0[2])
-; CHECK-LABEL: test_s_i16(
-; CHECK-NEXT: .param .align 2 .b8 test_s_i16_param_0[2]
-; CHECK: ld.param.u16 [[A:%rs[0-9]+]], [test_s_i16_param_0];
-; CHECK: .param .align 2 .b8 param0[2];
-; CHECK: st.param.b16 [param0+0], [[A]]
-; CHECK: .param .align 2 .b8 retval0[2];
-; CHECK: call.uni
-; CHECK-NEXT: test_s_i16,
-; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define %s_i16 @test_s_i16(%s_i16 %a) {
- %r = tail call %s_i16 @test_s_i16(%s_i16 %a);
- ret %s_i16 %r;
-}
-
-; CHECK: .func (.param .align 2 .b8 func_retval0[2])
-; CHECK-LABEL: test_s_f16(
-; CHECK-NEXT: .param .align 2 .b8 test_s_f16_param_0[2]
-; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_s_f16_param_0];
-; CHECK: .param .align 2 .b8 param0[2];
-; CHECK: st.param.b16 [param0+0], [[A]]
-; CHECK: .param .align 2 .b8 retval0[2];
-; CHECK: call.uni
-; CHECK-NEXT: test_s_f16,
-; CHECK: ld.param.b16 [[R:%h[0-9]+]], [retval0+0];
-; CHECK: st.param.b16 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define %s_f16 @test_s_f16(%s_f16 %a) {
- %r = tail call %s_f16 @test_s_f16(%s_f16 %a);
- ret %s_f16 %r;
-}
-
-; CHECK: .func (.param .align 4 .b8 func_retval0[4])
-; CHECK-LABEL: test_s_i32(
-; CHECK-NEXT: .param .align 4 .b8 test_s_i32_param_0[4]
-; CHECK: ld.param.u32 [[E:%r[0-9]+]], [test_s_i32_param_0];
-; CHECK: .param .align 4 .b8 param0[4]
-; CHECK: st.param.b32 [param0+0], [[E]];
-; CHECK: .param .align 4 .b8 retval0[4];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_s_i32,
-; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0];
-; CHECK: st.param.b32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define %s_i32 @test_s_i32(%s_i32 %a) {
- %r = tail call %s_i32 @test_s_i32(%s_i32 %a);
- ret %s_i32 %r;
-}
-
-; CHECK: .func (.param .align 4 .b8 func_retval0[4])
-; CHECK-LABEL: test_s_f32(
-; CHECK-NEXT: .param .align 4 .b8 test_s_f32_param_0[4]
-; CHECK: ld.param.f32 [[E:%f[0-9]+]], [test_s_f32_param_0];
-; CHECK: .param .align 4 .b8 param0[4]
-; CHECK: st.param.f32 [param0+0], [[E]];
-; CHECK: .param .align 4 .b8 retval0[4];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_s_f32,
-; CHECK: ld.param.f32 [[R:%f[0-9]+]], [retval0+0];
-; CHECK: st.param.f32 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define %s_f32 @test_s_f32(%s_f32 %a) {
- %r = tail call %s_f32 @test_s_f32(%s_f32 %a);
- ret %s_f32 %r;
-}
-
-; CHECK: .func (.param .align 8 .b8 func_retval0[8])
-; CHECK-LABEL: test_s_i64(
-; CHECK-NEXT: .param .align 8 .b8 test_s_i64_param_0[8]
-; CHECK: ld.param.u64 [[E:%rd[0-9]+]], [test_s_i64_param_0];
-; CHECK: .param .align 8 .b8 param0[8];
-; CHECK: st.param.b64 [param0+0], [[E]];
-; CHECK: .param .align 8 .b8 retval0[8];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_s_i64,
-; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0+0];
-; CHECK: st.param.b64 [func_retval0+0], [[R]];
-; CHECK-NEXT: ret;
-define %s_i64 @test_s_i64(%s_i64 %a) {
- %r = tail call %s_i64 @test_s_i64(%s_i64 %a);
- ret %s_i64 %r;
-}
-
-; Fields that have different types, but identical sizes are not vectorized.
-; CHECK: .func (.param .align 8 .b8 func_retval0[24])
-; CHECK-LABEL: test_s_i32f32(
-; CHECK: .param .align 8 .b8 test_s_i32f32_param_0[24]
-; CHECK-DAG: ld.param.u64 [[E4:%rd[0-9]+]], [test_s_i32f32_param_0+16];
-; CHECK-DAG: ld.param.f32 [[E3:%f[0-9]+]], [test_s_i32f32_param_0+12];
-; CHECK-DAG: ld.param.u32 [[E2:%r[0-9]+]], [test_s_i32f32_param_0+8];
-; CHECK-DAG: ld.param.f32 [[E1:%f[0-9]+]], [test_s_i32f32_param_0+4];
-; CHECK-DAG: ld.param.u32 [[E0:%r[0-9]+]], [test_s_i32f32_param_0];
-; CHECK: .param .align 8 .b8 param0[24];
-; CHECK-DAG: st.param.b32 [param0+0], [[E0]];
-; CHECK-DAG: st.param.f32 [param0+4], [[E1]];
-; CHECK-DAG: st.param.b32 [param0+8], [[E2]];
-; CHECK-DAG: st.param.f32 [param0+12], [[E3]];
-; CHECK-DAG: st.param.b64 [param0+16], [[E4]];
-; CHECK: .param .align 8 .b8 retval0[24];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_s_i32f32,
-; CHECK-DAG: ld.param.b32 [[RE0:%r[0-9]+]], [retval0+0];
-; CHECK-DAG: ld.param.f32 [[RE1:%f[0-9]+]], [retval0+4];
-; CHECK-DAG: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
-; CHECK-DAG: ld.param.f32 [[RE3:%f[0-9]+]], [retval0+12];
-; CHECK-DAG: ld.param.b64 [[RE4:%rd[0-9]+]], [retval0+16];
-; CHECK-DAG: st.param.b32 [func_retval0+0], [[RE0]];
-; CHECK-DAG: st.param.f32 [func_retval0+4], [[RE1]];
-; CHECK-DAG: st.param.b32 [func_retval0+8], [[RE2]];
-; CHECK-DAG: st.param.f32 [func_retval0+12], [[RE3]];
-; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE4]];
-; CHECK: ret;
-define %s_i32f32 @test_s_i32f32(%s_i32f32 %a) {
- %r = tail call %s_i32f32 @test_s_i32f32(%s_i32f32 %a);
- ret %s_i32f32 %r;
-}
-
-; We do vectorize consecutive fields with matching types.
-; CHECK:.visible .func (.param .align 8 .b8 func_retval0[24])
-; CHECK-LABEL: test_s_i32x4(
-; CHECK: .param .align 8 .b8 test_s_i32x4_param_0[24]
-; CHECK-DAG: ld.param.u64 [[RD1:%rd[0-9]+]], [test_s_i32x4_param_0+16];
-; CHECK-DAG: ld.param.v2.u32 {[[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_s_i32x4_param_0+8];
-; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i32x4_param_0];
-; CHECK: .param .align 8 .b8 param0[24];
-; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
-; CHECK: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]};
-; CHECK: st.param.b64 [param0+16], [[E4]];
-; CHECK: .param .align 8 .b8 retval0[24];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_s_i32x4,
-; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
-; CHECK: ld.param.v2.b32 {[[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+8];
-; CHECK: ld.param.b64 [[RE4:%rd[0-9]+]], [retval0+16];
-; CHECK-DAG: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]};
-; CHECK-DAG: st.param.v2.b32 [func_retval0+8], {[[RE2]], [[RE3]]};
-; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE4]];
-; CHECK: ret;
-
-define %s_i32x4 @test_s_i32x4(%s_i32x4 %a) {
- %r = tail call %s_i32x4 @test_s_i32x4(%s_i32x4 %a);
- ret %s_i32x4 %r;
-}
-
-; CHECK:.visible .func (.param .align 8 .b8 func_retval0[32])
-; CHECK-LABEL: test_s_i1i32x4(
-; CHECK: .param .align 8 .b8 test_s_i1i32x4_param_0[32]
-; CHECK: ld.param.u64 [[E5:%rd[0-9]+]], [test_s_i1i32x4_param_0+24];
-; CHECK: ld.param.u32 [[E4:%r[0-9]+]], [test_s_i1i32x4_param_0+16];
-; CHECK: ld.param.u32 [[E3:%r[0-9]+]], [test_s_i1i32x4_param_0+12];
-; CHECK: ld.param.u8 [[E2:%rs[0-9]+]], [test_s_i1i32x4_param_0+8];
-; CHECK: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i1i32x4_param_0];
-; CHECK: .param .align 8 .b8 param0[32];
-; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
-; CHECK: st.param.b8 [param0+8], [[E2]];
-; CHECK: st.param.b32 [param0+12], [[E3]];
-; CHECK: st.param.b32 [param0+16], [[E4]];
-; CHECK: st.param.b64 [param0+24], [[E5]];
-; CHECK: .param .align 8 .b8 retval0[32];
-; CHECK: call.uni (retval0),
-; CHECK: test_s_i1i32x4,
-; CHECK: (
-; CHECK: param0
-; CHECK: );
-; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
-; CHECK: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+8];
-; CHECK: ld.param.b32 [[RE3:%r[0-9]+]], [retval0+12];
-; CHECK: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16];
-; CHECK: ld.param.b64 [[RE5:%rd[0-9]+]], [retval0+24];
-; CHECK: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]};
-; CHECK: st.param.b8 [func_retval0+8], [[RE2]];
-; CHECK: st.param.b32 [func_retval0+12], [[RE3]];
-; CHECK: st.param.b32 [func_retval0+16], [[RE4]];
-; CHECK: st.param.b64 [func_retval0+24], [[RE5]];
-; CHECK: ret;
-
-define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) {
- %r = tail call %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a);
- ret %s_i8i32x4 %r;
-}
-
-; -- All loads/stores from parameters aligned by one must be done one
-; -- byte at a time.
-; CHECK:.visible .func (.param .align 1 .b8 func_retval0[25])
-; CHECK-LABEL: test_s_i1i32x4p(
-; CHECK-DAG: .param .align 1 .b8 test_s_i1i32x4p_param_0[25]
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+24];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+23];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+22];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+21];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+20];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+19];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+18];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+17];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+16];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+15];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+14];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+13];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+12];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+11];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+10];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+9];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+8];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+7];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+6];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+5];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+4];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+3];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+2];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+1];
-; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0];
-; --- TODO
-; --- Unaligned parameter store/ return value load is broken in both nvcc
-; --- and llvm and needs to be fixed.
-; CHECK: .param .align 1 .b8 param0[25];
-; CHECK-DAG: st.param.b32 [param0+0],
-; CHECK-DAG: st.param.b32 [param0+4],
-; CHECK-DAG: st.param.b8 [param0+8],
-; CHECK-DAG: st.param.b32 [param0+9],
-; CHECK-DAG: st.param.b32 [param0+13],
-; CHECK-DAG: st.param.b64 [param0+17],
-; CHECK: .param .align 1 .b8 retval0[25];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_s_i1i32x4p,
-; CHECK-DAG: ld.param.b32 %r41, [retval0+0];
-; CHECK-DAG: ld.param.b32 %r42, [retval0+4];
-; CHECK-DAG: ld.param.b8 %rs2, [retval0+8];
-; CHECK-DAG: ld.param.b32 %r43, [retval0+9];
-; CHECK-DAG: ld.param.b32 %r44, [retval0+13];
-; CHECK-DAG: ld.param.b64 %rd23, [retval0+17];
-; CHECK-DAG: st.param.b32 [func_retval0+0],
-; CHECK-DAG: st.param.b32 [func_retval0+4],
-; CHECK-DAG: st.param.b8 [func_retval0+8],
-; CHECK-DAG: st.param.b32 [func_retval0+9],
-; CHECK-DAG: st.param.b32 [func_retval0+13],
-; CHECK-DAG: st.param.b64 [func_retval0+17],
-
-define %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a) {
- %r = tail call %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a);
- ret %s_i8i32x4p %r;
-}
-
-; Check that we can vectorize loads that span multiple aggregate fields.
-; CHECK:.visible .func (.param .align 16 .b8 func_retval0[80])
-; CHECK-LABEL: test_s_crossfield(
-; CHECK: .param .align 16 .b8 test_s_crossfield_param_0[80]
-; CHECK: ld.param.u32 [[E15:%r[0-9]+]], [test_s_crossfield_param_0+64];
-; CHECK: ld.param.v4.u32 {[[E11:%r[0-9]+]], [[E12:%r[0-9]+]], [[E13:%r[0-9]+]], [[E14:%r[0-9]+]]}, [test_s_crossfield_param_0+48];
-; CHECK: ld.param.v4.u32 {[[E7:%r[0-9]+]], [[E8:%r[0-9]+]], [[E9:%r[0-9]+]], [[E10:%r[0-9]+]]}, [test_s_crossfield_param_0+32];
-; CHECK: ld.param.v4.u32 {[[E3:%r[0-9]+]], [[E4:%r[0-9]+]], [[E5:%r[0-9]+]], [[E6:%r[0-9]+]]}, [test_s_crossfield_param_0+16];
-; CHECK: ld.param.u32 [[E2:%r[0-9]+]], [test_s_crossfield_param_0+8];
-; CHECK: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_crossfield_param_0];
-; CHECK: .param .align 16 .b8 param0[80];
-; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
-; CHECK: st.param.b32 [param0+8], [[E2]];
-; CHECK: st.param.v4.b32 [param0+16], {[[E3]], [[E4]], [[E5]], [[E6]]};
-; CHECK: st.param.v4.b32 [param0+32], {[[E7]], [[E8]], [[E9]], [[E10]]};
-; CHECK: st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]};
-; CHECK: st.param.b32 [param0+64], [[E15]];
-; CHECK: .param .align 16 .b8 retval0[80];
-; CHECK: call.uni (retval0),
-; CHECK: test_s_crossfield,
-; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
-; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
-; CHECK: ld.param.v4.b32 {[[RE3:%r[0-9]+]], [[RE4:%r[0-9]+]], [[RE5:%r[0-9]+]], [[RE6:%r[0-9]+]]}, [retval0+16];
-; CHECK: ld.param.v4.b32 {[[RE7:%r[0-9]+]], [[RE8:%r[0-9]+]], [[RE9:%r[0-9]+]], [[RE10:%r[0-9]+]]}, [retval0+32];
-; CHECK: ld.param.v4.b32 {[[RE11:%r[0-9]+]], [[RE12:%r[0-9]+]], [[RE13:%r[0-9]+]], [[RE14:%r[0-9]+]]}, [retval0+48];
-; CHECK: ld.param.b32 [[RE15:%r[0-9]+]], [retval0+64];
-; CHECK: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]};
-; CHECK: st.param.b32 [func_retval0+8], [[RE2]];
-; CHECK: st.param.v4.b32 [func_retval0+16], {[[RE3]], [[RE4]], [[RE5]], [[RE6]]};
-; CHECK: st.param.v4.b32 [func_retval0+32], {[[RE7]], [[RE8]], [[RE9]], [[RE10]]};
-; CHECK: st.param.v4.b32 [func_retval0+48], {[[RE11]], [[RE12]], [[RE13]], [[RE14]]};
-; CHECK: st.param.b32 [func_retval0+64], [[RE15]];
-; CHECK: ret;
-
-define %s_crossfield @test_s_crossfield(%s_crossfield %a) {
- %r = tail call %s_crossfield @test_s_crossfield(%s_crossfield %a);
- ret %s_crossfield %r;
-}
+; Verifies correctness of load/store of parameters and return values.
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck %s
+
+%s_i1 = type { i1 }
+%s_i8 = type { i8 }
+%s_i16 = type { i16 }
+%s_f16 = type { half }
+%s_i32 = type { i32 }
+%s_f32 = type { float }
+%s_i64 = type { i64 }
+%s_f64 = type { double }
+
+; More complicated types. i64 is used to increase natural alignment
+; requirement for the type.
+%s_i32x4 = type { i32, i32, i32, i32, i64}
+%s_i32f32 = type { i32, float, i32, float, i64}
+%s_i8i32x4 = type { i32, i32, i8, i32, i32, i64}
+%s_i8i32x4p = type <{ i32, i32, i8, i32, i32, i64}>
+%s_crossfield = type { i32, [2 x i32], <4 x i32>, [3 x {i32, i32, i32}]}
+; All scalar parameters must be at least 32 bits in size.
+; i1 is loaded/stored as i8.
+
+; CHECK: .func (.param .b32 func_retval0)
+; CHECK-LABEL: test_i1(
+; CHECK-NEXT: .param .b32 test_i1_param_0
+; CHECK: ld.param.u8 [[A8:%r[0-9]+]], [test_i1_param_0];
+; CHECK: and.b32 [[A:%r[0-9]+]], [[A8]], 1;
+; CHECK: .param .b32 param0;
+; CHECK: st.param.b32 [param0+0], [[A]]
+; CHECK: .param .b32 retval0;
+; CHECK: call.uni
+; CHECK-NEXT: test_i1,
+; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0+0];
+; CHECK: and.b32 [[R:%r[0-9]+]], [[R8]], 1;
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define i1 @test_i1(i1 %a) {
+ %r = tail call i1 @test_i1(i1 %a);
+ ret i1 %r;
+}
+
+; Signed i1 is a somewhat special case. We only care about one bit and
+; then us neg.s32 to convert it to 32-bit -1 if it's set.
+; CHECK: .func (.param .b32 func_retval0)
+; CHECK-LABEL: test_i1s(
+; CHECK-NEXT: .param .b32 test_i1s_param_0
+; CHECK: ld.param.u8 [[A8:%rs[0-9]+]], [test_i1s_param_0];
+; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]];
+; CHECK: and.b32 [[A1:%r[0-9]+]], [[A32]], 1;
+; CHECK: neg.s32 [[A:%r[0-9]+]], [[A1]];
+; CHECK: .param .b32 param0;
+; CHECK: st.param.b32 [param0+0], [[A]];
+; CHECK: .param .b32 retval0;
+; CHECK: call.uni
+; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0+0];
+; CHECK: and.b32 [[R1:%r[0-9]+]], [[R8]], 1;
+; CHECK: neg.s32 [[R:%r[0-9]+]], [[R1]];
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define signext i1 @test_i1s(i1 signext %a) {
+ %r = tail call signext i1 @test_i1s(i1 signext %a);
+ ret i1 %r;
+}
+
+; Make sure that i1 loads are vectorized as i8 loads, respecting each element alignment.
+; CHECK: .func (.param .align 4 .b8 func_retval0[4])
+; CHECK-LABEL: test_v3i1(
+; CHECK-NEXT: .param .align 4 .b8 test_v3i1_param_0[4]
+; CHECK-DAG: ld.param.u8 [[E2:%rs[0-9]+]], [test_v3i1_param_0+2];
+; CHECK-DAG: ld.param.v2.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i1_param_0]
+; CHECK: .param .align 4 .b8 param0[4];
+; CHECK-DAG: st.param.v2.b8 [param0+0], {[[E0]], [[E1]]};
+; CHECK-DAG: st.param.b8 [param0+2], [[E2]];
+; CHECK: .param .align 4 .b8 retval0[4];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_v3i1,
+; CHECK-DAG: ld.param.v2.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0];
+; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2];
+; CHECK-DAG: st.param.v2.b8 [func_retval0+0], {[[RE0]], [[RE1]]}
+; CHECK-DAG: st.param.b8 [func_retval0+2], [[RE2]];
+; CHECK-NEXT: ret;
+define <3 x i1> @test_v3i1(<3 x i1> %a) {
+ %r = tail call <3 x i1> @test_v3i1(<3 x i1> %a);
+ ret <3 x i1> %r;
+}
+
+; CHECK: .func (.param .align 4 .b8 func_retval0[4])
+; CHECK-LABEL: test_v4i1(
+; CHECK-NEXT: .param .align 4 .b8 test_v4i1_param_0[4]
+; CHECK: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v4i1_param_0]
+; CHECK: .param .align 4 .b8 param0[4];
+; CHECK: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
+; CHECK: .param .align 4 .b8 retval0[4];
+; CHECK: call.uni (retval0),
+; CHECK: test_v4i1,
+; CHECK: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
+; CHECK: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]};
+; CHECK-NEXT: ret;
+define <4 x i1> @test_v4i1(<4 x i1> %a) {
+ %r = tail call <4 x i1> @test_v4i1(<4 x i1> %a);
+ ret <4 x i1> %r;
+}
+
+; CHECK: .func (.param .align 8 .b8 func_retval0[8])
+; CHECK-LABEL: test_v5i1(
+; CHECK-NEXT: .param .align 8 .b8 test_v5i1_param_0[8]
+; CHECK-DAG: ld.param.u8 [[E4:%rs[0-9]+]], [test_v5i1_param_0+4];
+; CHECK-DAG: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i1_param_0]
+; CHECK: .param .align 8 .b8 param0[8];
+; CHECK-DAG: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
+; CHECK-DAG: st.param.b8 [param0+4], [[E4]];
+; CHECK: .param .align 8 .b8 retval0[8];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_v5i1,
+; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
+; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4];
+; CHECK-DAG: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
+; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]];
+; CHECK-NEXT: ret;
+define <5 x i1> @test_v5i1(<5 x i1> %a) {
+ %r = tail call <5 x i1> @test_v5i1(<5 x i1> %a);
+ ret <5 x i1> %r;
+}
+
+; Unsigned i8 is loaded directly into 32-bit register.
+; CHECK: .func (.param .b32 func_retval0)
+; CHECK-LABEL: test_i8(
+; CHECK-NEXT: .param .b32 test_i8_param_0
+; CHECK: ld.param.u8 [[A8:%rs[0-9]+]], [test_i8_param_0];
+; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]];
+; CHECK: and.b32 [[A:%r[0-9]+]], [[A32]], 255;
+; CHECK: .param .b32 param0;
+; CHECK: st.param.b32 [param0+0], [[A]];
+; CHECK: .param .b32 retval0;
+; CHECK: call.uni (retval0),
+; CHECK: test_i8,
+; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0+0];
+; CHECK: and.b32 [[R:%r[0-9]+]], [[R32]], 255;
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define i8 @test_i8(i8 %a) {
+ %r = tail call i8 @test_i8(i8 %a);
+ ret i8 %r;
+}
+
+; signed i8 is loaded into 16-bit register which is then sign-extended to i32.
+; CHECK: .func (.param .b32 func_retval0)
+; CHECK-LABEL: test_i8s(
+; CHECK-NEXT: .param .b32 test_i8s_param_0
+; CHECK: ld.param.s8 [[A8:%rs[0-9]+]], [test_i8s_param_0];
+; CHECK: cvt.s32.s16 [[A:%r[0-9]+]], [[A8]];
+; CHECK: .param .b32 param0;
+; CHECK: st.param.b32 [param0+0], [[A]];
+; CHECK: .param .b32 retval0;
+; CHECK: call.uni (retval0),
+; CHECK: test_i8s,
+; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0+0];
+; -- This is suspicious (though correct) -- why not cvt.u8.u32, cvt.s8.s32 ?
+; CHECK: cvt.u16.u32 [[R16:%rs[0-9]+]], [[R32]];
+; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[R16]];
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define signext i8 @test_i8s(i8 signext %a) {
+ %r = tail call signext i8 @test_i8s(i8 signext %a);
+ ret i8 %r;
+}
+
+; CHECK: .func (.param .align 4 .b8 func_retval0[4])
+; CHECK-LABEL: test_v3i8(
+; CHECK-NEXT: .param .align 4 .b8 test_v3i8_param_0[4]
+; CHECK-DAG: ld.param.u8 [[E2:%rs[0-9]+]], [test_v3i8_param_0+2];
+; CHECK-DAG: ld.param.v2.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i8_param_0];
+; CHECK: .param .align 4 .b8 param0[4];
+; CHECK: st.param.v2.b8 [param0+0], {[[E0]], [[E1]]};
+; CHECK: st.param.b8 [param0+2], [[E2]];
+; CHECK: .param .align 4 .b8 retval0[4];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_v3i8,
+; CHECK-DAG: ld.param.v2.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0];
+; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2];
+; CHECK-DAG: st.param.v2.b8 [func_retval0+0], {[[RE0]], [[RE1]]};
+; CHECK-DAG: st.param.b8 [func_retval0+2], [[RE2]];
+; CHECK-NEXT: ret;
+define <3 x i8> @test_v3i8(<3 x i8> %a) {
+ %r = tail call <3 x i8> @test_v3i8(<3 x i8> %a);
+ ret <3 x i8> %r;
+}
+
+; CHECK: .func (.param .align 4 .b8 func_retval0[4])
+; CHECK-LABEL: test_v4i8(
+; CHECK-NEXT: .param .align 4 .b8 test_v4i8_param_0[4]
+; CHECK: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v4i8_param_0]
+; CHECK: .param .align 4 .b8 param0[4];
+; CHECK: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
+; CHECK: .param .align 4 .b8 retval0[4];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_v4i8,
+; CHECK: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
+; CHECK: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
+; CHECK-NEXT: ret;
+define <4 x i8> @test_v4i8(<4 x i8> %a) {
+ %r = tail call <4 x i8> @test_v4i8(<4 x i8> %a);
+ ret <4 x i8> %r;
+}
+
+; CHECK: .func (.param .align 8 .b8 func_retval0[8])
+; CHECK-LABEL: test_v5i8(
+; CHECK-NEXT: .param .align 8 .b8 test_v5i8_param_0[8]
+; CHECK-DAG: ld.param.u8 [[E4:%rs[0-9]+]], [test_v5i8_param_0+4];
+; CHECK-DAG ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i8_param_0]
+; CHECK: .param .align 8 .b8 param0[8];
+; CHECK-DAG: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
+; CHECK-DAG: st.param.b8 [param0+4], [[E4]];
+; CHECK: .param .align 8 .b8 retval0[8];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_v5i8,
+; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
+; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4];
+; CHECK-DAG: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
+; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]];
+; CHECK-NEXT: ret;
+define <5 x i8> @test_v5i8(<5 x i8> %a) {
+ %r = tail call <5 x i8> @test_v5i8(<5 x i8> %a);
+ ret <5 x i8> %r;
+}
+
+; CHECK: .func (.param .b32 func_retval0)
+; CHECK-LABEL: test_i16(
+; CHECK-NEXT: .param .b32 test_i16_param_0
+; CHECK: ld.param.u16 [[E16:%rs[0-9]+]], [test_i16_param_0];
+; CHECK: cvt.u32.u16 [[E32:%r[0-9]+]], [[E16]];
+; CHECK: .param .b32 param0;
+; CHECK: st.param.b32 [param0+0], [[E32]];
+; CHECK: .param .b32 retval0;
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_i16,
+; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0+0];
+; CHECK: and.b32 [[R:%r[0-9]+]], [[RE32]], 65535;
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define i16 @test_i16(i16 %a) {
+ %r = tail call i16 @test_i16(i16 %a);
+ ret i16 %r;
+}
+
+; CHECK: .func (.param .b32 func_retval0)
+; CHECK-LABEL: test_i16s(
+; CHECK-NEXT: .param .b32 test_i16s_param_0
+; CHECK: ld.param.u16 [[E16:%rs[0-9]+]], [test_i16s_param_0];
+; CHECK: cvt.s32.s16 [[E32:%r[0-9]+]], [[E16]];
+; CHECK: .param .b32 param0;
+; CHECK: st.param.b32 [param0+0], [[E32]];
+; CHECK: .param .b32 retval0;
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_i16s,
+; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0+0];
+; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[RE32]];
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define signext i16 @test_i16s(i16 signext %a) {
+ %r = tail call signext i16 @test_i16s(i16 signext %a);
+ ret i16 %r;
+}
+
+; CHECK: .func (.param .align 8 .b8 func_retval0[8])
+; CHECK-LABEL: test_v3i16(
+; CHECK-NEXT: .param .align 8 .b8 test_v3i16_param_0[8]
+; CHECK-DAG: ld.param.u16 [[E2:%rs[0-9]+]], [test_v3i16_param_0+4];
+; CHECK-DAG: ld.param.v2.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i16_param_0];
+; CHECK: .param .align 8 .b8 param0[8];
+; CHECK: st.param.v2.b16 [param0+0], {[[E0]], [[E1]]};
+; CHECK: st.param.b16 [param0+4], [[E2]];
+; CHECK: .param .align 8 .b8 retval0[8];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_v3i16,
+; CHECK: ld.param.v2.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0];
+; CHECK: ld.param.b16 [[RE2:%rs[0-9]+]], [retval0+4];
+; CHECK-DAG: st.param.v2.b16 [func_retval0+0], {[[RE0]], [[RE1]]};
+; CHECK-DAG: st.param.b16 [func_retval0+4], [[RE2]];
+; CHECK-NEXT: ret;
+define <3 x i16> @test_v3i16(<3 x i16> %a) {
+ %r = tail call <3 x i16> @test_v3i16(<3 x i16> %a);
+ ret <3 x i16> %r;
+}
+
+; CHECK: .func (.param .align 8 .b8 func_retval0[8])
+; CHECK-LABEL: test_v4i16(
+; CHECK-NEXT: .param .align 8 .b8 test_v4i16_param_0[8]
+; CHECK: ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v4i16_param_0]
+; CHECK: .param .align 8 .b8 param0[8];
+; CHECK: st.param.v4.b16 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
+; CHECK: .param .align 8 .b8 retval0[8];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_v4i16,
+; CHECK: ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
+; CHECK: st.param.v4.b16 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
+; CHECK-NEXT: ret;
+define <4 x i16> @test_v4i16(<4 x i16> %a) {
+ %r = tail call <4 x i16> @test_v4i16(<4 x i16> %a);
+ ret <4 x i16> %r;
+}
+
+; CHECK: .func (.param .align 16 .b8 func_retval0[16])
+; CHECK-LABEL: test_v5i16(
+; CHECK-NEXT: .param .align 16 .b8 test_v5i16_param_0[16]
+; CHECK-DAG: ld.param.u16 [[E4:%rs[0-9]+]], [test_v5i16_param_0+8];
+; CHECK-DAG ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i16_param_0]
+; CHECK: .param .align 16 .b8 param0[16];
+; CHECK-DAG: st.param.v4.b16 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
+; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
+; CHECK: .param .align 16 .b8 retval0[16];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_v5i16,
+; CHECK-DAG: ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
+; CHECK-DAG: ld.param.b16 [[RE4:%rs[0-9]+]], [retval0+8];
+; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
+; CHECK-DAG: st.param.b16 [func_retval0+8], [[RE4]];
+; CHECK-NEXT: ret;
+define <5 x i16> @test_v5i16(<5 x i16> %a) {
+ %r = tail call <5 x i16> @test_v5i16(<5 x i16> %a);
+ ret <5 x i16> %r;
+}
+
+; CHECK: .func (.param .b32 func_retval0)
+; CHECK-LABEL: test_f16(
+; CHECK-NEXT: .param .b32 test_f16_param_0
+; CHECK: ld.param.b16 [[E:%h[0-9]+]], [test_f16_param_0];
+; CHECK: .param .b32 param0;
+; CHECK: st.param.b16 [param0+0], [[E]];
+; CHECK: .param .b32 retval0;
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_f16,
+; CHECK: ld.param.b16 [[R:%h[0-9]+]], [retval0+0];
+; CHECK: st.param.b16 [func_retval0+0], [[R]]
+; CHECK-NEXT: ret;
+define half @test_f16(half %a) {
+ %r = tail call half @test_f16(half %a);
+ ret half %r;
+}
+
+; CHECK: .func (.param .align 4 .b8 func_retval0[4])
+; CHECK-LABEL: test_v2f16(
+; CHECK-NEXT: .param .align 4 .b8 test_v2f16_param_0[4]
+; CHECK: ld.param.b32 [[E:%hh[0-9]+]], [test_v2f16_param_0];
+; CHECK: .param .align 4 .b8 param0[4];
+; CHECK: st.param.b32 [param0+0], [[E]];
+; CHECK: .param .align 4 .b8 retval0[4];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_v2f16,
+; CHECK: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0];
+; CHECK: st.param.b32 [func_retval0+0], [[R]]
+; CHECK-NEXT: ret;
+define <2 x half> @test_v2f16(<2 x half> %a) {
+ %r = tail call <2 x half> @test_v2f16(<2 x half> %a);
+ ret <2 x half> %r;
+}
+
+; CHECK:.func (.param .align 8 .b8 func_retval0[8])
+; CHECK-LABEL: test_v3f16(
+; CHECK: .param .align 8 .b8 test_v3f16_param_0[8]
+; CHECK-DAG: ld.param.b32 [[HH01:%hh[0-9]+]], [test_v3f16_param_0];
+; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[HH01]];
+; CHECK-DAG: ld.param.b16 [[E2:%h[0-9]+]], [test_v3f16_param_0+4];
+; CHECK: .param .align 8 .b8 param0[8];
+; CHECK-DAG: st.param.v2.b16 [param0+0], {[[E0]], [[E1]]};
+; CHECK-DAG: st.param.b16 [param0+4], [[E2]];
+; CHECK: .param .align 8 .b8 retval0[8];
+; CHECK: call.uni (retval0),
+; CHECK: test_v3f16,
+; CHECK-DAG: ld.param.v2.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]]}, [retval0+0];
+; CHECK-DAG: ld.param.b16 [[R2:%h[0-9]+]], [retval0+4];
+; CHECK-DAG: st.param.v2.b16 [func_retval0+0], {[[R0]], [[R1]]};
+; CHECK-DAG: st.param.b16 [func_retval0+4], [[R2]];
+; CHECK: ret;
+define <3 x half> @test_v3f16(<3 x half> %a) {
+ %r = tail call <3 x half> @test_v3f16(<3 x half> %a);
+ ret <3 x half> %r;
+}
+
+; CHECK:.func (.param .align 8 .b8 func_retval0[8])
+; CHECK-LABEL: test_v4f16(
+; CHECK: .param .align 8 .b8 test_v4f16_param_0[8]
+; CHECK: ld.param.v2.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]]}, [test_v4f16_param_0];
+; CHECK-DAG: mov.b32 [[HH01:%hh[0-9]+]], [[R01]];
+; CHECK-DAG: mov.b32 [[HH23:%hh[0-9]+]], [[R23]];
+; CHECK: .param .align 8 .b8 param0[8];
+; CHECK: st.param.v2.b32 [param0+0], {[[HH01]], [[HH23]]};
+; CHECK: .param .align 8 .b8 retval0[8];
+; CHECK: call.uni (retval0),
+; CHECK: test_v4f16,
+; CHECK: ld.param.v2.b32 {[[RH01:%hh[0-9]+]], [[RH23:%hh[0-9]+]]}, [retval0+0];
+; CHECK: st.param.v2.b32 [func_retval0+0], {[[RH01]], [[RH23]]};
+; CHECK: ret;
+define <4 x half> @test_v4f16(<4 x half> %a) {
+ %r = tail call <4 x half> @test_v4f16(<4 x half> %a);
+ ret <4 x half> %r;
+}
+
+; CHECK:.func (.param .align 16 .b8 func_retval0[16])
+; CHECK-LABEL: test_v5f16(
+; CHECK: .param .align 16 .b8 test_v5f16_param_0[16]
+; CHECK-DAG: ld.param.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [test_v5f16_param_0];
+; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[HH01]];
+; CHECK-DAG: ld.param.b16 [[E4:%h[0-9]+]], [test_v5f16_param_0+8];
+; CHECK: .param .align 16 .b8 param0[16];
+; CHECK-DAG: st.param.v4.b16 [param0+0],
+; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
+; CHECK: .param .align 16 .b8 retval0[16];
+; CHECK: call.uni (retval0),
+; CHECK: test_v5f16,
+; CHECK-DAG: ld.param.v4.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]], [[R2:%h[0-9]+]], [[R3:%h[0-9]+]]}, [retval0+0];
+; CHECK-DAG: ld.param.b16 [[R4:%h[0-9]+]], [retval0+8];
+; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]};
+; CHECK-DAG: st.param.b16 [func_retval0+8], [[R4]];
+; CHECK: ret;
+define <5 x half> @test_v5f16(<5 x half> %a) {
+ %r = tail call <5 x half> @test_v5f16(<5 x half> %a);
+ ret <5 x half> %r;
+}
+
+; CHECK:.func (.param .align 16 .b8 func_retval0[16])
+; CHECK-LABEL: test_v8f16(
+; CHECK: .param .align 16 .b8 test_v8f16_param_0[16]
+; CHECK: ld.param.v4.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]], [[R45:%r[0-9]+]], [[R67:%r[0-9]+]]}, [test_v8f16_param_0];
+; CHECK-DAG: mov.b32 [[HH01:%hh[0-9]+]], [[R01]];
+; CHECK-DAG: mov.b32 [[HH23:%hh[0-9]+]], [[R23]];
+; CHECK-DAG: mov.b32 [[HH45:%hh[0-9]+]], [[R45]];
+; CHECK-DAG: mov.b32 [[HH67:%hh[0-9]+]], [[R67]];
+; CHECK: .param .align 16 .b8 param0[16];
+; CHECK: st.param.v4.b32 [param0+0], {[[HH01]], [[HH23]], [[HH45]], [[HH67]]};
+; CHECK: .param .align 16 .b8 retval0[16];
+; CHECK: call.uni (retval0),
+; CHECK: test_v8f16,
+; CHECK: ld.param.v4.b32 {[[RH01:%hh[0-9]+]], [[RH23:%hh[0-9]+]], [[RH45:%hh[0-9]+]], [[RH67:%hh[0-9]+]]}, [retval0+0];
+; CHECK: st.param.v4.b32 [func_retval0+0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]};
+; CHECK: ret;
+define <8 x half> @test_v8f16(<8 x half> %a) {
+ %r = tail call <8 x half> @test_v8f16(<8 x half> %a);
+ ret <8 x half> %r;
+}
+
+; CHECK:.func (.param .align 32 .b8 func_retval0[32])
+; CHECK-LABEL: test_v9f16(
+; CHECK: .param .align 32 .b8 test_v9f16_param_0[32]
+; CHECK-DAG: ld.param.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [test_v9f16_param_0];
+; CHECK-DAG: ld.param.v4.b16 {[[E4:%h[0-9]+]], [[E5:%h[0-9]+]], [[E6:%h[0-9]+]], [[E7:%h[0-9]+]]}, [test_v9f16_param_0+8];
+; CHECK-DAG: ld.param.b16 [[E8:%h[0-9]+]], [test_v9f16_param_0+16];
+; CHECK: .param .align 32 .b8 param0[32];
+; CHECK-DAG: st.param.v4.b16 [param0+0],
+; CHECK-DAG: st.param.v4.b16 [param0+8],
+; CHECK-DAG: st.param.b16 [param0+16], [[E8]];
+; CHECK: .param .align 32 .b8 retval0[32];
+; CHECK: call.uni (retval0),
+; CHECK: test_v9f16,
+; CHECK-DAG: ld.param.v4.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]], [[R2:%h[0-9]+]], [[R3:%h[0-9]+]]}, [retval0+0];
+; CHECK-DAG: ld.param.v4.b16 {[[R4:%h[0-9]+]], [[R5:%h[0-9]+]], [[R6:%h[0-9]+]], [[R7:%h[0-9]+]]}, [retval0+8];
+; CHECK-DAG: ld.param.b16 [[R8:%h[0-9]+]], [retval0+16];
+; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]};
+; CHECK-DAG: st.param.v4.b16 [func_retval0+8], {[[R4]], [[R5]], [[R6]], [[R7]]};
+; CHECK-DAG: st.param.b16 [func_retval0+16], [[R8]];
+; CHECK: ret;
+define <9 x half> @test_v9f16(<9 x half> %a) {
+ %r = tail call <9 x half> @test_v9f16(<9 x half> %a);
+ ret <9 x half> %r;
+}
+
+; CHECK: .func (.param .b32 func_retval0)
+; CHECK-LABEL: test_i32(
+; CHECK-NEXT: .param .b32 test_i32_param_0
+; CHECK: ld.param.u32 [[E:%r[0-9]+]], [test_i32_param_0];
+; CHECK: .param .b32 param0;
+; CHECK: st.param.b32 [param0+0], [[E]];
+; CHECK: .param .b32 retval0;
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_i32,
+; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0];
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define i32 @test_i32(i32 %a) {
+ %r = tail call i32 @test_i32(i32 %a);
+ ret i32 %r;
+}
+
+; CHECK: .func (.param .align 16 .b8 func_retval0[16])
+; CHECK-LABEL: test_v3i32(
+; CHECK-NEXT: .param .align 16 .b8 test_v3i32_param_0[16]
+; CHECK-DAG: ld.param.u32 [[E2:%r[0-9]+]], [test_v3i32_param_0+8];
+; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v3i32_param_0];
+; CHECK: .param .align 16 .b8 param0[16];
+; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
+; CHECK: st.param.b32 [param0+8], [[E2]];
+; CHECK: .param .align 16 .b8 retval0[16];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_v3i32,
+; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
+; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
+; CHECK-DAG: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]};
+; CHECK-DAG: st.param.b32 [func_retval0+8], [[RE2]];
+; CHECK-NEXT: ret;
+define <3 x i32> @test_v3i32(<3 x i32> %a) {
+ %r = tail call <3 x i32> @test_v3i32(<3 x i32> %a);
+ ret <3 x i32> %r;
+}
+
+; CHECK: .func (.param .align 16 .b8 func_retval0[16])
+; CHECK-LABEL: test_v4i32(
+; CHECK-NEXT: .param .align 16 .b8 test_v4i32_param_0[16]
+; CHECK: ld.param.v4.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v4i32_param_0]
+; CHECK: .param .align 16 .b8 param0[16];
+; CHECK: st.param.v4.b32 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
+; CHECK: .param .align 16 .b8 retval0[16];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_v4i32,
+; CHECK: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+0];
+; CHECK: st.param.v4.b32 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
+; CHCK-NEXT: ret;
+define <4 x i32> @test_v4i32(<4 x i32> %a) {
+ %r = tail call <4 x i32> @test_v4i32(<4 x i32> %a);
+ ret <4 x i32> %r;
+}
+
+; CHECK: .func (.param .align 32 .b8 func_retval0[32])
+; CHECK-LABEL: test_v5i32(
+; CHECK-NEXT: .param .align 32 .b8 test_v5i32_param_0[32]
+; CHECK-DAG: ld.param.u32 [[E4:%r[0-9]+]], [test_v5i32_param_0+16];
+; CHECK-DAG ld.param.v4.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v5i32_param_0]
+; CHECK: .param .align 32 .b8 param0[32];
+; CHECK-DAG: st.param.v4.b32 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
+; CHECK-DAG: st.param.b32 [param0+16], [[E4]];
+; CHECK: .param .align 32 .b8 retval0[32];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_v5i32,
+; CHECK-DAG: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+0];
+; CHECK-DAG: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16];
+; CHECK-DAG: st.param.v4.b32 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
+; CHECK-DAG: st.param.b32 [func_retval0+16], [[RE4]];
+; CHECK-NEXT: ret;
+define <5 x i32> @test_v5i32(<5 x i32> %a) {
+ %r = tail call <5 x i32> @test_v5i32(<5 x i32> %a);
+ ret <5 x i32> %r;
+}
+
+; CHECK: .func (.param .b32 func_retval0)
+; CHECK-LABEL: test_f32(
+; CHECK-NEXT: .param .b32 test_f32_param_0
+; CHECK: ld.param.f32 [[E:%f[0-9]+]], [test_f32_param_0];
+; CHECK: .param .b32 param0;
+; CHECK: st.param.f32 [param0+0], [[E]];
+; CHECK: .param .b32 retval0;
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_f32,
+; CHECK: ld.param.f32 [[R:%f[0-9]+]], [retval0+0];
+; CHECK: st.param.f32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define float @test_f32(float %a) {
+ %r = tail call float @test_f32(float %a);
+ ret float %r;
+}
+
+; CHECK: .func (.param .b64 func_retval0)
+; CHECK-LABEL: test_i64(
+; CHECK-NEXT: .param .b64 test_i64_param_0
+; CHECK: ld.param.u64 [[E:%rd[0-9]+]], [test_i64_param_0];
+; CHECK: .param .b64 param0;
+; CHECK: st.param.b64 [param0+0], [[E]];
+; CHECK: .param .b64 retval0;
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_i64,
+; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0+0];
+; CHECK: st.param.b64 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define i64 @test_i64(i64 %a) {
+ %r = tail call i64 @test_i64(i64 %a);
+ ret i64 %r;
+}
+
+; CHECK: .func (.param .align 32 .b8 func_retval0[32])
+; CHECK-LABEL: test_v3i64(
+; CHECK-NEXT: .param .align 32 .b8 test_v3i64_param_0[32]
+; CHECK-DAG: ld.param.u64 [[E2:%rd[0-9]+]], [test_v3i64_param_0+16];
+; CHECK-DAG: ld.param.v2.u64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v3i64_param_0];
+; CHECK: .param .align 32 .b8 param0[32];
+; CHECK: st.param.v2.b64 [param0+0], {[[E0]], [[E1]]};
+; CHECK: st.param.b64 [param0+16], [[E2]];
+; CHECK: .param .align 32 .b8 retval0[32];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_v3i64,
+; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0+0];
+; CHECK: ld.param.b64 [[RE2:%rd[0-9]+]], [retval0+16];
+; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]};
+; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE2]];
+; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]};
+; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE2]];
+; CHECK-NEXT: ret;
+define <3 x i64> @test_v3i64(<3 x i64> %a) {
+ %r = tail call <3 x i64> @test_v3i64(<3 x i64> %a);
+ ret <3 x i64> %r;
+}
+
+; For i64 vector loads are limited by PTX to 2 elements.
+; CHECK: .func (.param .align 32 .b8 func_retval0[32])
+; CHECK-LABEL: test_v4i64(
+; CHECK-NEXT: .param .align 32 .b8 test_v4i64_param_0[32]
+; CHECK-DAG: ld.param.v2.u64 {[[E2:%rd[0-9]+]], [[E3:%rd[0-9]+]]}, [test_v4i64_param_0+16];
+; CHECK-DAG: ld.param.v2.u64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v4i64_param_0];
+; CHECK: .param .align 32 .b8 param0[32];
+; CHECK: st.param.v2.b64 [param0+0], {[[E0]], [[E1]]};
+; CHECK: st.param.v2.b64 [param0+16], {[[E2]], [[E3]]};
+; CHECK: .param .align 32 .b8 retval0[32];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_v4i64,
+; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0+0];
+; CHECK: ld.param.v2.b64 {[[RE2:%rd[0-9]+]], [[RE3:%rd[0-9]+]]}, [retval0+16];
+; CHECK-DAG: st.param.v2.b64 [func_retval0+16], {[[RE2]], [[RE3]]};
+; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]};
+; CHECK-NEXT: ret;
+define <4 x i64> @test_v4i64(<4 x i64> %a) {
+ %r = tail call <4 x i64> @test_v4i64(<4 x i64> %a);
+ ret <4 x i64> %r;
+}
+
+; Aggregates, on the other hand, do not get extended.
+
+; CHECK: .func (.param .align 1 .b8 func_retval0[1])
+; CHECK-LABEL: test_s_i1(
+; CHECK-NEXT: .align 1 .b8 test_s_i1_param_0[1]
+; CHECK: ld.param.u8 [[A:%rs[0-9]+]], [test_s_i1_param_0];
+; CHECK: .param .align 1 .b8 param0[1];
+; CHECK: st.param.b8 [param0+0], [[A]]
+; CHECK: .param .align 1 .b8 retval0[1];
+; CHECK: call.uni
+; CHECK-NEXT: test_s_i1,
+; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0+0];
+; CHECK: st.param.b8 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define %s_i1 @test_s_i1(%s_i1 %a) {
+ %r = tail call %s_i1 @test_s_i1(%s_i1 %a);
+ ret %s_i1 %r;
+}
+
+; CHECK: .func (.param .align 1 .b8 func_retval0[1])
+; CHECK-LABEL: test_s_i8(
+; CHECK-NEXT: .param .align 1 .b8 test_s_i8_param_0[1]
+; CHECK: ld.param.u8 [[A:%rs[0-9]+]], [test_s_i8_param_0];
+; CHECK: .param .align 1 .b8 param0[1];
+; CHECK: st.param.b8 [param0+0], [[A]]
+; CHECK: .param .align 1 .b8 retval0[1];
+; CHECK: call.uni
+; CHECK-NEXT: test_s_i8,
+; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0+0];
+; CHECK: st.param.b8 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define %s_i8 @test_s_i8(%s_i8 %a) {
+ %r = tail call %s_i8 @test_s_i8(%s_i8 %a);
+ ret %s_i8 %r;
+}
+
+; CHECK: .func (.param .align 2 .b8 func_retval0[2])
+; CHECK-LABEL: test_s_i16(
+; CHECK-NEXT: .param .align 2 .b8 test_s_i16_param_0[2]
+; CHECK: ld.param.u16 [[A:%rs[0-9]+]], [test_s_i16_param_0];
+; CHECK: .param .align 2 .b8 param0[2];
+; CHECK: st.param.b16 [param0+0], [[A]]
+; CHECK: .param .align 2 .b8 retval0[2];
+; CHECK: call.uni
+; CHECK-NEXT: test_s_i16,
+; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define %s_i16 @test_s_i16(%s_i16 %a) {
+ %r = tail call %s_i16 @test_s_i16(%s_i16 %a);
+ ret %s_i16 %r;
+}
+
+; CHECK: .func (.param .align 2 .b8 func_retval0[2])
+; CHECK-LABEL: test_s_f16(
+; CHECK-NEXT: .param .align 2 .b8 test_s_f16_param_0[2]
+; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_s_f16_param_0];
+; CHECK: .param .align 2 .b8 param0[2];
+; CHECK: st.param.b16 [param0+0], [[A]]
+; CHECK: .param .align 2 .b8 retval0[2];
+; CHECK: call.uni
+; CHECK-NEXT: test_s_f16,
+; CHECK: ld.param.b16 [[R:%h[0-9]+]], [retval0+0];
+; CHECK: st.param.b16 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define %s_f16 @test_s_f16(%s_f16 %a) {
+ %r = tail call %s_f16 @test_s_f16(%s_f16 %a);
+ ret %s_f16 %r;
+}
+
+; CHECK: .func (.param .align 4 .b8 func_retval0[4])
+; CHECK-LABEL: test_s_i32(
+; CHECK-NEXT: .param .align 4 .b8 test_s_i32_param_0[4]
+; CHECK: ld.param.u32 [[E:%r[0-9]+]], [test_s_i32_param_0];
+; CHECK: .param .align 4 .b8 param0[4]
+; CHECK: st.param.b32 [param0+0], [[E]];
+; CHECK: .param .align 4 .b8 retval0[4];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_s_i32,
+; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0];
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define %s_i32 @test_s_i32(%s_i32 %a) {
+ %r = tail call %s_i32 @test_s_i32(%s_i32 %a);
+ ret %s_i32 %r;
+}
+
+; CHECK: .func (.param .align 4 .b8 func_retval0[4])
+; CHECK-LABEL: test_s_f32(
+; CHECK-NEXT: .param .align 4 .b8 test_s_f32_param_0[4]
+; CHECK: ld.param.f32 [[E:%f[0-9]+]], [test_s_f32_param_0];
+; CHECK: .param .align 4 .b8 param0[4]
+; CHECK: st.param.f32 [param0+0], [[E]];
+; CHECK: .param .align 4 .b8 retval0[4];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_s_f32,
+; CHECK: ld.param.f32 [[R:%f[0-9]+]], [retval0+0];
+; CHECK: st.param.f32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define %s_f32 @test_s_f32(%s_f32 %a) {
+ %r = tail call %s_f32 @test_s_f32(%s_f32 %a);
+ ret %s_f32 %r;
+}
+
+; CHECK: .func (.param .align 8 .b8 func_retval0[8])
+; CHECK-LABEL: test_s_i64(
+; CHECK-NEXT: .param .align 8 .b8 test_s_i64_param_0[8]
+; CHECK: ld.param.u64 [[E:%rd[0-9]+]], [test_s_i64_param_0];
+; CHECK: .param .align 8 .b8 param0[8];
+; CHECK: st.param.b64 [param0+0], [[E]];
+; CHECK: .param .align 8 .b8 retval0[8];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_s_i64,
+; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0+0];
+; CHECK: st.param.b64 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define %s_i64 @test_s_i64(%s_i64 %a) {
+ %r = tail call %s_i64 @test_s_i64(%s_i64 %a);
+ ret %s_i64 %r;
+}
+
+; Fields that have different types, but identical sizes are not vectorized.
+; CHECK: .func (.param .align 8 .b8 func_retval0[24])
+; CHECK-LABEL: test_s_i32f32(
+; CHECK: .param .align 8 .b8 test_s_i32f32_param_0[24]
+; CHECK-DAG: ld.param.u64 [[E4:%rd[0-9]+]], [test_s_i32f32_param_0+16];
+; CHECK-DAG: ld.param.f32 [[E3:%f[0-9]+]], [test_s_i32f32_param_0+12];
+; CHECK-DAG: ld.param.u32 [[E2:%r[0-9]+]], [test_s_i32f32_param_0+8];
+; CHECK-DAG: ld.param.f32 [[E1:%f[0-9]+]], [test_s_i32f32_param_0+4];
+; CHECK-DAG: ld.param.u32 [[E0:%r[0-9]+]], [test_s_i32f32_param_0];
+; CHECK: .param .align 8 .b8 param0[24];
+; CHECK-DAG: st.param.b32 [param0+0], [[E0]];
+; CHECK-DAG: st.param.f32 [param0+4], [[E1]];
+; CHECK-DAG: st.param.b32 [param0+8], [[E2]];
+; CHECK-DAG: st.param.f32 [param0+12], [[E3]];
+; CHECK-DAG: st.param.b64 [param0+16], [[E4]];
+; CHECK: .param .align 8 .b8 retval0[24];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_s_i32f32,
+; CHECK-DAG: ld.param.b32 [[RE0:%r[0-9]+]], [retval0+0];
+; CHECK-DAG: ld.param.f32 [[RE1:%f[0-9]+]], [retval0+4];
+; CHECK-DAG: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
+; CHECK-DAG: ld.param.f32 [[RE3:%f[0-9]+]], [retval0+12];
+; CHECK-DAG: ld.param.b64 [[RE4:%rd[0-9]+]], [retval0+16];
+; CHECK-DAG: st.param.b32 [func_retval0+0], [[RE0]];
+; CHECK-DAG: st.param.f32 [func_retval0+4], [[RE1]];
+; CHECK-DAG: st.param.b32 [func_retval0+8], [[RE2]];
+; CHECK-DAG: st.param.f32 [func_retval0+12], [[RE3]];
+; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE4]];
+; CHECK: ret;
+define %s_i32f32 @test_s_i32f32(%s_i32f32 %a) {
+ %r = tail call %s_i32f32 @test_s_i32f32(%s_i32f32 %a);
+ ret %s_i32f32 %r;
+}
+
+; We do vectorize consecutive fields with matching types.
+; CHECK:.visible .func (.param .align 8 .b8 func_retval0[24])
+; CHECK-LABEL: test_s_i32x4(
+; CHECK: .param .align 8 .b8 test_s_i32x4_param_0[24]
+; CHECK-DAG: ld.param.u64 [[RD1:%rd[0-9]+]], [test_s_i32x4_param_0+16];
+; CHECK-DAG: ld.param.v2.u32 {[[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_s_i32x4_param_0+8];
+; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i32x4_param_0];
+; CHECK: .param .align 8 .b8 param0[24];
+; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
+; CHECK: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]};
+; CHECK: st.param.b64 [param0+16], [[E4]];
+; CHECK: .param .align 8 .b8 retval0[24];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_s_i32x4,
+; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
+; CHECK: ld.param.v2.b32 {[[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+8];
+; CHECK: ld.param.b64 [[RE4:%rd[0-9]+]], [retval0+16];
+; CHECK-DAG: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]};
+; CHECK-DAG: st.param.v2.b32 [func_retval0+8], {[[RE2]], [[RE3]]};
+; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE4]];
+; CHECK: ret;
+
+define %s_i32x4 @test_s_i32x4(%s_i32x4 %a) {
+ %r = tail call %s_i32x4 @test_s_i32x4(%s_i32x4 %a);
+ ret %s_i32x4 %r;
+}
+
+; CHECK:.visible .func (.param .align 8 .b8 func_retval0[32])
+; CHECK-LABEL: test_s_i1i32x4(
+; CHECK: .param .align 8 .b8 test_s_i1i32x4_param_0[32]
+; CHECK: ld.param.u64 [[E5:%rd[0-9]+]], [test_s_i1i32x4_param_0+24];
+; CHECK: ld.param.u32 [[E4:%r[0-9]+]], [test_s_i1i32x4_param_0+16];
+; CHECK: ld.param.u32 [[E3:%r[0-9]+]], [test_s_i1i32x4_param_0+12];
+; CHECK: ld.param.u8 [[E2:%rs[0-9]+]], [test_s_i1i32x4_param_0+8];
+; CHECK: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i1i32x4_param_0];
+; CHECK: .param .align 8 .b8 param0[32];
+; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
+; CHECK: st.param.b8 [param0+8], [[E2]];
+; CHECK: st.param.b32 [param0+12], [[E3]];
+; CHECK: st.param.b32 [param0+16], [[E4]];
+; CHECK: st.param.b64 [param0+24], [[E5]];
+; CHECK: .param .align 8 .b8 retval0[32];
+; CHECK: call.uni (retval0),
+; CHECK: test_s_i1i32x4,
+; CHECK: (
+; CHECK: param0
+; CHECK: );
+; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
+; CHECK: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+8];
+; CHECK: ld.param.b32 [[RE3:%r[0-9]+]], [retval0+12];
+; CHECK: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16];
+; CHECK: ld.param.b64 [[RE5:%rd[0-9]+]], [retval0+24];
+; CHECK: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]};
+; CHECK: st.param.b8 [func_retval0+8], [[RE2]];
+; CHECK: st.param.b32 [func_retval0+12], [[RE3]];
+; CHECK: st.param.b32 [func_retval0+16], [[RE4]];
+; CHECK: st.param.b64 [func_retval0+24], [[RE5]];
+; CHECK: ret;
+
+define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) {
+ %r = tail call %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a);
+ ret %s_i8i32x4 %r;
+}
+
+; -- All loads/stores from parameters aligned by one must be done one
+; -- byte at a time.
+; CHECK:.visible .func (.param .align 1 .b8 func_retval0[25])
+; CHECK-LABEL: test_s_i1i32x4p(
+; CHECK-DAG: .param .align 1 .b8 test_s_i1i32x4p_param_0[25]
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+24];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+23];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+22];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+21];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+20];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+19];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+18];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+17];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+16];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+15];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+14];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+13];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+12];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+11];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+10];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+9];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+8];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+7];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+6];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+5];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+4];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+3];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+2];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+1];
+; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0];
+; --- TODO
+; --- Unaligned parameter store/ return value load is broken in both nvcc
+; --- and llvm and needs to be fixed.
+; CHECK: .param .align 1 .b8 param0[25];
+; CHECK-DAG: st.param.b32 [param0+0],
+; CHECK-DAG: st.param.b32 [param0+4],
+; CHECK-DAG: st.param.b8 [param0+8],
+; CHECK-DAG: st.param.b32 [param0+9],
+; CHECK-DAG: st.param.b32 [param0+13],
+; CHECK-DAG: st.param.b64 [param0+17],
+; CHECK: .param .align 1 .b8 retval0[25];
+; CHECK: call.uni (retval0),
+; CHECK-NEXT: test_s_i1i32x4p,
+; CHECK-DAG: ld.param.b32 %r41, [retval0+0];
+; CHECK-DAG: ld.param.b32 %r42, [retval0+4];
+; CHECK-DAG: ld.param.b8 %rs2, [retval0+8];
+; CHECK-DAG: ld.param.b32 %r43, [retval0+9];
+; CHECK-DAG: ld.param.b32 %r44, [retval0+13];
+; CHECK-DAG: ld.param.b64 %rd23, [retval0+17];
+; CHECK-DAG: st.param.b32 [func_retval0+0],
+; CHECK-DAG: st.param.b32 [func_retval0+4],
+; CHECK-DAG: st.param.b8 [func_retval0+8],
+; CHECK-DAG: st.param.b32 [func_retval0+9],
+; CHECK-DAG: st.param.b32 [func_retval0+13],
+; CHECK-DAG: st.param.b64 [func_retval0+17],
+
+define %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a) {
+ %r = tail call %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a);
+ ret %s_i8i32x4p %r;
+}
+
+; Check that we can vectorize loads that span multiple aggregate fields.
+; CHECK:.visible .func (.param .align 16 .b8 func_retval0[80])
+; CHECK-LABEL: test_s_crossfield(
+; CHECK: .param .align 16 .b8 test_s_crossfield_param_0[80]
+; CHECK: ld.param.u32 [[E15:%r[0-9]+]], [test_s_crossfield_param_0+64];
+; CHECK: ld.param.v4.u32 {[[E11:%r[0-9]+]], [[E12:%r[0-9]+]], [[E13:%r[0-9]+]], [[E14:%r[0-9]+]]}, [test_s_crossfield_param_0+48];
+; CHECK: ld.param.v4.u32 {[[E7:%r[0-9]+]], [[E8:%r[0-9]+]], [[E9:%r[0-9]+]], [[E10:%r[0-9]+]]}, [test_s_crossfield_param_0+32];
+; CHECK: ld.param.v4.u32 {[[E3:%r[0-9]+]], [[E4:%r[0-9]+]], [[E5:%r[0-9]+]], [[E6:%r[0-9]+]]}, [test_s_crossfield_param_0+16];
+; CHECK: ld.param.u32 [[E2:%r[0-9]+]], [test_s_crossfield_param_0+8];
+; CHECK: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_crossfield_param_0];
+; CHECK: .param .align 16 .b8 param0[80];
+; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
+; CHECK: st.param.b32 [param0+8], [[E2]];
+; CHECK: st.param.v4.b32 [param0+16], {[[E3]], [[E4]], [[E5]], [[E6]]};
+; CHECK: st.param.v4.b32 [param0+32], {[[E7]], [[E8]], [[E9]], [[E10]]};
+; CHECK: st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]};
+; CHECK: st.param.b32 [param0+64], [[E15]];
+; CHECK: .param .align 16 .b8 retval0[80];
+; CHECK: call.uni (retval0),
+; CHECK: test_s_crossfield,
+; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
+; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
+; CHECK: ld.param.v4.b32 {[[RE3:%r[0-9]+]], [[RE4:%r[0-9]+]], [[RE5:%r[0-9]+]], [[RE6:%r[0-9]+]]}, [retval0+16];
+; CHECK: ld.param.v4.b32 {[[RE7:%r[0-9]+]], [[RE8:%r[0-9]+]], [[RE9:%r[0-9]+]], [[RE10:%r[0-9]+]]}, [retval0+32];
+; CHECK: ld.param.v4.b32 {[[RE11:%r[0-9]+]], [[RE12:%r[0-9]+]], [[RE13:%r[0-9]+]], [[RE14:%r[0-9]+]]}, [retval0+48];
+; CHECK: ld.param.b32 [[RE15:%r[0-9]+]], [retval0+64];
+; CHECK: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]};
+; CHECK: st.param.b32 [func_retval0+8], [[RE2]];
+; CHECK: st.param.v4.b32 [func_retval0+16], {[[RE3]], [[RE4]], [[RE5]], [[RE6]]};
+; CHECK: st.param.v4.b32 [func_retval0+32], {[[RE7]], [[RE8]], [[RE9]], [[RE10]]};
+; CHECK: st.param.v4.b32 [func_retval0+48], {[[RE11]], [[RE12]], [[RE13]], [[RE14]]};
+; CHECK: st.param.b32 [func_retval0+64], [[RE15]];
+; CHECK: ret;
+
+define %s_crossfield @test_s_crossfield(%s_crossfield %a) {
+ %r = tail call %s_crossfield @test_s_crossfield(%s_crossfield %a);
+ ret %s_crossfield %r;
+}
Modified: llvm/trunk/test/CodeGen/NVPTX/simple-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/simple-call.ll?rev=303082&r1=303081&r2=303082&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/simple-call.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/simple-call.ll Mon May 15 12:17:44 2017
@@ -1,26 +1,26 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
-
-
-
-; CHECK: .func ({{.*}}) device_func
-define float @device_func(float %a) noinline {
- %ret = fmul float %a, %a
- ret float %ret
-}
-
-; CHECK: .entry kernel_func
-define void @kernel_func(float* %a) {
- %val = load float, float* %a
-; CHECK: call.uni (retval0),
-; CHECK: device_func,
- %mul = call float @device_func(float %val)
- store float %mul, float* %a
- ret void
-}
-
-
-
-!nvvm.annotations = !{!1}
-
-!1 = !{void (float*)* @kernel_func, !"kernel", i32 1}
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
+
+
+
+; CHECK: .func ({{.*}}) device_func
+define float @device_func(float %a) noinline {
+ %ret = fmul float %a, %a
+ ret float %ret
+}
+
+; CHECK: .entry kernel_func
+define void @kernel_func(float* %a) {
+ %val = load float, float* %a
+; CHECK: call.uni (retval0),
+; CHECK: device_func,
+ %mul = call float @device_func(float %val)
+ store float %mul, float* %a
+ ret void
+}
+
+
+
+!nvvm.annotations = !{!1}
+
+!1 = !{void (float*)* @kernel_func, !"kernel", i32 1}
Modified: llvm/trunk/test/CodeGen/NVPTX/vector-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/vector-call.ll?rev=303082&r1=303081&r2=303082&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/vector-call.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/vector-call.ll Mon May 15 12:17:44 2017
@@ -1,30 +1,30 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
-
-target triple = "nvptx-unknown-cuda"
-
-declare void @bar(<4 x i32>)
-
-; CHECK-LABEL: .func foo(
-; CHECK-DAG: ld.param.v4.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [foo_param_0];
-; CHECK: .param .align 16 .b8 param0[16];
-; CHECK-DAG: st.param.v4.b32 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
-; CHECK: call.uni
-; CHECK: ret;
-define void @foo(<4 x i32> %a) {
- tail call void @bar(<4 x i32> %a)
- ret void
-}
-
-; CHECK-LABEL: .func foo3(
-; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [foo3_param_0];
-; CHECK-DAG: ld.param.u32 [[E2:%r[0-9]+]], [foo3_param_0+8];
-; CHECK: .param .align 16 .b8 param0[16];
-; CHECK-DAG: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
-; CHECK-DAG: st.param.b32 [param0+8], [[E2]];
-; CHECK: call.uni
-; CHECK: ret;
-declare void @bar3(<3 x i32>)
-define void @foo3(<3 x i32> %a) {
- tail call void @bar3(<3 x i32> %a)
- ret void
-}
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
+
+target triple = "nvptx-unknown-cuda"
+
+declare void @bar(<4 x i32>)
+
+; CHECK-LABEL: .func foo(
+; CHECK-DAG: ld.param.v4.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [foo_param_0];
+; CHECK: .param .align 16 .b8 param0[16];
+; CHECK-DAG: st.param.v4.b32 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
+; CHECK: call.uni
+; CHECK: ret;
+define void @foo(<4 x i32> %a) {
+ tail call void @bar(<4 x i32> %a)
+ ret void
+}
+
+; CHECK-LABEL: .func foo3(
+; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [foo3_param_0];
+; CHECK-DAG: ld.param.u32 [[E2:%r[0-9]+]], [foo3_param_0+8];
+; CHECK: .param .align 16 .b8 param0[16];
+; CHECK-DAG: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
+; CHECK-DAG: st.param.b32 [param0+8], [[E2]];
+; CHECK: call.uni
+; CHECK: ret;
+declare void @bar3(<3 x i32>)
+define void @foo3(<3 x i32> %a) {
+ tail call void @bar3(<3 x i32> %a)
+ ret void
+}
Modified: llvm/trunk/test/CodeGen/NVPTX/zeroext-32bit.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/zeroext-32bit.ll?rev=303082&r1=303081&r2=303082&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/zeroext-32bit.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/zeroext-32bit.ll Mon May 15 12:17:44 2017
@@ -1,26 +1,26 @@
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s
-
-; The zeroext attribute below should be silently ignored because
-; we can pass a 32-bit integer across a function call without
-; needing to extend it.
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
-target triple = "nvptx64-unknown-cuda"
-
-; CHECK-LABEL: .visible .func zeroext_test
-; CHECK-NOT: cvt.u32.u16
-define void @zeroext_test() {
- tail call void @call1(i32 zeroext 0)
- ret void
-}
-
-declare void @call1(i32 zeroext)
-
-; CHECK-LABEL: .visible .func signext_test
-; CHECK-NOT: cvt.s32.s16
-define void @signext_test() {
- tail call void @call2(i32 zeroext 0)
- ret void
-}
-
-declare void @call2(i32 zeroext)
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s
+
+; The zeroext attribute below should be silently ignored because
+; we can pass a 32-bit integer across a function call without
+; needing to extend it.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+target triple = "nvptx64-unknown-cuda"
+
+; CHECK-LABEL: .visible .func zeroext_test
+; CHECK-NOT: cvt.u32.u16
+define void @zeroext_test() {
+ tail call void @call1(i32 zeroext 0)
+ ret void
+}
+
+declare void @call1(i32 zeroext)
+
+; CHECK-LABEL: .visible .func signext_test
+; CHECK-NOT: cvt.s32.s16
+define void @signext_test() {
+ tail call void @call2(i32 zeroext 0)
+ ret void
+}
+
+declare void @call2(i32 zeroext)
More information about the llvm-commits
mailing list