[llvm] r303284 - AMDGPU: Fix min3/max3 combines for f16/i16
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed May 17 12:25:07 PDT 2017
Author: arsenm
Date: Wed May 17 14:25:06 2017
New Revision: 303284
URL: http://llvm.org/viewvc/llvm-project?rev=303284&view=rev
Log:
AMDGPU: Fix min3/max3 combines for f16/i16
Fix missing instruction definitions for min3/max3.
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td
llvm/trunk/test/CodeGen/AMDGPU/fmax3.ll
llvm/trunk/test/CodeGen/AMDGPU/fmin3.ll
llvm/trunk/test/CodeGen/AMDGPU/max3.ll
llvm/trunk/test/CodeGen/AMDGPU/min3.ll
llvm/trunk/test/MC/AMDGPU/vop3-gfx9.s
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=303284&r1=303283&r2=303284&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Wed May 17 14:25:06 2017
@@ -289,6 +289,10 @@ public:
return getGeneration() >= GFX9;
}
+ bool hasMin3Max3_16() const {
+ return getGeneration() >= GFX9;
+ }
+
bool hasCARRY() const {
return (getGeneration() >= EVERGREEN);
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=303284&r1=303283&r2=303284&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Wed May 17 14:25:06 2017
@@ -4491,7 +4491,8 @@ SDValue SITargetLowering::performMinMaxC
if (Opc != AMDGPUISD::FMIN_LEGACY && Opc != AMDGPUISD::FMAX_LEGACY &&
- VT != MVT::f64) {
+ VT != MVT::f64 &&
+ ((VT != MVT::f16 && VT != MVT::i16) || Subtarget->hasMin3Max3_16())) {
// max(max(a, b), c) -> max3(a, b, c)
// min(min(a, b), c) -> min3(a, b, c)
if (Op0.getOpcode() == Opc && Op0.hasOneUse()) {
Modified: llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td?rev=303284&r1=303283&r2=303284&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td Wed May 17 14:25:06 2017
@@ -300,10 +300,19 @@ def V_AND_OR_B32 : VOP3Inst <"v_and_or_b
def V_OR3_B32 : VOP3Inst <"v_or3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
def V_XAD_U32 : VOP3Inst <"v_xad_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+
def V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmed3>;
def V_MED3_I16 : VOP3Inst <"v_med3_i16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmed3>;
def V_MED3_U16 : VOP3Inst <"v_med3_u16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUumed3>;
-}
+
+def V_MIN3_F16 : VOP3Inst <"v_min3_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmin3>;
+def V_MIN3_I16 : VOP3Inst <"v_min3_i16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmin3>;
+def V_MIN3_U16 : VOP3Inst <"v_min3_u16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUumin3>;
+
+def V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmax3>;
+def V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmax3>;
+def V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUumax3>;
+} // End SubtargetPredicate = isGFX9
//===----------------------------------------------------------------------===//
@@ -509,6 +518,15 @@ defm V_OR3_B32 : VOP3_Real_vi <0x202>;
defm V_PACK_B32_F16 : VOP3_Real_vi <0x2a0>;
defm V_XAD_U32 : VOP3_Real_vi <0x1f3>;
+
+defm V_MIN3_F16 : VOP3_Real_vi <0x1f4>;
+defm V_MIN3_I16 : VOP3_Real_vi <0x1f5>;
+defm V_MIN3_U16 : VOP3_Real_vi <0x1f6>;
+
+defm V_MAX3_F16 : VOP3_Real_vi <0x1f7>;
+defm V_MAX3_I16 : VOP3_Real_vi <0x1f8>;
+defm V_MAX3_U16 : VOP3_Real_vi <0x1f9>;
+
defm V_MED3_F16 : VOP3_Real_vi <0x1fa>;
defm V_MED3_I16 : VOP3_Real_vi <0x1fb>;
defm V_MED3_U16 : VOP3_Real_vi <0x1fc>;
Modified: llvm/trunk/test/CodeGen/AMDGPU/fmax3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmax3.ll?rev=303284&r1=303283&r2=303284&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmax3.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmax3.ll Wed May 17 14:25:06 2017
@@ -1,39 +1,92 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-declare float @llvm.maxnum.f32(float, float) nounwind readnone
-
-; SI-LABEL: {{^}}test_fmax3_olt_0:
-; SI: buffer_load_dword [[REGC:v[0-9]+]]
-; SI: buffer_load_dword [[REGB:v[0-9]+]]
-; SI: buffer_load_dword [[REGA:v[0-9]+]]
-; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
-define amdgpu_kernel void @test_fmax3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+; GCN-LABEL: {{^}}test_fmax3_olt_0_f32:
+; GCN: buffer_load_dword [[REGC:v[0-9]+]]
+; GCN: buffer_load_dword [[REGB:v[0-9]+]]
+; GCN: buffer_load_dword [[REGA:v[0-9]+]]
+; GCN: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
+define amdgpu_kernel void @test_fmax3_olt_0_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #0 {
%a = load volatile float, float addrspace(1)* %aptr, align 4
%b = load volatile float, float addrspace(1)* %bptr, align 4
%c = load volatile float, float addrspace(1)* %cptr, align 4
- %f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
- %f1 = call float @llvm.maxnum.f32(float %f0, float %c) nounwind readnone
+ %f0 = call float @llvm.maxnum.f32(float %a, float %b)
+ %f1 = call float @llvm.maxnum.f32(float %f0, float %c)
store float %f1, float addrspace(1)* %out, align 4
ret void
}
; Commute operand of second fmax
-; SI-LABEL: {{^}}test_fmax3_olt_1:
-; SI: buffer_load_dword [[REGB:v[0-9]+]]
-; SI: buffer_load_dword [[REGA:v[0-9]+]]
-; SI: buffer_load_dword [[REGC:v[0-9]+]]
-; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
-define amdgpu_kernel void @test_fmax3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+; GCN-LABEL: {{^}}test_fmax3_olt_1_f32:
+; GCN: buffer_load_dword [[REGB:v[0-9]+]]
+; GCN: buffer_load_dword [[REGA:v[0-9]+]]
+; GCN: buffer_load_dword [[REGC:v[0-9]+]]
+; GCN: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
+define amdgpu_kernel void @test_fmax3_olt_1_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #0 {
%a = load volatile float, float addrspace(1)* %aptr, align 4
%b = load volatile float, float addrspace(1)* %bptr, align 4
%c = load volatile float, float addrspace(1)* %cptr, align 4
- %f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
- %f1 = call float @llvm.maxnum.f32(float %c, float %f0) nounwind readnone
+ %f0 = call float @llvm.maxnum.f32(float %a, float %b)
+ %f1 = call float @llvm.maxnum.f32(float %c, float %f0)
store float %f1, float addrspace(1)* %out, align 4
ret void
}
+
+; GCN-LABEL: {{^}}test_fmax3_olt_0_f16:
+; GCN: buffer_load_ushort [[REGC:v[0-9]+]]
+; GCN: buffer_load_ushort [[REGB:v[0-9]+]]
+; GCN: buffer_load_ushort [[REGA:v[0-9]+]]
+
+; SI: v_max3_f32 [[RESULT_F32:v[0-9]+]],
+; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT]]
+
+; VI: v_max_f16_e32
+; VI: v_max_f16_e32 [[RESULT:v[0-9]+]],
+
+; GFX9: v_max3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; GCN: buffer_store_short [[RESULT]],
+define amdgpu_kernel void @test_fmax3_olt_0_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 {
+ %a = load volatile half, half addrspace(1)* %aptr, align 2
+ %b = load volatile half, half addrspace(1)* %bptr, align 2
+ %c = load volatile half, half addrspace(1)* %cptr, align 2
+ %f0 = call half @llvm.maxnum.f16(half %a, half %b)
+ %f1 = call half @llvm.maxnum.f16(half %f0, half %c)
+ store half %f1, half addrspace(1)* %out, align 2
+ ret void
+}
+
+; Commute operand of second fmax
+; GCN-LABEL: {{^}}test_fmax3_olt_1_f16:
+; GCN: buffer_load_ushort [[REGB:v[0-9]+]]
+; GCN: buffer_load_ushort [[REGA:v[0-9]+]]
+; GCN: buffer_load_ushort [[REGC:v[0-9]+]]
+
+; SI: v_max3_f32 [[RESULT_F32:v[0-9]+]],
+; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT]]
+
+; VI: v_max_f16_e32
+; VI: v_max_f16_e32 [[RESULT:v[0-9]+]],
+
+; GFX9: v_max3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; GCN: buffer_store_short [[RESULT]],
+define amdgpu_kernel void @test_fmax3_olt_1_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 {
+ %a = load volatile half, half addrspace(1)* %aptr, align 2
+ %b = load volatile half, half addrspace(1)* %bptr, align 2
+ %c = load volatile half, half addrspace(1)* %cptr, align 2
+ %f0 = call half @llvm.maxnum.f16(half %a, half %b)
+ %f1 = call half @llvm.maxnum.f16(half %c, half %f0)
+ store half %f1, half addrspace(1)* %out, align 2
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare float @llvm.maxnum.f32(float, float) #1
+declare half @llvm.maxnum.f16(half, half) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone speculatable }
Modified: llvm/trunk/test/CodeGen/AMDGPU/fmin3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmin3.ll?rev=303284&r1=303283&r2=303284&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmin3.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmin3.ll Wed May 17 14:25:06 2017
@@ -1,40 +1,90 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-
-declare float @llvm.minnum.f32(float, float) nounwind readnone
-
-; SI-LABEL: {{^}}test_fmin3_olt_0:
-; SI: buffer_load_dword [[REGC:v[0-9]+]]
-; SI: buffer_load_dword [[REGB:v[0-9]+]]
-; SI: buffer_load_dword [[REGA:v[0-9]+]]
-; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
-define amdgpu_kernel void @test_fmin3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+
+; GCN-LABEL: {{^}}test_fmin3_olt_0_f32:
+; GCN: buffer_load_dword [[REGC:v[0-9]+]]
+; GCN: buffer_load_dword [[REGB:v[0-9]+]]
+; GCN: buffer_load_dword [[REGA:v[0-9]+]]
+; GCN: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; GCN: buffer_store_dword [[RESULT]],
+define amdgpu_kernel void @test_fmin3_olt_0_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #0 {
%a = load volatile float, float addrspace(1)* %aptr, align 4
%b = load volatile float, float addrspace(1)* %bptr, align 4
%c = load volatile float, float addrspace(1)* %cptr, align 4
- %f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
- %f1 = call float @llvm.minnum.f32(float %f0, float %c) nounwind readnone
+ %f0 = call float @llvm.minnum.f32(float %a, float %b)
+ %f1 = call float @llvm.minnum.f32(float %f0, float %c)
store float %f1, float addrspace(1)* %out, align 4
ret void
}
; Commute operand of second fmin
-; SI-LABEL: {{^}}test_fmin3_olt_1:
-; SI: buffer_load_dword [[REGB:v[0-9]+]]
-; SI: buffer_load_dword [[REGA:v[0-9]+]]
-; SI: buffer_load_dword [[REGC:v[0-9]+]]
-; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
-define amdgpu_kernel void @test_fmin3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+; GCN-LABEL: {{^}}test_fmin3_olt_1_f32:
+; GCN: buffer_load_dword [[REGB:v[0-9]+]]
+; GCN: buffer_load_dword [[REGA:v[0-9]+]]
+; GCN: buffer_load_dword [[REGC:v[0-9]+]]
+; GCN: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; GCN: buffer_store_dword [[RESULT]],
+define amdgpu_kernel void @test_fmin3_olt_1_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #0 {
%a = load volatile float, float addrspace(1)* %aptr, align 4
%b = load volatile float, float addrspace(1)* %bptr, align 4
%c = load volatile float, float addrspace(1)* %cptr, align 4
- %f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
- %f1 = call float @llvm.minnum.f32(float %c, float %f0) nounwind readnone
+ %f0 = call float @llvm.minnum.f32(float %a, float %b)
+ %f1 = call float @llvm.minnum.f32(float %c, float %f0)
store float %f1, float addrspace(1)* %out, align 4
ret void
}
+
+; GCN-LABEL: {{^}}test_fmin3_olt_0_f16:
+; GCN: buffer_load_ushort [[REGC:v[0-9]+]]
+; GCN: buffer_load_ushort [[REGB:v[0-9]+]]
+; GCN: buffer_load_ushort [[REGA:v[0-9]+]]
+
+; SI: v_min3_f32 [[RESULT_F32:v[0-9]+]],
+; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT]]
+
+; VI: v_min_f16_e32
+; VI: v_min_f16_e32 [[RESULT:v[0-9]+]],
+
+; GFX9: v_min3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; GCN: buffer_store_short [[RESULT]],
+define amdgpu_kernel void @test_fmin3_olt_0_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 {
+ %a = load volatile half, half addrspace(1)* %aptr, align 2
+ %b = load volatile half, half addrspace(1)* %bptr, align 2
+ %c = load volatile half, half addrspace(1)* %cptr, align 2
+ %f0 = call half @llvm.minnum.f16(half %a, half %b)
+ %f1 = call half @llvm.minnum.f16(half %f0, half %c)
+ store half %f1, half addrspace(1)* %out, align 2
+ ret void
+}
+
+; Commute operand of second fmin
+; GCN-LABEL: {{^}}test_fmin3_olt_1_f16:
+; GCN: buffer_load_ushort [[REGB:v[0-9]+]]
+; GCN: buffer_load_ushort [[REGA:v[0-9]+]]
+; GCN: buffer_load_ushort [[REGC:v[0-9]+]]
+
+; SI: v_min3_f32 [[RESULT_F32:v[0-9]+]],
+; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT]]
+
+; VI: v_min_f16_e32
+; VI: v_min_f16_e32 [[RESULT:v[0-9]+]],
+
+; GFX9: v_min3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; GCN: buffer_store_short [[RESULT]],
+define amdgpu_kernel void @test_fmin3_olt_1_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 {
+ %a = load volatile half, half addrspace(1)* %aptr, align 2
+ %b = load volatile half, half addrspace(1)* %bptr, align 2
+ %c = load volatile half, half addrspace(1)* %cptr, align 2
+ %f0 = call half @llvm.minnum.f16(half %a, half %b)
+ %f1 = call half @llvm.minnum.f16(half %c, half %f0)
+ store half %f1, half addrspace(1)* %out, align 2
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare float @llvm.minnum.f32(float, float) #1
+declare half @llvm.minnum.f16(half, half) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone speculatable }
Modified: llvm/trunk/test/CodeGen/AMDGPU/max3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/max3.ll?rev=303284&r1=303283&r2=303284&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/max3.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/max3.ll Wed May 17 14:25:06 2017
@@ -1,41 +1,94 @@
-; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn < %s | FileCheck -check-prefixes=GCN,SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
-; FUNC-LABEL: @v_test_imax3_sgt_i32
-; SI: v_max3_i32
-define amdgpu_kernel void @v_test_imax3_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
- %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+; GCN-LABEL: {{^}}v_test_imax3_sgt_i32:
+; GCN: v_max3_i32
+define amdgpu_kernel void @v_test_imax3_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load i32, i32 addrspace(1)* %gep0, align 4
- %b = load i32, i32 addrspace(1)* %gep1, align 4
- %c = load i32, i32 addrspace(1)* %gep2, align 4
+ %a = load i32, i32 addrspace(1)* %gep0
+ %b = load i32, i32 addrspace(1)* %gep1
+ %c = load i32, i32 addrspace(1)* %gep2
%icmp0 = icmp sgt i32 %a, %b
%i0 = select i1 %icmp0, i32 %a, i32 %b
%icmp1 = icmp sgt i32 %i0, %c
%i1 = select i1 %icmp1, i32 %i0, i32 %c
- store i32 %i1, i32 addrspace(1)* %out, align 4
+ store i32 %i1, i32 addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @v_test_umax3_ugt_i32
-; SI: v_max3_u32
-define amdgpu_kernel void @v_test_umax3_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
- %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+; GCN-LABEL: {{^}}v_test_umax3_ugt_i32:
+; GCN: v_max3_u32
+define amdgpu_kernel void @v_test_umax3_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load i32, i32 addrspace(1)* %gep0, align 4
- %b = load i32, i32 addrspace(1)* %gep1, align 4
- %c = load i32, i32 addrspace(1)* %gep2, align 4
+ %a = load i32, i32 addrspace(1)* %gep0
+ %b = load i32, i32 addrspace(1)* %gep1
+ %c = load i32, i32 addrspace(1)* %gep2
%icmp0 = icmp ugt i32 %a, %b
%i0 = select i1 %icmp0, i32 %a, i32 %b
%icmp1 = icmp ugt i32 %i0, %c
%i1 = select i1 %icmp1, i32 %i0, i32 %c
- store i32 %i1, i32 addrspace(1)* %out, align 4
+ store i32 %i1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_imax3_sgt_i16:
+; SI: v_max3_i32
+
+; VI: v_max_i16
+; VI: v_max_i16
+
+; GFX9: v_max3_i16
+define amdgpu_kernel void @v_test_imax3_sgt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 addrspace(1)* %cptr) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i16, i16 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
+ %a = load i16, i16 addrspace(1)* %gep0
+ %b = load i16, i16 addrspace(1)* %gep1
+ %c = load i16, i16 addrspace(1)* %gep2
+ %icmp0 = icmp sgt i16 %a, %b
+ %i0 = select i1 %icmp0, i16 %a, i16 %b
+ %icmp1 = icmp sgt i16 %i0, %c
+ %i1 = select i1 %icmp1, i16 %i0, i16 %c
+ store i16 %i1, i16 addrspace(1)* %out
ret void
}
+
+; GCN-LABEL: {{^}}v_test_umax3_ugt_i16:
+; SI: v_max3_u32
+
+; VI: v_max_u16
+; VI: v_max_u16
+
+; GFX9: v_max3_u16
+define amdgpu_kernel void @v_test_umax3_ugt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 addrspace(1)* %cptr) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i16, i16 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
+ %a = load i16, i16 addrspace(1)* %gep0
+ %b = load i16, i16 addrspace(1)* %gep1
+ %c = load i16, i16 addrspace(1)* %gep2
+ %icmp0 = icmp ugt i16 %a, %b
+ %i0 = select i1 %icmp0, i16 %a, i16 %b
+ %icmp1 = icmp ugt i16 %i0, %c
+ %i1 = select i1 %icmp1, i16 %i0, i16 %c
+ store i16 %i1, i16 addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone speculatable }
Modified: llvm/trunk/test/CodeGen/AMDGPU/min3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/min3.ll?rev=303284&r1=303283&r2=303284&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/min3.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/min3.ll Wed May 17 14:25:06 2017
@@ -1,50 +1,50 @@
-; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
-; FUNC-LABEL: @v_test_imin3_slt_i32
-; SI: v_min3_i32
-define amdgpu_kernel void @v_test_imin3_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
- %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+; RUN: llc -march=amdgcn < %s | FileCheck -check-prefixes=GCN,SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+
+; GCN-LABEL: {{^}}v_test_imin3_slt_i32:
+; GCN: v_min3_i32
+define amdgpu_kernel void @v_test_imin3_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load i32, i32 addrspace(1)* %gep0, align 4
- %b = load i32, i32 addrspace(1)* %gep1, align 4
- %c = load i32, i32 addrspace(1)* %gep2, align 4
+ %a = load i32, i32 addrspace(1)* %gep0
+ %b = load i32, i32 addrspace(1)* %gep1
+ %c = load i32, i32 addrspace(1)* %gep2
%icmp0 = icmp slt i32 %a, %b
%i0 = select i1 %icmp0, i32 %a, i32 %b
%icmp1 = icmp slt i32 %i0, %c
%i1 = select i1 %icmp1, i32 %i0, i32 %c
- store i32 %i1, i32 addrspace(1)* %outgep, align 4
+ store i32 %i1, i32 addrspace(1)* %outgep
ret void
}
-; FUNC-LABEL: @v_test_umin3_ult_i32
-; SI: v_min3_u32
-define amdgpu_kernel void @v_test_umin3_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
- %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+; GCN-LABEL: {{^}}v_test_umin3_ult_i32:
+; GCN: v_min3_u32
+define amdgpu_kernel void @v_test_umin3_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
%gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
%outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load i32, i32 addrspace(1)* %gep0, align 4
- %b = load i32, i32 addrspace(1)* %gep1, align 4
- %c = load i32, i32 addrspace(1)* %gep2, align 4
+ %a = load i32, i32 addrspace(1)* %gep0
+ %b = load i32, i32 addrspace(1)* %gep1
+ %c = load i32, i32 addrspace(1)* %gep2
%icmp0 = icmp ult i32 %a, %b
%i0 = select i1 %icmp0, i32 %a, i32 %b
%icmp1 = icmp ult i32 %i0, %c
%i1 = select i1 %icmp1, i32 %i0, i32 %c
- store i32 %i1, i32 addrspace(1)* %outgep, align 4
+ store i32 %i1, i32 addrspace(1)* %outgep
ret void
}
-; FUNC-LABEL: @v_test_umin_umin_umin
-; SI: v_min_i32
-; SI: v_min3_i32
-define amdgpu_kernel void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
- %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+; GCN-LABEL: {{^}}v_test_umin_umin_umin:
+; GCN: v_min_i32
+; GCN: v_min3_i32
+define amdgpu_kernel void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid2 = mul i32 %tid, 2
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
@@ -57,10 +57,10 @@ define amdgpu_kernel void @v_test_umin_u
%outgep0 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%outgep1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid2
- %a = load i32, i32 addrspace(1)* %gep0, align 4
- %b = load i32, i32 addrspace(1)* %gep1, align 4
- %c = load i32, i32 addrspace(1)* %gep2, align 4
- %d = load i32, i32 addrspace(1)* %gep3, align 4
+ %a = load i32, i32 addrspace(1)* %gep0
+ %b = load i32, i32 addrspace(1)* %gep1
+ %c = load i32, i32 addrspace(1)* %gep2
+ %d = load i32, i32 addrspace(1)* %gep3
%icmp0 = icmp slt i32 %a, %b
%i0 = select i1 %icmp0, i32 %a, i32 %b
@@ -71,14 +71,14 @@ define amdgpu_kernel void @v_test_umin_u
%icmp2 = icmp slt i32 %i0, %i1
%i2 = select i1 %icmp2, i32 %i0, i32 %i1
- store i32 %i2, i32 addrspace(1)* %outgep1, align 4
+ store i32 %i2, i32 addrspace(1)* %outgep1
ret void
}
-; FUNC-LABEL: @v_test_umin3_2_uses
-; SI-NOT: v_min3
-define amdgpu_kernel void @v_test_umin3_2_uses(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
- %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+; GCN-LABEL: {{^}}v_test_umin3_2_uses:
+; GCN-NOT: v_min3
+define amdgpu_kernel void @v_test_umin3_2_uses(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid2 = mul i32 %tid, 2
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
@@ -91,10 +91,10 @@ define amdgpu_kernel void @v_test_umin3_
%outgep0 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%outgep1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid2
- %a = load i32, i32 addrspace(1)* %gep0, align 4
- %b = load i32, i32 addrspace(1)* %gep1, align 4
- %c = load i32, i32 addrspace(1)* %gep2, align 4
- %d = load i32, i32 addrspace(1)* %gep3, align 4
+ %a = load i32, i32 addrspace(1)* %gep0
+ %b = load i32, i32 addrspace(1)* %gep1
+ %c = load i32, i32 addrspace(1)* %gep2
+ %d = load i32, i32 addrspace(1)* %gep3
%icmp0 = icmp slt i32 %a, %b
%i0 = select i1 %icmp0, i32 %a, i32 %b
@@ -105,7 +105,60 @@ define amdgpu_kernel void @v_test_umin3_
%icmp2 = icmp slt i32 %i0, %c
%i2 = select i1 %icmp2, i32 %i0, i32 %c
- store i32 %i2, i32 addrspace(1)* %outgep0, align 4
- store i32 %i0, i32 addrspace(1)* %outgep1, align 4
+ store i32 %i2, i32 addrspace(1)* %outgep0
+ store i32 %i0, i32 addrspace(1)* %outgep1
ret void
}
+
+; GCN-LABEL: {{^}}v_test_imin3_slt_i16:
+; SI: v_min3_i32
+
+; VI: v_min_i16
+; VI: v_min_i16
+
+; GFX9: v_min3_i16
+define amdgpu_kernel void @v_test_imin3_slt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 addrspace(1)* %cptr) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i16, i16 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
+ %a = load i16, i16 addrspace(1)* %gep0
+ %b = load i16, i16 addrspace(1)* %gep1
+ %c = load i16, i16 addrspace(1)* %gep2
+ %icmp0 = icmp slt i16 %a, %b
+ %i0 = select i1 %icmp0, i16 %a, i16 %b
+ %icmp1 = icmp slt i16 %i0, %c
+ %i1 = select i1 %icmp1, i16 %i0, i16 %c
+ store i16 %i1, i16 addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_umin3_ult_i16:
+; SI: v_min3_u32
+
+; VI: v_min_u16
+; VI: v_min_u16
+
+; GFX9: v_min3_u16
+define amdgpu_kernel void @v_test_umin3_ult_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 addrspace(1)* %cptr) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i16, i16 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
+ %a = load i16, i16 addrspace(1)* %gep0
+ %b = load i16, i16 addrspace(1)* %gep1
+ %c = load i16, i16 addrspace(1)* %gep2
+ %icmp0 = icmp ult i16 %a, %b
+ %i0 = select i1 %icmp0, i16 %a, i16 %b
+ %icmp1 = icmp ult i16 %i0, %c
+ %i1 = select i1 %icmp1, i16 %i0, i16 %c
+ store i16 %i1, i16 addrspace(1)* %outgep
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone speculatable }
Modified: llvm/trunk/test/MC/AMDGPU/vop3-gfx9.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/vop3-gfx9.s?rev=303284&r1=303283&r2=303284&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/vop3-gfx9.s (original)
+++ llvm/trunk/test/MC/AMDGPU/vop3-gfx9.s Wed May 17 14:25:06 2017
@@ -35,6 +35,30 @@ v_xad_u32 v1, v2, v3, v4
// GFX9: v_xad_u32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf3,0xd1,0x02,0x07,0x12,0x04]
// NOVI: :1: error: instruction not supported on this GPU
+v_min3_f16 v1, v2, v3, v4
+// GFX9: v_min3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf4,0xd1,0x02,0x07,0x12,0x04]
+// NOVI: :1: error: instruction not supported on this GPU
+
+v_min3_i16 v1, v2, v3, v4
+// GFX9: v_min3_i16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf5,0xd1,0x02,0x07,0x12,0x04]
+// NOVI: :1: error: instruction not supported on this GPU
+
+v_min3_u16 v1, v2, v3, v4
+// GFX9: v_min3_u16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf6,0xd1,0x02,0x07,0x12,0x04]
+// NOVI: :1: error: instruction not supported on this GPU
+
+v_max3_f16 v1, v2, v3, v4
+// GFX9: v_max3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf7,0xd1,0x02,0x07,0x12,0x04]
+// NOVI: :1: error: instruction not supported on this GPU
+
+v_max3_i16 v1, v2, v3, v4
+// GFX9: v_max3_i16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf8,0xd1,0x02,0x07,0x12,0x04]
+// NOVI: :1: error: instruction not supported on this GPU
+
+v_max3_u16 v1, v2, v3, v4
+// GFX9: v_max3_u16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf9,0xd1,0x02,0x07,0x12,0x04]
+// NOVI: :1: error: instruction not supported on this GPU
+
v_med3_f16 v1, v2, v3, v4
// GFX9: v_med3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xfa,0xd1,0x02,0x07,0x12,0x04]
// NOVI: :1: error: instruction not supported on this GPU
More information about the llvm-commits
mailing list