[llvm] r222032 - R600/SI: Combine min3/max3 instructions
Matt Arsenault
Matthew.Arsenault at amd.com
Fri Nov 14 12:08:52 PST 2014
Author: arsenm
Date: Fri Nov 14 14:08:52 2014
New Revision: 222032
URL: http://llvm.org/viewvc/llvm-project?rev=222032&view=rev
Log:
R600/SI: Combine min3/max3 instructions
Added:
llvm/trunk/test/CodeGen/R600/fmax3.ll
llvm/trunk/test/CodeGen/R600/fmin3.ll
llvm/trunk/test/CodeGen/R600/max3.ll
llvm/trunk/test/CodeGen/R600/min3.ll
Modified:
llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp
llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h
llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.td
llvm/trunk/lib/Target/R600/SIISelLowering.cpp
llvm/trunk/lib/Target/R600/SIISelLowering.h
llvm/trunk/lib/Target/R600/SIInstructions.td
Modified: llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp?rev=222032&r1=222031&r2=222032&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp Fri Nov 14 14:08:52 2014
@@ -2374,6 +2374,12 @@ const char* AMDGPUTargetLowering::getTar
NODE_NAME_CASE(FMIN_LEGACY)
NODE_NAME_CASE(SMIN)
NODE_NAME_CASE(UMIN)
+ NODE_NAME_CASE(FMAX3)
+ NODE_NAME_CASE(SMAX3)
+ NODE_NAME_CASE(UMAX3)
+ NODE_NAME_CASE(FMIN3)
+ NODE_NAME_CASE(SMIN3)
+ NODE_NAME_CASE(UMIN3)
NODE_NAME_CASE(URECIP)
NODE_NAME_CASE(DIV_SCALE)
NODE_NAME_CASE(DIV_FMAS)
Modified: llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h?rev=222032&r1=222031&r2=222032&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h Fri Nov 14 14:08:52 2014
@@ -210,6 +210,12 @@ enum {
FMIN_LEGACY,
SMIN,
UMIN,
+ FMAX3,
+ SMAX3,
+ UMAX3,
+ FMIN3,
+ SMIN3,
+ UMIN3,
URECIP,
DIV_SCALE,
DIV_FMAS,
Modified: llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.td?rev=222032&r1=222031&r2=222032&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.td (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.td Fri Nov 14 14:08:52 2014
@@ -84,7 +84,7 @@ def AMDGPUfmin_legacy : SDNode<"AMDGPUIS
[SDNPAssociative]
>;
-// out = min(a, b) a snd b are signed ints
+// out = min(a, b) a and b are signed ints
def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
@@ -94,6 +94,37 @@ def AMDGPUumin : SDNode<"AMDGPUISD::UMIN
[SDNPCommutative, SDNPAssociative]
>;
+// FIXME: TableGen doesn't like commutative instructions with more
+// than 2 operands.
+// out = max(a, b, c) a, b and c are floats
+def AMDGPUfmax3 : SDNode<"AMDGPUISD::FMAX3", SDTFPTernaryOp,
+ [/*SDNPCommutative, SDNPAssociative*/]
+>;
+
+// out = max(a, b, c) a, b, and c are signed ints
+def AMDGPUsmax3 : SDNode<"AMDGPUISD::SMAX3", AMDGPUDTIntTernaryOp,
+ [/*SDNPCommutative, SDNPAssociative*/]
+>;
+
+// out = max(a, b, c) a, b and c are unsigned ints
+def AMDGPUumax3 : SDNode<"AMDGPUISD::UMAX3", AMDGPUDTIntTernaryOp,
+ [/*SDNPCommutative, SDNPAssociative*/]
+>;
+
+// out = min(a, b, c) a, b and c are floats
+def AMDGPUfmin3 : SDNode<"AMDGPUISD::FMIN3", SDTFPTernaryOp,
+ [/*SDNPCommutative, SDNPAssociative*/]
+>;
+
+// out = min(a, b, c) a, b and c are signed ints
+def AMDGPUsmin3 : SDNode<"AMDGPUISD::SMIN3", AMDGPUDTIntTernaryOp,
+ [/*SDNPCommutative, SDNPAssociative*/]
+>;
+
+// out = min(a, b) a and b are unsigned ints
+def AMDGPUumin3 : SDNode<"AMDGPUISD::UMIN3", AMDGPUDTIntTernaryOp,
+ [/*SDNPCommutative, SDNPAssociative*/]
+>;
def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0",
SDTIntToFPOp, []>;
Modified: llvm/trunk/lib/Target/R600/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIISelLowering.cpp?rev=222032&r1=222031&r2=222032&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/R600/SIISelLowering.cpp Fri Nov 14 14:08:52 2014
@@ -231,6 +231,8 @@ SITargetLowering::SITargetLowering(Targe
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
+ setTargetDAGCombine(ISD::FMINNUM);
+ setTargetDAGCombine(ISD::FMAXNUM);
setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::SETCC);
@@ -1314,6 +1316,61 @@ SDValue SITargetLowering::performSHLPtrC
return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset);
}
+static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
+ switch (Opc) {
+ case ISD::FMAXNUM:
+ return AMDGPUISD::FMAX3;
+ case AMDGPUISD::SMAX:
+ return AMDGPUISD::SMAX3;
+ case AMDGPUISD::UMAX:
+ return AMDGPUISD::UMAX3;
+ case ISD::FMINNUM:
+ return AMDGPUISD::FMIN3;
+ case AMDGPUISD::SMIN:
+ return AMDGPUISD::SMIN3;
+ case AMDGPUISD::UMIN:
+ return AMDGPUISD::UMIN3;
+ default:
+ llvm_unreachable("Not a min/max opcode");
+ }
+}
+
+SDValue SITargetLowering::performMin3Max3Combine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ unsigned Opc = N->getOpcode();
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ // Only do this if the inner op has one use since this will just increases
+ // register pressure for no benefit.
+
+ // max(max(a, b), c)
+ if (Op0.getOpcode() == Opc && Op0.hasOneUse()) {
+ SDLoc DL(N);
+ return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
+ DL,
+ N->getValueType(0),
+ Op0.getOperand(0),
+ Op0.getOperand(1),
+ Op1);
+ }
+
+ // max(a, max(b, c))
+ if (Op1.getOpcode() == Opc && Op1.hasOneUse()) {
+ SDLoc DL(N);
+ return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
+ DL,
+ N->getValueType(0),
+ Op0,
+ Op1.getOperand(0),
+ Op1.getOperand(1));
+ }
+
+ return SDValue();
+}
+
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -1341,6 +1398,17 @@ SDValue SITargetLowering::PerformDAGComb
}
break;
}
+ case ISD::FMAXNUM: // TODO: What about fmax_legacy?
+ case ISD::FMINNUM:
+ case AMDGPUISD::SMAX:
+ case AMDGPUISD::SMIN:
+ case AMDGPUISD::UMAX:
+ case AMDGPUISD::UMIN: {
+ if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
+ getTargetMachine().getOptLevel() > CodeGenOpt::None)
+ return performMin3Max3Combine(N, DCI);
+ break;
+ }
case AMDGPUISD::CVT_F32_UBYTE0:
case AMDGPUISD::CVT_F32_UBYTE1:
Modified: llvm/trunk/lib/Target/R600/SIISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIISelLowering.h?rev=222032&r1=222031&r2=222032&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIISelLowering.h (original)
+++ llvm/trunk/lib/Target/R600/SIISelLowering.h Fri Nov 14 14:08:52 2014
@@ -59,6 +59,8 @@ class SITargetLowering : public AMDGPUTa
unsigned AS,
DAGCombinerInfo &DCI) const;
+ SDValue performMin3Max3Combine(SDNode *N, DAGCombinerInfo &DCI) const;
+
public:
SITargetLowering(TargetMachine &tm);
Modified: llvm/trunk/lib/Target/R600/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIInstructions.td?rev=222032&r1=222031&r2=222032&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/R600/SIInstructions.td Fri Nov 14 14:08:52 2014
@@ -1573,15 +1573,27 @@ defm V_ALIGNBYTE_B32 : VOP3Inst <vop3<0x
>;
defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32",
VOP_F32_F32_F32_F32>;
-////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "v_min3_f32", []>;
-////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "v_min3_i32", []>;
-////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "v_min3_u32", []>;
-////def V_MAX3_F32 : VOP3_MAX3 <0x00000154, "v_max3_f32", []>;
-////def V_MAX3_I32 : VOP3_MAX3 <0x00000155, "v_max3_i32", []>;
-////def V_MAX3_U32 : VOP3_MAX3 <0x00000156, "v_max3_u32", []>;
-////def V_MED3_F32 : VOP3_MED3 <0x00000157, "v_med3_f32", []>;
-////def V_MED3_I32 : VOP3_MED3 <0x00000158, "v_med3_i32", []>;
-////def V_MED3_U32 : VOP3_MED3 <0x00000159, "v_med3_u32", []>;
+defm V_MIN3_F32 : VOP3Inst <vop3<0x151>, "v_min3_f32",
+ VOP_F32_F32_F32_F32, AMDGPUfmin3>;
+
+defm V_MIN3_I32 : VOP3Inst <vop3<0x152>, "v_min3_i32",
+ VOP_I32_I32_I32_I32, AMDGPUsmin3
+>;
+defm V_MIN3_U32 : VOP3Inst <vop3<0x153>, "v_min3_u32",
+ VOP_I32_I32_I32_I32, AMDGPUumin3
+>;
+defm V_MAX3_F32 : VOP3Inst <vop3<0x154>, "v_max3_f32",
+ VOP_F32_F32_F32_F32, AMDGPUfmax3
+>;
+defm V_MAX3_I32 : VOP3Inst <vop3<0x155>, "v_max3_i32",
+ VOP_I32_I32_I32_I32, AMDGPUsmax3
+>;
+defm V_MAX3_U32 : VOP3Inst <vop3<0x156>, "v_max3_u32",
+ VOP_I32_I32_I32_I32, AMDGPUumax3
+>;
+//def V_MED3_F32 : VOP3_MED3 <0x00000157, "v_med3_f32", []>;
+//def V_MED3_I32 : VOP3_MED3 <0x00000158, "v_med3_i32", []>;
+//def V_MED3_U32 : VOP3_MED3 <0x00000159, "v_med3_u32", []>;
//def V_SAD_U8 : VOP3_U8 <0x0000015a, "v_sad_u8", []>;
//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "v_sad_hi_u8", []>;
//def V_SAD_U16 : VOP3_U16 <0x0000015c, "v_sad_u16", []>;
Added: llvm/trunk/test/CodeGen/R600/fmax3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fmax3.ll?rev=222032&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/fmax3.ll (added)
+++ llvm/trunk/test/CodeGen/R600/fmax3.ll Fri Nov 14 14:08:52 2014
@@ -0,0 +1,38 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
+
+; SI-LABEL: {{^}}test_fmax3_olt_0:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @test_fmax3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+ %a = load float addrspace(1)* %aptr, align 4
+ %b = load float addrspace(1)* %bptr, align 4
+ %c = load float addrspace(1)* %cptr, align 4
+ %f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
+ %f1 = call float @llvm.maxnum.f32(float %f0, float %c) nounwind readnone
+ store float %f1, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; Commute operand of second fmax
+; SI-LABEL: {{^}}test_fmax3_olt_1:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @test_fmax3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+ %a = load float addrspace(1)* %aptr, align 4
+ %b = load float addrspace(1)* %bptr, align 4
+ %c = load float addrspace(1)* %cptr, align 4
+ %f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
+ %f1 = call float @llvm.maxnum.f32(float %c, float %f0) nounwind readnone
+ store float %f1, float addrspace(1)* %out, align 4
+ ret void
+}
Added: llvm/trunk/test/CodeGen/R600/fmin3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fmin3.ll?rev=222032&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/fmin3.ll (added)
+++ llvm/trunk/test/CodeGen/R600/fmin3.ll Fri Nov 14 14:08:52 2014
@@ -0,0 +1,38 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+
+; SI-LABEL: {{^}}test_fmin3_olt_0:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @test_fmin3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+ %a = load float addrspace(1)* %aptr, align 4
+ %b = load float addrspace(1)* %bptr, align 4
+ %c = load float addrspace(1)* %cptr, align 4
+ %f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
+ %f1 = call float @llvm.minnum.f32(float %f0, float %c) nounwind readnone
+ store float %f1, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; Commute operand of second fmin
+; SI-LABEL: {{^}}test_fmin3_olt_1:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @test_fmin3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+ %a = load float addrspace(1)* %aptr, align 4
+ %b = load float addrspace(1)* %bptr, align 4
+ %c = load float addrspace(1)* %cptr, align 4
+ %f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
+ %f1 = call float @llvm.minnum.f32(float %c, float %f0) nounwind readnone
+ store float %f1, float addrspace(1)* %out, align 4
+ ret void
+}
Added: llvm/trunk/test/CodeGen/R600/max3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/max3.ll?rev=222032&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/max3.ll (added)
+++ llvm/trunk/test/CodeGen/R600/max3.ll Fri Nov 14 14:08:52 2014
@@ -0,0 +1,41 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; FUNC-LABEL: @v_test_imax3_sgt_i32
+; SI: v_max3_i32
+define void @v_test_imax3_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %c = load i32 addrspace(1)* %gep2, align 4
+ %icmp0 = icmp sgt i32 %a, %b
+ %i0 = select i1 %icmp0, i32 %a, i32 %b
+ %icmp1 = icmp sgt i32 %i0, %c
+ %i1 = select i1 %icmp1, i32 %i0, i32 %c
+ store i32 %i1, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @v_test_umax3_ugt_i32
+; SI: v_max3_u32
+define void @v_test_umax3_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %c = load i32 addrspace(1)* %gep2, align 4
+ %icmp0 = icmp ugt i32 %a, %b
+ %i0 = select i1 %icmp0, i32 %a, i32 %b
+ %icmp1 = icmp ugt i32 %i0, %c
+ %i1 = select i1 %icmp1, i32 %i0, i32 %c
+ store i32 %i1, i32 addrspace(1)* %out, align 4
+ ret void
+}
Added: llvm/trunk/test/CodeGen/R600/min3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/min3.ll?rev=222032&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/min3.ll (added)
+++ llvm/trunk/test/CodeGen/R600/min3.ll Fri Nov 14 14:08:52 2014
@@ -0,0 +1,111 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; FUNC-LABEL: @v_test_imin3_slt_i32
+; SI: v_min3_i32
+define void @v_test_imin3_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %c = load i32 addrspace(1)* %gep2, align 4
+ %icmp0 = icmp slt i32 %a, %b
+ %i0 = select i1 %icmp0, i32 %a, i32 %b
+ %icmp1 = icmp slt i32 %i0, %c
+ %i1 = select i1 %icmp1, i32 %i0, i32 %c
+ store i32 %i1, i32 addrspace(1)* %outgep, align 4
+ ret void
+}
+
+; FUNC-LABEL: @v_test_umin3_ult_i32
+; SI: v_min3_u32
+define void @v_test_umin3_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %c = load i32 addrspace(1)* %gep2, align 4
+ %icmp0 = icmp ult i32 %a, %b
+ %i0 = select i1 %icmp0, i32 %a, i32 %b
+ %icmp1 = icmp ult i32 %i0, %c
+ %i1 = select i1 %icmp1, i32 %i0, i32 %c
+ store i32 %i1, i32 addrspace(1)* %outgep, align 4
+ ret void
+}
+
+; FUNC-LABEL: @v_test_umin_umin_umin
+; SI: v_min_i32
+; SI: v_min3_i32
+define void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %tid2 = mul i32 %tid, 2
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+
+ %gep3 = getelementptr i32 addrspace(1)* %aptr, i32 %tid2
+ %gep4 = getelementptr i32 addrspace(1)* %bptr, i32 %tid2
+ %gep5 = getelementptr i32 addrspace(1)* %cptr, i32 %tid2
+
+ %outgep0 = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %outgep1 = getelementptr i32 addrspace(1)* %out, i32 %tid2
+
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %c = load i32 addrspace(1)* %gep2, align 4
+ %d = load i32 addrspace(1)* %gep3, align 4
+
+ %icmp0 = icmp slt i32 %a, %b
+ %i0 = select i1 %icmp0, i32 %a, i32 %b
+
+ %icmp1 = icmp slt i32 %c, %d
+ %i1 = select i1 %icmp1, i32 %c, i32 %d
+
+ %icmp2 = icmp slt i32 %i0, %i1
+ %i2 = select i1 %icmp2, i32 %i0, i32 %i1
+
+ store i32 %i2, i32 addrspace(1)* %outgep1, align 4
+ ret void
+}
+
+; FUNC-LABEL: @v_test_umin3_2_uses
+; SI-NOT: v_min3
+define void @v_test_umin3_2_uses(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %tid2 = mul i32 %tid, 2
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+
+ %gep3 = getelementptr i32 addrspace(1)* %aptr, i32 %tid2
+ %gep4 = getelementptr i32 addrspace(1)* %bptr, i32 %tid2
+ %gep5 = getelementptr i32 addrspace(1)* %cptr, i32 %tid2
+
+ %outgep0 = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %outgep1 = getelementptr i32 addrspace(1)* %out, i32 %tid2
+
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %c = load i32 addrspace(1)* %gep2, align 4
+ %d = load i32 addrspace(1)* %gep3, align 4
+
+ %icmp0 = icmp slt i32 %a, %b
+ %i0 = select i1 %icmp0, i32 %a, i32 %b
+
+ %icmp1 = icmp slt i32 %c, %d
+ %i1 = select i1 %icmp1, i32 %c, i32 %d
+
+ %icmp2 = icmp slt i32 %i0, %c
+ %i2 = select i1 %icmp2, i32 %i0, i32 %c
+
+ store i32 %i2, i32 addrspace(1)* %outgep0, align 4
+ store i32 %i0, i32 addrspace(1)* %outgep1, align 4
+ ret void
+}
More information about the llvm-commits
mailing list