[llvm] r205242 - R600: Compute masked bits for min and max
Matt Arsenault
Matthew.Arsenault at amd.com
Mon Mar 31 12:35:33 PDT 2014
Author: arsenm
Date: Mon Mar 31 14:35:33 2014
New Revision: 205242
URL: http://llvm.org/viewvc/llvm-project?rev=205242&view=rev
Log:
R600: Compute masked bits for min and max
Modified:
llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp
llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.umax.ll
llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.umin.ll
Modified: llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp?rev=205242&r1=205241&r2=205242&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp Mon Mar 31 14:35:33 2014
@@ -1219,11 +1219,55 @@ const char* AMDGPUTargetLowering::getTar
}
}
+static void computeMaskedBitsForMinMax(const SDValue Op0,
+ const SDValue Op1,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) {
+ APInt Op0Zero, Op0One;
+ APInt Op1Zero, Op1One;
+ DAG.ComputeMaskedBits(Op0, Op0Zero, Op0One, Depth);
+ DAG.ComputeMaskedBits(Op1, Op1Zero, Op1One, Depth);
+
+ KnownZero = Op0Zero & Op1Zero;
+ KnownOne = Op0One & Op1One;
+}
+
void AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
+
KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything.
+ unsigned Opc = Op.getOpcode();
+ switch (Opc) {
+ case ISD::INTRINSIC_WO_CHAIN: {
+ // FIXME: The intrinsic should just use the node.
+ switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
+ case AMDGPUIntrinsic::AMDGPU_imax:
+ case AMDGPUIntrinsic::AMDGPU_umax:
+ case AMDGPUIntrinsic::AMDGPU_imin:
+ case AMDGPUIntrinsic::AMDGPU_umin:
+ computeMaskedBitsForMinMax(Op.getOperand(1), Op.getOperand(2),
+ KnownZero, KnownOne, DAG, Depth);
+ break;
+ default:
+ break;
+ }
+
+ break;
+ }
+ case AMDGPUISD::SMAX:
+ case AMDGPUISD::UMAX:
+ case AMDGPUISD::SMIN:
+ case AMDGPUISD::UMIN:
+ computeMaskedBitsForMinMax(Op.getOperand(0), Op.getOperand(1),
+ KnownZero, KnownOne, DAG, Depth);
+ break;
+ default:
+ break;
+ }
}
Modified: llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.umax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.umax.ll?rev=205242&r1=205241&r2=205242&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.umax.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.umax.ll Mon Mar 31 14:35:33 2014
@@ -21,6 +21,21 @@ entry:
ret void
}
+; SI-LABEL: @trunc_zext_umax
+; SI: BUFFER_LOAD_UBYTE [[VREG:v[0-9]+]],
+; SI: V_MAX_U32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
+; SI-NOT: AND
+; SI: BUFFER_STORE_SHORT [[RESULT]],
+define void @trunc_zext_umax(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
+ %tmp5 = load i8 addrspace(1)* %src, align 1
+ %tmp2 = zext i8 %tmp5 to i32
+ %tmp3 = tail call i32 @llvm.AMDGPU.umax(i32 %tmp2, i32 0) nounwind readnone
+ %tmp4 = trunc i32 %tmp3 to i8
+ %tmp6 = zext i8 %tmp4 to i16
+ store i16 %tmp6, i16 addrspace(1)* %out, align 2
+ ret void
+}
+
; Function Attrs: readnone
declare i32 @llvm.AMDGPU.umax(i32, i32) #1
Modified: llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.umin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.umin.ll?rev=205242&r1=205241&r2=205242&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.umin.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.umin.ll Mon Mar 31 14:35:33 2014
@@ -21,6 +21,21 @@ entry:
ret void
}
+; SI-LABEL: @trunc_zext_umin
+; SI: BUFFER_LOAD_UBYTE [[VREG:v[0-9]+]],
+; SI: V_MIN_U32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
+; SI-NOT: AND
+; SI: BUFFER_STORE_SHORT [[RESULT]],
+define void @trunc_zext_umin(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
+ %tmp5 = load i8 addrspace(1)* %src, align 1
+ %tmp2 = zext i8 %tmp5 to i32
+ %tmp3 = tail call i32 @llvm.AMDGPU.umin(i32 %tmp2, i32 0) nounwind readnone
+ %tmp4 = trunc i32 %tmp3 to i8
+ %tmp6 = zext i8 %tmp4 to i16
+ store i16 %tmp6, i16 addrspace(1)* %out, align 2
+ ret void
+}
+
; Function Attrs: readnone
declare i32 @llvm.AMDGPU.umin(i32, i32) #1
More information about the llvm-commits
mailing list