[PATCH] D11829: AMDGPU/SI: Re-define AMDGPUISD:CLAMP as always clamping between 0.0. and 1.0
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 7 06:32:36 PDT 2015
tstellarAMD created this revision.
tstellarAMD added a reviewer: arsenm.
tstellarAMD added a subscriber: llvm-commits.
We weren't actally selecting AMDGPUISD::CLAMP with a range other than
0.0 and 1.0, and this range is the only one we really care about since
we can fold it into VOP3 instructions.
Clamping to other ranges is now done with ISD::FMINNUM and ISD::FMAXNUM.
http://reviews.llvm.org/D11829
Files:
lib/Target/AMDGPU/AMDGPUISelLowering.cpp
lib/Target/AMDGPU/AMDGPUISelLowering.h
lib/Target/AMDGPU/AMDGPUInstrInfo.td
lib/Target/AMDGPU/AMDGPUInstructions.td
lib/Target/AMDGPU/SIInstructions.td
test/CodeGen/AMDGPU/llvm.AMDGPU.clamp.ll
Index: test/CodeGen/AMDGPU/llvm.AMDGPU.clamp.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.AMDGPU.clamp.ll
+++ test/CodeGen/AMDGPU/llvm.AMDGPU.clamp.ll
@@ -65,3 +65,11 @@
store float %clamp, float addrspace(1)* %out, align 4
ret void
}
+; FUNC-LABEL: {{^}}clamp_non_standard:
+; SI-DAG: v_max_f32
+; SI-DAG: v_min_f32
+define void @clamp_non_standard(float addrspace(1)* %out, float %src) {
+ %clamp = call float @llvm.AMDGPU.clamp.f32(float %src, float -1.0, float 1.0) readnone
+ store float %clamp, float addrspace(1)* %out, align 4
+ ret void
+}
Index: lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- lib/Target/AMDGPU/SIInstructions.td
+++ lib/Target/AMDGPU/SIInstructions.td
@@ -2562,9 +2562,9 @@
/********** =================== **********/
def : Pat <
- (AMDGPUclamp (VOP3Mods0Clamp f32:$src0, i32:$src0_modifiers, i32:$omod),
- (f32 FP_ZERO), (f32 FP_ONE)),
- (V_ADD_F32_e64 $src0_modifiers, $src0, 0, 0, 1, $omod)
+ (f32 (AMDGPUclamp (VOP3Mods0Clamp f32:$src0,
+ i32:$src0_modifiers, i32:$omod))),
+ (V_ADD_F32_e64 $src0_modifiers, (f32 $src0), 0, 0, 1, $omod)
>;
/********** ================================ **********/
Index: lib/Target/AMDGPU/AMDGPUInstructions.td
===================================================================
--- lib/Target/AMDGPU/AMDGPUInstructions.td
+++ lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -453,7 +453,7 @@
(outs rc:$dst),
(ins rc:$src0),
"CLAMP $dst, $src0",
- [(set f32:$dst, (AMDGPUclamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
+ [(set f32:$dst, (AMDGPUclamp f32:$src0))]
>;
class FABS <RegisterClass rc> : AMDGPUShaderInst <
Index: lib/Target/AMDGPU/AMDGPUInstrInfo.td
===================================================================
--- lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -77,7 +77,7 @@
[]
>;
-def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPTernaryOp, []>;
+def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp, []>;
// out = max(a, b) a and b are signed ints
def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
Index: lib/Target/AMDGPU/AMDGPUISelLowering.h
===================================================================
--- lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -232,6 +232,7 @@
// End AMDIL ISD Opcodes
DWORDADDR,
FRACT,
+ /// CLAMP value between 0.0 and 1.0.
CLAMP,
// SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -912,9 +912,15 @@
return LowerIntrinsicLRP(Op, DAG);
case AMDGPUIntrinsic::AMDGPU_clamp:
- case AMDGPUIntrinsic::AMDIL_clamp: // Legacy name.
- return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
- Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ case AMDGPUIntrinsic::AMDIL_clamp: { // Legacy name.
+ if (cast<ConstantFPSDNode>(Op.getOperand(2))->isZero() &&
+ cast<ConstantFPSDNode>(Op.getOperand(3))->isExactlyValue(1.0))
+ return DAG.getNode(AMDGPUISD::CLAMP, DL, VT, Op.getOperand(1));
+
+ SDValue Tmp = DAG.getNode(ISD::FMAXNUM, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ return DAG.getNode(ISD::FMINNUM, DL, VT, Tmp, Op.getOperand(3));
+ }
case Intrinsic::AMDGPU_div_scale: {
// 3rd parameter required to be a constant.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D11829.31506.patch
Type: text/x-patch
Size: 3713 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150807/989882f4/attachment.bin>
More information about the llvm-commits
mailing list