[PATCH] D37348: Tighten conditions for converting ISD::CTTZ_ZERO_UNDEF to ISD::CTTZ
Wei Ding via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 1 12:45:23 PDT 2017
wdng updated this revision to Diff 113579.
wdng added a comment.
Just add a custom lowering ISD:CTTZ to ISD::CTTZ_ZERO_UNDEF
Repository:
rL LLVM
https://reviews.llvm.org/D37348
Files:
lib/Target/AMDGPU/AMDGPUISelLowering.cpp
lib/Target/AMDGPU/AMDGPUISelLowering.h
test/CodeGen/AMDGPU/cttz_zero_undef.ll
Index: test/CodeGen/AMDGPU/cttz_zero_undef.ll
===================================================================
--- test/CodeGen/AMDGPU/cttz_zero_undef.ll
+++ test/CodeGen/AMDGPU/cttz_zero_undef.ll
@@ -76,3 +76,28 @@
store <4 x i32> %cttz, <4 x i32> addrspace(1)* %out, align 16
ret void
}
+
+; FUNC-LABEL: {{^}}s_cttz_zero_undef_i32_with_select:
+; SI: s_ff1_i32_b32
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
+; EG: FFBL_INT {{\*? *}}[[RESULT]]
+define amdgpu_kernel void @s_cttz_zero_undef_i32_with_select(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+ %cttz = tail call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
+ %cttz_ret = icmp ne i32 %val, 0
+ %ret = select i1 %cttz_ret, i32 %cttz, i32 32
+ store i32 %cttz, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_cttz_zero_undef_i32_with_select:
+; SI: v_ffbl_b32_e32
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
+define amdgpu_kernel void @v_cttz_zero_undef_i32_with_select(i32 addrspace(1)* noalias %out, i32 addrspace(1)* nocapture readonly %arrayidx) nounwind {
+ %val = load i32, i32 addrspace(1)* %arrayidx, align 1
+ %cttz = tail call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
+ %cttz_ret = icmp ne i32 %val, 0
+ %ret = select i1 %cttz_ret, i32 %cttz, i32 32
+ store i32 %ret, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
Index: lib/Target/AMDGPU/AMDGPUISelLowering.h
===================================================================
--- lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -115,6 +115,7 @@
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const;
+ SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &Results) const;
void analyzeFormalArgumentsCompute(CCState &State,
Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -382,7 +382,7 @@
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
- setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::CTLZ, VT, Expand);
}
@@ -1090,6 +1090,7 @@
break;
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::CTTZ: return LowerCTTZ(Op, DAG);
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
@@ -1763,6 +1764,13 @@
return DAG.getNode(ISD::FSUB, SL, VT, X, Mul);
}
+SDValue AMDGPUTargetLowering:: LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ EVT VT = Op.getValueType();
+
+ return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SL, VT, Op.getOperand(0));
+}
+
SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Src = Op.getOperand(0);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D37348.113579.patch
Type: text/x-patch
Size: 3359 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170901/3965501c/attachment.bin>
More information about the llvm-commits
mailing list