[llvm] r316037 - AMDGPU : Fix an error for the llvm.cttz implementation.

Wei Ding via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 17 14:49:52 PDT 2017


Author: wdng
Date: Tue Oct 17 14:49:52 2017
New Revision: 316037

URL: http://llvm.org/viewvc/llvm-project?rev=316037&view=rev
Log:
AMDGPU : Fix an error for the llvm.cttz implementation.

Differential Revision: http://reviews.llvm.org/D39014

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
    llvm/trunk/test/CodeGen/AMDGPU/cttz_zero_undef.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=316037&r1=316036&r2=316037&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Tue Oct 17 14:49:52 2017
@@ -2208,9 +2208,8 @@ SDValue AMDGPUTargetLowering::LowerCTLZ_
   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(),
                                    *DAG.getContext(), MVT::i32);
 
-  SDValue ZeroOrOne = isCtlzOpc(Op.getOpcode()) ? Zero : One;
   SDValue HiOrLo = isCtlzOpc(Op.getOpcode()) ? Hi : Lo;
-  SDValue Hi0orLo0 = DAG.getSetCC(SL, SetCCVT, HiOrLo, ZeroOrOne, ISD::SETEQ);
+  SDValue Hi0orLo0 = DAG.getSetCC(SL, SetCCVT, HiOrLo, Zero, ISD::SETEQ);
 
   SDValue OprLo = DAG.getNode(ISDOpc, SL, MVT::i32, Lo);
   SDValue OprHi = DAG.getNode(ISDOpc, SL, MVT::i32, Hi);
@@ -2233,7 +2232,7 @@ SDValue AMDGPUTargetLowering::LowerCTLZ_
     // FIXME: DAG combines turn what should be an s_and_b64 into a v_or_b32,
     // which we probably don't want.
     SDValue LoOrHi = isCtlzOpc(Op.getOpcode()) ? Lo : Hi;
-    SDValue Lo0OrHi0 = DAG.getSetCC(SL, SetCCVT, LoOrHi, ZeroOrOne, ISD::SETEQ);
+    SDValue Lo0OrHi0 = DAG.getSetCC(SL, SetCCVT, LoOrHi, Zero, ISD::SETEQ);
     SDValue SrcIsZero = DAG.getNode(ISD::AND, SL, SetCCVT, Lo0OrHi0, Hi0orLo0);
 
     // TODO: If i64 setcc is half rate, it can result in 1 fewer instruction

Modified: llvm/trunk/test/CodeGen/AMDGPU/cttz_zero_undef.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/cttz_zero_undef.ll?rev=316037&r1=316036&r2=316037&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/cttz_zero_undef.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/cttz_zero_undef.ll Tue Oct 17 14:49:52 2017
@@ -157,6 +157,7 @@ define amdgpu_kernel void @v_cttz_zero_u
 
 ; FUNC-LABEL: {{^}}v_cttz_zero_undef_i32_with_select:
 ; SI: v_ffbl_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
+; SI: v_cmp_ne_u32_e32 vcc, 0
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
 define amdgpu_kernel void @v_cttz_zero_undef_i32_with_select(i32 addrspace(1)* noalias %out, i32 addrspace(1)* nocapture readonly %arrayidx) nounwind {
   %val = load i32, i32 addrspace(1)* %arrayidx, align 1
@@ -178,6 +179,8 @@ define amdgpu_kernel void @v_cttz_zero_u
 ; SI: v_or_b32_e32 [[VAL2:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
 ; SI-DAG: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL1]]
 ; SI-DAG: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL2]]
+; SI: v_cmp_eq_u32_e32 vcc, 0
+; SI: v_cmp_ne_u64_e32 vcc, 0
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
 define amdgpu_kernel void @v_cttz_zero_undef_i64_with_select(i64 addrspace(1)* noalias %out, i64 addrspace(1)* nocapture readonly %arrayidx) nounwind {
   %val = load i64, i64 addrspace(1)* %arrayidx, align 1




More information about the llvm-commits mailing list