[llvm] 2af1640 - [LegalizeDAG][X86][AMDGPU] Use ANY_EXTEND instead of ZERO_EXTEND when promoting ISD::CTTZ/CTTZ_ZERO_UNDEF.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 7 22:26:50 PST 2020
Author: Craig Topper
Date: 2020-02-07T22:25:56-08:00
New Revision: 2af1640f9aa4ebe5d447586bbdad6514312bb814
URL: https://github.com/llvm/llvm-project/commit/2af1640f9aa4ebe5d447586bbdad6514312bb814
DIFF: https://github.com/llvm/llvm-project/commit/2af1640f9aa4ebe5d447586bbdad6514312bb814.diff
LOG: [LegalizeDAG][X86][AMDGPU] Use ANY_EXTEND instead of ZERO_EXTEND when promoting ISD::CTTZ/CTTZ_ZERO_UNDEF.
Summary:
For CTTZ we place a set bit just past where the non-promoted type
stopped so the extended bits won't be used for the count. For
CTTZ_ZERO_UNDEF we don't care what happens if no bits are set in
the original type and we end up counting into the extended bits.
So we can just use ANY_EXTEND for both cases.
This matches what is done in type legalization for these operations.
We make no effort to force the upper bits to zero.
Differential Revision: https://reviews.llvm.org/D74111
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
llvm/test/CodeGen/X86/clz.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 5a23573d033c..d03f765a5779 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4342,8 +4342,13 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTPOP:
- // Zero extend the argument.
- Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+ // Zero extend the argument unless its cttz, then use any_extend.
+ if (Node->getOpcode() == ISD::CTTZ ||
+ Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
+ Tmp1 = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Node->getOperand(0));
+ else
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+
if (Node->getOpcode() == ISD::CTTZ) {
// The count is the same in the promoted type except if the original
// value was zero. This can be handled by setting the bit just off
diff --git a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
index c5e7a7310e63..626ef156a91e 100644
--- a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
@@ -131,7 +131,7 @@ define amdgpu_kernel void @s_cttz_zero_undef_i64_with_select(i64 addrspace(1)* n
; FUNC-LABEL: {{^}}v_cttz_zero_undef_i8_with_select:
; SI-NOSDWA: v_ffbl_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
-; SI-SDWA: v_ffbl_b32_sdwa
+; SI-SDWA: v_ffbl_b32_e32
; EG: MEM_RAT MSKOR
define amdgpu_kernel void @v_cttz_zero_undef_i8_with_select(i8 addrspace(1)* noalias %out, i8 addrspace(1)* nocapture readonly %arrayidx) nounwind {
%val = load i8, i8 addrspace(1)* %arrayidx, align 1
@@ -144,7 +144,7 @@ define amdgpu_kernel void @v_cttz_zero_undef_i8_with_select(i8 addrspace(1)* noa
; FUNC-LABEL: {{^}}v_cttz_zero_undef_i16_with_select:
; SI-NOSDWA: v_ffbl_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
-; SI-SDWA: v_ffbl_b32_sdwa
+; SI-SDWA: v_ffbl_b32_e32
; EG: MEM_RAT MSKOR
define amdgpu_kernel void @v_cttz_zero_undef_i16_with_select(i16 addrspace(1)* noalias %out, i16 addrspace(1)* nocapture readonly %arrayidx) nounwind {
%val = load i16, i16 addrspace(1)* %arrayidx, align 1
@@ -246,7 +246,7 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
; FUNC-LABEL: {{^}}v_cttz_i8_sel_eq_neg1:
; SI: {{buffer|flat}}_load_ubyte
; SI-NOSDWA: v_ffbl_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
-; SI-SDWA: v_ffbl_b32_sdwa
+; SI-SDWA: v_ffbl_b32_e32
; EG: MEM_RAT MSKOR
; EG: FFBL_INT
define amdgpu_kernel void @v_cttz_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* nocapture readonly %arrayidx) nounwind {
diff --git a/llvm/test/CodeGen/X86/clz.ll b/llvm/test/CodeGen/X86/clz.ll
index f76f5b10151c..7884f3ebcc0c 100644
--- a/llvm/test/CodeGen/X86/clz.ll
+++ b/llvm/test/CodeGen/X86/clz.ll
@@ -17,29 +17,25 @@ declare i64 @llvm.ctlz.i64(i64, i1)
define i8 @cttz_i8(i8 %x) {
; X32-LABEL: cttz_i8:
; X32: # %bb.0:
-; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: bsfl %eax, %eax
+; X32-NEXT: bsfl {{[0-9]+}}(%esp), %eax
; X32-NEXT: # kill: def $al killed $al killed $eax
; X32-NEXT: retl
;
; X64-LABEL: cttz_i8:
; X64: # %bb.0:
-; X64-NEXT: movzbl %dil, %eax
-; X64-NEXT: bsfl %eax, %eax
+; X64-NEXT: bsfl %edi, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
;
; X32-CLZ-LABEL: cttz_i8:
; X32-CLZ: # %bb.0:
-; X32-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X32-CLZ-NEXT: tzcntl %eax, %eax
+; X32-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
; X32-CLZ-NEXT: # kill: def $al killed $al killed $eax
; X32-CLZ-NEXT: retl
;
; X64-CLZ-LABEL: cttz_i8:
; X64-CLZ: # %bb.0:
-; X64-CLZ-NEXT: movzbl %dil, %eax
-; X64-CLZ-NEXT: tzcntl %eax, %eax
+; X64-CLZ-NEXT: tzcntl %edi, %eax
; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
; X64-CLZ-NEXT: retq
%tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true )
@@ -503,17 +499,16 @@ define i8 @cttz_i8_zero_test(i8 %n) {
;
; X32-CLZ-LABEL: cttz_i8_zero_test:
; X32-CLZ: # %bb.0:
-; X32-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X32-CLZ-NEXT: orl $256, %eax # imm = 0x100
+; X32-CLZ-NEXT: movl $256, %eax # imm = 0x100
+; X32-CLZ-NEXT: orl {{[0-9]+}}(%esp), %eax
; X32-CLZ-NEXT: tzcntl %eax, %eax
; X32-CLZ-NEXT: # kill: def $al killed $al killed $eax
; X32-CLZ-NEXT: retl
;
; X64-CLZ-LABEL: cttz_i8_zero_test:
; X64-CLZ: # %bb.0:
-; X64-CLZ-NEXT: movzbl %dil, %eax
-; X64-CLZ-NEXT: orl $256, %eax # imm = 0x100
-; X64-CLZ-NEXT: tzcntl %eax, %eax
+; X64-CLZ-NEXT: orl $256, %edi # imm = 0x100
+; X64-CLZ-NEXT: tzcntl %edi, %eax
; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
; X64-CLZ-NEXT: retq
%tmp1 = call i8 @llvm.cttz.i8(i8 %n, i1 false)
More information about the llvm-commits
mailing list