[llvm] r267280 - [CodeGen] When promoting CTTZ operations to larger type, don't insert a select to detect if the input is zero to return the original size instead of the extended size. Instead just set the first bit in the zero extended part.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 22 22:20:47 PDT 2016
Author: ctopper
Date: Sat Apr 23 00:20:47 2016
New Revision: 267280
URL: http://llvm.org/viewvc/llvm-project?rev=267280&view=rev
Log:
[CodeGen] When promoting CTTZ operations to larger type, don't insert a select to detect if the input is zero to return the original size instead of the extended size. Instead just set the first bit in the zero extended part.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/trunk/test/CodeGen/X86/bmi.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp?rev=267280&r1=267279&r2=267280&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Sat Apr 23 00:20:47 2016
@@ -4010,18 +4010,20 @@ void SelectionDAGLegalize::PromoteNode(S
case ISD::CTPOP:
// Zero extend the argument.
Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+ if (Node->getOpcode() == ISD::CTTZ) {
+ // The count is the same in the promoted type except if the original
+ // value was zero. This can be handled by setting the bit just off
+ // the top of the original type.
+ auto TopBit = APInt::getOneBitSet(NVT.getSizeInBits(),
+ OVT.getSizeInBits());
+ Tmp1 = DAG.getNode(ISD::OR, dl, NVT, Tmp1,
+ DAG.getConstant(TopBit, dl, NVT));
+ }
// Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is
// already the correct result.
Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
- if (Node->getOpcode() == ISD::CTTZ) {
- // FIXME: This should set a bit in the zero extended value instead.
- Tmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT),
- Tmp1, DAG.getConstant(NVT.getSizeInBits(), dl, NVT),
- ISD::SETEQ);
- Tmp1 = DAG.getSelect(dl, NVT, Tmp2,
- DAG.getConstant(OVT.getSizeInBits(), dl, NVT), Tmp1);
- } else if (Node->getOpcode() == ISD::CTLZ ||
- Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
+ if (Node->getOpcode() == ISD::CTLZ ||
+ Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
// Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
DAG.getConstant(NVT.getSizeInBits() -
Modified: llvm/trunk/test/CodeGen/X86/bmi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bmi.ll?rev=267280&r1=267279&r2=267280&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bmi.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bmi.ll Sat Apr 23 00:20:47 2016
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s
declare i8 @llvm.cttz.i8(i8, i1)
@@ -10,12 +10,9 @@ define i8 @t1(i8 %x) {
; CHECK-LABEL: t1:
; CHECK: # BB#0:
; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: tzcntl %eax, %ecx
-; CHECK-NEXT: cmpl $32, %ecx
-; CHECK-NEXT: movl $8, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: orl $256, %eax # imm = 0x100
+; CHECK-NEXT: tzcntl %eax, %eax
; CHECK-NEXT: retq
-;
%tmp = tail call i8 @llvm.cttz.i8( i8 %x, i1 false )
ret i8 %tmp
}
@@ -25,7 +22,6 @@ define i16 @t2(i16 %x) {
; CHECK: # BB#0:
; CHECK-NEXT: tzcntw %di, %ax
; CHECK-NEXT: retq
-;
%tmp = tail call i16 @llvm.cttz.i16( i16 %x, i1 false )
ret i16 %tmp
}
@@ -35,7 +31,6 @@ define i32 @t3(i32 %x) {
; CHECK: # BB#0:
; CHECK-NEXT: tzcntl %edi, %eax
; CHECK-NEXT: retq
-;
%tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 false )
ret i32 %tmp
}
@@ -45,7 +40,6 @@ define i32 @tzcnt32_load(i32* %x) {
; CHECK: # BB#0:
; CHECK-NEXT: tzcntl (%rdi), %eax
; CHECK-NEXT: retq
-;
%x1 = load i32, i32* %x
%tmp = tail call i32 @llvm.cttz.i32(i32 %x1, i1 false )
ret i32 %tmp
@@ -56,7 +50,6 @@ define i64 @t4(i64 %x) {
; CHECK: # BB#0:
; CHECK-NEXT: tzcntq %rdi, %rax
; CHECK-NEXT: retq
-;
%tmp = tail call i64 @llvm.cttz.i64( i64 %x, i1 false )
ret i64 %tmp
}
@@ -67,7 +60,6 @@ define i8 @t5(i8 %x) {
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: tzcntl %eax, %eax
; CHECK-NEXT: retq
-;
%tmp = tail call i8 @llvm.cttz.i8( i8 %x, i1 true )
ret i8 %tmp
}
@@ -77,7 +69,6 @@ define i16 @t6(i16 %x) {
; CHECK: # BB#0:
; CHECK-NEXT: tzcntw %di, %ax
; CHECK-NEXT: retq
-;
%tmp = tail call i16 @llvm.cttz.i16( i16 %x, i1 true )
ret i16 %tmp
}
@@ -87,7 +78,6 @@ define i32 @t7(i32 %x) {
; CHECK: # BB#0:
; CHECK-NEXT: tzcntl %edi, %eax
; CHECK-NEXT: retq
-;
%tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 true )
ret i32 %tmp
}
@@ -97,7 +87,6 @@ define i64 @t8(i64 %x) {
; CHECK: # BB#0:
; CHECK-NEXT: tzcntq %rdi, %rax
; CHECK-NEXT: retq
-;
%tmp = tail call i64 @llvm.cttz.i64( i64 %x, i1 true )
ret i64 %tmp
}
@@ -107,7 +96,6 @@ define i32 @andn32(i32 %x, i32 %y) {
; CHECK: # BB#0:
; CHECK-NEXT: andnl %esi, %edi, %eax
; CHECK-NEXT: retq
-;
%tmp1 = xor i32 %x, -1
%tmp2 = and i32 %y, %tmp1
ret i32 %tmp2
@@ -118,7 +106,6 @@ define i32 @andn32_load(i32 %x, i32* %y)
; CHECK: # BB#0:
; CHECK-NEXT: andnl (%rsi), %edi, %eax
; CHECK-NEXT: retq
-;
%y1 = load i32, i32* %y
%tmp1 = xor i32 %x, -1
%tmp2 = and i32 %y1, %tmp1
@@ -130,7 +117,6 @@ define i64 @andn64(i64 %x, i64 %y) {
; CHECK: # BB#0:
; CHECK-NEXT: andnq %rsi, %rdi, %rax
; CHECK-NEXT: retq
-;
%tmp1 = xor i64 %x, -1
%tmp2 = and i64 %tmp1, %y
ret i64 %tmp2
@@ -143,7 +129,6 @@ define i1 @andn_cmp(i32 %x, i32 %y) {
; CHECK-NEXT: andnl %esi, %edi, %eax
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
-;
%notx = xor i32 %x, -1
%and = and i32 %notx, %y
%cmp = icmp eq i32 %and, 0
@@ -158,7 +143,6 @@ define i1 @and_cmp1(i32 %x, i32 %y) {
; CHECK-NEXT: cmpl %esi, %edi
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
-;
%and = and i32 %x, %y
%cmp = icmp eq i32 %and, %y
ret i1 %cmp
@@ -171,7 +155,6 @@ define i1 @and_cmp2(i32 %x, i32 %y) {
; CHECK-NEXT: cmpl %esi, %edi
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
-;
%and = and i32 %y, %x
%cmp = icmp ne i32 %and, %y
ret i1 %cmp
@@ -184,7 +167,6 @@ define i1 @and_cmp3(i32 %x, i32 %y) {
; CHECK-NEXT: cmpl %edi, %esi
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
-;
%and = and i32 %x, %y
%cmp = icmp eq i32 %y, %and
ret i1 %cmp
@@ -197,7 +179,6 @@ define i1 @and_cmp4(i32 %x, i32 %y) {
; CHECK-NEXT: cmpl %edi, %esi
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
-;
%and = and i32 %y, %x
%cmp = icmp ne i32 %y, %and
ret i1 %cmp
@@ -212,7 +193,6 @@ define i1 @and_cmp_const(i32 %x) {
; CHECK-NEXT: cmpl $43, %edi
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
-;
%and = and i32 %x, 43
%cmp = icmp eq i32 %and, 43
ret i1 %cmp
@@ -225,7 +205,6 @@ define i1 @andn_cmp_swap_ops(i64 %x, i64
; CHECK-NEXT: andnq %rsi, %rdi, %rax
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
-;
%notx = xor i64 %x, -1
%and = and i64 %y, %notx
%cmp = icmp eq i64 %and, 0
@@ -240,7 +219,6 @@ define i1 @andn_cmp_i8(i8 %x, i8 %y) {
; CHECK-NEXT: testb %sil, %dil
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
-;
%noty = xor i8 %y, -1
%and = and i8 %x, %noty
%cmp = icmp eq i8 %and, 0
@@ -252,7 +230,6 @@ define i32 @bextr32(i32 %x, i32 %y) {
; CHECK: # BB#0:
; CHECK-NEXT: bextrl %esi, %edi, %eax
; CHECK-NEXT: retq
-;
%tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %y)
ret i32 %tmp
}
@@ -262,7 +239,6 @@ define i32 @bextr32_load(i32* %x, i32 %y
; CHECK: # BB#0:
; CHECK-NEXT: bextrl %esi, (%rdi), %eax
; CHECK-NEXT: retq
-;
%x1 = load i32, i32* %x
%tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x1, i32 %y)
ret i32 %tmp
@@ -276,7 +252,6 @@ define i32 @bextr32b(i32 %x) uwtable s
; CHECK-NEXT: movl $3076, %eax # imm = 0xC04
; CHECK-NEXT: bextrl %eax, %edi, %eax
; CHECK-NEXT: retq
-;
%1 = lshr i32 %x, 4
%2 = and i32 %1, 4095
ret i32 %2
@@ -288,7 +263,6 @@ define i32 @bextr32b_load(i32* %x) uwta
; CHECK-NEXT: movl $3076, %eax # imm = 0xC04
; CHECK-NEXT: bextrl %eax, (%rdi), %eax
; CHECK-NEXT: retq
-;
%1 = load i32, i32* %x
%2 = lshr i32 %1, 4
%3 = and i32 %2, 4095
@@ -300,7 +274,6 @@ define i64 @bextr64(i64 %x, i64 %y) {
; CHECK: # BB#0:
; CHECK-NEXT: bextrq %rsi, %rdi, %rax
; CHECK-NEXT: retq
-;
%tmp = tail call i64 @llvm.x86.bmi.bextr.64(i64 %x, i64 %y)
ret i64 %tmp
}
@@ -313,7 +286,6 @@ define i64 @bextr64b(i64 %x) uwtable s
; CHECK-NEXT: movl $3076, %eax # imm = 0xC04
; CHECK-NEXT: bextrl %eax, %edi, %eax
; CHECK-NEXT: retq
-;
%1 = lshr i64 %x, 4
%2 = and i64 %1, 4095
ret i64 %2
@@ -325,7 +297,6 @@ define i64 @bextr64b_load(i64* %x) {
; CHECK-NEXT: movl $3076, %eax # imm = 0xC04
; CHECK-NEXT: bextrl %eax, (%rdi), %eax
; CHECK-NEXT: retq
-;
%1 = load i64, i64* %x, align 8
%2 = lshr i64 %1, 4
%3 = and i64 %2, 4095
@@ -339,7 +310,6 @@ define i32 @non_bextr32(i32 %x) {
; CHECK-NEXT: andl $111, %edi
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: retq
-;
entry:
%shr = lshr i32 %x, 2
%and = and i32 %shr, 111
@@ -353,7 +323,6 @@ define i64 @non_bextr64(i64 %x) {
; CHECK-NEXT: movabsq $8589934590, %rax # imm = 0x1FFFFFFFE
; CHECK-NEXT: andq %rdi, %rax
; CHECK-NEXT: retq
-;
entry:
%shr = lshr i64 %x, 2
%and = and i64 %shr, 8589934590
@@ -365,7 +334,6 @@ define i32 @bzhi32(i32 %x, i32 %y) {
; CHECK: # BB#0:
; CHECK-NEXT: bzhil %esi, %edi, %eax
; CHECK-NEXT: retq
-;
%tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x, i32 %y)
ret i32 %tmp
}
@@ -375,7 +343,6 @@ define i32 @bzhi32_load(i32* %x, i32 %y)
; CHECK: # BB#0:
; CHECK-NEXT: bzhil %esi, (%rdi), %eax
; CHECK-NEXT: retq
-;
%x1 = load i32, i32* %x
%tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x1, i32 %y)
ret i32 %tmp
@@ -388,7 +355,6 @@ define i64 @bzhi64(i64 %x, i64 %y) {
; CHECK: # BB#0:
; CHECK-NEXT: bzhiq %rsi, %rdi, %rax
; CHECK-NEXT: retq
-;
%tmp = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %x, i64 %y)
ret i64 %tmp
}
@@ -400,7 +366,6 @@ define i32 @bzhi32b(i32 %x, i8 zeroext %
; CHECK: # BB#0: # %entry
; CHECK-NEXT: bzhil %esi, %edi, %eax
; CHECK-NEXT: retq
-;
entry:
%conv = zext i8 %index to i32
%shl = shl i32 1, %conv
@@ -414,7 +379,6 @@ define i32 @bzhi32b_load(i32* %w, i8 zer
; CHECK: # BB#0: # %entry
; CHECK-NEXT: bzhil %esi, (%rdi), %eax
; CHECK-NEXT: retq
-;
entry:
%x = load i32, i32* %w
%conv = zext i8 %index to i32
@@ -429,7 +393,6 @@ define i32 @bzhi32c(i32 %x, i8 zeroext %
; CHECK: # BB#0: # %entry
; CHECK-NEXT: bzhil %esi, %edi, %eax
; CHECK-NEXT: retq
-;
entry:
%conv = zext i8 %index to i32
%shl = shl i32 1, %conv
@@ -443,7 +406,6 @@ define i64 @bzhi64b(i64 %x, i8 zeroext %
; CHECK: # BB#0: # %entry
; CHECK-NEXT: bzhiq %rsi, %rdi, %rax
; CHECK-NEXT: retq
-;
entry:
%conv = zext i8 %index to i64
%shl = shl i64 1, %conv
@@ -458,7 +420,6 @@ define i64 @bzhi64_constant_mask(i64 %x)
; CHECK-NEXT: movb $62, %al
; CHECK-NEXT: bzhiq %rax, %rdi, %rax
; CHECK-NEXT: retq
-;
entry:
%and = and i64 %x, 4611686018427387903
ret i64 %and
@@ -470,7 +431,6 @@ define i64 @bzhi64_small_constant_mask(i
; CHECK-NEXT: andl $2147483647, %edi # imm = 0x7FFFFFFF
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: retq
-;
entry:
%and = and i64 %x, 2147483647
ret i64 %and
@@ -481,7 +441,6 @@ define i32 @blsi32(i32 %x) {
; CHECK: # BB#0:
; CHECK-NEXT: blsil %edi, %eax
; CHECK-NEXT: retq
-;
%tmp = sub i32 0, %x
%tmp2 = and i32 %x, %tmp
ret i32 %tmp2
@@ -492,7 +451,6 @@ define i32 @blsi32_load(i32* %x) {
; CHECK: # BB#0:
; CHECK-NEXT: blsil (%rdi), %eax
; CHECK-NEXT: retq
-;
%x1 = load i32, i32* %x
%tmp = sub i32 0, %x1
%tmp2 = and i32 %x1, %tmp
@@ -504,7 +462,6 @@ define i64 @blsi64(i64 %x) {
; CHECK: # BB#0:
; CHECK-NEXT: blsiq %rdi, %rax
; CHECK-NEXT: retq
-;
%tmp = sub i64 0, %x
%tmp2 = and i64 %tmp, %x
ret i64 %tmp2
@@ -515,7 +472,6 @@ define i32 @blsmsk32(i32 %x) {
; CHECK: # BB#0:
; CHECK-NEXT: blsmskl %edi, %eax
; CHECK-NEXT: retq
-;
%tmp = sub i32 %x, 1
%tmp2 = xor i32 %x, %tmp
ret i32 %tmp2
@@ -526,7 +482,6 @@ define i32 @blsmsk32_load(i32* %x) {
; CHECK: # BB#0:
; CHECK-NEXT: blsmskl (%rdi), %eax
; CHECK-NEXT: retq
-;
%x1 = load i32, i32* %x
%tmp = sub i32 %x1, 1
%tmp2 = xor i32 %x1, %tmp
@@ -538,7 +493,6 @@ define i64 @blsmsk64(i64 %x) {
; CHECK: # BB#0:
; CHECK-NEXT: blsmskq %rdi, %rax
; CHECK-NEXT: retq
-;
%tmp = sub i64 %x, 1
%tmp2 = xor i64 %tmp, %x
ret i64 %tmp2
@@ -549,7 +503,6 @@ define i32 @blsr32(i32 %x) {
; CHECK: # BB#0:
; CHECK-NEXT: blsrl %edi, %eax
; CHECK-NEXT: retq
-;
%tmp = sub i32 %x, 1
%tmp2 = and i32 %x, %tmp
ret i32 %tmp2
@@ -560,7 +513,6 @@ define i32 @blsr32_load(i32* %x) {
; CHECK: # BB#0:
; CHECK-NEXT: blsrl (%rdi), %eax
; CHECK-NEXT: retq
-;
%x1 = load i32, i32* %x
%tmp = sub i32 %x1, 1
%tmp2 = and i32 %x1, %tmp
@@ -572,7 +524,6 @@ define i64 @blsr64(i64 %x) {
; CHECK: # BB#0:
; CHECK-NEXT: blsrq %rdi, %rax
; CHECK-NEXT: retq
-;
%tmp = sub i64 %x, 1
%tmp2 = and i64 %tmp, %x
ret i64 %tmp2
@@ -583,7 +534,6 @@ define i32 @pdep32(i32 %x, i32 %y) {
; CHECK: # BB#0:
; CHECK-NEXT: pdepl %esi, %edi, %eax
; CHECK-NEXT: retq
-;
%tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y)
ret i32 %tmp
}
@@ -593,7 +543,6 @@ define i32 @pdep32_load(i32 %x, i32* %y)
; CHECK: # BB#0:
; CHECK-NEXT: pdepl (%rsi), %edi, %eax
; CHECK-NEXT: retq
-;
%y1 = load i32, i32* %y
%tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y1)
ret i32 %tmp
@@ -606,7 +555,6 @@ define i64 @pdep64(i64 %x, i64 %y) {
; CHECK: # BB#0:
; CHECK-NEXT: pdepq %rsi, %rdi, %rax
; CHECK-NEXT: retq
-;
%tmp = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x, i64 %y)
ret i64 %tmp
}
@@ -618,7 +566,6 @@ define i32 @pext32(i32 %x, i32 %y) {
; CHECK: # BB#0:
; CHECK-NEXT: pextl %esi, %edi, %eax
; CHECK-NEXT: retq
-;
%tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y)
ret i32 %tmp
}
@@ -628,7 +575,6 @@ define i32 @pext32_load(i32 %x, i32* %y)
; CHECK: # BB#0:
; CHECK-NEXT: pextl (%rsi), %edi, %eax
; CHECK-NEXT: retq
-;
%y1 = load i32, i32* %y
%tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y1)
ret i32 %tmp
@@ -641,7 +587,6 @@ define i64 @pext64(i64 %x, i64 %y) {
; CHECK: # BB#0:
; CHECK-NEXT: pextq %rsi, %rdi, %rax
; CHECK-NEXT: retq
-;
%tmp = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 %y)
ret i64 %tmp
}
More information about the llvm-commits
mailing list