[llvm] r275950 - AVX-512: Fixed BT instruction selection.
Elena Demikhovsky via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 19 00:14:21 PDT 2016
Author: delena
Date: Tue Jul 19 02:14:21 2016
New Revision: 275950
URL: http://llvm.org/viewvc/llvm-project?rev=275950&view=rev
Log:
AVX-512: Fixed BT instruction selection.
The following condition expression ( a >> n) & 1 is converted to "bt a, n" instruction. It works on all intel targets.
But on AVX-512 it was broken because the expression is modified to (truncate (a >>n) to i1).
I added the new sequence (truncate (a >>n) to i1) to the BT pattern.
Differential Revision: https://reviews.llvm.org/D22354
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/bt.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=275950&r1=275949&r2=275950&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Tue Jul 19 02:14:21 2016
@@ -1468,6 +1468,10 @@ SDValue TargetLowering::SimplifySetCC(EV
if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
// Will get folded away.
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
+ if (MinBits == 1 && C1 == 1)
+ // Invert the condition.
+ return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
return DAG.getSetCC(dl, VT, Trunc, C, Cond);
}
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=275950&r1=275949&r2=275950&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jul 19 02:14:21 2016
@@ -15010,9 +15010,32 @@ unsigned X86TargetLowering::combineRepea
return 2;
}
+/// Create a BT (Bit Test) node - Test bit \p BitNo in \p Src and set condition
+/// according to equal/not-equal condition code \p CC.
+static SDValue getBitTestCondition(SDValue Src, SDValue BitNo, ISD::CondCode CC,
+ const SDLoc &dl, SelectionDAG &DAG) {
+ // If Src is i8, promote it to i32 with any_extend. There is no i8 BT
+ // instruction. Since the shift amount is in-range-or-undefined, we know
+ // that doing a bittest on the i32 value is ok. We extend to i32 because
+ // the encoding for the i16 version is larger than the i32 version.
+ // Also promote i16 to i32 for performance / code size reason.
+ if (Src.getValueType() == MVT::i8 || Src.getValueType() == MVT::i16)
+ Src = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Src);
+
+ // If the operand types disagree, extend the shift amount to match. Since
+ // BT ignores high bits (like shifts) we can use anyextend.
+ if (Src.getValueType() != BitNo.getValueType())
+ BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo);
+
+ SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo);
+ X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(Cond, dl, MVT::i8), BT);
+}
+
/// Result of 'and' is compared against zero. Change to a BT node if possible.
-SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
- const SDLoc &dl, SelectionDAG &DAG) const {
+static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
+ const SDLoc &dl, SelectionDAG &DAG) {
SDValue Op0 = And.getOperand(0);
SDValue Op1 = And.getOperand(1);
if (Op0.getOpcode() == ISD::TRUNCATE)
@@ -15055,30 +15078,38 @@ SDValue X86TargetLowering::LowerToBT(SDV
}
}
- if (LHS.getNode()) {
- // If LHS is i8, promote it to i32 with any_extend. There is no i8 BT
- // instruction. Since the shift amount is in-range-or-undefined, we know
- // that doing a bittest on the i32 value is ok. We extend to i32 because
- // the encoding for the i16 version is larger than the i32 version.
- // Also promote i16 to i32 for performance / code size reason.
- if (LHS.getValueType() == MVT::i8 ||
- LHS.getValueType() == MVT::i16)
- LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
-
- // If the operand types disagree, extend the shift amount to match. Since
- // BT ignores high bits (like shifts) we can use anyextend.
- if (LHS.getValueType() != RHS.getValueType())
- RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
-
- SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
- X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
- return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(Cond, dl, MVT::i8), BT);
- }
+ if (LHS.getNode())
+ return getBitTestCondition(LHS, RHS, CC, dl, DAG);
return SDValue();
}
+// Convert (truncate (srl X, N) to i1) to (bt X, N)
+static SDValue LowerTruncateToBT(SDValue Op, ISD::CondCode CC,
+ const SDLoc &dl, SelectionDAG &DAG) {
+
+ assert(Op.getOpcode() == ISD::TRUNCATE && Op.getValueType() == MVT::i1 &&
+ "Expected TRUNCATE to i1 node");
+
+ if (Op.getOperand(0).getOpcode() != ISD::SRL)
+ return SDValue();
+
+ SDValue ShiftRight = Op.getOperand(0);
+ return getBitTestCondition(ShiftRight.getOperand(0), ShiftRight.getOperand(1),
+ CC, dl, DAG);
+}
+
+/// Result of 'and' or 'trunc to i1' is compared against zero.
+/// Change to a BT node if possible.
+SDValue X86TargetLowering::LowerToBT(SDValue Op, ISD::CondCode CC,
+ const SDLoc &dl, SelectionDAG &DAG) const {
+ if (Op.getOpcode() == ISD::AND)
+ return LowerAndToBT(Op, CC, dl, DAG);
+ if (Op.getOpcode() == ISD::TRUNCATE && Op.getValueType() == MVT::i1)
+ return LowerTruncateToBT(Op, CC, dl, DAG);
+ return SDValue();
+}
+
/// Turns an ISD::CondCode into a value suitable for SSE floating-point mask
/// CMPs.
static int translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
@@ -15606,8 +15637,8 @@ SDValue X86TargetLowering::LowerSETCC(SD
// Lower (X & (1 << N)) == 0 to BT(X, N).
// Lower ((X >>u N) & 1) != 0 to BT(X, N).
// Lower ((X >>s N) & 1) != 0 to BT(X, N).
- if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
- isNullConstant(Op1) &&
+ // Lower (trunc (X >> N) to i1) to BT(X, N).
+ if (Op0.hasOneUse() && isNullConstant(Op1) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
if (SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG)) {
if (VT == MVT::i1) {
@@ -16798,9 +16829,8 @@ SDValue X86TargetLowering::LowerBRCOND(S
// Look pass the truncate if the high bits are known zero.
Cond = getCondAfterTruncWithZeroHighBitsInput(Cond, DAG);
- // We know the result of AND is compared against zero. Try to match
- // it to BT.
- if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+ // We know the result is compared against zero. Try to match it to BT.
+ if (Cond.hasOneUse()) {
if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG)) {
CC = NewSetCC.getOperand(0);
Cond = NewSetCC.getOperand(1);
Modified: llvm/trunk/test/CodeGen/X86/bt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bt.ll?rev=275950&r1=275949&r2=275950&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bt.ll Tue Jul 19 02:14:21 2016
@@ -1,7 +1,6 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck --check-prefix=CHECK --check-prefix=PENTIUM4 %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=AVX-512 %s
-
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s
; PR3253
; The register+memory form of the BT instruction should be usable on
@@ -21,29 +20,11 @@
; - The and can be commuted.
define void @test2(i32 %x, i32 %n) nounwind {
-; PENTIUM4-LABEL: test2:
-; PENTIUM4: # BB#0: # %entry
-; PENTIUM4-NEXT: btl %esi, %edi
-; PENTIUM4-NEXT: jb .LBB0_2
-; PENTIUM4-NEXT: # BB#1: # %bb
-; PENTIUM4-NEXT: pushq %rax
-; PENTIUM4-NEXT: callq foo
-; PENTIUM4-NEXT: popq %rax
-; PENTIUM4-NEXT: .LBB0_2: # %UnifiedReturnBlock
-; PENTIUM4-NEXT: retq
-;
-; AVX-512-LABEL: test2:
-; AVX-512: # BB#0: # %entry
-; AVX-512-NEXT: movl %esi, %ecx
-; AVX-512-NEXT: shrl %cl, %edi
-; AVX-512-NEXT: testb $1, %dil
-; AVX-512-NEXT: jne .LBB0_2
-; AVX-512-NEXT: # BB#1: # %bb
-; AVX-512-NEXT: pushq %rax
-; AVX-512-NEXT: callq foo
-; AVX-512-NEXT: popq %rax
-; AVX-512-NEXT: .LBB0_2: # %UnifiedReturnBlock
-; AVX-512-NEXT: retq
+; CHECK-LABEL: test2:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: btl %esi, %edi
+; CHECK-NEXT: jb .LBB0_2
+;
entry:
%tmp29 = lshr i32 %x, %n
%tmp3 = and i32 %tmp29, 1
@@ -59,29 +40,11 @@ UnifiedReturnBlock:
}
define void @test2b(i32 %x, i32 %n) nounwind {
-; PENTIUM4-LABEL: test2b:
-; PENTIUM4: # BB#0: # %entry
-; PENTIUM4-NEXT: btl %esi, %edi
-; PENTIUM4-NEXT: jb .LBB1_2
-; PENTIUM4-NEXT: # BB#1: # %bb
-; PENTIUM4-NEXT: pushq %rax
-; PENTIUM4-NEXT: callq foo
-; PENTIUM4-NEXT: popq %rax
-; PENTIUM4-NEXT: .LBB1_2: # %UnifiedReturnBlock
-; PENTIUM4-NEXT: retq
-;
-; AVX-512-LABEL: test2b:
-; AVX-512: # BB#0: # %entry
-; AVX-512-NEXT: movl %esi, %ecx
-; AVX-512-NEXT: shrl %cl, %edi
-; AVX-512-NEXT: testb $1, %dil
-; AVX-512-NEXT: jne .LBB1_2
-; AVX-512-NEXT: # BB#1: # %bb
-; AVX-512-NEXT: pushq %rax
-; AVX-512-NEXT: callq foo
-; AVX-512-NEXT: popq %rax
-; AVX-512-NEXT: .LBB1_2: # %UnifiedReturnBlock
-; AVX-512-NEXT: retq
+; CHECK-LABEL: test2b:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: btl %esi, %edi
+; CHECK-NEXT: jb .LBB1_2
+;
entry:
%tmp29 = lshr i32 %x, %n
%tmp3 = and i32 1, %tmp29
@@ -97,29 +60,11 @@ UnifiedReturnBlock:
}
define void @atest2(i32 %x, i32 %n) nounwind {
-; PENTIUM4-LABEL: atest2:
-; PENTIUM4: # BB#0: # %entry
-; PENTIUM4-NEXT: btl %esi, %edi
-; PENTIUM4-NEXT: jb .LBB2_2
-; PENTIUM4-NEXT: # BB#1: # %bb
-; PENTIUM4-NEXT: pushq %rax
-; PENTIUM4-NEXT: callq foo
-; PENTIUM4-NEXT: popq %rax
-; PENTIUM4-NEXT: .LBB2_2: # %UnifiedReturnBlock
-; PENTIUM4-NEXT: retq
-;
-; AVX-512-LABEL: atest2:
-; AVX-512: # BB#0: # %entry
-; AVX-512-NEXT: movl %esi, %ecx
-; AVX-512-NEXT: shrl %cl, %edi
-; AVX-512-NEXT: testb $1, %dil
-; AVX-512-NEXT: jne .LBB2_2
-; AVX-512-NEXT: # BB#1: # %bb
-; AVX-512-NEXT: pushq %rax
-; AVX-512-NEXT: callq foo
-; AVX-512-NEXT: popq %rax
-; AVX-512-NEXT: .LBB2_2: # %UnifiedReturnBlock
-; AVX-512-NEXT: retq
+; CHECK-LABEL: atest2:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: btl %esi, %edi
+; CHECK-NEXT: jb .LBB2_2
+;
entry:
%tmp29 = ashr i32 %x, %n
%tmp3 = and i32 %tmp29, 1
@@ -135,29 +80,11 @@ UnifiedReturnBlock:
}
define void @atest2b(i32 %x, i32 %n) nounwind {
-; PENTIUM4-LABEL: atest2b:
-; PENTIUM4: # BB#0: # %entry
-; PENTIUM4-NEXT: btl %esi, %edi
-; PENTIUM4-NEXT: jb .LBB3_2
-; PENTIUM4-NEXT: # BB#1: # %bb
-; PENTIUM4-NEXT: pushq %rax
-; PENTIUM4-NEXT: callq foo
-; PENTIUM4-NEXT: popq %rax
-; PENTIUM4-NEXT: .LBB3_2: # %UnifiedReturnBlock
-; PENTIUM4-NEXT: retq
-;
-; AVX-512-LABEL: atest2b:
-; AVX-512: # BB#0: # %entry
-; AVX-512-NEXT: movl %esi, %ecx
-; AVX-512-NEXT: shrl %cl, %edi
-; AVX-512-NEXT: testb $1, %dil
-; AVX-512-NEXT: jne .LBB3_2
-; AVX-512-NEXT: # BB#1: # %bb
-; AVX-512-NEXT: pushq %rax
-; AVX-512-NEXT: callq foo
-; AVX-512-NEXT: popq %rax
-; AVX-512-NEXT: .LBB3_2: # %UnifiedReturnBlock
-; AVX-512-NEXT: retq
+; CHECK-LABEL: atest2b:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: btl %esi, %edi
+; CHECK-NEXT: jb .LBB3_2
+;
entry:
%tmp29 = ashr i32 %x, %n
%tmp3 = and i32 1, %tmp29
@@ -177,12 +104,7 @@ define void @test3(i32 %x, i32 %n) nounw
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jb .LBB4_2
-; CHECK-NEXT: # BB#1: # %bb
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: callq foo
-; CHECK-NEXT: popq %rax
-; CHECK-NEXT: .LBB4_2: # %UnifiedReturnBlock
-; CHECK-NEXT: retq
+;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %tmp29, %x
@@ -202,12 +124,7 @@ define void @test3b(i32 %x, i32 %n) noun
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jb .LBB5_2
-; CHECK-NEXT: # BB#1: # %bb
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: callq foo
-; CHECK-NEXT: popq %rax
-; CHECK-NEXT: .LBB5_2: # %UnifiedReturnBlock
-; CHECK-NEXT: retq
+;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %x, %tmp29
@@ -223,29 +140,11 @@ UnifiedReturnBlock:
}
define void @testne2(i32 %x, i32 %n) nounwind {
-; PENTIUM4-LABEL: testne2:
-; PENTIUM4: # BB#0: # %entry
-; PENTIUM4-NEXT: btl %esi, %edi
-; PENTIUM4-NEXT: jae .LBB6_2
-; PENTIUM4-NEXT: # BB#1: # %bb
-; PENTIUM4-NEXT: pushq %rax
-; PENTIUM4-NEXT: callq foo
-; PENTIUM4-NEXT: popq %rax
-; PENTIUM4-NEXT: .LBB6_2: # %UnifiedReturnBlock
-; PENTIUM4-NEXT: retq
-;
-; AVX-512-LABEL: testne2:
-; AVX-512: # BB#0: # %entry
-; AVX-512-NEXT: movl %esi, %ecx
-; AVX-512-NEXT: shrl %cl, %edi
-; AVX-512-NEXT: testb $1, %dil
-; AVX-512-NEXT: je .LBB6_2
-; AVX-512-NEXT: # BB#1: # %bb
-; AVX-512-NEXT: pushq %rax
-; AVX-512-NEXT: callq foo
-; AVX-512-NEXT: popq %rax
-; AVX-512-NEXT: .LBB6_2: # %UnifiedReturnBlock
-; AVX-512-NEXT: retq
+; CHECK-LABEL: testne2:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: btl %esi, %edi
+; CHECK-NEXT: jae .LBB6_2
+;
entry:
%tmp29 = lshr i32 %x, %n
%tmp3 = and i32 %tmp29, 1
@@ -261,29 +160,11 @@ UnifiedReturnBlock:
}
define void @testne2b(i32 %x, i32 %n) nounwind {
-; PENTIUM4-LABEL: testne2b:
-; PENTIUM4: # BB#0: # %entry
-; PENTIUM4-NEXT: btl %esi, %edi
-; PENTIUM4-NEXT: jae .LBB7_2
-; PENTIUM4-NEXT: # BB#1: # %bb
-; PENTIUM4-NEXT: pushq %rax
-; PENTIUM4-NEXT: callq foo
-; PENTIUM4-NEXT: popq %rax
-; PENTIUM4-NEXT: .LBB7_2: # %UnifiedReturnBlock
-; PENTIUM4-NEXT: retq
-;
-; AVX-512-LABEL: testne2b:
-; AVX-512: # BB#0: # %entry
-; AVX-512-NEXT: movl %esi, %ecx
-; AVX-512-NEXT: shrl %cl, %edi
-; AVX-512-NEXT: testb $1, %dil
-; AVX-512-NEXT: je .LBB7_2
-; AVX-512-NEXT: # BB#1: # %bb
-; AVX-512-NEXT: pushq %rax
-; AVX-512-NEXT: callq foo
-; AVX-512-NEXT: popq %rax
-; AVX-512-NEXT: .LBB7_2: # %UnifiedReturnBlock
-; AVX-512-NEXT: retq
+; CHECK-LABEL: testne2b:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: btl %esi, %edi
+; CHECK-NEXT: jae .LBB7_2
+;
entry:
%tmp29 = lshr i32 %x, %n
%tmp3 = and i32 1, %tmp29
@@ -299,29 +180,11 @@ UnifiedReturnBlock:
}
define void @atestne2(i32 %x, i32 %n) nounwind {
-; PENTIUM4-LABEL: atestne2:
-; PENTIUM4: # BB#0: # %entry
-; PENTIUM4-NEXT: btl %esi, %edi
-; PENTIUM4-NEXT: jae .LBB8_2
-; PENTIUM4-NEXT: # BB#1: # %bb
-; PENTIUM4-NEXT: pushq %rax
-; PENTIUM4-NEXT: callq foo
-; PENTIUM4-NEXT: popq %rax
-; PENTIUM4-NEXT: .LBB8_2: # %UnifiedReturnBlock
-; PENTIUM4-NEXT: retq
-;
-; AVX-512-LABEL: atestne2:
-; AVX-512: # BB#0: # %entry
-; AVX-512-NEXT: movl %esi, %ecx
-; AVX-512-NEXT: shrl %cl, %edi
-; AVX-512-NEXT: testb $1, %dil
-; AVX-512-NEXT: je .LBB8_2
-; AVX-512-NEXT: # BB#1: # %bb
-; AVX-512-NEXT: pushq %rax
-; AVX-512-NEXT: callq foo
-; AVX-512-NEXT: popq %rax
-; AVX-512-NEXT: .LBB8_2: # %UnifiedReturnBlock
-; AVX-512-NEXT: retq
+; CHECK-LABEL: atestne2:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: btl %esi, %edi
+; CHECK-NEXT: jae .LBB8_2
+;
entry:
%tmp29 = ashr i32 %x, %n
%tmp3 = and i32 %tmp29, 1
@@ -337,29 +200,11 @@ UnifiedReturnBlock:
}
define void @atestne2b(i32 %x, i32 %n) nounwind {
-; PENTIUM4-LABEL: atestne2b:
-; PENTIUM4: # BB#0: # %entry
-; PENTIUM4-NEXT: btl %esi, %edi
-; PENTIUM4-NEXT: jae .LBB9_2
-; PENTIUM4-NEXT: # BB#1: # %bb
-; PENTIUM4-NEXT: pushq %rax
-; PENTIUM4-NEXT: callq foo
-; PENTIUM4-NEXT: popq %rax
-; PENTIUM4-NEXT: .LBB9_2: # %UnifiedReturnBlock
-; PENTIUM4-NEXT: retq
-;
-; AVX-512-LABEL: atestne2b:
-; AVX-512: # BB#0: # %entry
-; AVX-512-NEXT: movl %esi, %ecx
-; AVX-512-NEXT: shrl %cl, %edi
-; AVX-512-NEXT: testb $1, %dil
-; AVX-512-NEXT: je .LBB9_2
-; AVX-512-NEXT: # BB#1: # %bb
-; AVX-512-NEXT: pushq %rax
-; AVX-512-NEXT: callq foo
-; AVX-512-NEXT: popq %rax
-; AVX-512-NEXT: .LBB9_2: # %UnifiedReturnBlock
-; AVX-512-NEXT: retq
+; CHECK-LABEL: atestne2b:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: btl %esi, %edi
+; CHECK-NEXT: jae .LBB9_2
+;
entry:
%tmp29 = ashr i32 %x, %n
%tmp3 = and i32 1, %tmp29
@@ -379,12 +224,7 @@ define void @testne3(i32 %x, i32 %n) nou
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jae .LBB10_2
-; CHECK-NEXT: # BB#1: # %bb
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: callq foo
-; CHECK-NEXT: popq %rax
-; CHECK-NEXT: .LBB10_2: # %UnifiedReturnBlock
-; CHECK-NEXT: retq
+;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %tmp29, %x
@@ -404,12 +244,7 @@ define void @testne3b(i32 %x, i32 %n) no
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jae .LBB11_2
-; CHECK-NEXT: # BB#1: # %bb
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: callq foo
-; CHECK-NEXT: popq %rax
-; CHECK-NEXT: .LBB11_2: # %UnifiedReturnBlock
-; CHECK-NEXT: retq
+;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %x, %tmp29
@@ -425,29 +260,11 @@ UnifiedReturnBlock:
}
define void @query2(i32 %x, i32 %n) nounwind {
-; PENTIUM4-LABEL: query2:
-; PENTIUM4: # BB#0: # %entry
-; PENTIUM4-NEXT: btl %esi, %edi
-; PENTIUM4-NEXT: jae .LBB12_2
-; PENTIUM4-NEXT: # BB#1: # %bb
-; PENTIUM4-NEXT: pushq %rax
-; PENTIUM4-NEXT: callq foo
-; PENTIUM4-NEXT: popq %rax
-; PENTIUM4-NEXT: .LBB12_2: # %UnifiedReturnBlock
-; PENTIUM4-NEXT: retq
-;
-; AVX-512-LABEL: query2:
-; AVX-512: # BB#0: # %entry
-; AVX-512-NEXT: movl %esi, %ecx
-; AVX-512-NEXT: shrl %cl, %edi
-; AVX-512-NEXT: testb $1, %dil
-; AVX-512-NEXT: je .LBB12_2
-; AVX-512-NEXT: # BB#1: # %bb
-; AVX-512-NEXT: pushq %rax
-; AVX-512-NEXT: callq foo
-; AVX-512-NEXT: popq %rax
-; AVX-512-NEXT: .LBB12_2: # %UnifiedReturnBlock
-; AVX-512-NEXT: retq
+; CHECK-LABEL: query2:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: btl %esi, %edi
+; CHECK-NEXT: jae .LBB12_2
+;
entry:
%tmp29 = lshr i32 %x, %n
%tmp3 = and i32 %tmp29, 1
@@ -463,29 +280,11 @@ UnifiedReturnBlock:
}
define void @query2b(i32 %x, i32 %n) nounwind {
-; PENTIUM4-LABEL: query2b:
-; PENTIUM4: # BB#0: # %entry
-; PENTIUM4-NEXT: btl %esi, %edi
-; PENTIUM4-NEXT: jae .LBB13_2
-; PENTIUM4-NEXT: # BB#1: # %bb
-; PENTIUM4-NEXT: pushq %rax
-; PENTIUM4-NEXT: callq foo
-; PENTIUM4-NEXT: popq %rax
-; PENTIUM4-NEXT: .LBB13_2: # %UnifiedReturnBlock
-; PENTIUM4-NEXT: retq
-;
-; AVX-512-LABEL: query2b:
-; AVX-512: # BB#0: # %entry
-; AVX-512-NEXT: movl %esi, %ecx
-; AVX-512-NEXT: shrl %cl, %edi
-; AVX-512-NEXT: testb $1, %dil
-; AVX-512-NEXT: je .LBB13_2
-; AVX-512-NEXT: # BB#1: # %bb
-; AVX-512-NEXT: pushq %rax
-; AVX-512-NEXT: callq foo
-; AVX-512-NEXT: popq %rax
-; AVX-512-NEXT: .LBB13_2: # %UnifiedReturnBlock
-; AVX-512-NEXT: retq
+; CHECK-LABEL: query2b:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: btl %esi, %edi
+; CHECK-NEXT: jae .LBB13_2
+;
entry:
%tmp29 = lshr i32 %x, %n
%tmp3 = and i32 1, %tmp29
@@ -501,29 +300,11 @@ UnifiedReturnBlock:
}
define void @aquery2(i32 %x, i32 %n) nounwind {
-; PENTIUM4-LABEL: aquery2:
-; PENTIUM4: # BB#0: # %entry
-; PENTIUM4-NEXT: btl %esi, %edi
-; PENTIUM4-NEXT: jae .LBB14_2
-; PENTIUM4-NEXT: # BB#1: # %bb
-; PENTIUM4-NEXT: pushq %rax
-; PENTIUM4-NEXT: callq foo
-; PENTIUM4-NEXT: popq %rax
-; PENTIUM4-NEXT: .LBB14_2: # %UnifiedReturnBlock
-; PENTIUM4-NEXT: retq
-;
-; AVX-512-LABEL: aquery2:
-; AVX-512: # BB#0: # %entry
-; AVX-512-NEXT: movl %esi, %ecx
-; AVX-512-NEXT: shrl %cl, %edi
-; AVX-512-NEXT: testb $1, %dil
-; AVX-512-NEXT: je .LBB14_2
-; AVX-512-NEXT: # BB#1: # %bb
-; AVX-512-NEXT: pushq %rax
-; AVX-512-NEXT: callq foo
-; AVX-512-NEXT: popq %rax
-; AVX-512-NEXT: .LBB14_2: # %UnifiedReturnBlock
-; AVX-512-NEXT: retq
+; CHECK-LABEL: aquery2:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: btl %esi, %edi
+; CHECK-NEXT: jae .LBB14_2
+;
entry:
%tmp29 = ashr i32 %x, %n
%tmp3 = and i32 %tmp29, 1
@@ -539,29 +320,11 @@ UnifiedReturnBlock:
}
define void @aquery2b(i32 %x, i32 %n) nounwind {
-; PENTIUM4-LABEL: aquery2b:
-; PENTIUM4: # BB#0: # %entry
-; PENTIUM4-NEXT: btl %esi, %edi
-; PENTIUM4-NEXT: jae .LBB15_2
-; PENTIUM4-NEXT: # BB#1: # %bb
-; PENTIUM4-NEXT: pushq %rax
-; PENTIUM4-NEXT: callq foo
-; PENTIUM4-NEXT: popq %rax
-; PENTIUM4-NEXT: .LBB15_2: # %UnifiedReturnBlock
-; PENTIUM4-NEXT: retq
-;
-; AVX-512-LABEL: aquery2b:
-; AVX-512: # BB#0: # %entry
-; AVX-512-NEXT: movl %esi, %ecx
-; AVX-512-NEXT: shrl %cl, %edi
-; AVX-512-NEXT: testb $1, %dil
-; AVX-512-NEXT: je .LBB15_2
-; AVX-512-NEXT: # BB#1: # %bb
-; AVX-512-NEXT: pushq %rax
-; AVX-512-NEXT: callq foo
-; AVX-512-NEXT: popq %rax
-; AVX-512-NEXT: .LBB15_2: # %UnifiedReturnBlock
-; AVX-512-NEXT: retq
+; CHECK-LABEL: aquery2b:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: btl %esi, %edi
+; CHECK-NEXT: jae .LBB15_2
+;
entry:
%tmp29 = ashr i32 %x, %n
%tmp3 = and i32 1, %tmp29
@@ -581,12 +344,7 @@ define void @query3(i32 %x, i32 %n) noun
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jae .LBB16_2
-; CHECK-NEXT: # BB#1: # %bb
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: callq foo
-; CHECK-NEXT: popq %rax
-; CHECK-NEXT: .LBB16_2: # %UnifiedReturnBlock
-; CHECK-NEXT: retq
+;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %tmp29, %x
@@ -606,12 +364,7 @@ define void @query3b(i32 %x, i32 %n) nou
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jae .LBB17_2
-; CHECK-NEXT: # BB#1: # %bb
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: callq foo
-; CHECK-NEXT: popq %rax
-; CHECK-NEXT: .LBB17_2: # %UnifiedReturnBlock
-; CHECK-NEXT: retq
+;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %x, %tmp29
@@ -631,12 +384,7 @@ define void @query3x(i32 %x, i32 %n) nou
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jae .LBB18_2
-; CHECK-NEXT: # BB#1: # %bb
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: callq foo
-; CHECK-NEXT: popq %rax
-; CHECK-NEXT: .LBB18_2: # %UnifiedReturnBlock
-; CHECK-NEXT: retq
+;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %tmp29, %x
@@ -656,12 +404,7 @@ define void @query3bx(i32 %x, i32 %n) no
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jae .LBB19_2
-; CHECK-NEXT: # BB#1: # %bb
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: callq foo
-; CHECK-NEXT: popq %rax
-; CHECK-NEXT: .LBB19_2: # %UnifiedReturnBlock
-; CHECK-NEXT: retq
+;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %x, %tmp29
@@ -677,35 +420,11 @@ UnifiedReturnBlock:
}
define void @queryne2(i32 %x, i32 %n) nounwind {
-; PENTIUM4-LABEL: queryne2:
-; PENTIUM4: # BB#0: # %entry
-; PENTIUM4-NEXT: btl %esi, %edi
-; PENTIUM4-NEXT: jb .LBB20_2
-; PENTIUM4-NEXT: # BB#1: # %bb
-; PENTIUM4-NEXT: pushq %rax
-; PENTIUM4-NEXT: callq foo
-; PENTIUM4-NEXT: popq %rax
-; PENTIUM4-NEXT: .LBB20_2: # %UnifiedReturnBlock
-; PENTIUM4-NEXT: retq
-;
-; AVX-512-LABEL: queryne2:
-; AVX-512: # BB#0: # %entry
-; AVX-512-NEXT: movl %esi, %ecx
-; AVX-512-NEXT: shrl %cl, %edi
-; AVX-512-NEXT: andl $1, %edi
-; AVX-512-NEXT: kmovw %edi, %k0
-; AVX-512-NEXT: kxnorw %k0, %k0, %k1
-; AVX-512-NEXT: kshiftrw $15, %k1, %k1
-; AVX-512-NEXT: kxorw %k1, %k0, %k0
-; AVX-512-NEXT: kmovw %k0, %eax
-; AVX-512-NEXT: testb %al, %al
-; AVX-512-NEXT: je .LBB20_2
-; AVX-512-NEXT: # BB#1: # %bb
-; AVX-512-NEXT: pushq %rax
-; AVX-512-NEXT: callq foo
-; AVX-512-NEXT: popq %rax
-; AVX-512-NEXT: .LBB20_2: # %UnifiedReturnBlock
-; AVX-512-NEXT: retq
+; CHECK-LABEL: queryne2:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: btl %esi, %edi
+; CHECK-NEXT: jb .LBB20_2
+;
entry:
%tmp29 = lshr i32 %x, %n
%tmp3 = and i32 %tmp29, 1
@@ -721,35 +440,11 @@ UnifiedReturnBlock:
}
define void @queryne2b(i32 %x, i32 %n) nounwind {
-; PENTIUM4-LABEL: queryne2b:
-; PENTIUM4: # BB#0: # %entry
-; PENTIUM4-NEXT: btl %esi, %edi
-; PENTIUM4-NEXT: jb .LBB21_2
-; PENTIUM4-NEXT: # BB#1: # %bb
-; PENTIUM4-NEXT: pushq %rax
-; PENTIUM4-NEXT: callq foo
-; PENTIUM4-NEXT: popq %rax
-; PENTIUM4-NEXT: .LBB21_2: # %UnifiedReturnBlock
-; PENTIUM4-NEXT: retq
-;
-; AVX-512-LABEL: queryne2b:
-; AVX-512: # BB#0: # %entry
-; AVX-512-NEXT: movl %esi, %ecx
-; AVX-512-NEXT: shrl %cl, %edi
-; AVX-512-NEXT: andl $1, %edi
-; AVX-512-NEXT: kmovw %edi, %k0
-; AVX-512-NEXT: kxnorw %k0, %k0, %k1
-; AVX-512-NEXT: kshiftrw $15, %k1, %k1
-; AVX-512-NEXT: kxorw %k1, %k0, %k0
-; AVX-512-NEXT: kmovw %k0, %eax
-; AVX-512-NEXT: testb %al, %al
-; AVX-512-NEXT: je .LBB21_2
-; AVX-512-NEXT: # BB#1: # %bb
-; AVX-512-NEXT: pushq %rax
-; AVX-512-NEXT: callq foo
-; AVX-512-NEXT: popq %rax
-; AVX-512-NEXT: .LBB21_2: # %UnifiedReturnBlock
-; AVX-512-NEXT: retq
+; CHECK-LABEL: queryne2b:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: btl %esi, %edi
+; CHECK-NEXT: jb .LBB21_2
+;
entry:
%tmp29 = lshr i32 %x, %n
%tmp3 = and i32 1, %tmp29
@@ -765,35 +460,11 @@ UnifiedReturnBlock:
}
define void @aqueryne2(i32 %x, i32 %n) nounwind {
-; PENTIUM4-LABEL: aqueryne2:
-; PENTIUM4: # BB#0: # %entry
-; PENTIUM4-NEXT: btl %esi, %edi
-; PENTIUM4-NEXT: jb .LBB22_2
-; PENTIUM4-NEXT: # BB#1: # %bb
-; PENTIUM4-NEXT: pushq %rax
-; PENTIUM4-NEXT: callq foo
-; PENTIUM4-NEXT: popq %rax
-; PENTIUM4-NEXT: .LBB22_2: # %UnifiedReturnBlock
-; PENTIUM4-NEXT: retq
-;
-; AVX-512-LABEL: aqueryne2:
-; AVX-512: # BB#0: # %entry
-; AVX-512-NEXT: movl %esi, %ecx
-; AVX-512-NEXT: shrl %cl, %edi
-; AVX-512-NEXT: andl $1, %edi
-; AVX-512-NEXT: kmovw %edi, %k0
-; AVX-512-NEXT: kxnorw %k0, %k0, %k1
-; AVX-512-NEXT: kshiftrw $15, %k1, %k1
-; AVX-512-NEXT: kxorw %k1, %k0, %k0
-; AVX-512-NEXT: kmovw %k0, %eax
-; AVX-512-NEXT: testb %al, %al
-; AVX-512-NEXT: je .LBB22_2
-; AVX-512-NEXT: # BB#1: # %bb
-; AVX-512-NEXT: pushq %rax
-; AVX-512-NEXT: callq foo
-; AVX-512-NEXT: popq %rax
-; AVX-512-NEXT: .LBB22_2: # %UnifiedReturnBlock
-; AVX-512-NEXT: retq
+; CHECK-LABEL: aqueryne2:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: btl %esi, %edi
+; CHECK-NEXT: jb .LBB22_2
+;
entry:
%tmp29 = ashr i32 %x, %n
%tmp3 = and i32 %tmp29, 1
@@ -809,35 +480,11 @@ UnifiedReturnBlock:
}
define void @aqueryne2b(i32 %x, i32 %n) nounwind {
-; PENTIUM4-LABEL: aqueryne2b:
-; PENTIUM4: # BB#0: # %entry
-; PENTIUM4-NEXT: btl %esi, %edi
-; PENTIUM4-NEXT: jb .LBB23_2
-; PENTIUM4-NEXT: # BB#1: # %bb
-; PENTIUM4-NEXT: pushq %rax
-; PENTIUM4-NEXT: callq foo
-; PENTIUM4-NEXT: popq %rax
-; PENTIUM4-NEXT: .LBB23_2: # %UnifiedReturnBlock
-; PENTIUM4-NEXT: retq
-;
-; AVX-512-LABEL: aqueryne2b:
-; AVX-512: # BB#0: # %entry
-; AVX-512-NEXT: movl %esi, %ecx
-; AVX-512-NEXT: shrl %cl, %edi
-; AVX-512-NEXT: andl $1, %edi
-; AVX-512-NEXT: kmovw %edi, %k0
-; AVX-512-NEXT: kxnorw %k0, %k0, %k1
-; AVX-512-NEXT: kshiftrw $15, %k1, %k1
-; AVX-512-NEXT: kxorw %k1, %k0, %k0
-; AVX-512-NEXT: kmovw %k0, %eax
-; AVX-512-NEXT: testb %al, %al
-; AVX-512-NEXT: je .LBB23_2
-; AVX-512-NEXT: # BB#1: # %bb
-; AVX-512-NEXT: pushq %rax
-; AVX-512-NEXT: callq foo
-; AVX-512-NEXT: popq %rax
-; AVX-512-NEXT: .LBB23_2: # %UnifiedReturnBlock
-; AVX-512-NEXT: retq
+; CHECK-LABEL: aqueryne2b:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: btl %esi, %edi
+; CHECK-NEXT: jb .LBB23_2
+;
entry:
%tmp29 = ashr i32 %x, %n
%tmp3 = and i32 1, %tmp29
@@ -857,12 +504,7 @@ define void @queryne3(i32 %x, i32 %n) no
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jb .LBB24_2
-; CHECK-NEXT: # BB#1: # %bb
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: callq foo
-; CHECK-NEXT: popq %rax
-; CHECK-NEXT: .LBB24_2: # %UnifiedReturnBlock
-; CHECK-NEXT: retq
+;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %tmp29, %x
@@ -882,12 +524,7 @@ define void @queryne3b(i32 %x, i32 %n) n
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jb .LBB25_2
-; CHECK-NEXT: # BB#1: # %bb
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: callq foo
-; CHECK-NEXT: popq %rax
-; CHECK-NEXT: .LBB25_2: # %UnifiedReturnBlock
-; CHECK-NEXT: retq
+;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %x, %tmp29
@@ -907,12 +544,7 @@ define void @queryne3x(i32 %x, i32 %n) n
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jb .LBB26_2
-; CHECK-NEXT: # BB#1: # %bb
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: callq foo
-; CHECK-NEXT: popq %rax
-; CHECK-NEXT: .LBB26_2: # %UnifiedReturnBlock
-; CHECK-NEXT: retq
+;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %tmp29, %x
@@ -932,12 +564,7 @@ define void @queryne3bx(i32 %x, i32 %n)
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jb .LBB27_2
-; CHECK-NEXT: # BB#1: # %bb
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: callq foo
-; CHECK-NEXT: popq %rax
-; CHECK-NEXT: .LBB27_2: # %UnifiedReturnBlock
-; CHECK-NEXT: retq
+;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %x, %tmp29
@@ -961,6 +588,7 @@ define zeroext i1 @invert(i32 %flags, i3
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: setb %al
; CHECK-NEXT: retq
+;
%neg = xor i32 %flags, -1
%shl = shl i32 1, %flag
%and = and i32 %shl, %neg
More information about the llvm-commits
mailing list