[llvm] 600f2e1 - [X86] Remove SETB_C8r/SETB_C16r pseudo instructions. Use SETB_C32r and EXTRACT_SUBREG instead.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 6 10:23:12 PST 2020
Author: Craig Topper
Date: 2020-02-06T10:22:24-08:00
New Revision: 600f2e1c4de59a48a765d9ac8eadab2f4307fa30
URL: https://github.com/llvm/llvm-project/commit/600f2e1c4de59a48a765d9ac8eadab2f4307fa30
DIFF: https://github.com/llvm/llvm-project/commit/600f2e1c4de59a48a765d9ac8eadab2f4307fa30.diff
LOG: [X86] Remove SETB_C8r/SETB_C16r pseudo instructions. Use SETB_C32r and EXTRACT_SUBREG instead.
Only 32 and 64 bit SBB are dependency breaking instructons on some
CPUs. The 8 and 16 bit forms have to preserve upper bits of the GPR.
This patch removes the smaller forms and selects the wider form
instead. I had to do this with custom code as the tblgen generated
code glued the eflags copytoreg to the extract_subreg instead of
to the SETB pseudo.
Longer term I think we can remove X86ISD::SETCC_CARRY and use
(X86ISD::SBB zero, zero). We'll want to keep the pseudo and select
(X86ISD::SBB zero, zero) to either a MOV32r0+SBB for targets where
there is no dependency break and SETB_C32/SETB_C64 for targets
that have a dependency break. May want some way to avoid the MOV32r0
if the instruction that produced the carry flag happened to def a
register that we can use for the dependency.
I think the flag copy lowering should be using NEG instead of SUB to
handle SETB. That would avoid the MOV32r0 there. Or maybe it should
use a ADC with -1 to recreate the carry flag and keep the SETB?
That would avoid a MOVZX on the input of the SUB.
Differential Revision: https://reviews.llvm.org/D74024
Added:
Modified:
llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/lib/Target/X86/X86InstrCompiler.td
llvm/lib/Target/X86/X86InstrInfo.cpp
llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
llvm/test/CodeGen/X86/flags-copy-lowering.mir
llvm/test/CodeGen/X86/sbb.ll
llvm/test/CodeGen/X86/vector-compare-any_of.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
index b1d2de29c896..d3db5cab8ec4 100644
--- a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -639,8 +639,6 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
FlagsKilled = true;
switch (MI.getOpcode()) {
- case X86::SETB_C8r:
- case X86::SETB_C16r:
case X86::SETB_C32r:
case X86::SETB_C64r:
// Use custom lowering for arithmetic that is merely extending the
@@ -1057,24 +1055,9 @@ void X86FlagsCopyLoweringPass::rewriteSetCarryExtended(
unsigned Sub;
switch (SetBI.getOpcode()) {
- case X86::SETB_C8r:
- Sub = X86::SUB8rr;
- break;
-
- case X86::SETB_C16r:
- Sub = X86::SUB16rr;
- break;
-
- case X86::SETB_C32r:
- Sub = X86::SUB32rr;
- break;
-
- case X86::SETB_C64r:
- Sub = X86::SUB64rr;
- break;
-
- default:
- llvm_unreachable("Invalid SETB_C* opcode!");
+ default: llvm_unreachable("Invalid SETB_C* opcode!");
+ case X86::SETB_C32r: Sub = X86::SUB32rr; break;
+ case X86::SETB_C64r: Sub = X86::SUB64rr; break;
}
Register ResultReg = MRI->createVirtualRegister(&SetBRC);
BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg)
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index b174a0856b2d..9240fd24f31d 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5270,6 +5270,35 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
if (foldLoadStoreIntoMemOperand(Node))
return;
break;
+
+ case X86ISD::SETCC_CARRY: {
+ // We have to do this manually because tblgen will put the eflags copy in
+ // the wrong place if we use an extract_subreg in the pattern.
+ MVT VT = Node->getSimpleValueType(0);
+ SDValue Chain = CurDAG->getEntryNode();
+
+ // Copy flags to the EFLAGS register and glue it to next node.
+ SDValue EFLAGS = CurDAG->getCopyToReg(Chain, dl, X86::EFLAGS,
+ Node->getOperand(1), SDValue());
+ Chain = EFLAGS;
+
+ // Create a 64-bit instruction if the result is 64-bits otherwise use the
+ // 32-bit version.
+ unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r;
+ MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
+ SDValue Result = SDValue(
+ CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)), 0);
+
+ // For less than 32-bits we need to extract from the 32-bit node.
+ if (VT == MVT::i8 || VT == MVT::i16) {
+ int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit;
+ Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result);
+ }
+
+ ReplaceUses(SDValue(Node, 0), Result);
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
}
SelectCode(Node);
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index ee5ce177e247..43a67cbc42dc 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -308,18 +308,13 @@ def mov64imm32 : ComplexPattern<i64, 1, "selectMOV64Imm32", [imm, X86Wrapper]>;
def : Pat<(i64 mov64imm32:$src), (MOV32ri64 mov64imm32:$src)>;
// Use sbb to materialize carry bit.
-let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in {
+let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteADC],
+ hasSideEffects = 0 in {
// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
// However, Pat<> can't replicate the destination reg into the inputs of the
// result.
-def SETB_C8r : I<0, Pseudo, (outs GR8:$dst), (ins), "",
- [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
-def SETB_C16r : I<0, Pseudo, (outs GR16:$dst), (ins), "",
- [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
-def SETB_C32r : I<0, Pseudo, (outs GR32:$dst), (ins), "",
- [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
-def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "",
- [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C32r : I<0, Pseudo, (outs GR32:$dst), (ins), "", []>;
+def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "", []>;
} // isCodeGenOnly
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index e6576b8d552f..371181802abd 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4085,10 +4085,6 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::MOV32ImmSExti8:
case X86::MOV64ImmSExti8:
return ExpandMOVImmSExti8(MIB, *this, Subtarget);
- case X86::SETB_C8r:
- return Expand2AddrUndef(MIB, get(X86::SBB8rr));
- case X86::SETB_C16r:
- return Expand2AddrUndef(MIB, get(X86::SBB16rr));
case X86::SETB_C32r:
return Expand2AddrUndef(MIB, get(X86::SBB32rr));
case X86::SETB_C64r:
diff --git a/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll b/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
index 38d1eeebeca2..e7793e83b01c 100644
--- a/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
+++ b/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
@@ -12,7 +12,7 @@ define i32 @main() nounwind {
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpq {{.*}}(%rip), %rax
-; CHECK-NEXT: sbbb %al, %al
+; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: testb $-106, %al
; CHECK-NEXT: jle .LBB0_1
; CHECK-NEXT: # %bb.2: # %if.then
diff --git a/llvm/test/CodeGen/X86/flags-copy-lowering.mir b/llvm/test/CodeGen/X86/flags-copy-lowering.mir
index f8bf3837bdae..ef484824b58b 100644
--- a/llvm/test/CodeGen/X86/flags-copy-lowering.mir
+++ b/llvm/test/CodeGen/X86/flags-copy-lowering.mir
@@ -541,28 +541,8 @@ body: |
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
$eflags = COPY %3
- %4:gr8 = SETB_C8r implicit-def $eflags, implicit $eflags
- MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %4
- ; CHECK-NOT: $eflags =
- ; CHECK: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
- ; CHECK-NEXT: %[[ZERO_SUBREG:[^:]*]]:gr8 = COPY %[[ZERO]].sub_8bit
- ; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr8 = SUB8rr %[[ZERO_SUBREG]], %[[CF_REG]]
- ; CHECK-NEXT: MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]
-
- $eflags = COPY %3
- %5:gr16 = SETB_C16r implicit-def $eflags, implicit $eflags
- MOV16mr $rsp, 1, $noreg, -16, $noreg, killed %5
- ; CHECK-NOT: $eflags =
- ; CHECK: %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
- ; CHECK-NEXT: %[[CF_TRUNC:[^:]*]]:gr16 = COPY %[[CF_EXT]].sub_16bit
- ; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
- ; CHECK-NEXT: %[[ZERO_SUBREG:[^:]*]]:gr16 = COPY %[[ZERO]].sub_16bit
- ; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr16 = SUB16rr %[[ZERO_SUBREG]], %[[CF_TRUNC]]
- ; CHECK-NEXT: MOV16mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]
-
- $eflags = COPY %3
- %6:gr32 = SETB_C32r implicit-def $eflags, implicit $eflags
- MOV32mr $rsp, 1, $noreg, -16, $noreg, killed %6
+ %4:gr32 = SETB_C32r implicit-def $eflags, implicit $eflags
+ MOV32mr $rsp, 1, $noreg, -16, $noreg, killed %4
; CHECK-NOT: $eflags =
; CHECK: %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
@@ -570,8 +550,8 @@ body: |
; CHECK-NEXT: MOV32mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]
$eflags = COPY %3
- %7:gr64 = SETB_C64r implicit-def $eflags, implicit $eflags
- MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %7
+ %5:gr64 = SETB_C64r implicit-def $eflags, implicit $eflags
+ MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5
; CHECK-NOT: $eflags =
; CHECK: %[[CF_EXT1:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
; CHECK-NEXT: %[[CF_EXT2:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[CF_EXT1]], %subreg.sub_32bit
diff --git a/llvm/test/CodeGen/X86/sbb.ll b/llvm/test/CodeGen/X86/sbb.ll
index bd4a62f21699..cc8127cbea23 100644
--- a/llvm/test/CodeGen/X86/sbb.ll
+++ b/llvm/test/CodeGen/X86/sbb.ll
@@ -9,7 +9,8 @@ define i8 @i8_select_0_or_neg1(i8 %x) {
; CHECK-LABEL: i8_select_0_or_neg1:
; CHECK: # %bb.0:
; CHECK-NEXT: negb %dil
-; CHECK-NEXT: sbbb %al, %al
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%cmp = icmp eq i8 %x, 0
%sel = select i1 %cmp, i8 0, i8 -1
@@ -22,7 +23,8 @@ define i16 @i16_select_0_or_neg1_as_math(i16 %x) {
; CHECK-LABEL: i16_select_0_or_neg1_as_math:
; CHECK: # %bb.0:
; CHECK-NEXT: negw %di
-; CHECK-NEXT: sbbw %ax, %ax
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%cmp = icmp eq i16 %x, 0
%ext = zext i1 %cmp to i16
@@ -90,7 +92,8 @@ define i16 @i16_select_neg1_or_0_commuted(i16 %x) {
; CHECK-LABEL: i16_select_neg1_or_0_commuted:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpw $1, %di
-; CHECK-NEXT: sbbw %ax, %ax
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%cmp = icmp ne i16 %x, 0
%sel = select i1 %cmp, i16 0, i16 -1
@@ -103,7 +106,8 @@ define i8 @i8_select_neg1_or_0_commuted_as_math(i8 %x) {
; CHECK-LABEL: i8_select_neg1_or_0_commuted_as_math:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpb $1, %dil
-; CHECK-NEXT: sbbb %al, %al
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%cmp = icmp ne i8 %x, 0
%ext = zext i1 %cmp to i8
@@ -205,7 +209,8 @@ define i16 @ult_select_neg1_or_0_sub(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: ult_select_neg1_or_0_sub:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpw %di, %si
-; CHECK-NEXT: sbbw %ax, %ax
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%cmp = icmp ult i16 %y, %x
%zext = zext i1 %cmp to i16
diff --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
index 40c63a7171cb..084de61dd086 100644
--- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
@@ -666,7 +666,8 @@ define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
; SSE-NEXT: packsswb %xmm1, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
; SSE-NEXT: negl %eax
-; SSE-NEXT: sbbw %ax, %ax
+; SSE-NEXT: sbbl %eax, %eax
+; SSE-NEXT: # kill: def $ax killed $ax killed $eax
; SSE-NEXT: retq
;
; AVX1-LABEL: test_v16i16_legal_sext:
@@ -678,7 +679,8 @@ define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
; AVX1-NEXT: negl %eax
-; AVX1-NEXT: sbbw %ax, %ax
+; AVX1-NEXT: sbbl %eax, %eax
+; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -689,7 +691,8 @@ define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpmovmskb %xmm0, %eax
; AVX2-NEXT: negl %eax
-; AVX2-NEXT: sbbw %ax, %ax
+; AVX2-NEXT: sbbl %eax, %eax
+; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@@ -731,7 +734,8 @@ define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
; SSE-NEXT: pcmpgtb %xmm1, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
; SSE-NEXT: negl %eax
-; SSE-NEXT: sbbb %al, %al
+; SSE-NEXT: sbbl %eax, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
; SSE-NEXT: retq
;
; AVX-LABEL: test_v16i8_sext:
@@ -739,7 +743,8 @@ define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpmovmskb %xmm0, %eax
; AVX-NEXT: negl %eax
-; AVX-NEXT: sbbb %al, %al
+; AVX-NEXT: sbbl %eax, %eax
+; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v16i8_sext:
@@ -778,7 +783,8 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
; SSE-NEXT: por %xmm1, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
; SSE-NEXT: negl %eax
-; SSE-NEXT: sbbb %al, %al
+; SSE-NEXT: sbbl %eax, %eax
+; SSE-NEXT: # kill: def $al killed $al killed $eax
; SSE-NEXT: retq
;
; AVX1-LABEL: test_v32i8_sext:
@@ -790,7 +796,8 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
; AVX1-NEXT: negl %eax
-; AVX1-NEXT: sbbb %al, %al
+; AVX1-NEXT: sbbl %eax, %eax
+; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -799,7 +806,8 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpmovmskb %ymm0, %eax
; AVX2-NEXT: negl %eax
-; AVX2-NEXT: sbbb %al, %al
+; AVX2-NEXT: sbbl %eax, %eax
+; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
More information about the llvm-commits
mailing list