[llvm] 7d626e7 - [X86] Move RDFLAGS/WRFLAGS expansion until after RA
Bill Wendling via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 30 15:32:24 PST 2023
Author: Bill Wendling
Date: 2023-01-30T15:32:16-08:00
New Revision: 7d626e7cbb3ac9b3f43a10df8d8edfab1300b156
URL: https://github.com/llvm/llvm-project/commit/7d626e7cbb3ac9b3f43a10df8d8edfab1300b156
DIFF: https://github.com/llvm/llvm-project/commit/7d626e7cbb3ac9b3f43a10df8d8edfab1300b156.diff
LOG: [X86] Move RDFLAGS/WRFLAGS expansion until after RA
The register allocator may introduce reloads in the middle of reading
and writing the EFLAGS register, due to the RDFLAGS & WRFLAGS pseudos
being expanded before RA. This may cause an issue where the stack
pointer was adjusted but the stack offset for the reload wasn't
accounted for (see [1]).
To avoid this, expand these pseudos after register allocation.
[1] https://github.com/llvm/llvm-project/issues/59102
Reviewed By: craig.topper, nickdesaulniers, pengfei
Differential Revision: https://reviews.llvm.org/D140045
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86InstrInfo.cpp
llvm/lib/Target/X86/X86InstrInfo.td
llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 965bccdfd0bf3..0d9cf723027e0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37306,41 +37306,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::CMOV_VK64:
return EmitLoweredSelect(MI, BB);
- case X86::RDFLAGS32:
- case X86::RDFLAGS64: {
- unsigned PushF =
- MI.getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64;
- unsigned Pop = MI.getOpcode() == X86::RDFLAGS32 ? X86::POP32r : X86::POP64r;
- MachineInstr *Push = BuildMI(*BB, MI, DL, TII->get(PushF));
- // Permit reads of the EFLAGS and DF registers without them being defined.
- // This intrinsic exists to read external processor state in flags, such as
- // the trap flag, interrupt flag, and direction flag, none of which are
- // modeled by the backend.
- assert(Push->getOperand(2).getReg() == X86::EFLAGS &&
- "Unexpected register in operand!");
- Push->getOperand(2).setIsUndef();
- assert(Push->getOperand(3).getReg() == X86::DF &&
- "Unexpected register in operand!");
- Push->getOperand(3).setIsUndef();
- BuildMI(*BB, MI, DL, TII->get(Pop), MI.getOperand(0).getReg());
-
- MI.eraseFromParent(); // The pseudo is gone now.
- return BB;
- }
-
- case X86::WRFLAGS32:
- case X86::WRFLAGS64: {
- unsigned Push =
- MI.getOpcode() == X86::WRFLAGS32 ? X86::PUSH32r : X86::PUSH64r;
- unsigned PopF =
- MI.getOpcode() == X86::WRFLAGS32 ? X86::POPF32 : X86::POPF64;
- BuildMI(*BB, MI, DL, TII->get(Push)).addReg(MI.getOperand(0).getReg());
- BuildMI(*BB, MI, DL, TII->get(PopF));
-
- MI.eraseFromParent(); // The pseudo is gone now.
- return BB;
- }
-
case X86::FP32_TO_INT16_IN_MEM:
case X86::FP32_TO_INT32_IN_MEM:
case X86::FP32_TO_INT64_IN_MEM:
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index d650994350cb0..00c29f30e96d7 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -5060,6 +5060,45 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
+ case X86::RDFLAGS32:
+ case X86::RDFLAGS64: {
+ unsigned Is64Bit = MI.getOpcode() == X86::RDFLAGS64;
+ MachineBasicBlock &MBB = *MIB->getParent();
+
+ MachineInstr *NewMI =
+ BuildMI(MBB, MI, MIB->getDebugLoc(),
+ get(Is64Bit ? X86::PUSHF64 : X86::PUSHF32))
+ .getInstr();
+
+ // Permit reads of the EFLAGS and DF registers without them being defined.
+ // This intrinsic exists to read external processor state in flags, such as
+ // the trap flag, interrupt flag, and direction flag, none of which are
+ // modeled by the backend.
+ assert(NewMI->getOperand(2).getReg() == X86::EFLAGS &&
+ "Unexpected register in operand! Should be EFLAGS.");
+ NewMI->getOperand(2).setIsUndef();
+ assert(NewMI->getOperand(3).getReg() == X86::DF &&
+ "Unexpected register in operand! Should be DF.");
+ NewMI->getOperand(3).setIsUndef();
+
+ MIB->setDesc(get(Is64Bit ? X86::POP64r : X86::POP32r));
+ return true;
+ }
+
+ case X86::WRFLAGS32:
+ case X86::WRFLAGS64: {
+ unsigned Is64Bit = MI.getOpcode() == X86::WRFLAGS64;
+ MachineBasicBlock &MBB = *MIB->getParent();
+
+ BuildMI(MBB, MI, MIB->getDebugLoc(),
+ get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
+ .addReg(MI.getOperand(0).getReg());
+ BuildMI(MBB, MI, MIB->getDebugLoc(),
+ get(Is64Bit ? X86::POPF64 : X86::POPF32));
+ MI.eraseFromParent();
+ return true;
+ }
+
// KNL does not recognize dependency-breaking idioms for mask registers,
// so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
// Using %k0 as the undef input register is a performance heuristic based
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 3012035af11ab..b33e40900c706 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -1381,7 +1381,7 @@ def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src", []>,
}
-let mayLoad = 1, mayStore = 1, usesCustomInserter = 1,
+let isPseudo = 1, mayLoad = 1, mayStore = 1,
SchedRW = [WriteRMW], Defs = [ESP] in {
let Uses = [ESP] in
def RDFLAGS32 : PseudoI<(outs GR32:$dst), (ins),
@@ -1394,7 +1394,7 @@ let mayLoad = 1, mayStore = 1, usesCustomInserter = 1,
Requires<[In64BitMode]>;
}
-let mayLoad = 1, mayStore = 1, usesCustomInserter = 1,
+let isPseudo = 1, mayLoad = 1, mayStore = 1,
SchedRW = [WriteRMW] in {
let Defs = [ESP, EFLAGS, DF], Uses = [ESP] in
def WRFLAGS32 : PseudoI<(outs), (ins GR32:$src),
diff --git a/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll b/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll
index 5460f36fca670..47aefdbf0e466 100644
--- a/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll
@@ -52,3 +52,167 @@ entry:
call void @llvm.x86.flags.write.u64(i64 %arg)
ret void
}
+
+define i64 @read_flags_reg_pressure() nounwind {
+; CHECK-LABEL: read_flags_reg_pressure:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: subq $16, %rsp
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: movq %rdx, (%rsp) # 8-byte Spill
+; CHECK-NEXT: pushfq
+; CHECK-NEXT: popq %rdx
+; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movq (%rsp), %rdx # 8-byte Reload
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; CHECK-NEXT: addq $16, %rsp
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: retq
+;
+; WIN64-LABEL: read_flags_reg_pressure:
+; WIN64: # %bb.0:
+; WIN64-NEXT: pushq %rbp
+; WIN64-NEXT: pushq %r15
+; WIN64-NEXT: pushq %r14
+; WIN64-NEXT: pushq %r13
+; WIN64-NEXT: pushq %r12
+; WIN64-NEXT: pushq %rsi
+; WIN64-NEXT: pushq %rdi
+; WIN64-NEXT: pushq %rbx
+; WIN64-NEXT: subq $16, %rsp
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
+; WIN64-NEXT: #APP
+; WIN64-NEXT: #NO_APP
+; WIN64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; WIN64-NEXT: pushfq
+; WIN64-NEXT: popq %rdx
+; WIN64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; WIN64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; WIN64-NEXT: #APP
+; WIN64-NEXT: #NO_APP
+; WIN64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; WIN64-NEXT: addq $16, %rsp
+; WIN64-NEXT: popq %rbx
+; WIN64-NEXT: popq %rdi
+; WIN64-NEXT: popq %rsi
+; WIN64-NEXT: popq %r12
+; WIN64-NEXT: popq %r13
+; WIN64-NEXT: popq %r14
+; WIN64-NEXT: popq %r15
+; WIN64-NEXT: popq %rbp
+; WIN64-NEXT: retq
+ %1 = tail call { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } asm sideeffect "", "={ax},={bx},={cx},={dx},={si},={di},={bp},={r8},={r9},={r10},={r11},={r12},={r13},={r14},={r15},~{dirflag},~{fpsr},~{flags}"()
+ %2 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 0
+ %3 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 1
+ %4 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 2
+ %5 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 3
+ %6 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 4
+ %7 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 5
+ %8 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 6
+ %9 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 7
+ %10 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 8
+ %11 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 9
+ %12 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 10
+ %13 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 11
+ %14 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 12
+ %15 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 13
+ %16 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 14
+ %17 = tail call i64 @llvm.x86.flags.read.u64()
+ tail call void asm sideeffect "", "{ax},{bx},{cx},{dx},{si},{di},{bp},{r8},{r9},{r10},{r11},{r12},{r13},{r14},{r15},~{dirflag},~{fpsr},~{flags}"(i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9, i64 %10, i64 %11, i64 %12, i64 %13, i64 %14, i64 %15, i64 %16)
+ ret i64 %17
+}
+
+define void @write_flags_reg_pressure(i64 noundef %0) nounwind {
+; CHECK-LABEL: write_flags_reg_pressure:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: subq $16, %rsp
+; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: movq %rdx, (%rsp) # 8-byte Spill
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; CHECK-NEXT: pushq %rdx
+; CHECK-NEXT: popfq
+; CHECK-NEXT: movq (%rsp), %rdx # 8-byte Reload
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: addq $16, %rsp
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: retq
+;
+; WIN64-LABEL: write_flags_reg_pressure:
+; WIN64: # %bb.0:
+; WIN64-NEXT: pushq %rbp
+; WIN64-NEXT: pushq %r15
+; WIN64-NEXT: pushq %r14
+; WIN64-NEXT: pushq %r13
+; WIN64-NEXT: pushq %r12
+; WIN64-NEXT: pushq %rsi
+; WIN64-NEXT: pushq %rdi
+; WIN64-NEXT: pushq %rbx
+; WIN64-NEXT: subq $16, %rsp
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
+; WIN64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; WIN64-NEXT: #APP
+; WIN64-NEXT: #NO_APP
+; WIN64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; WIN64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; WIN64-NEXT: pushq %rdx
+; WIN64-NEXT: popfq
+; WIN64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; WIN64-NEXT: #APP
+; WIN64-NEXT: #NO_APP
+; WIN64-NEXT: addq $16, %rsp
+; WIN64-NEXT: popq %rbx
+; WIN64-NEXT: popq %rdi
+; WIN64-NEXT: popq %rsi
+; WIN64-NEXT: popq %r12
+; WIN64-NEXT: popq %r13
+; WIN64-NEXT: popq %r14
+; WIN64-NEXT: popq %r15
+; WIN64-NEXT: popq %rbp
+; WIN64-NEXT: retq
+ %2 = tail call { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } asm sideeffect "", "={ax},={bx},={cx},={dx},={si},={di},={bp},={r8},={r9},={r10},={r11},={r12},={r13},={r14},={r15},~{dirflag},~{fpsr},~{flags}"()
+ %3 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 0
+ %4 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 1
+ %5 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 2
+ %6 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 3
+ %7 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 4
+ %8 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 5
+ %9 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 6
+ %10 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 7
+ %11 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 8
+ %12 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 9
+ %13 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 10
+ %14 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 11
+ %15 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 12
+ %16 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 13
+ %17 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 14
+ tail call void @llvm.x86.flags.write.u64(i64 %0)
+ tail call void asm sideeffect "", "{ax},{bx},{cx},{dx},{si},{di},{bp},{r8},{r9},{r10},{r11},{r12},{r13},{r14},{r15},~{dirflag},~{fpsr},~{flags}"(i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9, i64 %10, i64 %11, i64 %12, i64 %13, i64 %14, i64 %15, i64 %16, i64 %17)
+ ret void
+}
More information about the llvm-commits
mailing list