[llvm] 7d626e7 - [X86] Move RDFLAGS/WRFLAGS expansion until after RA

Bill Wendling via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 30 15:32:24 PST 2023


Author: Bill Wendling
Date: 2023-01-30T15:32:16-08:00
New Revision: 7d626e7cbb3ac9b3f43a10df8d8edfab1300b156

URL: https://github.com/llvm/llvm-project/commit/7d626e7cbb3ac9b3f43a10df8d8edfab1300b156
DIFF: https://github.com/llvm/llvm-project/commit/7d626e7cbb3ac9b3f43a10df8d8edfab1300b156.diff

LOG: [X86] Move RDFLAGS/WRFLAGS expansion until after RA

The register allocator may introduce reloads in the middle of reading
and writing the EFLAGS register, due to the RDFLAGS & WRFLAGS pseudos
being expanded before RA. This may cause an issue where the stack
pointer was adjusted but the stack offset for the reload wasn't
accounted for (see [1]).

To avoid this, expand these pseudos after register allocation.

[1] https://github.com/llvm/llvm-project/issues/59102

Reviewed By: craig.topper, nickdesaulniers, pengfei

Differential Revision: https://reviews.llvm.org/D140045

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/lib/Target/X86/X86InstrInfo.cpp
    llvm/lib/Target/X86/X86InstrInfo.td
    llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 965bccdfd0bf3..0d9cf723027e0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37306,41 +37306,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   case X86::CMOV_VK64:
     return EmitLoweredSelect(MI, BB);
 
-  case X86::RDFLAGS32:
-  case X86::RDFLAGS64: {
-    unsigned PushF =
-        MI.getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64;
-    unsigned Pop = MI.getOpcode() == X86::RDFLAGS32 ? X86::POP32r : X86::POP64r;
-    MachineInstr *Push = BuildMI(*BB, MI, DL, TII->get(PushF));
-    // Permit reads of the EFLAGS and DF registers without them being defined.
-    // This intrinsic exists to read external processor state in flags, such as
-    // the trap flag, interrupt flag, and direction flag, none of which are
-    // modeled by the backend.
-    assert(Push->getOperand(2).getReg() == X86::EFLAGS &&
-           "Unexpected register in operand!");
-    Push->getOperand(2).setIsUndef();
-    assert(Push->getOperand(3).getReg() == X86::DF &&
-           "Unexpected register in operand!");
-    Push->getOperand(3).setIsUndef();
-    BuildMI(*BB, MI, DL, TII->get(Pop), MI.getOperand(0).getReg());
-
-    MI.eraseFromParent(); // The pseudo is gone now.
-    return BB;
-  }
-
-  case X86::WRFLAGS32:
-  case X86::WRFLAGS64: {
-    unsigned Push =
-        MI.getOpcode() == X86::WRFLAGS32 ? X86::PUSH32r : X86::PUSH64r;
-    unsigned PopF =
-        MI.getOpcode() == X86::WRFLAGS32 ? X86::POPF32 : X86::POPF64;
-    BuildMI(*BB, MI, DL, TII->get(Push)).addReg(MI.getOperand(0).getReg());
-    BuildMI(*BB, MI, DL, TII->get(PopF));
-
-    MI.eraseFromParent(); // The pseudo is gone now.
-    return BB;
-  }
-
   case X86::FP32_TO_INT16_IN_MEM:
   case X86::FP32_TO_INT32_IN_MEM:
   case X86::FP32_TO_INT64_IN_MEM:

diff  --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index d650994350cb0..00c29f30e96d7 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -5060,6 +5060,45 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
     return true;
   }
 
+  case X86::RDFLAGS32:
+  case X86::RDFLAGS64: {
+    unsigned Is64Bit = MI.getOpcode() == X86::RDFLAGS64;
+    MachineBasicBlock &MBB = *MIB->getParent();
+
+    MachineInstr *NewMI =
+        BuildMI(MBB, MI, MIB->getDebugLoc(),
+                get(Is64Bit ? X86::PUSHF64 : X86::PUSHF32))
+            .getInstr();
+
+    // Permit reads of the EFLAGS and DF registers without them being defined.
+    // This intrinsic exists to read external processor state in flags, such as
+    // the trap flag, interrupt flag, and direction flag, none of which are
+    // modeled by the backend.
+    assert(NewMI->getOperand(2).getReg() == X86::EFLAGS &&
+           "Unexpected register in operand! Should be EFLAGS.");
+    NewMI->getOperand(2).setIsUndef();
+    assert(NewMI->getOperand(3).getReg() == X86::DF &&
+           "Unexpected register in operand! Should be DF.");
+    NewMI->getOperand(3).setIsUndef();
+
+    MIB->setDesc(get(Is64Bit ? X86::POP64r : X86::POP32r));
+    return true;
+  }
+
+  case X86::WRFLAGS32:
+  case X86::WRFLAGS64: {
+    unsigned Is64Bit = MI.getOpcode() == X86::WRFLAGS64;
+    MachineBasicBlock &MBB = *MIB->getParent();
+
+    BuildMI(MBB, MI, MIB->getDebugLoc(),
+            get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
+        .addReg(MI.getOperand(0).getReg());
+    BuildMI(MBB, MI, MIB->getDebugLoc(),
+            get(Is64Bit ? X86::POPF64 : X86::POPF32));
+    MI.eraseFromParent();
+    return true;
+  }
+
   // KNL does not recognize dependency-breaking idioms for mask registers,
   // so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
   // Using %k0 as the undef input register is a performance heuristic based

diff  --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 3012035af11ab..b33e40900c706 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -1381,7 +1381,7 @@ def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src", []>,
 
 }
 
-let mayLoad = 1, mayStore = 1, usesCustomInserter = 1,
+let isPseudo = 1, mayLoad = 1, mayStore = 1,
     SchedRW = [WriteRMW], Defs = [ESP] in {
   let Uses = [ESP] in
   def RDFLAGS32 : PseudoI<(outs GR32:$dst), (ins),
@@ -1394,7 +1394,7 @@ let mayLoad = 1, mayStore = 1, usesCustomInserter = 1,
                 Requires<[In64BitMode]>;
 }
 
-let mayLoad = 1, mayStore = 1, usesCustomInserter = 1,
+let isPseudo = 1, mayLoad = 1, mayStore = 1,
     SchedRW = [WriteRMW] in {
   let Defs = [ESP, EFLAGS, DF], Uses = [ESP] in
   def WRFLAGS32 : PseudoI<(outs), (ins GR32:$src),

diff  --git a/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll b/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll
index 5460f36fca670..47aefdbf0e466 100644
--- a/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll
@@ -52,3 +52,167 @@ entry:
   call void @llvm.x86.flags.write.u64(i64 %arg)
   ret void
 }
+
+define i64 @read_flags_reg_pressure() nounwind {
+; CHECK-LABEL: read_flags_reg_pressure:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    pushq %r13
+; CHECK-NEXT:    pushq %r12
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    subq $16, %rsp
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    movq %rdx, (%rsp) # 8-byte Spill
+; CHECK-NEXT:    pushfq
+; CHECK-NEXT:    popq %rdx
+; CHECK-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movq (%rsp), %rdx # 8-byte Reload
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; CHECK-NEXT:    addq $16, %rsp
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    popq %r12
+; CHECK-NEXT:    popq %r13
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    retq
+;
+; WIN64-LABEL: read_flags_reg_pressure:
+; WIN64:       # %bb.0:
+; WIN64-NEXT:    pushq %rbp
+; WIN64-NEXT:    pushq %r15
+; WIN64-NEXT:    pushq %r14
+; WIN64-NEXT:    pushq %r13
+; WIN64-NEXT:    pushq %r12
+; WIN64-NEXT:    pushq %rsi
+; WIN64-NEXT:    pushq %rdi
+; WIN64-NEXT:    pushq %rbx
+; WIN64-NEXT:    subq $16, %rsp
+; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rbp
+; WIN64-NEXT:    #APP
+; WIN64-NEXT:    #NO_APP
+; WIN64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; WIN64-NEXT:    pushfq
+; WIN64-NEXT:    popq %rdx
+; WIN64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; WIN64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; WIN64-NEXT:    #APP
+; WIN64-NEXT:    #NO_APP
+; WIN64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; WIN64-NEXT:    addq $16, %rsp
+; WIN64-NEXT:    popq %rbx
+; WIN64-NEXT:    popq %rdi
+; WIN64-NEXT:    popq %rsi
+; WIN64-NEXT:    popq %r12
+; WIN64-NEXT:    popq %r13
+; WIN64-NEXT:    popq %r14
+; WIN64-NEXT:    popq %r15
+; WIN64-NEXT:    popq %rbp
+; WIN64-NEXT:    retq
+  %1 = tail call { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } asm sideeffect "", "={ax},={bx},={cx},={dx},={si},={di},={bp},={r8},={r9},={r10},={r11},={r12},={r13},={r14},={r15},~{dirflag},~{fpsr},~{flags}"()
+  %2 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 0
+  %3 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 1
+  %4 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 2
+  %5 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 3
+  %6 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 4
+  %7 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 5
+  %8 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 6
+  %9 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 7
+  %10 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 8
+  %11 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 9
+  %12 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 10
+  %13 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 11
+  %14 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 12
+  %15 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 13
+  %16 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 14
+  %17 = tail call i64 @llvm.x86.flags.read.u64()
+  tail call void asm sideeffect "", "{ax},{bx},{cx},{dx},{si},{di},{bp},{r8},{r9},{r10},{r11},{r12},{r13},{r14},{r15},~{dirflag},~{fpsr},~{flags}"(i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9, i64 %10, i64 %11, i64 %12, i64 %13, i64 %14, i64 %15, i64 %16)
+  ret i64 %17
+}
+
+define void @write_flags_reg_pressure(i64 noundef %0) nounwind {
+; CHECK-LABEL: write_flags_reg_pressure:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    pushq %r13
+; CHECK-NEXT:    pushq %r12
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    subq $16, %rsp
+; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    movq %rdx, (%rsp) # 8-byte Spill
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; CHECK-NEXT:    pushq %rdx
+; CHECK-NEXT:    popfq
+; CHECK-NEXT:    movq (%rsp), %rdx # 8-byte Reload
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    addq $16, %rsp
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    popq %r12
+; CHECK-NEXT:    popq %r13
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    retq
+;
+; WIN64-LABEL: write_flags_reg_pressure:
+; WIN64:       # %bb.0:
+; WIN64-NEXT:    pushq %rbp
+; WIN64-NEXT:    pushq %r15
+; WIN64-NEXT:    pushq %r14
+; WIN64-NEXT:    pushq %r13
+; WIN64-NEXT:    pushq %r12
+; WIN64-NEXT:    pushq %rsi
+; WIN64-NEXT:    pushq %rdi
+; WIN64-NEXT:    pushq %rbx
+; WIN64-NEXT:    subq $16, %rsp
+; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rbp
+; WIN64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; WIN64-NEXT:    #APP
+; WIN64-NEXT:    #NO_APP
+; WIN64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; WIN64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; WIN64-NEXT:    pushq %rdx
+; WIN64-NEXT:    popfq
+; WIN64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; WIN64-NEXT:    #APP
+; WIN64-NEXT:    #NO_APP
+; WIN64-NEXT:    addq $16, %rsp
+; WIN64-NEXT:    popq %rbx
+; WIN64-NEXT:    popq %rdi
+; WIN64-NEXT:    popq %rsi
+; WIN64-NEXT:    popq %r12
+; WIN64-NEXT:    popq %r13
+; WIN64-NEXT:    popq %r14
+; WIN64-NEXT:    popq %r15
+; WIN64-NEXT:    popq %rbp
+; WIN64-NEXT:    retq
+  %2 = tail call { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } asm sideeffect "", "={ax},={bx},={cx},={dx},={si},={di},={bp},={r8},={r9},={r10},={r11},={r12},={r13},={r14},={r15},~{dirflag},~{fpsr},~{flags}"()
+  %3 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 0
+  %4 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 1
+  %5 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 2
+  %6 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 3
+  %7 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 4
+  %8 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 5
+  %9 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 6
+  %10 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 7
+  %11 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 8
+  %12 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 9
+  %13 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 10
+  %14 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 11
+  %15 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 12
+  %16 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 13
+  %17 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 14
+  tail call void @llvm.x86.flags.write.u64(i64 %0)
+  tail call void asm sideeffect "", "{ax},{bx},{cx},{dx},{si},{di},{bp},{r8},{r9},{r10},{r11},{r12},{r13},{r14},{r15},~{dirflag},~{fpsr},~{flags}"(i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9, i64 %10, i64 %11, i64 %12, i64 %13, i64 %14, i64 %15, i64 %16, i64 %17)
+  ret void
+}


        


More information about the llvm-commits mailing list