[PATCH] D30661: [x86] Split MXCSR into two pseudo-registers

Andy Kaylor via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 6 09:41:19 PST 2017


andrew.w.kaylor created this revision.

Split MXCSR into two pseudo-registers so that the control bits and the status bits can be modeled separately.  This register cannot be used as an operand to any instruction so we are free to model it in whatever way is most useful for producing correct code.

This patch only updates the instructions that load and save the entire contents of the register, so both control and status parts are referenced together here.  A subsequent patch will update floating point operations to add an implicit use of the control bits and an implicit def of the status bits.  This will guarantee that FP instructions are not hoisted above or sunk below the instructions that set the control bits or read the status bits without causing FP operations to act as barriers to one another.

I will be posting another patch shortly to update the clang front end to recognize this change in register naming.


Repository:
  rL LLVM

https://reviews.llvm.org/D30661

Files:
  lib/Target/X86/X86InstrFPStack.td
  lib/Target/X86/X86InstrSSE.td
  lib/Target/X86/X86RegisterInfo.td
  test/CodeGen/X86/ipra-reg-usage.ll


Index: lib/Target/X86/X86RegisterInfo.td
===================================================================
--- lib/Target/X86/X86RegisterInfo.td
+++ lib/Target/X86/X86RegisterInfo.td
@@ -255,7 +255,10 @@
 def EFLAGS : X86Reg<"flags", 0>;
 
 // SSE floating point control/status register
-def MXCSR : X86Reg<"mxcsr", 0>;
+// Although MXCSR is actually a single register we model the control bits
+// separately from the status bits in order to avoid unnecessary dependencies.
+def MXCSR_C : X86Reg<"mxcsr_c", 0>;
+def MXCSR_S : X86Reg<"mxcsr_s", 0>;
 
 // Segment registers
 def CS : X86Reg<"cs", 1>;
@@ -496,6 +499,10 @@
   let CopyCost = -1;  // Don't allow copying of status registers.
   let isAllocatable = 0;
 }
+def MXCSCR : RegisterClass<"X86", [i16], 16, (add MXCSR_C, MXCSR_S)> {
+  let CopyCost = -1;  // Don't allow copying of MXCSR.
+  let isAllocatable = 0;
+}
 
 // AVX-512 vector/mask registers.
 def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64],
Index: lib/Target/X86/X86InstrFPStack.td
===================================================================
--- lib/Target/X86/X86InstrFPStack.td
+++ lib/Target/X86/X86InstrFPStack.td
@@ -667,14 +667,14 @@
 def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", [], IIC_FCOMPP>;
 
 let Predicates = [HasFXSR] in {
-  let Uses = [MXCSR] in {
+  let Uses = [MXCSR_C, MXCSR_S] in {
     def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
                  "fxsave\t$dst", [(int_x86_fxsave addr:$dst)], IIC_FXSAVE>, TB;
     def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
                    "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)],
                    IIC_FXSAVE>, TB, Requires<[In64BitMode]>;
   }
-  let Defs = [MXCSR] in {
+  let Defs = [MXCSR_C, MXCSR_S] in {
     def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
                   "fxrstor\t$src", [(int_x86_fxrstor addr:$src)], IIC_FXRSTOR>, 
                   TB;
Index: lib/Target/X86/X86InstrSSE.td
===================================================================
--- lib/Target/X86/X86InstrSSE.td
+++ lib/Target/X86/X86InstrSSE.td
@@ -3737,21 +3737,21 @@
 // SSE 1 & 2 - Load/Store XCSR register
 //===----------------------------------------------------------------------===//
 
-let Defs = [MXCSR] in
+let Defs = [MXCSR_C, MXCSR_S] in
   def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
                  "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
                  IIC_SSE_LDMXCSR>, VEX, Sched<[WriteLoad]>, VEX_WIG;
-let Uses = [MXCSR] in
+let Uses = [MXCSR_C, MXCSR_S] in
   def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
                  "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
                  IIC_SSE_STMXCSR>, VEX, Sched<[WriteStore]>, VEX_WIG;
 
 let Predicates = [UseSSE1] in {
-  let Defs = [MXCSR] in
+  let Defs = [MXCSR_C, MXCSR_S] in
     def LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src),
                   "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
                   IIC_SSE_LDMXCSR>, TB, Sched<[WriteLoad]>;
-  let Uses = [MXCSR] in
+  let Uses = [MXCSR_C, MXCSR_S] in
     def STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst),
                   "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
                   IIC_SSE_STMXCSR>, TB, Sched<[WriteStore]>;
Index: test/CodeGen/X86/ipra-reg-usage.ll
===================================================================
--- test/CodeGen/X86/ipra-reg-usage.ll
+++ test/CodeGen/X86/ipra-reg-usage.ll
@@ -3,7 +3,7 @@
 target triple = "x86_64-unknown-unknown"
 declare void @bar1()
 define preserve_allcc void @foo()#0 {
-; CHECK: foo Clobbered Registers: CS DS EFLAGS EIP EIZ ES FPSW FS GS IP MXCSR RIP RIZ SS BND0 BND1 BND2 BND3 CR0 CR1 CR2 CR3 CR4 CR5 CR6 CR7 CR8 CR9 CR10 CR11 CR12 CR13 CR14 CR15 DR0 DR1 DR2 DR3 DR4 DR5 DR6 DR7 DR8 DR9 DR10 DR11 DR12 DR13 DR14 DR15 FP0 FP1 FP2 FP3 FP4 FP5 FP6 FP7 K0 K1 K2 K3 K4 K5 K6 K7 MM0 MM1 MM2 MM3 MM4 MM5 MM6 MM7 R11 ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 XMM16 XMM17 XMM18 XMM19 XMM20 XMM21 XMM22 XMM23 XMM24 XMM25 XMM26 XMM27 XMM28 XMM29 XMM30 XMM31 YMM0 YMM1 YMM2 YMM3 YMM4 YMM5 YMM6 YMM7 YMM8 YMM9 YMM10 YMM11 YMM12 YMM13 YMM14 YMM15 YMM16 YMM17 YMM18 YMM19 YMM20 YMM21 YMM22 YMM23 YMM24 YMM25 YMM26 YMM27 YMM28 YMM29 YMM30 YMM31 ZMM0 ZMM1 ZMM2 ZMM3 ZMM4 ZMM5 ZMM6 ZMM7 ZMM8 ZMM9 ZMM10 ZMM11 ZMM12 ZMM13 ZMM14 ZMM15 ZMM16 ZMM17 ZMM18 ZMM19 ZMM20 ZMM21 ZMM22 ZMM23 ZMM24 ZMM25 ZMM26 ZMM27 ZMM28 ZMM29 ZMM30 ZMM31 R11B R11D R11W
+; CHECK: foo Clobbered Registers: CS DS EFLAGS EIP EIZ ES FPSW FS GS IP MXCSR_C MXCSR_S RIP RIZ SS BND0 BND1 BND2 BND3 CR0 CR1 CR2 CR3 CR4 CR5 CR6 CR7 CR8 CR9 CR10 CR11 CR12 CR13 CR14 CR15 DR0 DR1 DR2 DR3 DR4 DR5 DR6 DR7 DR8 DR9 DR10 DR11 DR12 DR13 DR14 DR15 FP0 FP1 FP2 FP3 FP4 FP5 FP6 FP7 K0 K1 K2 K3 K4 K5 K6 K7 MM0 MM1 MM2 MM3 MM4 MM5 MM6 MM7 R11 ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 XMM16 XMM17 XMM18 XMM19 XMM20 XMM21 XMM22 XMM23 XMM24 XMM25 XMM26 XMM27 XMM28 XMM29 XMM30 XMM31 YMM0 YMM1 YMM2 YMM3 YMM4 YMM5 YMM6 YMM7 YMM8 YMM9 YMM10 YMM11 YMM12 YMM13 YMM14 YMM15 YMM16 YMM17 YMM18 YMM19 YMM20 YMM21 YMM22 YMM23 YMM24 YMM25 YMM26 YMM27 YMM28 YMM29 YMM30 YMM31 ZMM0 ZMM1 ZMM2 ZMM3 ZMM4 ZMM5 ZMM6 ZMM7 ZMM8 ZMM9 ZMM10 ZMM11 ZMM12 ZMM13 ZMM14 ZMM15 ZMM16 ZMM17 ZMM18 ZMM19 ZMM20 ZMM21 ZMM22 ZMM23 ZMM24 ZMM25 ZMM26 ZMM27 ZMM28 ZMM29 ZMM30 ZMM31 R11B R11D R11W
   call void @bar1()
   call void @bar2()
   ret void


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D30661.90718.patch
Type: text/x-patch
Size: 5419 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170306/f6d3a88f/attachment.bin>


More information about the llvm-commits mailing list