[llvm] [X86InstrInfo] support memfold on spillable inline asm (PR #70832)

Nick Desaulniers via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 20 14:05:38 PST 2023


https://github.com/nickdesaulniers updated https://github.com/llvm/llvm-project/pull/70832

>From f157f4cb57eb3c9c22afb82dfdcbd5ff6255024b Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers at google.com>
Date: Tue, 31 Oct 2023 09:54:08 -0700
Subject: [PATCH 1/3] [X86InstrInfo] support memfold on spillable inline asm

This enables -regalloc=greedy to memfold spillable inline asm
MachineOperands.

Because no instruction selection framework marks MachineOperands as
spillable, no language frontend can observe functional changes from this
patch. That will change once instruction selection frameworks are
updated.
---
 llvm/lib/Target/X86/X86InstrInfo.cpp          |  10 +
 llvm/lib/Target/X86/X86InstrInfo.h            |   2 +
 .../CodeGen/X86/inline-asm-rm-exhaustion.mir  | 232 ++++++++++++++++++
 3 files changed, 244 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/inline-asm-rm-exhaustion.mir

diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 3ca7b427ae2067f4..6e09beb3e4de7b32 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -10287,5 +10287,15 @@ void X86InstrInfo::genAlternativeCodeSequence(
   }
 }
 
+// See also: X86DAGToDAGISel::SelectInlineAsmMemoryOperand().
+void X86InstrInfo::getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops) const {
+  Ops.append({
+    MachineOperand::CreateImm(1),        // Scale
+    MachineOperand::CreateReg(0, false), // Index
+    MachineOperand::CreateImm(0),        // Disp
+    MachineOperand::CreateReg(0, false), // Segment
+  });
+}
+
 #define GET_INSTRINFO_HELPERS
 #include "X86GenInstrInfo.inc"
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index b0a2d2b890743488..b40ffa2c8889ac7c 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -659,6 +659,8 @@ class X86InstrInfo final : public X86GenInstrInfo {
     return false;
   }
 
+  void getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops) const override;
+
 private:
   /// This is a helper for convertToThreeAddress for 8 and 16-bit instructions.
   /// We use 32-bit LEA to form 3-address code by promoting to a 32-bit
diff --git a/llvm/test/CodeGen/X86/inline-asm-rm-exhaustion.mir b/llvm/test/CodeGen/X86/inline-asm-rm-exhaustion.mir
new file mode 100644
index 0000000000000000..97f0b561bba1c49e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/inline-asm-rm-exhaustion.mir
@@ -0,0 +1,232 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -start-after=finalize-isel -regalloc=greedy -stop-after=greedy \
+# RUN:   -verify-machineinstrs -verify-regalloc %s -o - | FileCheck %s
+--- |
+  target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128"
+  target triple = "i386-unknown-linux-gnu"
+
+  define void @input(i32 %0) #0 {
+    call void asm "# $0", "rm,~{ax},~{cx},~{dx},~{si},~{di},~{bx},~{bp}"(i32 %0)
+    ret void
+  }
+
+  define i32 @output() #0 {
+    %1 = alloca i32, align 4
+    call void asm "# $0", "=*rm,~{ax},~{cx},~{dx},~{si},~{di},~{bx},~{bp}"(ptr nonnull elementtype(i32) %1)
+    %2 = load i32, ptr %1, align 4
+    ret i32 %2
+  }
+
+  define i32 @inout(i32 %0) #0 {
+    %2 = alloca i32, align 4
+    store i32 %0, ptr %2, align 4
+    call void asm "# $0 $1", "=*rm,0,~{ax},~{cx},~{dx},~{si},~{di},~{bx},~{bp}"(ptr nonnull elementtype(i32) %2, i32 %0)
+    %3 = load i32, ptr %2, align 4
+    ret i32 %3
+  }
+
+  attributes #0 = { nounwind }
+
+...
+---
+name:            input
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHCatchret:   false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: gr32, preferred-register: '' }
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:
+  - { id: 0, type: default, offset: 0, size: 4, alignment: 16, stack-id: default,
+      isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+stack:           []
+entry_values:    []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0 (%ir-block.1):
+    ; CHECK-LABEL: name: input
+    ; CHECK: INLINEASM &"# $0", 8 /* mayload attdialect */, 262190 /* mem:m */, %fixed-stack.0, 1, $noreg, 0, $noreg, 12 /* clobber */, implicit-def dead early-clobber $ax, 12 /* clobber */, implicit-def dead early-clobber $cx, 12 /* clobber */, implicit-def dead early-clobber $dx, 12 /* clobber */, implicit-def dead early-clobber $si, 12 /* clobber */, implicit-def dead early-clobber $di, 12 /* clobber */, implicit-def dead early-clobber $bx, 12 /* clobber */, implicit-def dead early-clobber $bp :: (load (s32) from %fixed-stack.0, align 16)
+    ; CHECK-NEXT: RET 0
+    %0:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 16)
+    INLINEASM &"# $0", 0 /* attdialect */, 1076101129 /* reguse:GR32 spillable */, %0, 12 /* clobber */, implicit-def early-clobber $ax, 12 /* clobber */, implicit-def early-clobber $cx, 12 /* clobber */, implicit-def early-clobber $dx, 12 /* clobber */, implicit-def early-clobber $si, 12 /* clobber */, implicit-def early-clobber $di, 12 /* clobber */, implicit-def early-clobber $bx, 12 /* clobber */, implicit-def early-clobber $bp
+    RET 0
+
+...
+---
+name:            output
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHCatchret:   false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: gr32, preferred-register: '' }
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:
+  - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+entry_values:    []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0 (%ir-block.0):
+    ; CHECK-LABEL: name: output
+    ; CHECK: INLINEASM &"# $0", 16 /* maystore attdialect */, 262190 /* mem:m */, %stack.1, 1, $noreg, 0, $noreg, 12 /* clobber */, implicit-def dead early-clobber $ax, 12 /* clobber */, implicit-def dead early-clobber $cx, 12 /* clobber */, implicit-def dead early-clobber $dx, 12 /* clobber */, implicit-def dead early-clobber $si, 12 /* clobber */, implicit-def dead early-clobber $di, 12 /* clobber */, implicit-def dead early-clobber $bx, 12 /* clobber */, implicit-def dead early-clobber $bp :: (store (s32) into %stack.1)
+    ; CHECK-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %stack.1)
+    ; CHECK-NEXT: MOV32mr %stack.0, 1, $noreg, 0, $noreg, [[MOV32rm]] :: (store (s32) into %ir.1)
+    ; CHECK-NEXT: $eax = COPY [[MOV32rm]]
+    ; CHECK-NEXT: RET 0, $eax
+    INLINEASM &"# $0", 0 /* attdialect */, 1076101130 /* regdef:GR32 spillable */, def %0, 12 /* clobber */, implicit-def early-clobber $ax, 12 /* clobber */, implicit-def early-clobber $cx, 12 /* clobber */, implicit-def early-clobber $dx, 12 /* clobber */, implicit-def early-clobber $si, 12 /* clobber */, implicit-def early-clobber $di, 12 /* clobber */, implicit-def early-clobber $bx, 12 /* clobber */, implicit-def early-clobber $bp
+    MOV32mr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store (s32) into %ir.1)
+    $eax = COPY %0
+    RET 0, $eax
+
+...
+---
+name:            inout
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHCatchret:   false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: gr32, preferred-register: '' }
+  - { id: 1, class: gr32, preferred-register: '' }
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:
+  - { id: 0, type: default, offset: 0, size: 4, alignment: 16, stack-id: default,
+      isImmutable: false, isAliased: false, callee-saved-register: '',
+      callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+stack:           []
+entry_values:    []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0 (%ir-block.1):
+    ; CHECK-LABEL: name: inout
+    ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 16)
+    ; CHECK-NEXT: MOV32mr %stack.0, 1, $noreg, 0, $noreg, [[MOV32rm]] :: (store (s32) into %stack.0)
+    ; CHECK-NEXT: INLINEASM &"# $0 $1", 24 /* mayload maystore attdialect */, 262190 /* mem:m */, %stack.0, 1, $noreg, 0, $noreg, 262190 /* mem:m */, %stack.0, 1, $noreg, 0, $noreg, 12 /* clobber */, implicit-def dead early-clobber $ax, 12 /* clobber */, implicit-def dead early-clobber $cx, 12 /* clobber */, implicit-def dead early-clobber $dx, 12 /* clobber */, implicit-def dead early-clobber $si, 12 /* clobber */, implicit-def dead early-clobber $di, 12 /* clobber */, implicit-def dead early-clobber $bx, 12 /* clobber */, implicit-def dead early-clobber $bp :: (store (s32) into %stack.0)
+    ; CHECK-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0)
+    ; CHECK-NEXT: MOV32mr %fixed-stack.0, 1, $noreg, 0, $noreg, [[MOV32rm1]] :: (store (s32) into %ir.2, align 16)
+    ; CHECK-NEXT: $eax = COPY [[MOV32rm1]]
+    ; CHECK-NEXT: RET 0, $eax
+    %1:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 16)
+    INLINEASM &"# $0 $1", 0 /* attdialect */, 1076101130 /* regdef:GR32 spillable */, def %0, 2147483657 /* reguse tiedto:$0 */, %1(tied-def 3), 12 /* clobber */, implicit-def early-clobber $ax, 12 /* clobber */, implicit-def early-clobber $cx, 12 /* clobber */, implicit-def early-clobber $dx, 12 /* clobber */, implicit-def early-clobber $si, 12 /* clobber */, implicit-def early-clobber $di, 12 /* clobber */, implicit-def early-clobber $bx, 12 /* clobber */, implicit-def early-clobber $bp
+    MOV32mr %fixed-stack.0, 1, $noreg, 0, $noreg, %0 :: (store (s32) into %ir.2, align 16)
+    $eax = COPY %0
+    RET 0, $eax
+
+...

>From 582160ac25e83b71d97a2f58aed46ce53a4d17c3 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers at google.com>
Date: Fri, 17 Nov 2023 14:11:23 -0800
Subject: [PATCH 2/3] move test

---
 llvm/test/CodeGen/{ => MIR}/X86/inline-asm-rm-exhaustion.mir | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename llvm/test/CodeGen/{ => MIR}/X86/inline-asm-rm-exhaustion.mir (100%)

diff --git a/llvm/test/CodeGen/X86/inline-asm-rm-exhaustion.mir b/llvm/test/CodeGen/MIR/X86/inline-asm-rm-exhaustion.mir
similarity index 100%
rename from llvm/test/CodeGen/X86/inline-asm-rm-exhaustion.mir
rename to llvm/test/CodeGen/MIR/X86/inline-asm-rm-exhaustion.mir

>From 06c300b3a95d679107e97191a535672ada75190c Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers at google.com>
Date: Mon, 20 Nov 2023 14:04:19 -0800
Subject: [PATCH 3/3] - change getFrameIndexOperands to take the frame index -
 change X86InstrInfo::getFrameIndexOperands to use X86AddressMode

---
 llvm/include/llvm/CodeGen/TargetInstrInfo.h |  2 +-
 llvm/lib/CodeGen/TargetInstrInfo.cpp        | 12 +++++-------
 llvm/lib/Target/X86/X86InstrInfo.cpp        | 12 +++++-------
 llvm/lib/Target/X86/X86InstrInfo.h          |  2 +-
 4 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index de065849eaa6ebc3..2b36a723af1d93a8 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2194,7 +2194,7 @@ class TargetInstrInfo : public MCInstrInfo {
   /// INLINEASM ... 262190 /* mem:m */, %stack.0.x.addr, 1, $noreg, 0, $noreg
   /// we would add placeholders for:                     ^  ^       ^  ^
   virtual void
-  getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops) const {
+  getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops, int FI) const {
     llvm_unreachable("unknown number of operands necessary");
   }
 
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 5ede36505b5b00cf..36ce2861ee4bf7b7 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -578,18 +578,16 @@ static void foldInlineAsmMemOperand(MachineInstr *MI, unsigned OpNo, int FI,
     foldInlineAsmMemOperand(MI, TiedTo, FI, TII);
   }
 
-  // Change the operand from a register to a frame index.
-  MO.ChangeToFrameIndex(FI, MO.getTargetFlags());
-
-  SmallVector<MachineOperand, 4> NewOps;
-  TII.getFrameIndexOperands(NewOps);
+  SmallVector<MachineOperand, 5> NewOps;
+  TII.getFrameIndexOperands(NewOps, FI);
   assert(!NewOps.empty() && "getFrameIndexOperands didn't create any operands");
-  MI->insert(MI->operands_begin() + OpNo + 1, NewOps);
+  MI->removeOperand(OpNo);
+  MI->insert(MI->operands_begin() + OpNo, NewOps);
 
   // Change the previous operand to a MemKind InlineAsm::Flag. The second param
   // is the per-target number of operands that represent the memory operand
   // excluding this one (MD). This includes MO.
-  InlineAsm::Flag F(InlineAsm::Kind::Mem, NewOps.size() + 1);
+  InlineAsm::Flag F(InlineAsm::Kind::Mem, NewOps.size());
   F.setMemConstraint(InlineAsm::ConstraintCode::m);
   MachineOperand &MD = MI->getOperand(OpNo - 1);
   MD.setImm(F);
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 6e09beb3e4de7b32..2f8e91b97d8ae552 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -10288,13 +10288,11 @@ void X86InstrInfo::genAlternativeCodeSequence(
 }
 
 // See also: X86DAGToDAGISel::SelectInlineAsmMemoryOperand().
-void X86InstrInfo::getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops) const {
-  Ops.append({
-    MachineOperand::CreateImm(1),        // Scale
-    MachineOperand::CreateReg(0, false), // Index
-    MachineOperand::CreateImm(0),        // Disp
-    MachineOperand::CreateReg(0, false), // Segment
-  });
+void X86InstrInfo::getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops, int FI) const {
+  X86AddressMode M;
+  M.BaseType = X86AddressMode::FrameIndexBase;
+  M.Base.FrameIndex = FI;
+  M.getFullAddress(Ops);
 }
 
 #define GET_INSTRINFO_HELPERS
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index b40ffa2c8889ac7c..cac8036f26e740bb 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -659,7 +659,7 @@ class X86InstrInfo final : public X86GenInstrInfo {
     return false;
   }
 
-  void getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops) const override;
+  void getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops, int FI) const override;
 
 private:
   /// This is a helper for convertToThreeAddress for 8 and 16-bit instructions.



More information about the llvm-commits mailing list