[llvm] [X86InstrInfo] support memfold on spillable inline asm (PR #70832)

Nick Desaulniers via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 3 13:40:37 PDT 2023


https://github.com/nickdesaulniers updated https://github.com/llvm/llvm-project/pull/70832

>From 4d736be10969728290512f0c74fd88d640fa6f0b Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers at google.com>
Date: Mon, 30 Oct 2023 15:37:24 -0700
Subject: [PATCH 1/2] [TargetInstrInfo] enable foldMemoryOperand for InlineAsm

foldMemoryOperand looks at pairs of instructions (generally a load to
virt reg then use of the virtreg, or def of a virtreg then a store) and
attempts to combine them.  This can reduce register pressure.

A prior commit added the ability to mark such a MachineOperand as
spillable. In terms of INLINEASM, this means that "rm" was used (rather
than just "r") to denote that the INLINEASM may use a memory operand
rather than a register operand. This effectively undoes decisions made
by the instruction selection framework.  Callers will be added in the
register allocation frameworks. This has been tested with all of the
above (which will come as follow up patches).

Thanks to @topperc who suggested this at last years LLVM US Dev Meeting
and @qcolombet who confirmed this was the right approach.
---
 llvm/include/llvm/CodeGen/TargetInstrInfo.h | 10 ++++
 llvm/lib/CodeGen/TargetInstrInfo.cpp        | 62 +++++++++++++++++++++
 2 files changed, 72 insertions(+)

diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 8e7499ac626a747..45728c97808e9d3 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2186,6 +2186,16 @@ class TargetInstrInfo : public MCInstrInfo {
   // Get the call frame size just before MI.
   unsigned getCallFrameSizeAt(MachineInstr &MI) const;
 
+  /// Fills in the necessary MachineOperands to refer to a frame index.
+  /// The best way to understand this is to print `asm(""::"m"(x));` after
+  /// finalize-isel. Example:
+  /// INLINEASM ... 262190 /* mem:m */, %stack.0.x.addr, 1, $noreg, 0, $noreg
+  /// we would add placeholders for:                     ^  ^       ^  ^
+  virtual void
+  getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops) const {
+    llvm_unreachable("unknown number of operands necessary");
+  }
+
 private:
   mutable std::unique_ptr<MIRFormatter> Formatter;
   unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 3013a768bc4d566..5ede36505b5b00c 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -565,6 +565,64 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
   return NewMI;
 }
 
+static void foldInlineAsmMemOperand(MachineInstr *MI, unsigned OpNo, int FI,
+                                    const TargetInstrInfo &TII) {
+  MachineOperand &MO = MI->getOperand(OpNo);
+  const VirtRegInfo &RI = AnalyzeVirtRegInBundle(*MI, MO.getReg());
+
+  // If the machine operand is tied, untie it first.
+  if (MO.isTied()) {
+    unsigned TiedTo = MI->findTiedOperandIdx(OpNo);
+    MI->untieRegOperand(OpNo);
+    // Intentional recursion!
+    foldInlineAsmMemOperand(MI, TiedTo, FI, TII);
+  }
+
+  // Change the operand from a register to a frame index.
+  MO.ChangeToFrameIndex(FI, MO.getTargetFlags());
+
+  SmallVector<MachineOperand, 4> NewOps;
+  TII.getFrameIndexOperands(NewOps);
+  assert(!NewOps.empty() && "getFrameIndexOperands didn't create any operands");
+  MI->insert(MI->operands_begin() + OpNo + 1, NewOps);
+
+  // Change the previous operand to a MemKind InlineAsm::Flag. The second param
+  // is the per-target number of operands that represent the memory operand
+  // excluding this one (MD). This includes MO.
+  InlineAsm::Flag F(InlineAsm::Kind::Mem, NewOps.size() + 1);
+  F.setMemConstraint(InlineAsm::ConstraintCode::m);
+  MachineOperand &MD = MI->getOperand(OpNo - 1);
+  MD.setImm(F);
+
+  // Update mayload/maystore metadata.
+  MachineOperand &ExtraMO = MI->getOperand(InlineAsm::MIOp_ExtraInfo);
+  if (RI.Reads)
+    ExtraMO.setImm(ExtraMO.getImm() | InlineAsm::Extra_MayLoad);
+  if (RI.Writes)
+    ExtraMO.setImm(ExtraMO.getImm() | InlineAsm::Extra_MayStore);
+}
+
+// Returns nullptr if not possible to fold.
+static MachineInstr *foldInlineAsmMemOperand(MachineInstr &MI,
+                                             ArrayRef<unsigned> Ops, int FI,
+                                             const TargetInstrInfo &TII) {
+  assert(MI.isInlineAsm() && "wrong opcode");
+  if (Ops.size() > 1)
+    return nullptr;
+  unsigned Op = Ops[0];
+  assert(Op && "should never be first operand");
+  assert(MI.getOperand(Op).isReg() && "shouldn't be folding non-reg operands");
+
+  if (!MI.mayFoldInlineAsmRegOp(Op))
+    return nullptr;
+
+  MachineInstr &NewMI = TII.duplicate(*MI.getParent(), MI.getIterator(), MI);
+
+  foldInlineAsmMemOperand(&NewMI, Op, FI, TII);
+
+  return &NewMI;
+}
+
 MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
                                                  ArrayRef<unsigned> Ops, int FI,
                                                  LiveIntervals *LIS,
@@ -612,6 +670,8 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
     NewMI = foldPatchpoint(MF, MI, Ops, FI, *this);
     if (NewMI)
       MBB->insert(MI, NewMI);
+  } else if (MI.isInlineAsm()) {
+    NewMI = foldInlineAsmMemOperand(MI, Ops, FI, *this);
   } else {
     // Ask the target to do the actual folding.
     NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS, VRM);
@@ -683,6 +743,8 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
     NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this);
     if (NewMI)
       NewMI = &*MBB.insert(MI, NewMI);
+  } else if (MI.isInlineAsm() && isLoadFromStackSlot(LoadMI, FrameIndex)) {
+    NewMI = foldInlineAsmMemOperand(MI, Ops, FrameIndex, *this);
   } else {
     // Ask the target to do the actual folding.
     NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI, LIS);

>From 5617e18b8014d755daabca33385322f640755fe9 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers at google.com>
Date: Tue, 31 Oct 2023 09:54:08 -0700
Subject: [PATCH 2/2] [X86InstrInfo] support memfold on spillable inline asm

This enables -regalloc=greedy to memfold spillable inline asm
MachineOperands.

Because no instruction selection framework marks MachineOperands as
spillable, no language frontend can observe functional changes from this
patch. That will change once instruction selection frameworks are
updated.
---
 llvm/lib/Target/X86/X86InstrInfo.cpp          |  10 +
 llvm/lib/Target/X86/X86InstrInfo.h            |   2 +
 .../CodeGen/X86/inline-asm-rm-exhaustion.mir  | 232 ++++++++++++++++++
 3 files changed, 244 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/inline-asm-rm-exhaustion.mir

diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 4c6854da0ada3d2..2e17cd8ac88ff4a 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -10333,5 +10333,15 @@ void X86InstrInfo::genAlternativeCodeSequence(
   }
 }
 
+// See also: X86DAGToDAGISel::SelectInlineAsmMemoryOperand().
+void X86InstrInfo::getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops) const {
+  Ops.append({
+    MachineOperand::CreateImm(1),        // Scale
+    MachineOperand::CreateReg(0, false), // Index
+    MachineOperand::CreateImm(0),        // Disp
+    MachineOperand::CreateReg(0, false), // Segment
+  });
+}
+
 #define GET_INSTRINFO_HELPERS
 #include "X86GenInstrInfo.inc"
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index e1199e20c318e24..b08bc66d5b72c19 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -648,6 +648,8 @@ class X86InstrInfo final : public X86GenInstrInfo {
     return false;
   }
 
+  void getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops) const override;
+
 private:
   /// This is a helper for convertToThreeAddress for 8 and 16-bit instructions.
   /// We use 32-bit LEA to form 3-address code by promoting to a 32-bit
diff --git a/llvm/test/CodeGen/X86/inline-asm-rm-exhaustion.mir b/llvm/test/CodeGen/X86/inline-asm-rm-exhaustion.mir
new file mode 100644
index 000000000000000..97f0b561bba1c49
--- /dev/null
+++ b/llvm/test/CodeGen/X86/inline-asm-rm-exhaustion.mir
@@ -0,0 +1,232 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -start-after=finalize-isel -regalloc=greedy -stop-after=greedy \
+# RUN:   -verify-machineinstrs -verify-regalloc %s -o - | FileCheck %s
+--- |
+  target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128"
+  target triple = "i386-unknown-linux-gnu"
+
+  define void @input(i32 %0) #0 {
+    call void asm "# $0", "rm,~{ax},~{cx},~{dx},~{si},~{di},~{bx},~{bp}"(i32 %0)
+    ret void
+  }
+
+  define i32 @output() #0 {
+    %1 = alloca i32, align 4
+    call void asm "# $0", "=*rm,~{ax},~{cx},~{dx},~{si},~{di},~{bx},~{bp}"(ptr nonnull elementtype(i32) %1)
+    %2 = load i32, ptr %1, align 4
+    ret i32 %2
+  }
+
+  define i32 @inout(i32 %0) #0 {
+    %2 = alloca i32, align 4
+    store i32 %0, ptr %2, align 4
+    call void asm "# $0 $1", "=*rm,0,~{ax},~{cx},~{dx},~{si},~{di},~{bx},~{bp}"(ptr nonnull elementtype(i32) %2, i32 %0)
+    %3 = load i32, ptr %2, align 4
+    ret i32 %3
+  }
+
+  attributes #0 = { nounwind }
+
+...
+---
+name:            input
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHCatchret:   false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: gr32, preferred-register: '' }
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:
+  - { id: 0, type: default, offset: 0, size: 4, alignment: 16, stack-id: default,
+      isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+stack:           []
+entry_values:    []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0 (%ir-block.1):
+    ; CHECK-LABEL: name: input
+    ; CHECK: INLINEASM &"# $0", 8 /* mayload attdialect */, 262190 /* mem:m */, %fixed-stack.0, 1, $noreg, 0, $noreg, 12 /* clobber */, implicit-def dead early-clobber $ax, 12 /* clobber */, implicit-def dead early-clobber $cx, 12 /* clobber */, implicit-def dead early-clobber $dx, 12 /* clobber */, implicit-def dead early-clobber $si, 12 /* clobber */, implicit-def dead early-clobber $di, 12 /* clobber */, implicit-def dead early-clobber $bx, 12 /* clobber */, implicit-def dead early-clobber $bp :: (load (s32) from %fixed-stack.0, align 16)
+    ; CHECK-NEXT: RET 0
+    %0:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 16)
+    INLINEASM &"# $0", 0 /* attdialect */, 1076101129 /* reguse:GR32 spillable */, %0, 12 /* clobber */, implicit-def early-clobber $ax, 12 /* clobber */, implicit-def early-clobber $cx, 12 /* clobber */, implicit-def early-clobber $dx, 12 /* clobber */, implicit-def early-clobber $si, 12 /* clobber */, implicit-def early-clobber $di, 12 /* clobber */, implicit-def early-clobber $bx, 12 /* clobber */, implicit-def early-clobber $bp
+    RET 0
+
+...
+---
+name:            output
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHCatchret:   false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: gr32, preferred-register: '' }
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:
+  - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+entry_values:    []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0 (%ir-block.0):
+    ; CHECK-LABEL: name: output
+    ; CHECK: INLINEASM &"# $0", 16 /* maystore attdialect */, 262190 /* mem:m */, %stack.1, 1, $noreg, 0, $noreg, 12 /* clobber */, implicit-def dead early-clobber $ax, 12 /* clobber */, implicit-def dead early-clobber $cx, 12 /* clobber */, implicit-def dead early-clobber $dx, 12 /* clobber */, implicit-def dead early-clobber $si, 12 /* clobber */, implicit-def dead early-clobber $di, 12 /* clobber */, implicit-def dead early-clobber $bx, 12 /* clobber */, implicit-def dead early-clobber $bp :: (store (s32) into %stack.1)
+    ; CHECK-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %stack.1)
+    ; CHECK-NEXT: MOV32mr %stack.0, 1, $noreg, 0, $noreg, [[MOV32rm]] :: (store (s32) into %ir.1)
+    ; CHECK-NEXT: $eax = COPY [[MOV32rm]]
+    ; CHECK-NEXT: RET 0, $eax
+    INLINEASM &"# $0", 0 /* attdialect */, 1076101130 /* regdef:GR32 spillable */, def %0, 12 /* clobber */, implicit-def early-clobber $ax, 12 /* clobber */, implicit-def early-clobber $cx, 12 /* clobber */, implicit-def early-clobber $dx, 12 /* clobber */, implicit-def early-clobber $si, 12 /* clobber */, implicit-def early-clobber $di, 12 /* clobber */, implicit-def early-clobber $bx, 12 /* clobber */, implicit-def early-clobber $bp
+    MOV32mr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store (s32) into %ir.1)
+    $eax = COPY %0
+    RET 0, $eax
+
+...
+---
+name:            inout
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHCatchret:   false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: gr32, preferred-register: '' }
+  - { id: 1, class: gr32, preferred-register: '' }
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:
+  - { id: 0, type: default, offset: 0, size: 4, alignment: 16, stack-id: default,
+      isImmutable: false, isAliased: false, callee-saved-register: '',
+      callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+stack:           []
+entry_values:    []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0 (%ir-block.1):
+    ; CHECK-LABEL: name: inout
+    ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 16)
+    ; CHECK-NEXT: MOV32mr %stack.0, 1, $noreg, 0, $noreg, [[MOV32rm]] :: (store (s32) into %stack.0)
+    ; CHECK-NEXT: INLINEASM &"# $0 $1", 24 /* mayload maystore attdialect */, 262190 /* mem:m */, %stack.0, 1, $noreg, 0, $noreg, 262190 /* mem:m */, %stack.0, 1, $noreg, 0, $noreg, 12 /* clobber */, implicit-def dead early-clobber $ax, 12 /* clobber */, implicit-def dead early-clobber $cx, 12 /* clobber */, implicit-def dead early-clobber $dx, 12 /* clobber */, implicit-def dead early-clobber $si, 12 /* clobber */, implicit-def dead early-clobber $di, 12 /* clobber */, implicit-def dead early-clobber $bx, 12 /* clobber */, implicit-def dead early-clobber $bp :: (store (s32) into %stack.0)
+    ; CHECK-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0)
+    ; CHECK-NEXT: MOV32mr %fixed-stack.0, 1, $noreg, 0, $noreg, [[MOV32rm1]] :: (store (s32) into %ir.2, align 16)
+    ; CHECK-NEXT: $eax = COPY [[MOV32rm1]]
+    ; CHECK-NEXT: RET 0, $eax
+    %1:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 16)
+    INLINEASM &"# $0 $1", 0 /* attdialect */, 1076101130 /* regdef:GR32 spillable */, def %0, 2147483657 /* reguse tiedto:$0 */, %1(tied-def 3), 12 /* clobber */, implicit-def early-clobber $ax, 12 /* clobber */, implicit-def early-clobber $cx, 12 /* clobber */, implicit-def early-clobber $dx, 12 /* clobber */, implicit-def early-clobber $si, 12 /* clobber */, implicit-def early-clobber $di, 12 /* clobber */, implicit-def early-clobber $bx, 12 /* clobber */, implicit-def early-clobber $bp
+    MOV32mr %fixed-stack.0, 1, $noreg, 0, $noreg, %0 :: (store (s32) into %ir.2, align 16)
+    $eax = COPY %0
+    RET 0, $eax
+
+...



More information about the llvm-commits mailing list