[llvm] [MachineOutliner] Preserve regmasks in calls to outlined functions (PR #120940)

via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 23 00:23:01 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: Zhaoxuan Jiang (nocchijiang)

<details>
<summary>Changes</summary>

When emitting calls to an outlined function, the register masks from the outlined sequence are lost. The AArch64CollectLOH pass, which I plan to move to PreEmitPass2 (positioned after MachineOutliner), relies on accurate register masks. This patch ensures that regmasks are correctly preserved in the outlined calls, maintaining the required accuracy for subsequent passes.

---
Full diff: https://github.com/llvm/llvm-project/pull/120940.diff


2 Files Affected:

- (modified) llvm/lib/CodeGen/MachineOutliner.cpp (+25) 
- (added) llvm/test/CodeGen/AArch64/machine-outliner-regmask.mir (+94) 


``````````diff
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index 4c5489434c69bb..c01d9b04a89d93 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -1117,6 +1117,7 @@ bool MachineOutliner::outline(
         // instruction. It also updates call site information for moved
         // code.
         SmallSet<Register, 2> UseRegs, DefRegs;
+        SmallPtrSet<const uint32_t *, 2> RegMasks;
         // Copy over the defs in the outlined range.
         // First inst in outlined range <-- Anything that's defined in this
         // ...                           .. range has to be added as an
@@ -1130,6 +1131,12 @@ bool MachineOutliner::outline(
           MachineInstr *MI = &*Iter;
           SmallSet<Register, 2> InstrUseRegs;
           for (MachineOperand &MOP : MI->operands()) {
+            // Collect all regmasks. Merge them in the end.
+            if (MOP.isRegMask()) {
+              RegMasks.insert(MOP.getRegMask());
+              continue;
+            }
+
             // Skip over anything that isn't a register.
             if (!MOP.isReg())
               continue;
@@ -1153,6 +1160,24 @@ bool MachineOutliner::outline(
             MI->getMF()->eraseCallSiteInfo(MI);
         }
 
+        if (!RegMasks.empty()) {
+          if (RegMasks.size() == 1) {
+            CallInst->addOperand(
+                MachineOperand::CreateRegMask(*RegMasks.begin()));
+          } else {
+            uint32_t *RegMask = MF->allocateRegMask();
+            unsigned NumRegs =
+                MF->getSubtarget().getRegisterInfo()->getNumRegs();
+            unsigned Size = MachineOperand::getRegMaskSize(NumRegs);
+            memset(RegMask, UINT32_MAX, Size * sizeof(RegMask[0]));
+            for (const uint32_t *Mask : RegMasks) {
+              for (unsigned I = 0; I < NumRegs; ++I)
+                RegMask[I] &= Mask[I];
+            }
+            CallInst->addOperand(MachineOperand::CreateRegMask(RegMask));
+          }
+        }
+
         for (const Register &I : DefRegs)
           // If it's a def, add it to the call instruction.
           CallInst->addOperand(
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-regmask.mir b/llvm/test/CodeGen/AArch64/machine-outliner-regmask.mir
new file mode 100644
index 00000000000000..047a73f81dd2ae
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-regmask.mir
@@ -0,0 +1,94 @@
+# RUN: llc -mtriple=aarch64-apple-ios -run-pass=prologepilog -run-pass=machine-outliner %s -o - | FileCheck %s
+--- |
+  declare swiftcc void @bar()
+  declare void @baz(i32, i32, i32) #0
+
+  define void @test_same_regmask() #0 {
+    ret void
+  }
+  define void @test_different_regmasks() #0 {
+    ret void
+  }
+  define void @foo(i32, i32, i32, i32, i32, i32, i32, i32) #0 {
+    ret void
+  }
+
+...
+---
+name:            foo
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    RET undef $lr
+
+
+...
+---
+name:            test_same_regmask
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: test_same_regmask
+  ; CHECK:       bb.1:
+  ; CHECK-NEXT:    BL @OUTLINED_FUNCTION_1, csr_aarch64_aapcs
+
+  bb.0:
+    $sp = frame-setup SUBXri $sp, 16, 0
+
+  bb.1:
+    $w0 = MOVZWi 1, 0
+    $w1 = MOVZWi 2, 0
+    $w2 = MOVZWi 3, 0
+    BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit-def $sp
+    $w0 = MOVZWi 1, 0
+    $w1 = MOVZWi 2, 0
+    $w2 = MOVZWi 3, 0
+    BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit-def $sp
+    $sp = ADDXri $sp, 16, 0
+    RET undef $lr
+
+
+...
+---
+name:            test_different_regmasks
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: test_different_regmasks
+  ; CHECK:       bb.1:
+  ; CHECK-NEXT:    BL @OUTLINED_FUNCTION_0, CustomRegMask($fp,$lr,$wzr,$wzr_hi,$xzr,$b8,$b9,$b10,$b11,$b12,$b13,$b14,$b15,$d8,$d9,$d10,$d11,$d12,$d13,$d14,$d15,$h8,$h9,$h10,$h11,$h12,$h13,$h14,$h15,$s8,$s9,$s10,$s11,$s12,$s13,$s14,$s15,$w19,$w20,$w22,$w23,$w24,$w25,$w26,$w27,$w28,$w29,$w30,$x19,$x20,$x22,$x23,$x24,$x25,$x26,$x27,$x28,$b8_hi,$b9_hi,$b10_hi,$b11_hi,$b12_hi,$b13_hi,$b14_hi,$b15_hi,$h8_hi,$h9_hi,$h10_hi,$h11_hi,$h12_hi,$h13_hi,$h14_hi,$h15_hi,$s8_hi,$s9_hi,$s10_hi,$s11_hi,$s12_hi,$s13_hi,$s14_hi,$s15_hi,$w19_hi,$w20_hi,$w22_hi,$w23_hi,$w24_hi,$w25_hi,$w26_hi,$w27_hi,$w28_hi,$w29_hi,$w30_hi,$d8_d9,$d9_d10,$d10_d11,$d11_d12,$d12_d13,$d13_d14,$d14_d15,$d8_d9_d10_d11,$d9_d10_d11_d12,$d10_d11_d12_d13,$d11_d12_d13_d14,$d12_d13_d14_d15,$d8_d9_d10,$d9_d10_d11,$d10_d11_d12,$d11_d12_d13,$d12_d13_d14,$d13_d14_d15,$x22_x23_x24_x25_x26_x27_x28_fp,$w22_w23,$w24_w25,$w26_w27,$w28_w29,$x28_fp,$x22_x23,$x24_x25,$x26_x27)
+
+  bb.0:
+    $sp = frame-setup SUBXri $sp, 16, 0
+
+  bb.1:
+    $w0 = MOVZWi 1, 0
+    $w1 = MOVZWi 2, 0
+    $w2 = MOVZWi 3, 0
+    $w3 = MOVZWi 4, 0
+    $w4 = MOVZWi 5, 0
+    $w5 = MOVZWi 6, 0
+    $w6 = MOVZWi 7, 0
+    $w7 = MOVZWi 8, 0
+    BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit killed $w3, implicit killed $w4, implicit killed $w5, implicit killed $w6, implicit killed $w7, implicit-def $sp
+    BL @bar, csr_aarch64_aapcs_swifterror, implicit-def dead $lr, implicit $sp, implicit-def $sp
+    $w0 = MOVZWi 1, 0
+    $w1 = MOVZWi 2, 0
+    $w2 = MOVZWi 3, 0
+    $w3 = MOVZWi 4, 0
+    $w4 = MOVZWi 5, 0
+    $w5 = MOVZWi 6, 0
+    $w6 = MOVZWi 7, 0
+    $w7 = MOVZWi 8, 0
+    BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit killed $w3, implicit killed $w4, implicit killed $w5, implicit killed $w6, implicit killed $w7, implicit-def $sp
+    BL @bar, csr_aarch64_aapcs_swifterror, implicit-def dead $lr, implicit $sp, implicit-def $sp
+    $w0 = MOVZWi 1, 0
+    $w1 = MOVZWi 2, 0
+    $w2 = MOVZWi 3, 0
+    $w3 = MOVZWi 4, 0
+    $w4 = MOVZWi 5, 0
+    $w5 = MOVZWi 6, 0
+    $w6 = MOVZWi 7, 0
+    $w7 = MOVZWi 8, 0
+    BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit killed $w3, implicit killed $w4, implicit killed $w5, implicit killed $w6, implicit killed $w7, implicit-def $sp
+    BL @bar, csr_aarch64_aapcs_swifterror, implicit-def dead $lr, implicit $sp, implicit-def $sp
+    $sp = ADDXri $sp, 16, 0
+    RET undef $lr

``````````

</details>


https://github.com/llvm/llvm-project/pull/120940


More information about the llvm-commits mailing list