[llvm] [PHIElimination] Ensure correct copy emission for `INLINEASM_BR` indirect targets (PR #173396)

Antonio Frighetto via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 23 11:00:02 PST 2025


https://github.com/antoniofrighetto updated https://github.com/llvm/llvm-project/pull/173396

>From 4fcd78ef5ecaac6518cf7db628c2b8b8fdb2335e Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Tue, 23 Dec 2025 17:35:18 +0100
Subject: [PATCH 1/2] [PHIElimination] Introduce test (NFC)

---
 ...sm-different-indirect-target-end-to-end.ll | 35 +++++++
 .../callbr-asm-different-indirect-target.mir  | 91 +++++++++++++++++++
 2 files changed, 126 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/callbr-asm-different-indirect-target-end-to-end.ll
 create mode 100644 llvm/test/CodeGen/X86/callbr-asm-different-indirect-target.mir

diff --git a/llvm/test/CodeGen/X86/callbr-asm-different-indirect-target-end-to-end.ll b/llvm/test/CodeGen/X86/callbr-asm-different-indirect-target-end-to-end.ll
new file mode 100644
index 0000000000000..22999ff8309f3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/callbr-asm-different-indirect-target-end-to-end.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=x86_64-- -O1 < %s | FileCheck %s
+
+define i64 @inlineasm_br_different_indirect_target(i1 %cmp) {
+; CHECK-LABEL: inlineasm_br_different_indirect_target:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:  .LBB0_1: # Inline asm indirect target
+; CHECK-NEXT:    # %loop
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    # Label of block must be emitted
+; CHECK-NEXT:    # implicit-def: $rax
+; CHECK-NEXT:    testb $1, %dil
+; CHECK-NEXT:    je .LBB0_1
+; CHECK-NEXT:  # %bb.2: # %loop.end
+; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    xorq $9, %rax
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:  # %bb.3: # %exit
+; CHECK-NEXT:    retq
+entry:
+  br label %loop
+
+loop:                                             ; preds = %loop.end, %loop, %entry
+  %val = phi i64 [ 0, %entry ], [ %val.next, %loop.end ], [ poison, %loop ]
+  br i1 %cmp, label %loop.end, label %loop
+
+loop.end:                                         ; preds = %loop
+  %val.next = xor i64 %val, 9
+  callbr void asm sideeffect "", "!i,~{dirflag},~{fpsr},~{flags}"()
+          to label %exit [label %loop]
+
+exit:                                             ; preds = %loop.end
+  ret i64 %val.next
+}
diff --git a/llvm/test/CodeGen/X86/callbr-asm-different-indirect-target.mir b/llvm/test/CodeGen/X86/callbr-asm-different-indirect-target.mir
new file mode 100644
index 0000000000000..c7487b8493917
--- /dev/null
+++ b/llvm/test/CodeGen/X86/callbr-asm-different-indirect-target.mir
@@ -0,0 +1,91 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=x86_64-- -run-pass=livevars,phi-node-elimination %s -o - | FileCheck %s
+# RUN: llc -mtriple=x86_64-- --passes='require<live-vars>,phi-node-elimination' -verify-each -o - %s | FileCheck %s
+
+--- |
+  define i64 @inlineasm_br_different_indirect_target(i1 %cmp) {
+  entry:
+    br label %loop
+
+  loop:                                             ; preds = %loop.end, %loop, %entry
+    %val = phi i64 [ 0, %entry ], [ %val.next, %loop.end ], [ poison, %loop ]
+    br i1 %cmp, label %loop.end, label %loop
+
+  loop.end:                                         ; preds = %loop
+    %val.next = xor i64 %val, 9
+    callbr void asm sideeffect "", "!i,~{dirflag},~{fpsr},~{flags}"()
+            to label %exit [label %loop]
+
+  exit:                                             ; preds = %loop.end
+    ret i64 %val.next
+  }
+...
+---
+name:            inlineasm_br_different_indirect_target
+alignment:       16
+tracksRegLiveness: true
+noPhis:          false
+isSSA:           true
+body:             |
+  ; CHECK-LABEL: name: inlineasm_br_different_indirect_target
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $edi
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gr32 = COPY killed $edi
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gr8 = COPY killed [[COPY]].sub_8bit
+  ; CHECK-NEXT:   [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags
+  ; CHECK-NEXT:   [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, killed [[MOV32r0_]], %subreg.sub_32bit
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gr64 = COPY killed [[SUBREG_TO_REG]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1.loop (inlineasm-br-indirect-target):
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; FIXME: This is a miscompilation, as, for the PHI below, the IMPLICIT_DEF
+  ; happens to be at the beginning of the bb.1 block, redefining previous
+  ; definition of COPY2.
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gr64 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:gr64 = COPY killed [[COPY2]]
+  ; CHECK-NEXT:   TEST8ri [[COPY1]], 1, implicit-def $eflags
+  ; CHECK-NEXT:   JCC_1 %bb.1, 4, implicit killed $eflags
+  ; CHECK-NEXT:   JMP_1 %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.loop.end:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000), %bb.1(0x00000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[XOR64ri32_:%[0-9]+]]:gr64 = XOR64ri32 killed [[COPY3]], 9, implicit-def dead $eflags
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gr64 = COPY [[XOR64ri32_]]
+  ; CHECK-NEXT:   INLINEASM_BR &"", 1 /* sideeffect attdialect */, 13 /* imm */, %bb.1, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags
+  ; CHECK-NEXT:   JMP_1 %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.exit:
+  ; CHECK-NEXT:   $rax = COPY killed [[XOR64ri32_]]
+  ; CHECK-NEXT:   RET 0, killed $rax
+  bb.0.entry:
+    successors: %bb.1(0x80000000)
+    liveins: $edi
+
+    %2:gr32 = COPY killed $edi
+    %3:gr8 = COPY killed %2.sub_8bit
+    %5:gr32 = MOV32r0 implicit-def dead $eflags
+    %4:gr64 = SUBREG_TO_REG 0, killed %5, %subreg.sub_32bit
+
+  bb.1.loop (inlineasm-br-indirect-target):
+    successors: %bb.2(0x40000000), %bb.1(0x40000000)
+
+    %0:gr64 = PHI %4, %bb.0, undef %6:gr64, %bb.1, %1, %bb.2
+    TEST8ri %3, 1, implicit-def $eflags
+    JCC_1 %bb.1, 4, implicit killed $eflags
+    JMP_1 %bb.2
+
+  bb.2.loop.end:
+    successors: %bb.3(0x80000000), %bb.1(0x00000000)
+
+    %1:gr64 = XOR64ri32 killed %0, 9, implicit-def dead $eflags
+    INLINEASM_BR &"", 1 /* sideeffect attdialect */, 13 /* imm */, %bb.1, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags
+    JMP_1 %bb.3
+
+  bb.3.exit:
+    $rax = COPY killed %1
+    RET 0, killed $rax
+...

>From ccb520c5ec61ec0205ab50be4d48a4c1186a6241 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Tue, 23 Dec 2025 19:53:38 +0100
Subject: [PATCH 2/2] [PHIElimination] Ensure correct copy emission for
 INLINEASM_BR indirect targets

While lowering phi-nodes to copies, subsequently to f7a53d82c0902147909f28a9295a9d00b4b27d38,
ensure that such copies are placed before any INLINEASM_BR
instructions, only if this actually happens to be the case.
If a block with an INLINEASM_BR instruction and any of its
indirect jump target are distinct blocks, the new copy on
the indirect target continues to be normally emitted prior
to the terminator.

Fixes: https://github.com/llvm/llvm-project/issues/172824.
---
 llvm/lib/CodeGen/PHIEliminationUtils.cpp       | 18 +++++++++++++++++-
 ...asm-different-indirect-target-end-to-end.ll |  2 +-
 .../callbr-asm-different-indirect-target.mir   |  5 +----
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/llvm/lib/CodeGen/PHIEliminationUtils.cpp
index f4562f437788e..8abfec9364546 100644
--- a/llvm/lib/CodeGen/PHIEliminationUtils.cpp
+++ b/llvm/lib/CodeGen/PHIEliminationUtils.cpp
@@ -13,6 +13,19 @@
 
 using namespace llvm;
 
+// Returns true if MBB contains an INLINEASM_BR instruction that may
+// branch to SuccMBB, requiring specialized copy placement.
+static bool hasInlineAsmBrToSuccessor(MachineBasicBlock *MBB,
+                                      MachineBasicBlock *SuccMBB) {
+  if (!SuccMBB->isInlineAsmBrIndirectTarget())
+    return false;
+
+  for (const MachineInstr &MI : *MBB)
+    if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
+      return true;
+  return false;
+}
+
 // findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
 // when following the CFG edge to SuccMBB. This needs to be after any def of
 // SrcReg, but before any subsequent point where control flow might jump out of
@@ -31,8 +44,11 @@ llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
   // computeLastInsertPoint, and similarly assumes that there cannot be multiple
   // instructions that are Calls with EHPad successors or INLINEASM_BR in a
   // block.
+  // Note that, if the successor basic block happens to be an indirect target,
+  // and the current block, which may be the successor itself, does not contain
+  // any INLINEASM_BR, we may not need any specialized handling.
   bool EHPadSuccessor = SuccMBB->isEHPad();
-  if (!EHPadSuccessor && !SuccMBB->isInlineAsmBrIndirectTarget())
+  if (!EHPadSuccessor && !hasInlineAsmBrToSuccessor(MBB, SuccMBB))
     return MBB->getFirstTerminator();
 
   // Discover any defs in this basic block.
diff --git a/llvm/test/CodeGen/X86/callbr-asm-different-indirect-target-end-to-end.ll b/llvm/test/CodeGen/X86/callbr-asm-different-indirect-target-end-to-end.ll
index 22999ff8309f3..b9ef6ecf8800b 100644
--- a/llvm/test/CodeGen/X86/callbr-asm-different-indirect-target-end-to-end.ll
+++ b/llvm/test/CodeGen/X86/callbr-asm-different-indirect-target-end-to-end.ll
@@ -4,11 +4,11 @@
 define i64 @inlineasm_br_different_indirect_target(i1 %cmp) {
 ; CHECK-LABEL: inlineasm_br_different_indirect_target:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:  .LBB0_1: # Inline asm indirect target
 ; CHECK-NEXT:    # %loop
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    # Label of block must be emitted
-; CHECK-NEXT:    # implicit-def: $rax
 ; CHECK-NEXT:    testb $1, %dil
 ; CHECK-NEXT:    je .LBB0_1
 ; CHECK-NEXT:  # %bb.2: # %loop.end
diff --git a/llvm/test/CodeGen/X86/callbr-asm-different-indirect-target.mir b/llvm/test/CodeGen/X86/callbr-asm-different-indirect-target.mir
index c7487b8493917..eec92f059082f 100644
--- a/llvm/test/CodeGen/X86/callbr-asm-different-indirect-target.mir
+++ b/llvm/test/CodeGen/X86/callbr-asm-different-indirect-target.mir
@@ -41,12 +41,9 @@ body:             |
   ; CHECK-NEXT: bb.1.loop (inlineasm-br-indirect-target):
   ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
   ; CHECK-NEXT: {{  $}}
-  ; FIXME: This is a miscompilation, as, for the PHI below, the IMPLICIT_DEF
-  ; happens to be at the beginning of the bb.1 block, redefining previous
-  ; definition of COPY2.
-  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gr64 = IMPLICIT_DEF
   ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:gr64 = COPY killed [[COPY2]]
   ; CHECK-NEXT:   TEST8ri [[COPY1]], 1, implicit-def $eflags
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gr64 = IMPLICIT_DEF
   ; CHECK-NEXT:   JCC_1 %bb.1, 4, implicit killed $eflags
   ; CHECK-NEXT:   JMP_1 %bb.2
   ; CHECK-NEXT: {{  $}}



More information about the llvm-commits mailing list