[llvm] [DAG] Add strictfp implicit def reg after metadata. (PR #168282)

via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 16 10:45:46 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: David Green (davemgreen)

<details>
<summary>Changes</summary>

This prevents a machine verifier error, where it "Expected implicit register after groups".

Fixes #<!-- -->158661

---
Full diff: https://github.com/llvm/llvm-project/pull/168282.diff


6 Files Affected:

- (modified) llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp (+7-7) 
- (modified) llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp (+7-7) 
- (added) llvm/test/CodeGen/AArch64/strictfp-inlineasm.ll (+17) 
- (modified) llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll (+5-3) 
- (added) llvm/test/CodeGen/ARM/strictfp-inlineasm.ll (+17) 
- (added) llvm/test/CodeGen/X86/strictfp-inlineasm.ll (+27) 


``````````diff
diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index a8661ce629a4f..9837c0ca12990 100644
--- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -565,13 +565,6 @@ bool InlineAsmLowering::lowerInlineAsm(
     }
   }
 
-  // Add rounding control registers as implicit def for inline asm.
-  if (MF.getFunction().hasFnAttribute(Attribute::StrictFP)) {
-    ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
-    for (MCPhysReg Reg : RCRegs)
-      Inst.addReg(Reg, RegState::ImplicitDefine);
-  }
-
   if (auto Bundle = Call.getOperandBundle(LLVMContext::OB_convergencectrl)) {
     auto *Token = Bundle->Inputs[0].get();
     ArrayRef<Register> SourceRegs = GetOrCreateVRegs(*Token);
@@ -583,6 +576,13 @@ bool InlineAsmLowering::lowerInlineAsm(
   if (const MDNode *SrcLoc = Call.getMetadata("srcloc"))
     Inst.addMetadata(SrcLoc);
 
+  // Add rounding control registers as implicit def for inline asm.
+  if (MF.getFunction().hasFnAttribute(Attribute::StrictFP)) {
+    ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
+    for (MCPhysReg Reg : RCRegs)
+      Inst.addReg(Reg, RegState::ImplicitDefine);
+  }
+
   // All inputs are handled, insert the instruction now
   MIRBuilder.insertInstr(Inst);
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 72d0c44889048..52e8449fe510c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -1416,13 +1416,6 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
       }
     }
 
-    // Add rounding control registers as implicit def for inline asm.
-    if (MF->getFunction().hasFnAttribute(Attribute::StrictFP)) {
-      ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
-      for (MCPhysReg Reg : RCRegs)
-        MIB.addReg(Reg, RegState::ImplicitDefine);
-    }
-
     // GCC inline assembly allows input operands to also be early-clobber
     // output operands (so long as the operand is written only after it's
     // used), but this does not match the semantics of our early-clobber flag.
@@ -1443,6 +1436,13 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
     if (MD)
       MIB.addMetadata(MD);
 
+    // Add rounding control registers as implicit def for inline asm.
+    if (MF->getFunction().hasFnAttribute(Attribute::StrictFP)) {
+      ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
+      for (MCPhysReg Reg : RCRegs)
+        MIB.addReg(Reg, RegState::ImplicitDefine);
+    }
+
     MBB->insert(InsertPos, MIB);
     break;
   }
diff --git a/llvm/test/CodeGen/AArch64/strictfp-inlineasm.ll b/llvm/test/CodeGen/AArch64/strictfp-inlineasm.ll
new file mode 100644
index 0000000000000..0bbf31c5c0d73
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/strictfp-inlineasm.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs < %s | FileCheck %s
+
+define i32 @foo() strictfp {
+; CHECK-LABEL: foo:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w0, #-1 // =0xffffffff
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    ret
+entry:
+  tail call void asm sideeffect "", "r"(i32 1) #1, !srcloc !0
+  ret i32 -1
+}
+
+!0 = !{i64 87}
diff --git a/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll b/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll
index 4b5a49fc0c2e9..562296fce4957 100644
--- a/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll
@@ -67,7 +67,7 @@ define float @asm_changes_mode(float %x, float %y) #0 {
   ; SDAG-NEXT: {{  $}}
   ; SDAG-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
   ; SDAG-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; SDAG-NEXT:   INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, implicit-def $mode
+  ; SDAG-NEXT:   INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, !0, implicit-def $mode
   ; SDAG-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
   ; SDAG-NEXT:   $vgpr0 = COPY [[V_ADD_F32_e64_]]
   ; SDAG-NEXT:   SI_RETURN implicit $vgpr0
@@ -78,11 +78,11 @@ define float @asm_changes_mode(float %x, float %y) #0 {
   ; GISEL-NEXT: {{  $}}
   ; GISEL-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
   ; GISEL-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GISEL-NEXT:   INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, implicit-def $mode
+  ; GISEL-NEXT:   INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, !0, implicit-def $mode
   ; GISEL-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
   ; GISEL-NEXT:   $vgpr0 = COPY [[V_ADD_F32_e64_]]
   ; GISEL-NEXT:   SI_RETURN implicit $vgpr0
-  call void asm sideeffect "; maybe defs mode", ""()
+  call void asm sideeffect "; maybe defs mode", ""(), !srcloc !0
   %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore")
   ret float %val
 }
@@ -90,3 +90,5 @@ define float @asm_changes_mode(float %x, float %y) #0 {
 declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
 
 attributes #0 = { strictfp "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"  }
+
+!0 = !{i64 87}
diff --git a/llvm/test/CodeGen/ARM/strictfp-inlineasm.ll b/llvm/test/CodeGen/ARM/strictfp-inlineasm.ll
new file mode 100644
index 0000000000000..2d898a87b978d
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/strictfp-inlineasm.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=armv7-none-eabi -verify-machineinstrs < %s | FileCheck %s
+
+define i32 @foo() strictfp {
+; CHECK-LABEL: foo:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    mov r0, #1
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    mvn r0, #0
+; CHECK-NEXT:    bx lr
+entry:
+  tail call void asm sideeffect "", "r"(i32 1) #1, !srcloc !0
+  ret i32 -1
+}
+
+!0 = !{i64 87}
diff --git a/llvm/test/CodeGen/X86/strictfp-inlineasm.ll b/llvm/test/CodeGen/X86/strictfp-inlineasm.ll
new file mode 100644
index 0000000000000..674c12a7e9bf3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/strictfp-inlineasm.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefixes=X64
+
+define i32 @foo() strictfp {
+; X86-LABEL: foo:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl $1, %eax
+; X86-NEXT:    #APP
+; X86-NEXT:    #NO_APP
+; X86-NEXT:    movl $-1, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: foo:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl $1, %eax
+; X64-NEXT:    #APP
+; X64-NEXT:    #NO_APP
+; X64-NEXT:    movl $-1, %eax
+; X64-NEXT:    retq
+entry:
+  tail call void asm sideeffect "", "r"(i32 1) #1, !srcloc !0
+  ret i32 -1
+}
+
+
+!0 = !{i64 87}

``````````

</details>


https://github.com/llvm/llvm-project/pull/168282


More information about the llvm-commits mailing list