[llvm] [DAG] Add strictfp implicit def reg after metadata. (PR #168282)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 16 10:45:17 PST 2025
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/168282
This prevents a machine verifier error, where it "Expected implicit register after groups".
Fixes #158661
>From 26199dbbaf785864dd92c3e4a9b47f09f2e92c35 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Sun, 16 Nov 2025 18:43:14 +0000
Subject: [PATCH] [DAG] Add strictfp implicit def reg after metadata.
This prevents a machine verifier error, where it "Expected implicit register
after groups".
Fixes #158661
---
.../CodeGen/GlobalISel/InlineAsmLowering.cpp | 14 +++++-----
.../lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 14 +++++-----
.../CodeGen/AArch64/strictfp-inlineasm.ll | 17 ++++++++++++
.../CodeGen/AMDGPU/call-defs-mode-register.ll | 8 +++---
llvm/test/CodeGen/ARM/strictfp-inlineasm.ll | 17 ++++++++++++
llvm/test/CodeGen/X86/strictfp-inlineasm.ll | 27 +++++++++++++++++++
6 files changed, 80 insertions(+), 17 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/strictfp-inlineasm.ll
create mode 100644 llvm/test/CodeGen/ARM/strictfp-inlineasm.ll
create mode 100644 llvm/test/CodeGen/X86/strictfp-inlineasm.ll
diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index a8661ce629a4f..9837c0ca12990 100644
--- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -565,13 +565,6 @@ bool InlineAsmLowering::lowerInlineAsm(
}
}
- // Add rounding control registers as implicit def for inline asm.
- if (MF.getFunction().hasFnAttribute(Attribute::StrictFP)) {
- ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
- for (MCPhysReg Reg : RCRegs)
- Inst.addReg(Reg, RegState::ImplicitDefine);
- }
-
if (auto Bundle = Call.getOperandBundle(LLVMContext::OB_convergencectrl)) {
auto *Token = Bundle->Inputs[0].get();
ArrayRef<Register> SourceRegs = GetOrCreateVRegs(*Token);
@@ -583,6 +576,13 @@ bool InlineAsmLowering::lowerInlineAsm(
if (const MDNode *SrcLoc = Call.getMetadata("srcloc"))
Inst.addMetadata(SrcLoc);
+ // Add rounding control registers as implicit def for inline asm.
+ if (MF.getFunction().hasFnAttribute(Attribute::StrictFP)) {
+ ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
+ for (MCPhysReg Reg : RCRegs)
+ Inst.addReg(Reg, RegState::ImplicitDefine);
+ }
+
// All inputs are handled, insert the instruction now
MIRBuilder.insertInstr(Inst);
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 72d0c44889048..52e8449fe510c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -1416,13 +1416,6 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
}
}
- // Add rounding control registers as implicit def for inline asm.
- if (MF->getFunction().hasFnAttribute(Attribute::StrictFP)) {
- ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
- for (MCPhysReg Reg : RCRegs)
- MIB.addReg(Reg, RegState::ImplicitDefine);
- }
-
// GCC inline assembly allows input operands to also be early-clobber
// output operands (so long as the operand is written only after it's
// used), but this does not match the semantics of our early-clobber flag.
@@ -1443,6 +1436,13 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
if (MD)
MIB.addMetadata(MD);
+ // Add rounding control registers as implicit def for inline asm.
+ if (MF->getFunction().hasFnAttribute(Attribute::StrictFP)) {
+ ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
+ for (MCPhysReg Reg : RCRegs)
+ MIB.addReg(Reg, RegState::ImplicitDefine);
+ }
+
MBB->insert(InsertPos, MIB);
break;
}
diff --git a/llvm/test/CodeGen/AArch64/strictfp-inlineasm.ll b/llvm/test/CodeGen/AArch64/strictfp-inlineasm.ll
new file mode 100644
index 0000000000000..0bbf31c5c0d73
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/strictfp-inlineasm.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs < %s | FileCheck %s
+
+define i32 @foo() strictfp {
+; CHECK-LABEL: foo:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w0, #-1 // =0xffffffff
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: ret
+entry:
+ tail call void asm sideeffect "", "r"(i32 1) #1, !srcloc !0
+ ret i32 -1
+}
+
+!0 = !{i64 87}
diff --git a/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll b/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll
index 4b5a49fc0c2e9..562296fce4957 100644
--- a/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll
@@ -67,7 +67,7 @@ define float @asm_changes_mode(float %x, float %y) #0 {
; SDAG-NEXT: {{ $}}
; SDAG-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; SDAG-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; SDAG-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, implicit-def $mode
+ ; SDAG-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, !0, implicit-def $mode
; SDAG-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; SDAG-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]]
; SDAG-NEXT: SI_RETURN implicit $vgpr0
@@ -78,11 +78,11 @@ define float @asm_changes_mode(float %x, float %y) #0 {
; GISEL-NEXT: {{ $}}
; GISEL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GISEL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GISEL-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, implicit-def $mode
+ ; GISEL-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, !0, implicit-def $mode
; GISEL-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GISEL-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]]
; GISEL-NEXT: SI_RETURN implicit $vgpr0
- call void asm sideeffect "; maybe defs mode", ""()
+ call void asm sideeffect "; maybe defs mode", ""(), !srcloc !0
%val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore")
ret float %val
}
@@ -90,3 +90,5 @@ define float @asm_changes_mode(float %x, float %y) #0 {
declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
attributes #0 = { strictfp "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+
+!0 = !{i64 87}
diff --git a/llvm/test/CodeGen/ARM/strictfp-inlineasm.ll b/llvm/test/CodeGen/ARM/strictfp-inlineasm.ll
new file mode 100644
index 0000000000000..2d898a87b978d
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/strictfp-inlineasm.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=armv7-none-eabi -verify-machineinstrs < %s | FileCheck %s
+
+define i32 @foo() strictfp {
+; CHECK-LABEL: foo:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: mov r0, #1
+; CHECK-NEXT: @APP
+; CHECK-NEXT: @NO_APP
+; CHECK-NEXT: mvn r0, #0
+; CHECK-NEXT: bx lr
+entry:
+ tail call void asm sideeffect "", "r"(i32 1) #1, !srcloc !0
+ ret i32 -1
+}
+
+!0 = !{i64 87}
diff --git a/llvm/test/CodeGen/X86/strictfp-inlineasm.ll b/llvm/test/CodeGen/X86/strictfp-inlineasm.ll
new file mode 100644
index 0000000000000..674c12a7e9bf3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/strictfp-inlineasm.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefixes=X64
+
+define i32 @foo() strictfp {
+; X86-LABEL: foo:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movl $1, %eax
+; X86-NEXT: #APP
+; X86-NEXT: #NO_APP
+; X86-NEXT: movl $-1, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: foo:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movl $1, %eax
+; X64-NEXT: #APP
+; X64-NEXT: #NO_APP
+; X64-NEXT: movl $-1, %eax
+; X64-NEXT: retq
+entry:
+ tail call void asm sideeffect "", "r"(i32 1) #1, !srcloc !0
+ ret i32 -1
+}
+
+
+!0 = !{i64 87}
More information about the llvm-commits
mailing list