[llvm] [ARM] Consider denormal mode in `ARMSubtarget` (PR #160456)

Wed Sep 24 06:23:07 PDT 2025

https://github.com/paperchalice updated https://github.com/llvm/llvm-project/pull/160456

>From 9e268b4decb4aa48361870c3bf827685da9c005a Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Wed, 24 Sep 2025 15:43:58 +0800
Subject: [PATCH 1/2] [ARM] Consider denormal mode in `ARMSubtarget`

---
 llvm/lib/Target/ARM/ARMSubtarget.cpp     | 12 +++++-------
 llvm/lib/Target/ARM/ARMSubtarget.h       |  8 +++++++-
 llvm/lib/Target/ARM/ARMTargetMachine.cpp |  6 +++++-
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 9f600e0c685ab..ed7a1ef21ee84 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -88,18 +88,16 @@ ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU,
 ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
                            const std::string &FS,
                            const ARMBaseTargetMachine &TM, bool IsLittle,
-                           bool MinSize)
+                           bool MinSize, DenormalMode DM)
     : ARMGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
       UseMulOps(UseFusedMulOps), CPUString(CPU), OptMinSize(MinSize),
-      IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), TM(TM),
+      IsLittle(IsLittle), DM(DM), TargetTriple(TT), Options(TM.Options), TM(TM),
       FrameLowering(initializeFrameLowering(CPU, FS)),
       // At this point initializeSubtargetDependencies has been called so
       // we can query directly.
-      InstrInfo(isThumb1Only()
-                    ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this)
-                    : !isThumb()
-                          ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this)
-                          : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
+      InstrInfo(isThumb1Only() ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this)
+                : !isThumb()   ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this)
+                             : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
       TLInfo(TM, *this) {
 
   CallLoweringInfo.reset(new ARMCallLowering(*getTargetLowering()));
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 637eb4560e0f1..b2d368e0ca175 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -186,6 +186,12 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
   /// IsLittle - The target is Little Endian
   bool IsLittle;
 
+  /// DM - Denormal mode
+  /// NEON and VFP RunFast mode are not IEEE 754 compliant,
+  /// use this field to determine whether to generate NEON/VFP
+  /// instructions in related function.
+  DenormalMode DM;
+
   /// TargetTriple - What processor and OS we're targeting.
   Triple TargetTriple;
 
@@ -206,7 +212,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
   ///
   ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
                const ARMBaseTargetMachine &TM, bool IsLittle,
-               bool MinSize = false);
+               bool MinSize = false, DenormalMode DM = DenormalMode::getIEEE());
 
   /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
   /// that still makes it profitable to inline the call.
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 131b9332e9ade..86740a92b32c5 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -229,6 +229,10 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
   if (F.hasMinSize())
     Key += "+minsize";
 
+  DenormalMode DM = F.getDenormalModeRaw();
+  if (DM != DenormalMode::getIEEE())
+    Key += "denormal-fp-math=" + DM.str();
+
   auto &I = SubtargetMap[Key];
   if (!I) {
     // This needs to be done before we create a new subtarget since any
@@ -236,7 +240,7 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
     // function that reside in TargetOptions.
     resetTargetOptions(F);
     I = std::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle,
-                                        F.hasMinSize());
+                                       F.hasMinSize(), DM);
 
     if (!I->isThumb() && !I->hasARMOps())
       F.getContext().emitError("Function '" + F.getName() + "' uses ARM "

>From e857ba68cda34e6984a52d46a8e2a14bec3072eb Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Wed, 24 Sep 2025 21:22:11 +0800
Subject: [PATCH 2/2] address comment

---
 llvm/lib/Target/ARM/ARMSubtarget.cpp |  3 ++-
 llvm/test/CodeGen/ARM/fadds.ll       |  2 +-
 llvm/test/CodeGen/ARM/fmuls.ll       |  2 +-
 llvm/test/CodeGen/ARM/fp_convert.ll  |  2 +-
 llvm/test/CodeGen/ARM/fsubs.ll       |  2 +-
 llvm/test/CodeGen/ARM/neon-spfp.ll   | 10 +++++-----
 6 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index ed7a1ef21ee84..f0592945b2051 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -222,7 +222,8 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
   const FeatureBitset &Bits = getFeatureBits();
   if ((Bits[ARM::ProcA5] || Bits[ARM::ProcA8]) && // Where this matters
-      (Options.UnsafeFPMath || isTargetDarwin()))
+      (Options.UnsafeFPMath || isTargetDarwin() ||
+       DM == DenormalMode::getPreserveSign()))
     HasNEONForFP = true;
 
   if (isRWPI())
diff --git a/llvm/test/CodeGen/ARM/fadds.ll b/llvm/test/CodeGen/ARM/fadds.ll
index b5d3bdae1f9d3..191d5b3c13d26 100644
--- a/llvm/test/CodeGen/ARM/fadds.ll
+++ b/llvm/test/CodeGen/ARM/fadds.ll
@@ -7,7 +7,7 @@
 ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \
 ; RUN:  | FileCheck %s -check-prefix=CORTEXA8
 
-; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --denormal-fp-math=preserve-sign %s -o - \
 ; RUN:  | FileCheck %s -check-prefix=CORTEXA8U
 
 ; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \
diff --git a/llvm/test/CodeGen/ARM/fmuls.ll b/llvm/test/CodeGen/ARM/fmuls.ll
index b24d867a7e866..a390a242e5918 100644
--- a/llvm/test/CodeGen/ARM/fmuls.ll
+++ b/llvm/test/CodeGen/ARM/fmuls.ll
@@ -7,7 +7,7 @@
 ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \
 ; RUN:  | FileCheck %s -check-prefix=CORTEXA8
 
-; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --denormal-fp-math=preserve-sign %s -o - \
 ; RUN:  | FileCheck %s -check-prefix=CORTEXA8U
 
 ; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \
diff --git a/llvm/test/CodeGen/ARM/fp_convert.ll b/llvm/test/CodeGen/ARM/fp_convert.ll
index 6f4707573fb50..0b749bf1c7ad4 100644
--- a/llvm/test/CodeGen/ARM/fp_convert.ll
+++ b/llvm/test/CodeGen/ARM/fp_convert.ll
@@ -7,7 +7,7 @@
 ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \
 ; RUN: | FileCheck %s -check-prefix=VFP2
 
-; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --denormal-fp-math=preserve-sign %s -o - \
 ; RUN:  | FileCheck %s -check-prefix=NEON
 
 ; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \
diff --git a/llvm/test/CodeGen/ARM/fsubs.ll b/llvm/test/CodeGen/ARM/fsubs.ll
index baff34ab31fcf..7170f04ea0dd3 100644
--- a/llvm/test/CodeGen/ARM/fsubs.ll
+++ b/llvm/test/CodeGen/ARM/fsubs.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \
 ; RUN:  | FileCheck %s -check-prefix=NFP1
 
-; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --denormal-fp-math=preserve-sign %s -o - \
 ; RUN:  | FileCheck %s -check-prefix=NFP1U
 
 ; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \
diff --git a/llvm/test/CodeGen/ARM/neon-spfp.ll b/llvm/test/CodeGen/ARM/neon-spfp.ll
index cbf25965a2fac..70a809583ff65 100644
--- a/llvm/test/CodeGen/ARM/neon-spfp.ll
+++ b/llvm/test/CodeGen/ARM/neon-spfp.ll
@@ -4,11 +4,11 @@
 ; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 | FileCheck %s -check-prefix=CHECK-LINUXA15
 ; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift | FileCheck %s -check-prefix=CHECK-LINUXSWIFT
 
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK-UNSAFEA5
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK-UNSAFEA8
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK-UNSAFEA9
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK-UNSAFEA15
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift --enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK-UNSAFESWIFT
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 --denormal-fp-math=preserve-sign | FileCheck %s -check-prefix=CHECK-UNSAFEA5
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 --denormal-fp-math=preserve-sign | FileCheck %s -check-prefix=CHECK-UNSAFEA8
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 --denormal-fp-math=preserve-sign | FileCheck %s -check-prefix=CHECK-UNSAFEA9
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 --denormal-fp-math=preserve-sign| FileCheck %s -check-prefix=CHECK-UNSAFEA15
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift --denormal-fp-math=preserve-sign | FileCheck %s -check-prefix=CHECK-UNSAFESWIFT
 
 ; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a5 | FileCheck %s -check-prefix=CHECK-DARWINA5
 ; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK-DARWINA8