[clang] [llvm] [IR] Allow fast math flags on calls with homogeneous FP struct types (PR #110506)
Benjamin Maxwell via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 1 08:34:14 PDT 2024
https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/110506
>From 328357f2300ebe55b8385c01f9c655f703933736 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 30 Sep 2024 11:07:45 +0000
Subject: [PATCH 1/9] [IR] Allow fast math flags on calls with homogeneous FP
struct types
This extends FPMathOperator to allow calls that return literal structs
of homogeneous floating-point or vector-of-floating-point types.
The intended use case for this is to support FP intrinsics that return
multiple values (such as `llvm.sincos`).
---
llvm/docs/LangRef.rst | 19 ++++++------
llvm/include/llvm/IR/DerivedTypes.h | 4 +++
llvm/include/llvm/IR/Operator.h | 14 +++++++--
llvm/lib/IR/Type.cpp | 13 +++++----
llvm/test/Bitcode/compatibility.ll | 20 +++++++++++++
llvm/unittests/IR/InstructionsTest.cpp | 40 ++++++++++++++++++++++----
6 files changed, 87 insertions(+), 23 deletions(-)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 3f39d58b322a4f..1eb2982385fda0 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -12472,9 +12472,8 @@ instruction's return value on the same edge).
The optional ``fast-math-flags`` marker indicates that the phi has one
or more :ref:`fast-math-flags <fastmath>`. These are optimization hints
to enable otherwise unsafe floating-point optimizations. Fast-math-flags
-are only valid for phis that return a floating-point scalar or vector
-type, or an array (nested to any depth) of floating-point scalar or vector
-types.
+are only valid for phis that return a floating-point scalar or vector type,
+possibly within an array (nested to any depth), or a homogeneous struct literal.
Semantics:
""""""""""
@@ -12523,8 +12522,8 @@ class <t_firstclass>` type.
#. The optional ``fast-math flags`` marker indicates that the select has one or more
:ref:`fast-math flags <fastmath>`. These are optimization hints to enable
otherwise unsafe floating-point optimizations. Fast-math flags are only valid
- for selects that return a floating-point scalar or vector type, or an array
- (nested to any depth) of floating-point scalar or vector types.
+ for selects that return a floating-point scalar or vector type, possibly
+ within an array (nested to any depth), or a homogeneous struct literal.
Semantics:
""""""""""
@@ -12762,8 +12761,8 @@ This instruction requires several arguments:
#. The optional ``fast-math flags`` marker indicates that the call has one or more
:ref:`fast-math flags <fastmath>`, which are optimization hints to enable
otherwise unsafe floating-point optimizations. Fast-math flags are only valid
- for calls that return a floating-point scalar or vector type, or an array
- (nested to any depth) of floating-point scalar or vector types.
+ for calls that return a floating-point scalar or vector type, possibly within
+ an array (nested to any depth), or a homogeneous struct literal.
#. The optional "cconv" marker indicates which :ref:`calling
convention <callingconv>` the call should use. If none is
@@ -20528,7 +20527,8 @@ the explicit vector length.
more :ref:`fast-math flags <fastmath>`. These are optimization hints to
enable otherwise unsafe floating-point optimizations. Fast-math flags are
only valid for selects that return a floating-point scalar or vector type,
- or an array (nested to any depth) of floating-point scalar or vector types.
+ possibly within an array (nested to any depth), or a homogeneous struct
+ literal.
Semantics:
""""""""""
@@ -20586,7 +20586,8 @@ is the pivot.
more :ref:`fast-math flags <fastmath>`. These are optimization hints to
enable otherwise unsafe floating-point optimizations. Fast-math flags are
only valid for merges that return a floating-point scalar or vector type,
- or an array (nested to any depth) of floating-point scalar or vector types.
+ possibly within an array (nested to any depth), or a homogeneous struct
+ literal.
Semantics:
""""""""""
diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h
index 975c142f1a4572..a24801d8bdf834 100644
--- a/llvm/include/llvm/IR/DerivedTypes.h
+++ b/llvm/include/llvm/IR/DerivedTypes.h
@@ -301,6 +301,10 @@ class StructType : public Type {
/// {<vscale x 2 x i32>, <vscale x 4 x i64>}}
bool containsHomogeneousScalableVectorTypes() const;
+ /// Return true if this struct is non-empty and all element types are the
+ /// same.
+ bool containsHomogeneousTypes() const;
+
/// Return true if this is a named struct that has a non-empty name.
bool hasName() const { return SymbolTableEntry != nullptr; }
diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
index 88b9bfc0be4b15..22ffcc730e7b68 100644
--- a/llvm/include/llvm/IR/Operator.h
+++ b/llvm/include/llvm/IR/Operator.h
@@ -15,6 +15,7 @@
#define LLVM_IR_OPERATOR_H
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/TypeSwitch.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/FMF.h"
#include "llvm/IR/GEPNoWrapFlags.h"
@@ -351,8 +352,17 @@ class FPMathOperator : public Operator {
case Instruction::Select:
case Instruction::Call: {
Type *Ty = V->getType();
- while (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty))
- Ty = ArrTy->getElementType();
+ TypeSwitch<Type *>(Ty)
+ .Case([&](StructType *StructTy) {
+ if (!StructTy->isLiteral() || !StructTy->containsHomogeneousTypes())
+ return;
+ Ty = StructTy->elements().front();
+ })
+ .Case([&](ArrayType *ArrTy) {
+ do {
+ Ty = ArrTy->getElementType();
+ } while ((ArrTy = dyn_cast<ArrayType>(Ty)));
+ });
return Ty->isFPOrFPVectorTy();
}
default:
diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp
index 3784ad28d7219d..f618263f79c313 100644
--- a/llvm/lib/IR/Type.cpp
+++ b/llvm/lib/IR/Type.cpp
@@ -430,13 +430,14 @@ bool StructType::containsScalableVectorType(
}
bool StructType::containsHomogeneousScalableVectorTypes() const {
- Type *FirstTy = getNumElements() > 0 ? elements()[0] : nullptr;
- if (!FirstTy || !isa<ScalableVectorType>(FirstTy))
+ if (getNumElements() <= 0 || !isa<ScalableVectorType>(elements().front()))
return false;
- for (Type *Ty : elements())
- if (Ty != FirstTy)
- return false;
- return true;
+ return containsHomogeneousTypes();
+}
+
+bool StructType::containsHomogeneousTypes() const {
+ ArrayRef<Type *> ElementTys = elements();
+ return !ElementTys.empty() && all_equal(ElementTys);
}
void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) {
diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll
index ea29ff634a43bb..4fe9d9b11f8831 100644
--- a/llvm/test/Bitcode/compatibility.ll
+++ b/llvm/test/Bitcode/compatibility.ll
@@ -1122,6 +1122,26 @@ define void @fastMathFlagsForArrayCalls([2 x float] %f, [2 x double] %d1, [2 x <
ret void
}
+declare { float, float } @fmf_struct_f32()
+declare { double, double } @fmf_struct_f64()
+declare { <4 x double>, <4 x double> } @fmf_struct_v4f64()
+
+; CHECK-LABEL: fastMathFlagsForStructCalls(
+define void @fastMathFlagsForStructCalls({ float, float } %f, { double, double } %d1, { <4 x double>, <4 x double> } %d2) {
+ %call.fast = call fast { float, float } @fmf_struct_f32()
+ ; CHECK: %call.fast = call fast { float, float } @fmf_struct_f32()
+
+ ; Throw in some other attributes to make sure those stay in the right places.
+
+ %call.nsz.arcp = notail call nsz arcp { double, double } @fmf_struct_f64()
+ ; CHECK: %call.nsz.arcp = notail call nsz arcp { double, double } @fmf_struct_f64()
+
+ %call.nnan.ninf = tail call nnan ninf fastcc { <4 x double>, <4 x double> } @fmf_struct_v4f64()
+ ; CHECK: %call.nnan.ninf = tail call nnan ninf fastcc { <4 x double>, <4 x double> } @fmf_struct_v4f64()
+
+ ret void
+}
+
;; Type System
%opaquety = type opaque
define void @typesystem() {
diff --git a/llvm/unittests/IR/InstructionsTest.cpp b/llvm/unittests/IR/InstructionsTest.cpp
index 481fe96607e48e..9d8056a768af2f 100644
--- a/llvm/unittests/IR/InstructionsTest.cpp
+++ b/llvm/unittests/IR/InstructionsTest.cpp
@@ -1559,12 +1559,40 @@ TEST(InstructionsTest, FPCallIsFPMathOperator) {
CallInst::Create(AVFFnTy, AVFCallee, {}, ""));
EXPECT_TRUE(isa<FPMathOperator>(AVFCall));
- Type *AAVFTy = ArrayType::get(AVFTy, 2);
- FunctionType *AAVFFnTy = FunctionType::get(AAVFTy, {});
- Value *AAVFCallee = Constant::getNullValue(PtrTy);
- std::unique_ptr<CallInst> AAVFCall(
- CallInst::Create(AAVFFnTy, AAVFCallee, {}, ""));
- EXPECT_TRUE(isa<FPMathOperator>(AAVFCall));
+ Type *StructITy = StructType::get(ITy, ITy);
+ FunctionType *StructIFnTy = FunctionType::get(StructITy, {});
+ Value *StructICallee = Constant::getNullValue(PtrTy);
+ std::unique_ptr<CallInst> StructICall(
+ CallInst::Create(StructIFnTy, StructICallee, {}, ""));
+ EXPECT_FALSE(isa<FPMathOperator>(StructICall));
+
+ Type *NamedStructFTy = StructType::create({FTy, FTy}, "AStruct");
+ FunctionType *NamedStructFFnTy = FunctionType::get(NamedStructFTy, {});
+ Value *NamedStructFCallee = Constant::getNullValue(PtrTy);
+ std::unique_ptr<CallInst> NamedStructFCall(
+ CallInst::Create(NamedStructFFnTy, NamedStructFCallee, {}, ""));
+ EXPECT_FALSE(isa<FPMathOperator>(NamedStructFCall));
+
+ Type *MixedStructTy = StructType::get(FTy, ITy);
+ FunctionType *MixedStructFnTy = FunctionType::get(MixedStructTy, {});
+ Value *MixedStructCallee = Constant::getNullValue(PtrTy);
+ std::unique_ptr<CallInst> MixedStructCall(
+ CallInst::Create(MixedStructFnTy, MixedStructCallee, {}, ""));
+ EXPECT_FALSE(isa<FPMathOperator>(MixedStructCall));
+
+ Type *StructFTy = StructType::get(FTy, FTy);
+ FunctionType *StructFFnTy = FunctionType::get(StructFTy, {});
+ Value *StructFCallee = Constant::getNullValue(PtrTy);
+ std::unique_ptr<CallInst> StructFCall(
+ CallInst::Create(StructFFnTy, StructFCallee, {}, ""));
+ EXPECT_TRUE(isa<FPMathOperator>(StructFCall));
+
+ Type *StructVFTy = StructType::get(VFTy, VFTy);
+ FunctionType *StructVFFnTy = FunctionType::get(StructVFTy, {});
+ Value *StructVFCallee = Constant::getNullValue(PtrTy);
+ std::unique_ptr<CallInst> StructVFCall(
+ CallInst::Create(StructVFFnTy, StructVFCallee, {}, ""));
+ EXPECT_TRUE(isa<FPMathOperator>(StructVFCall));
}
TEST(InstructionsTest, FNegInstruction) {
>From 688154050d02b3d118031d2fdd532a7f5d2500a8 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 30 Sep 2024 15:06:11 +0000
Subject: [PATCH 2/9] Remove TypeSwitch
---
llvm/include/llvm/IR/Operator.h | 21 +++++++++------------
1 file changed, 9 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
index 22ffcc730e7b68..2cacc632f3a8cb 100644
--- a/llvm/include/llvm/IR/Operator.h
+++ b/llvm/include/llvm/IR/Operator.h
@@ -15,7 +15,6 @@
#define LLVM_IR_OPERATOR_H
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/TypeSwitch.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/FMF.h"
#include "llvm/IR/GEPNoWrapFlags.h"
@@ -352,17 +351,15 @@ class FPMathOperator : public Operator {
case Instruction::Select:
case Instruction::Call: {
Type *Ty = V->getType();
- TypeSwitch<Type *>(Ty)
- .Case([&](StructType *StructTy) {
- if (!StructTy->isLiteral() || !StructTy->containsHomogeneousTypes())
- return;
- Ty = StructTy->elements().front();
- })
- .Case([&](ArrayType *ArrTy) {
- do {
- Ty = ArrTy->getElementType();
- } while ((ArrTy = dyn_cast<ArrayType>(Ty)));
- });
+ if (StructType *StructTy = dyn_cast<StructType>(Ty)) {
+ if (!StructTy->isLiteral() || !StructTy->containsHomogeneousTypes())
+ return false;
+ Ty = StructTy->elements().front();
+ } else if (ArrayType *ArrayTy = dyn_cast<ArrayType>(Ty)) {
+ do {
+ Ty = ArrayTy->getElementType();
+ } while ((ArrayTy = dyn_cast<ArrayType>(Ty)));
+ }
return Ty->isFPOrFPVectorTy();
}
default:
>From 5cc741f73aced83fc8826438e0a018776586a5e2 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 30 Sep 2024 15:31:09 +0000
Subject: [PATCH 3/9] Update clang tests
---
clang/test/CodeGen/X86/cx-complex-range.c | 2 +-
clang/test/CodeGen/cx-complex-range.c | 10 +++++-----
clang/test/CodeGen/nofpclass.c | 6 +++---
3 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/clang/test/CodeGen/X86/cx-complex-range.c b/clang/test/CodeGen/X86/cx-complex-range.c
index 14887637d516ef..da580d54c9f618 100644
--- a/clang/test/CodeGen/X86/cx-complex-range.c
+++ b/clang/test/CodeGen/X86/cx-complex-range.c
@@ -1220,7 +1220,7 @@ _Complex _Float16 mulf16(_Complex _Float16 a, _Complex _Float16 b) {
// FULL_FAST-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2
// FULL_FAST-NEXT: [[CONV:%.*]] = fpext half [[C_REAL]] to x86_fp80
// FULL_FAST-NEXT: [[CONV1:%.*]] = fpext half [[C_IMAG]] to x86_fp80
-// FULL_FAST-NEXT: [[CALL:%.*]] = call { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[CONV]], x86_fp80 noundef nofpclass(nan inf) [[CONV1]]) #[[ATTR1]]
+// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[CONV]], x86_fp80 noundef nofpclass(nan inf) [[CONV1]]) #[[ATTR1]]
// FULL_FAST-NEXT: [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 0
// FULL_FAST-NEXT: [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 1
// FULL_FAST-NEXT: [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP0]] to half
diff --git a/clang/test/CodeGen/cx-complex-range.c b/clang/test/CodeGen/cx-complex-range.c
index d4fb62a7dfec35..b780d4d1767c12 100644
--- a/clang/test/CodeGen/cx-complex-range.c
+++ b/clang/test/CodeGen/cx-complex-range.c
@@ -1444,7 +1444,7 @@ _Complex float mulf(_Complex float a, _Complex float b) {
// FULL_FAST-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8
// FULL_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i32 0, i32 1
// FULL_FAST-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8
-// FULL_FAST-NEXT: [[CALL:%.*]] = call { double, double } @__divdc3(double noundef nofpclass(nan inf) [[A_REAL]], double noundef nofpclass(nan inf) [[A_IMAG]], double noundef nofpclass(nan inf) [[B_REAL]], double noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
+// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { double, double } @__divdc3(double noundef nofpclass(nan inf) [[A_REAL]], double noundef nofpclass(nan inf) [[A_IMAG]], double noundef nofpclass(nan inf) [[B_REAL]], double noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
// FULL_FAST-NEXT: [[TMP4:%.*]] = extractvalue { double, double } [[CALL]], 0
// FULL_FAST-NEXT: [[TMP5:%.*]] = extractvalue { double, double } [[CALL]], 1
// FULL_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[RETVAL]], i32 0, i32 0
@@ -2003,7 +2003,7 @@ _Complex double divd(_Complex double a, _Complex double b) {
// FULL_FAST-NEXT: [[ISNAN_CMP1:%.*]] = fcmp reassoc nnan ninf nsz arcp afn uno double [[MUL_I]], [[MUL_I]]
// FULL_FAST-NEXT: br i1 [[ISNAN_CMP1]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]]
// FULL_FAST: complex_mul_libcall:
-// FULL_FAST-NEXT: [[CALL:%.*]] = call { double, double } @__muldc3(double noundef nofpclass(nan inf) [[A_REAL]], double noundef nofpclass(nan inf) [[A_IMAG]], double noundef nofpclass(nan inf) [[B_REAL]], double noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
+// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { double, double } @__muldc3(double noundef nofpclass(nan inf) [[A_REAL]], double noundef nofpclass(nan inf) [[A_IMAG]], double noundef nofpclass(nan inf) [[B_REAL]], double noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
// FULL_FAST-NEXT: [[TMP4:%.*]] = extractvalue { double, double } [[CALL]], 0
// FULL_FAST-NEXT: [[TMP5:%.*]] = extractvalue { double, double } [[CALL]], 1
// FULL_FAST-NEXT: br label [[COMPLEX_MUL_CONT]]
@@ -2535,7 +2535,7 @@ _Complex double muld(_Complex double a, _Complex double b) {
// FULL_FAST-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16
// FULL_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds nuw { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1
// FULL_FAST-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16
-// FULL_FAST-NEXT: [[CALL:%.*]] = call { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[A_REAL]], x86_fp80 noundef nofpclass(nan inf) [[A_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
+// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[A_REAL]], x86_fp80 noundef nofpclass(nan inf) [[A_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
// FULL_FAST-NEXT: [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 0
// FULL_FAST-NEXT: [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 1
// FULL_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds nuw { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 0
@@ -3028,7 +3028,7 @@ _Complex long double divld(_Complex long double a, _Complex long double b) {
// FULL_FAST-NEXT: [[ISNAN_CMP1:%.*]] = fcmp reassoc nnan ninf nsz arcp afn uno x86_fp80 [[MUL_I]], [[MUL_I]]
// FULL_FAST-NEXT: br i1 [[ISNAN_CMP1]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]]
// FULL_FAST: complex_mul_libcall:
-// FULL_FAST-NEXT: [[CALL:%.*]] = call { x86_fp80, x86_fp80 } @__mulxc3(x86_fp80 noundef nofpclass(nan inf) [[A_REAL]], x86_fp80 noundef nofpclass(nan inf) [[A_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
+// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { x86_fp80, x86_fp80 } @__mulxc3(x86_fp80 noundef nofpclass(nan inf) [[A_REAL]], x86_fp80 noundef nofpclass(nan inf) [[A_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
// FULL_FAST-NEXT: [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 0
// FULL_FAST-NEXT: [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 1
// FULL_FAST-NEXT: br label [[COMPLEX_MUL_CONT]]
@@ -3753,7 +3753,7 @@ _Complex long double mulld(_Complex long double a, _Complex long double b) {
// FULL_FAST-NEXT: [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 4
// FULL_FAST-NEXT: [[CONV:%.*]] = fpext float [[C_REAL]] to x86_fp80
// FULL_FAST-NEXT: [[CONV1:%.*]] = fpext float [[C_IMAG]] to x86_fp80
-// FULL_FAST-NEXT: [[CALL:%.*]] = call { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[CONV]], x86_fp80 noundef nofpclass(nan inf) [[CONV1]]) #[[ATTR2]]
+// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[CONV]], x86_fp80 noundef nofpclass(nan inf) [[CONV1]]) #[[ATTR2]]
// FULL_FAST-NEXT: [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 0
// FULL_FAST-NEXT: [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 1
// FULL_FAST-NEXT: [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP0]] to float
diff --git a/clang/test/CodeGen/nofpclass.c b/clang/test/CodeGen/nofpclass.c
index 16a3823a5c02de..ca86e67a5d27c7 100644
--- a/clang/test/CodeGen/nofpclass.c
+++ b/clang/test/CodeGen/nofpclass.c
@@ -548,7 +548,7 @@ _Complex float defined_complex_func(_Complex float a, _Complex double b, _Comple
// CFINITEONLY-NEXT: [[ISNAN_CMP5:%.*]] = fcmp nnan ninf uno double [[MUL_I]], [[MUL_I]]
// CFINITEONLY-NEXT: br i1 [[ISNAN_CMP5]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]]
// CFINITEONLY: complex_mul_libcall:
-// CFINITEONLY-NEXT: [[CALL:%.*]] = call { double, double } @__muldc3(double noundef nofpclass(nan inf) [[C_REAL]], double noundef nofpclass(nan inf) [[C_IMAG]], double noundef nofpclass(nan inf) [[C_REAL2]], double noundef nofpclass(nan inf) [[C_IMAG4]]) #[[ATTR7:[0-9]+]]
+// CFINITEONLY-NEXT: [[CALL:%.*]] = call nnan ninf { double, double } @__muldc3(double noundef nofpclass(nan inf) [[C_REAL]], double noundef nofpclass(nan inf) [[C_IMAG]], double noundef nofpclass(nan inf) [[C_REAL2]], double noundef nofpclass(nan inf) [[C_IMAG4]]) #[[ATTR7:[0-9]+]]
// CFINITEONLY-NEXT: [[TMP2:%.*]] = extractvalue { double, double } [[CALL]], 0
// CFINITEONLY-NEXT: [[TMP3:%.*]] = extractvalue { double, double } [[CALL]], 1
// CFINITEONLY-NEXT: br label [[COMPLEX_MUL_CONT]]
@@ -605,7 +605,7 @@ _Complex float defined_complex_func(_Complex float a, _Complex double b, _Comple
// NONANS-NEXT: [[ISNAN_CMP5:%.*]] = fcmp nnan uno double [[MUL_I]], [[MUL_I]]
// NONANS-NEXT: br i1 [[ISNAN_CMP5]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]]
// NONANS: complex_mul_libcall:
-// NONANS-NEXT: [[CALL:%.*]] = call { double, double } @__muldc3(double noundef nofpclass(nan) [[C_REAL]], double noundef nofpclass(nan) [[C_IMAG]], double noundef nofpclass(nan) [[C_REAL2]], double noundef nofpclass(nan) [[C_IMAG4]]) #[[ATTR7:[0-9]+]]
+// NONANS-NEXT: [[CALL:%.*]] = call nnan { double, double } @__muldc3(double noundef nofpclass(nan) [[C_REAL]], double noundef nofpclass(nan) [[C_IMAG]], double noundef nofpclass(nan) [[C_REAL2]], double noundef nofpclass(nan) [[C_IMAG4]]) #[[ATTR7:[0-9]+]]
// NONANS-NEXT: [[TMP2:%.*]] = extractvalue { double, double } [[CALL]], 0
// NONANS-NEXT: [[TMP3:%.*]] = extractvalue { double, double } [[CALL]], 1
// NONANS-NEXT: br label [[COMPLEX_MUL_CONT]]
@@ -649,7 +649,7 @@ _Complex float defined_complex_func(_Complex float a, _Complex double b, _Comple
// NOINFS-NEXT: [[ISNAN_CMP5:%.*]] = fcmp ninf uno double [[MUL_I]], [[MUL_I]]
// NOINFS-NEXT: br i1 [[ISNAN_CMP5]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]]
// NOINFS: complex_mul_libcall:
-// NOINFS-NEXT: [[CALL:%.*]] = call { double, double } @__muldc3(double noundef nofpclass(inf) [[C_REAL]], double noundef nofpclass(inf) [[C_IMAG]], double noundef nofpclass(inf) [[C_REAL2]], double noundef nofpclass(inf) [[C_IMAG4]]) #[[ATTR7:[0-9]+]]
+// NOINFS-NEXT: [[CALL:%.*]] = call ninf { double, double } @__muldc3(double noundef nofpclass(inf) [[C_REAL]], double noundef nofpclass(inf) [[C_IMAG]], double noundef nofpclass(inf) [[C_REAL2]], double noundef nofpclass(inf) [[C_IMAG4]]) #[[ATTR7:[0-9]+]]
// NOINFS-NEXT: [[TMP2:%.*]] = extractvalue { double, double } [[CALL]], 0
// NOINFS-NEXT: [[TMP3:%.*]] = extractvalue { double, double } [[CALL]], 1
// NOINFS-NEXT: br label [[COMPLEX_MUL_CONT]]
>From 8d3353c6c290c60c25abaea4b309f7ec4b525abf Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 30 Sep 2024 16:15:01 +0000
Subject: [PATCH 4/9] Fixups
---
llvm/docs/LangRef.rst | 39 +++++++++++++++-----------
llvm/test/Bitcode/compatibility.ll | 14 ++++-----
llvm/unittests/IR/InstructionsTest.cpp | 25 +++++++++++++++--
3 files changed, 53 insertions(+), 25 deletions(-)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 1eb2982385fda0..0462b5d7328737 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -3685,9 +3685,9 @@ Fast-Math Flags
LLVM IR floating-point operations (:ref:`fneg <i_fneg>`, :ref:`fadd <i_fadd>`,
:ref:`fsub <i_fsub>`, :ref:`fmul <i_fmul>`, :ref:`fdiv <i_fdiv>`,
-:ref:`frem <i_frem>`, :ref:`fcmp <i_fcmp>`), :ref:`phi <i_phi>`,
-:ref:`select <i_select>` and :ref:`call <i_call>`
-may use the following flags to enable otherwise unsafe
+:ref:`frem <i_frem>`, :ref:`fcmp <i_fcmp>`), and :ref:`phi <i_phi>`,
+:ref:`select <i_select>`, or :ref:`call <i_call>` instructions that return
+floating-point types may use the following flags to enable otherwise unsafe
floating-point transformations.
``fast``
@@ -3709,6 +3709,16 @@ floating-point transformations.
argument or zero result as insignificant. This does not imply that -0.0
is poison and/or guaranteed to not exist in the operation.
+.. _fastmath_return_types:
+
+Note: For :ref:`phi <i_phi>`, :ref:`select <i_select>`, and :ref:`call <i_call>`
+instructions, the following return types are considered to be floating-point
+types:
+
+- Floating-point scalar or vector types
+- Array types (nested to any depth) of floating-point scalar or vector types
+- Homogeneous literal struct types of floating-point scalar or vector types
+
Rewrite-based flags
^^^^^^^^^^^^^^^^^^^
@@ -4343,7 +4353,7 @@ recursive, can be opaqued, and are never uniqued.
:Examples:
+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| ``{ i32, i32, i32 }`` | A triple of three ``i32`` values |
+| ``{ i32, i32, i32 }`` | A triple of three ``i32`` values (this is a "homogeneous" struct as all element types are the same) |
+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| ``{ float, ptr }`` | A pair, where the first element is a ``float`` and the second element is a :ref:`pointer <t_pointer>`. |
+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
@@ -12472,8 +12482,8 @@ instruction's return value on the same edge).
The optional ``fast-math-flags`` marker indicates that the phi has one
or more :ref:`fast-math-flags <fastmath>`. These are optimization hints
to enable otherwise unsafe floating-point optimizations. Fast-math-flags
-are only valid for phis that return a floating-point scalar or vector type,
-possibly within an array (nested to any depth), or a homogeneous struct literal.
+are only valid for phis that return :ref:`supported floating-point types
+<fastmath_return_types>`.
Semantics:
""""""""""
@@ -12522,8 +12532,8 @@ class <t_firstclass>` type.
#. The optional ``fast-math flags`` marker indicates that the select has one or more
:ref:`fast-math flags <fastmath>`. These are optimization hints to enable
otherwise unsafe floating-point optimizations. Fast-math flags are only valid
- for selects that return a floating-point scalar or vector type, possibly
- within an array (nested to any depth), or a homogeneous struct literal.
+ for selects that return :ref:`supported floating-point types
+ <fastmath_return_types>`..
Semantics:
""""""""""
@@ -12761,8 +12771,7 @@ This instruction requires several arguments:
#. The optional ``fast-math flags`` marker indicates that the call has one or more
:ref:`fast-math flags <fastmath>`, which are optimization hints to enable
otherwise unsafe floating-point optimizations. Fast-math flags are only valid
- for calls that return a floating-point scalar or vector type, possibly within
- an array (nested to any depth), or a homogeneous struct literal.
+ for calls that return :ref:`supported floating-point types <fastmath_return_types>`.
#. The optional "cconv" marker indicates which :ref:`calling
convention <callingconv>` the call should use. If none is
@@ -20526,9 +20535,8 @@ the explicit vector length.
#. The optional ``fast-math flags`` marker indicates that the select has one or
more :ref:`fast-math flags <fastmath>`. These are optimization hints to
enable otherwise unsafe floating-point optimizations. Fast-math flags are
- only valid for selects that return a floating-point scalar or vector type,
- possibly within an array (nested to any depth), or a homogeneous struct
- literal.
+ only valid for selects that return :ref:`supported floating-point types
+ <fastmath_return_types>`.
Semantics:
""""""""""
@@ -20585,9 +20593,8 @@ is the pivot.
#. The optional ``fast-math flags`` marker indicates that the merge has one or
more :ref:`fast-math flags <fastmath>`. These are optimization hints to
enable otherwise unsafe floating-point optimizations. Fast-math flags are
- only valid for merges that return a floating-point scalar or vector type,
- possibly within an array (nested to any depth), or a homogeneous struct
- literal.
+ only valid for merges that return :ref:`supported floating-point types
+ <fastmath_return_types>`.
Semantics:
""""""""""
diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll
index 4fe9d9b11f8831..b6050944e637a3 100644
--- a/llvm/test/Bitcode/compatibility.ll
+++ b/llvm/test/Bitcode/compatibility.ll
@@ -1123,21 +1123,21 @@ define void @fastMathFlagsForArrayCalls([2 x float] %f, [2 x double] %d1, [2 x <
}
declare { float, float } @fmf_struct_f32()
-declare { double, double } @fmf_struct_f64()
-declare { <4 x double>, <4 x double> } @fmf_struct_v4f64()
+declare { double, double, double } @fmf_struct_f64()
+declare { <4 x double> } @fmf_struct_v4f64()
; CHECK-LABEL: fastMathFlagsForStructCalls(
-define void @fastMathFlagsForStructCalls({ float, float } %f, { double, double } %d1, { <4 x double>, <4 x double> } %d2) {
+define void @fastMathFlagsForStructCalls() {
%call.fast = call fast { float, float } @fmf_struct_f32()
; CHECK: %call.fast = call fast { float, float } @fmf_struct_f32()
; Throw in some other attributes to make sure those stay in the right places.
- %call.nsz.arcp = notail call nsz arcp { double, double } @fmf_struct_f64()
- ; CHECK: %call.nsz.arcp = notail call nsz arcp { double, double } @fmf_struct_f64()
+ %call.nsz.arcp = notail call nsz arcp { double, double, double } @fmf_struct_f64()
+ ; CHECK: %call.nsz.arcp = notail call nsz arcp { double, double, double } @fmf_struct_f64()
- %call.nnan.ninf = tail call nnan ninf fastcc { <4 x double>, <4 x double> } @fmf_struct_v4f64()
- ; CHECK: %call.nnan.ninf = tail call nnan ninf fastcc { <4 x double>, <4 x double> } @fmf_struct_v4f64()
+ %call.nnan.ninf = tail call nnan ninf fastcc { <4 x double> } @fmf_struct_v4f64()
+ ; CHECK: %call.nnan.ninf = tail call nnan ninf fastcc { <4 x double> } @fmf_struct_v4f64()
ret void
}
diff --git a/llvm/unittests/IR/InstructionsTest.cpp b/llvm/unittests/IR/InstructionsTest.cpp
index 9d8056a768af2f..0af812564c0267 100644
--- a/llvm/unittests/IR/InstructionsTest.cpp
+++ b/llvm/unittests/IR/InstructionsTest.cpp
@@ -1566,6 +1566,13 @@ TEST(InstructionsTest, FPCallIsFPMathOperator) {
CallInst::Create(StructIFnTy, StructICallee, {}, ""));
EXPECT_FALSE(isa<FPMathOperator>(StructICall));
+ Type *EmptyStructTy = StructType::get(C);
+ FunctionType *EmptyStructFnTy = FunctionType::get(EmptyStructTy, {});
+ Value *EmptyStructCallee = Constant::getNullValue(PtrTy);
+ std::unique_ptr<CallInst> EmptyStructCall(
+ CallInst::Create(EmptyStructFnTy, EmptyStructCallee, {}, ""));
+ EXPECT_FALSE(isa<FPMathOperator>(EmptyStructCall));
+
Type *NamedStructFTy = StructType::create({FTy, FTy}, "AStruct");
FunctionType *NamedStructFFnTy = FunctionType::get(NamedStructFTy, {});
Value *NamedStructFCallee = Constant::getNullValue(PtrTy);
@@ -1580,19 +1587,33 @@ TEST(InstructionsTest, FPCallIsFPMathOperator) {
CallInst::Create(MixedStructFnTy, MixedStructCallee, {}, ""));
EXPECT_FALSE(isa<FPMathOperator>(MixedStructCall));
- Type *StructFTy = StructType::get(FTy, FTy);
+ Type *StructFTy = StructType::get(FTy, FTy, FTy);
FunctionType *StructFFnTy = FunctionType::get(StructFTy, {});
Value *StructFCallee = Constant::getNullValue(PtrTy);
std::unique_ptr<CallInst> StructFCall(
CallInst::Create(StructFFnTy, StructFCallee, {}, ""));
EXPECT_TRUE(isa<FPMathOperator>(StructFCall));
- Type *StructVFTy = StructType::get(VFTy, VFTy);
+ Type *StructVFTy = StructType::get(VFTy, VFTy, VFTy, VFTy);
FunctionType *StructVFFnTy = FunctionType::get(StructVFTy, {});
Value *StructVFCallee = Constant::getNullValue(PtrTy);
std::unique_ptr<CallInst> StructVFCall(
CallInst::Create(StructVFFnTy, StructVFCallee, {}, ""));
EXPECT_TRUE(isa<FPMathOperator>(StructVFCall));
+
+ Type *NestedStructFTy = StructType::get(StructFTy, StructFTy, StructFTy);
+ FunctionType *NestedStructFFnTy = FunctionType::get(NestedStructFTy, {});
+ Value *NestedStructFCallee = Constant::getNullValue(PtrTy);
+ std::unique_ptr<CallInst> NestedStructFCall(
+ CallInst::Create(NestedStructFFnTy, NestedStructFCallee, {}, ""));
+ EXPECT_FALSE(isa<FPMathOperator>(NestedStructFCall));
+
+ Type *AStructFTy = ArrayType::get(StructFTy, 5);
+ FunctionType *AStructFFnTy = FunctionType::get(AStructFTy, {});
+ Value *AStructFCallee = Constant::getNullValue(PtrTy);
+ std::unique_ptr<CallInst> AStructFCall(
+ CallInst::Create(AStructFFnTy, AStructFCallee, {}, ""));
+ EXPECT_FALSE(isa<FPMathOperator>(AStructFCall));
}
TEST(InstructionsTest, FNegInstruction) {
>From 04b7d82c6b995f394e6573f522ef1cda45216b48 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 30 Sep 2024 18:47:34 +0000
Subject: [PATCH 5/9] Update nofpclass
---
llvm/docs/LangRef.rst | 15 ++++++++-------
llvm/include/llvm/IR/Operator.h | 27 ++++++++++++++++-----------
llvm/lib/IR/Attributes.cpp | 5 ++---
llvm/test/Bitcode/compatibility.ll | 11 ++++++++++-
4 files changed, 36 insertions(+), 22 deletions(-)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 0462b5d7328737..22dd7ad10baa2c 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -1499,11 +1499,12 @@ Currently, only the following parameter attributes are defined:
``nofpclass(<test mask>)``
This attribute applies to parameters and return values with
floating-point and vector of floating-point types, as well as
- arrays of such types. The test mask has the same format as the
- second argument to the :ref:`llvm.is.fpclass <llvm.is.fpclass>`,
- and indicates which classes of floating-point values are not
- permitted for the value. For example a bitmask of 3 indicates
- the parameter may not be a NaN.
+ :ref:`supported aggregates <fastmath_return_types>` of such types
+ (matching the supported types for :ref:`fast-math flags <fastmath>`).
+ The test mask has the same format as the second argument to the
+ :ref:`llvm.is.fpclass <llvm.is.fpclass>`, and indicates which classes
+ of floating-point values are not permitted for the value. For example
+ a bitmask of 3 indicates the parameter may not be a NaN.
If the value is a floating-point class indicated by the
``nofpclass`` test mask, a :ref:`poison value <poisonvalues>` is
@@ -3709,12 +3710,12 @@ floating-point transformations.
argument or zero result as insignificant. This does not imply that -0.0
is poison and/or guaranteed to not exist in the operation.
-.. _fastmath_return_types:
-
Note: For :ref:`phi <i_phi>`, :ref:`select <i_select>`, and :ref:`call <i_call>`
instructions, the following return types are considered to be floating-point
types:
+.. _fastmath_return_types:
+
- Floating-point scalar or vector types
- Array types (nested to any depth) of floating-point scalar or vector types
- Homogeneous literal struct types of floating-point scalar or vector types
diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
index 2cacc632f3a8cb..1228df0fb9f225 100644
--- a/llvm/include/llvm/IR/Operator.h
+++ b/llvm/include/llvm/IR/Operator.h
@@ -326,6 +326,21 @@ class FPMathOperator : public Operator {
/// precision.
float getFPAccuracy() const;
+ /// Returns true if `Ty` is a supported floating-point type for phi, select,
+ /// or call FPMathOperators.
+ static bool isSupportedFloatingPointType(Type *Ty) {
+ if (StructType *StructTy = dyn_cast<StructType>(Ty)) {
+ if (!StructTy->isLiteral() || !StructTy->containsHomogeneousTypes())
+ return false;
+ Ty = StructTy->elements().front();
+ } else if (ArrayType *ArrayTy = dyn_cast<ArrayType>(Ty)) {
+ do {
+ Ty = ArrayTy->getElementType();
+ } while ((ArrayTy = dyn_cast<ArrayType>(Ty)));
+ }
+ return Ty->isFPOrFPVectorTy();
+ }
+
static bool classof(const Value *V) {
unsigned Opcode;
if (auto *I = dyn_cast<Instruction>(V))
@@ -350,17 +365,7 @@ class FPMathOperator : public Operator {
case Instruction::PHI:
case Instruction::Select:
case Instruction::Call: {
- Type *Ty = V->getType();
- if (StructType *StructTy = dyn_cast<StructType>(Ty)) {
- if (!StructTy->isLiteral() || !StructTy->containsHomogeneousTypes())
- return false;
- Ty = StructTy->elements().front();
- } else if (ArrayType *ArrayTy = dyn_cast<ArrayType>(Ty)) {
- do {
- Ty = ArrayTy->getElementType();
- } while ((ArrayTy = dyn_cast<ArrayType>(Ty)));
- }
- return Ty->isFPOrFPVectorTy();
+ return isSupportedFloatingPointType(V->getType());
}
default:
return false;
diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index eb615833c00bf3..ddfbcdb902cc3d 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -28,6 +28,7 @@
#include "llvm/IR/ConstantRangeList.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
@@ -2094,9 +2095,7 @@ bool AttrBuilder::operator==(const AttrBuilder &B) const {
///
/// TODO: Consider relaxing to any FP type struct fields.
bool AttributeFuncs::isNoFPClassCompatibleType(Type *Ty) {
- while (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty))
- Ty = ArrTy->getElementType();
- return Ty->isFPOrFPVectorTy();
+ return FPMathOperator::isSupportedFloatingPointType(Ty);
}
/// Which attributes cannot be applied to a type.
diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll
index b6050944e637a3..a1b2370a87b821 100644
--- a/llvm/test/Bitcode/compatibility.ll
+++ b/llvm/test/Bitcode/compatibility.ll
@@ -2097,9 +2097,14 @@ declare nofpclass(sub zero) float @nofpclass_sub_zero(float nofpclass(sub zero))
; CHECK: declare nofpclass(inf sub) float @nofpclass_sub_inf(float nofpclass(inf sub))
declare nofpclass(sub inf) float @nofpclass_sub_inf(float nofpclass(sub inf))
+; CHECK: declare nofpclass(nan) { float, float } @nofpclass_struct({ double } nofpclass(nan))
+declare nofpclass(nan) { float, float } @nofpclass_struct({ double } nofpclass(nan))
+
declare float @unknown_fpclass_func(float)
-define float @nofpclass_callsites(float %arg) {
+declare { <4 x double>, <4 x double>, <4 x double> } @unknown_fpclass_struct_func({ float })
+
+define float @nofpclass_callsites(float %arg, { float } %arg1) {
; CHECK: %call0 = call nofpclass(nan) float @unknown_fpclass_func(float nofpclass(ninf) %arg)
%call0 = call nofpclass(nan) float @unknown_fpclass_func(float nofpclass(ninf) %arg)
@@ -2108,6 +2113,10 @@ define float @nofpclass_callsites(float %arg) {
; CHECK: %call2 = call nofpclass(zero) float @unknown_fpclass_func(float nofpclass(norm) %arg)
%call2 = call nofpclass(zero) float @unknown_fpclass_func(float nofpclass(norm) %arg)
+
+ ; CHECK: %call3 = call nofpclass(pinf) { <4 x double>, <4 x double>, <4 x double> } @unknown_fpclass_struct_func({ float } nofpclass(all) %arg1)
+ %call3 = call nofpclass(pinf) { <4 x double>, <4 x double>, <4 x double> } @unknown_fpclass_struct_func({ float } nofpclass(all) %arg1)
+
%add0 = fadd float %call0, %call1
%add1 = fadd float %add0, %call2
ret float %add1
>From 77479c69e635c100361dec663b0cb9a6ef3904f4 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 1 Oct 2024 08:45:19 +0000
Subject: [PATCH 6/9] Update nofpclass clang tests
---
clang/test/CodeGen/X86/cx-complex-range.c | 2 +-
clang/test/CodeGen/cx-complex-range.c | 42 +++++++++++------------
clang/test/CodeGen/nofpclass.c | 14 ++++----
3 files changed, 29 insertions(+), 29 deletions(-)
diff --git a/clang/test/CodeGen/X86/cx-complex-range.c b/clang/test/CodeGen/X86/cx-complex-range.c
index da580d54c9f618..a0e6dc219b36f7 100644
--- a/clang/test/CodeGen/X86/cx-complex-range.c
+++ b/clang/test/CodeGen/X86/cx-complex-range.c
@@ -1220,7 +1220,7 @@ _Complex _Float16 mulf16(_Complex _Float16 a, _Complex _Float16 b) {
// FULL_FAST-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2
// FULL_FAST-NEXT: [[CONV:%.*]] = fpext half [[C_REAL]] to x86_fp80
// FULL_FAST-NEXT: [[CONV1:%.*]] = fpext half [[C_IMAG]] to x86_fp80
-// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[CONV]], x86_fp80 noundef nofpclass(nan inf) [[CONV1]]) #[[ATTR1]]
+// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn nofpclass(nan inf) { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[CONV]], x86_fp80 noundef nofpclass(nan inf) [[CONV1]]) #[[ATTR1]]
// FULL_FAST-NEXT: [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 0
// FULL_FAST-NEXT: [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 1
// FULL_FAST-NEXT: [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP0]] to half
diff --git a/clang/test/CodeGen/cx-complex-range.c b/clang/test/CodeGen/cx-complex-range.c
index b780d4d1767c12..d83d4d02ac1991 100644
--- a/clang/test/CodeGen/cx-complex-range.c
+++ b/clang/test/CodeGen/cx-complex-range.c
@@ -1382,7 +1382,7 @@ _Complex float mulf(_Complex float a, _Complex float b) {
// AVRFP64-NEXT: store double [[AGG_RESULT_IMAG]], ptr [[AGG_RESULT_IMAGP4]], align 1
// AVRFP64-NEXT: ret void
//
-// BASIC_FAST-LABEL: define dso_local { double, double } @divd(
+// BASIC_FAST-LABEL: define dso_local nofpclass(nan inf) { double, double } @divd(
// BASIC_FAST-SAME: double noundef nofpclass(nan inf) [[A_COERCE0:%.*]], double noundef nofpclass(nan inf) [[A_COERCE1:%.*]], double noundef nofpclass(nan inf) [[B_COERCE0:%.*]], double noundef nofpclass(nan inf) [[B_COERCE1:%.*]]) #[[ATTR1:[0-9]+]] {
// BASIC_FAST-NEXT: entry:
// BASIC_FAST-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8
@@ -1422,7 +1422,7 @@ _Complex float mulf(_Complex float a, _Complex float b) {
// BASIC_FAST-NEXT: [[TMP15:%.*]] = load { double, double }, ptr [[RETVAL]], align 8
// BASIC_FAST-NEXT: ret { double, double } [[TMP15]]
//
-// FULL_FAST-LABEL: define dso_local { double, double } @divd(
+// FULL_FAST-LABEL: define dso_local nofpclass(nan inf) { double, double } @divd(
// FULL_FAST-SAME: double noundef nofpclass(nan inf) [[A_COERCE0:%.*]], double noundef nofpclass(nan inf) [[A_COERCE1:%.*]], double noundef nofpclass(nan inf) [[B_COERCE0:%.*]], double noundef nofpclass(nan inf) [[B_COERCE1:%.*]]) #[[ATTR1:[0-9]+]] {
// FULL_FAST-NEXT: entry:
// FULL_FAST-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8
@@ -1444,7 +1444,7 @@ _Complex float mulf(_Complex float a, _Complex float b) {
// FULL_FAST-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8
// FULL_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i32 0, i32 1
// FULL_FAST-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8
-// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { double, double } @__divdc3(double noundef nofpclass(nan inf) [[A_REAL]], double noundef nofpclass(nan inf) [[A_IMAG]], double noundef nofpclass(nan inf) [[B_REAL]], double noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
+// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn nofpclass(nan inf) { double, double } @__divdc3(double noundef nofpclass(nan inf) [[A_REAL]], double noundef nofpclass(nan inf) [[A_IMAG]], double noundef nofpclass(nan inf) [[B_REAL]], double noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
// FULL_FAST-NEXT: [[TMP4:%.*]] = extractvalue { double, double } [[CALL]], 0
// FULL_FAST-NEXT: [[TMP5:%.*]] = extractvalue { double, double } [[CALL]], 1
// FULL_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[RETVAL]], i32 0, i32 0
@@ -1454,7 +1454,7 @@ _Complex float mulf(_Complex float a, _Complex float b) {
// FULL_FAST-NEXT: [[TMP6:%.*]] = load { double, double }, ptr [[RETVAL]], align 8
// FULL_FAST-NEXT: ret { double, double } [[TMP6]]
//
-// IMPRVD_FAST-LABEL: define dso_local { double, double } @divd(
+// IMPRVD_FAST-LABEL: define dso_local nofpclass(nan inf) { double, double } @divd(
// IMPRVD_FAST-SAME: double noundef nofpclass(nan inf) [[A_COERCE0:%.*]], double noundef nofpclass(nan inf) [[A_COERCE1:%.*]], double noundef nofpclass(nan inf) [[B_COERCE0:%.*]], double noundef nofpclass(nan inf) [[B_COERCE1:%.*]]) #[[ATTR2:[0-9]+]] {
// IMPRVD_FAST-NEXT: entry:
// IMPRVD_FAST-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8
@@ -1512,7 +1512,7 @@ _Complex float mulf(_Complex float a, _Complex float b) {
// IMPRVD_FAST-NEXT: [[TMP26:%.*]] = load { double, double }, ptr [[RETVAL]], align 8
// IMPRVD_FAST-NEXT: ret { double, double } [[TMP26]]
//
-// PRMTD_FAST-LABEL: define dso_local { double, double } @divd(
+// PRMTD_FAST-LABEL: define dso_local nofpclass(nan inf) { double, double } @divd(
// PRMTD_FAST-SAME: double noundef nofpclass(nan inf) [[A_COERCE0:%.*]], double noundef nofpclass(nan inf) [[A_COERCE1:%.*]], double noundef nofpclass(nan inf) [[B_COERCE0:%.*]], double noundef nofpclass(nan inf) [[B_COERCE1:%.*]]) #[[ATTR1:[0-9]+]] {
// PRMTD_FAST-NEXT: entry:
// PRMTD_FAST-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8
@@ -1934,7 +1934,7 @@ _Complex double divd(_Complex double a, _Complex double b) {
// AVRFP64-NEXT: store double [[AGG_RESULT_IMAG]], ptr [[AGG_RESULT_IMAGP4]], align 1
// AVRFP64-NEXT: ret void
//
-// BASIC_FAST-LABEL: define dso_local { double, double } @muld(
+// BASIC_FAST-LABEL: define dso_local nofpclass(nan inf) { double, double } @muld(
// BASIC_FAST-SAME: double noundef nofpclass(nan inf) [[A_COERCE0:%.*]], double noundef nofpclass(nan inf) [[A_COERCE1:%.*]], double noundef nofpclass(nan inf) [[B_COERCE0:%.*]], double noundef nofpclass(nan inf) [[B_COERCE1:%.*]]) #[[ATTR1]] {
// BASIC_FAST-NEXT: entry:
// BASIC_FAST-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8
@@ -1969,7 +1969,7 @@ _Complex double divd(_Complex double a, _Complex double b) {
// BASIC_FAST-NEXT: [[TMP4:%.*]] = load { double, double }, ptr [[RETVAL]], align 8
// BASIC_FAST-NEXT: ret { double, double } [[TMP4]]
//
-// FULL_FAST-LABEL: define dso_local { double, double } @muld(
+// FULL_FAST-LABEL: define dso_local nofpclass(nan inf) { double, double } @muld(
// FULL_FAST-SAME: double noundef nofpclass(nan inf) [[A_COERCE0:%.*]], double noundef nofpclass(nan inf) [[A_COERCE1:%.*]], double noundef nofpclass(nan inf) [[B_COERCE0:%.*]], double noundef nofpclass(nan inf) [[B_COERCE1:%.*]]) #[[ATTR1]] {
// FULL_FAST-NEXT: entry:
// FULL_FAST-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8
@@ -2003,7 +2003,7 @@ _Complex double divd(_Complex double a, _Complex double b) {
// FULL_FAST-NEXT: [[ISNAN_CMP1:%.*]] = fcmp reassoc nnan ninf nsz arcp afn uno double [[MUL_I]], [[MUL_I]]
// FULL_FAST-NEXT: br i1 [[ISNAN_CMP1]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]]
// FULL_FAST: complex_mul_libcall:
-// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { double, double } @__muldc3(double noundef nofpclass(nan inf) [[A_REAL]], double noundef nofpclass(nan inf) [[A_IMAG]], double noundef nofpclass(nan inf) [[B_REAL]], double noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
+// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn nofpclass(nan inf) { double, double } @__muldc3(double noundef nofpclass(nan inf) [[A_REAL]], double noundef nofpclass(nan inf) [[A_IMAG]], double noundef nofpclass(nan inf) [[B_REAL]], double noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
// FULL_FAST-NEXT: [[TMP4:%.*]] = extractvalue { double, double } [[CALL]], 0
// FULL_FAST-NEXT: [[TMP5:%.*]] = extractvalue { double, double } [[CALL]], 1
// FULL_FAST-NEXT: br label [[COMPLEX_MUL_CONT]]
@@ -2017,7 +2017,7 @@ _Complex double divd(_Complex double a, _Complex double b) {
// FULL_FAST-NEXT: [[TMP6:%.*]] = load { double, double }, ptr [[RETVAL]], align 8
// FULL_FAST-NEXT: ret { double, double } [[TMP6]]
//
-// IMPRVD_FAST-LABEL: define dso_local { double, double } @muld(
+// IMPRVD_FAST-LABEL: define dso_local nofpclass(nan inf) { double, double } @muld(
// IMPRVD_FAST-SAME: double noundef nofpclass(nan inf) [[A_COERCE0:%.*]], double noundef nofpclass(nan inf) [[A_COERCE1:%.*]], double noundef nofpclass(nan inf) [[B_COERCE0:%.*]], double noundef nofpclass(nan inf) [[B_COERCE1:%.*]]) #[[ATTR2]] {
// IMPRVD_FAST-NEXT: entry:
// IMPRVD_FAST-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8
@@ -2052,7 +2052,7 @@ _Complex double divd(_Complex double a, _Complex double b) {
// IMPRVD_FAST-NEXT: [[TMP4:%.*]] = load { double, double }, ptr [[RETVAL]], align 8
// IMPRVD_FAST-NEXT: ret { double, double } [[TMP4]]
//
-// PRMTD_FAST-LABEL: define dso_local { double, double } @muld(
+// PRMTD_FAST-LABEL: define dso_local nofpclass(nan inf) { double, double } @muld(
// PRMTD_FAST-SAME: double noundef nofpclass(nan inf) [[A_COERCE0:%.*]], double noundef nofpclass(nan inf) [[A_COERCE1:%.*]], double noundef nofpclass(nan inf) [[B_COERCE0:%.*]], double noundef nofpclass(nan inf) [[B_COERCE1:%.*]]) #[[ATTR1]] {
// PRMTD_FAST-NEXT: entry:
// PRMTD_FAST-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8
@@ -2493,7 +2493,7 @@ _Complex double muld(_Complex double a, _Complex double b) {
// AVRFP64-NEXT: store double [[AGG_RESULT_IMAG]], ptr [[AGG_RESULT_IMAGP4]], align 1
// AVRFP64-NEXT: ret void
//
-// BASIC_FAST-LABEL: define dso_local { x86_fp80, x86_fp80 } @divld(
+// BASIC_FAST-LABEL: define dso_local nofpclass(nan inf) { x86_fp80, x86_fp80 } @divld(
// BASIC_FAST-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR1]] {
// BASIC_FAST-NEXT: entry:
// BASIC_FAST-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16
@@ -2523,7 +2523,7 @@ _Complex double muld(_Complex double a, _Complex double b) {
// BASIC_FAST-NEXT: [[TMP11:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16
// BASIC_FAST-NEXT: ret { x86_fp80, x86_fp80 } [[TMP11]]
//
-// FULL_FAST-LABEL: define dso_local { x86_fp80, x86_fp80 } @divld(
+// FULL_FAST-LABEL: define dso_local nofpclass(nan inf) { x86_fp80, x86_fp80 } @divld(
// FULL_FAST-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR1]] {
// FULL_FAST-NEXT: entry:
// FULL_FAST-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16
@@ -2535,7 +2535,7 @@ _Complex double muld(_Complex double a, _Complex double b) {
// FULL_FAST-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16
// FULL_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds nuw { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1
// FULL_FAST-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16
-// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[A_REAL]], x86_fp80 noundef nofpclass(nan inf) [[A_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
+// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn nofpclass(nan inf) { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[A_REAL]], x86_fp80 noundef nofpclass(nan inf) [[A_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
// FULL_FAST-NEXT: [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 0
// FULL_FAST-NEXT: [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 1
// FULL_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds nuw { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 0
@@ -2545,7 +2545,7 @@ _Complex double muld(_Complex double a, _Complex double b) {
// FULL_FAST-NEXT: [[TMP2:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16
// FULL_FAST-NEXT: ret { x86_fp80, x86_fp80 } [[TMP2]]
//
-// IMPRVD_FAST-LABEL: define dso_local { x86_fp80, x86_fp80 } @divld(
+// IMPRVD_FAST-LABEL: define dso_local nofpclass(nan inf) { x86_fp80, x86_fp80 } @divld(
// IMPRVD_FAST-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR2]] {
// IMPRVD_FAST-NEXT: entry:
// IMPRVD_FAST-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16
@@ -2593,7 +2593,7 @@ _Complex double muld(_Complex double a, _Complex double b) {
// IMPRVD_FAST-NEXT: [[TMP22:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16
// IMPRVD_FAST-NEXT: ret { x86_fp80, x86_fp80 } [[TMP22]]
//
-// PRMTD_FAST-LABEL: define dso_local { x86_fp80, x86_fp80 } @divld(
+// PRMTD_FAST-LABEL: define dso_local nofpclass(nan inf) { x86_fp80, x86_fp80 } @divld(
// PRMTD_FAST-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR1]] {
// PRMTD_FAST-NEXT: entry:
// PRMTD_FAST-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16
@@ -2979,7 +2979,7 @@ _Complex long double divld(_Complex long double a, _Complex long double b) {
// AVRFP64-NEXT: store double [[AGG_RESULT_IMAG]], ptr [[AGG_RESULT_IMAGP4]], align 1
// AVRFP64-NEXT: ret void
//
-// BASIC_FAST-LABEL: define dso_local { x86_fp80, x86_fp80 } @mulld(
+// BASIC_FAST-LABEL: define dso_local nofpclass(nan inf) { x86_fp80, x86_fp80 } @mulld(
// BASIC_FAST-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR1]] {
// BASIC_FAST-NEXT: entry:
// BASIC_FAST-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16
@@ -3004,7 +3004,7 @@ _Complex long double divld(_Complex long double a, _Complex long double b) {
// BASIC_FAST-NEXT: [[TMP0:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16
// BASIC_FAST-NEXT: ret { x86_fp80, x86_fp80 } [[TMP0]]
//
-// FULL_FAST-LABEL: define dso_local { x86_fp80, x86_fp80 } @mulld(
+// FULL_FAST-LABEL: define dso_local nofpclass(nan inf) { x86_fp80, x86_fp80 } @mulld(
// FULL_FAST-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR1]] {
// FULL_FAST-NEXT: entry:
// FULL_FAST-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16
@@ -3028,7 +3028,7 @@ _Complex long double divld(_Complex long double a, _Complex long double b) {
// FULL_FAST-NEXT: [[ISNAN_CMP1:%.*]] = fcmp reassoc nnan ninf nsz arcp afn uno x86_fp80 [[MUL_I]], [[MUL_I]]
// FULL_FAST-NEXT: br i1 [[ISNAN_CMP1]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]]
// FULL_FAST: complex_mul_libcall:
-// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { x86_fp80, x86_fp80 } @__mulxc3(x86_fp80 noundef nofpclass(nan inf) [[A_REAL]], x86_fp80 noundef nofpclass(nan inf) [[A_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
+// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn nofpclass(nan inf) { x86_fp80, x86_fp80 } @__mulxc3(x86_fp80 noundef nofpclass(nan inf) [[A_REAL]], x86_fp80 noundef nofpclass(nan inf) [[A_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
// FULL_FAST-NEXT: [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 0
// FULL_FAST-NEXT: [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 1
// FULL_FAST-NEXT: br label [[COMPLEX_MUL_CONT]]
@@ -3042,7 +3042,7 @@ _Complex long double divld(_Complex long double a, _Complex long double b) {
// FULL_FAST-NEXT: [[TMP2:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16
// FULL_FAST-NEXT: ret { x86_fp80, x86_fp80 } [[TMP2]]
//
-// IMPRVD_FAST-LABEL: define dso_local { x86_fp80, x86_fp80 } @mulld(
+// IMPRVD_FAST-LABEL: define dso_local nofpclass(nan inf) { x86_fp80, x86_fp80 } @mulld(
// IMPRVD_FAST-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR2]] {
// IMPRVD_FAST-NEXT: entry:
// IMPRVD_FAST-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16
@@ -3067,7 +3067,7 @@ _Complex long double divld(_Complex long double a, _Complex long double b) {
// IMPRVD_FAST-NEXT: [[TMP0:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16
// IMPRVD_FAST-NEXT: ret { x86_fp80, x86_fp80 } [[TMP0]]
//
-// PRMTD_FAST-LABEL: define dso_local { x86_fp80, x86_fp80 } @mulld(
+// PRMTD_FAST-LABEL: define dso_local nofpclass(nan inf) { x86_fp80, x86_fp80 } @mulld(
// PRMTD_FAST-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR1]] {
// PRMTD_FAST-NEXT: entry:
// PRMTD_FAST-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16
@@ -3753,7 +3753,7 @@ _Complex long double mulld(_Complex long double a, _Complex long double b) {
// FULL_FAST-NEXT: [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 4
// FULL_FAST-NEXT: [[CONV:%.*]] = fpext float [[C_REAL]] to x86_fp80
// FULL_FAST-NEXT: [[CONV1:%.*]] = fpext float [[C_IMAG]] to x86_fp80
-// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[CONV]], x86_fp80 noundef nofpclass(nan inf) [[CONV1]]) #[[ATTR2]]
+// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn nofpclass(nan inf) { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[CONV]], x86_fp80 noundef nofpclass(nan inf) [[CONV1]]) #[[ATTR2]]
// FULL_FAST-NEXT: [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 0
// FULL_FAST-NEXT: [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 1
// FULL_FAST-NEXT: [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP0]] to float
diff --git a/clang/test/CodeGen/nofpclass.c b/clang/test/CodeGen/nofpclass.c
index ca86e67a5d27c7..da7cdff0b1a46b 100644
--- a/clang/test/CodeGen/nofpclass.c
+++ b/clang/test/CodeGen/nofpclass.c
@@ -519,7 +519,7 @@ _Complex float defined_complex_func(_Complex float a, _Complex double b, _Comple
}
// CFINITEONLY: Function Attrs: noinline nounwind optnone
-// CFINITEONLY-LABEL: define dso_local { double, double } @defined_complex_func_f64_ret
+// CFINITEONLY-LABEL: define dso_local nofpclass(nan inf) { double, double } @defined_complex_func_f64_ret
// CFINITEONLY-SAME: (double noundef nofpclass(nan inf) [[C_COERCE0:%.*]], double noundef nofpclass(nan inf) [[C_COERCE1:%.*]]) #[[ATTR0]] {
// CFINITEONLY-NEXT: entry:
// CFINITEONLY-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8
@@ -548,7 +548,7 @@ _Complex float defined_complex_func(_Complex float a, _Complex double b, _Comple
// CFINITEONLY-NEXT: [[ISNAN_CMP5:%.*]] = fcmp nnan ninf uno double [[MUL_I]], [[MUL_I]]
// CFINITEONLY-NEXT: br i1 [[ISNAN_CMP5]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]]
// CFINITEONLY: complex_mul_libcall:
-// CFINITEONLY-NEXT: [[CALL:%.*]] = call nnan ninf { double, double } @__muldc3(double noundef nofpclass(nan inf) [[C_REAL]], double noundef nofpclass(nan inf) [[C_IMAG]], double noundef nofpclass(nan inf) [[C_REAL2]], double noundef nofpclass(nan inf) [[C_IMAG4]]) #[[ATTR7:[0-9]+]]
+// CFINITEONLY-NEXT: [[CALL:%.*]] = call nnan ninf nofpclass(nan inf) { double, double } @__muldc3(double noundef nofpclass(nan inf) [[C_REAL]], double noundef nofpclass(nan inf) [[C_IMAG]], double noundef nofpclass(nan inf) [[C_REAL2]], double noundef nofpclass(nan inf) [[C_IMAG4]]) #[[ATTR7:[0-9]+]]
// CFINITEONLY-NEXT: [[TMP2:%.*]] = extractvalue { double, double } [[CALL]], 0
// CFINITEONLY-NEXT: [[TMP3:%.*]] = extractvalue { double, double } [[CALL]], 1
// CFINITEONLY-NEXT: br label [[COMPLEX_MUL_CONT]]
@@ -563,7 +563,7 @@ _Complex float defined_complex_func(_Complex float a, _Complex double b, _Comple
// CFINITEONLY-NEXT: ret { double, double } [[TMP4]]
//
// CLFINITEONLY: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-// CLFINITEONLY-LABEL: define dso_local { double, double } @defined_complex_func_f64_ret
+// CLFINITEONLY-LABEL: define dso_local nofpclass(nan inf) { double, double } @defined_complex_func_f64_ret
// CLFINITEONLY-SAME: (double noundef nofpclass(nan inf) [[C_COERCE0:%.*]], double noundef nofpclass(nan inf) [[C_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CLFINITEONLY-NEXT: entry:
// CLFINITEONLY-NEXT: [[MUL_AD:%.*]] = fmul nnan ninf double [[C_COERCE0]], [[C_COERCE1]]
@@ -576,7 +576,7 @@ _Complex float defined_complex_func(_Complex float a, _Complex double b, _Comple
// CLFINITEONLY-NEXT: ret { double, double } [[DOTFCA_1_INSERT]]
//
// NONANS: Function Attrs: noinline nounwind optnone
-// NONANS-LABEL: define dso_local { double, double } @defined_complex_func_f64_ret
+// NONANS-LABEL: define dso_local nofpclass(nan) { double, double } @defined_complex_func_f64_ret
// NONANS-SAME: (double noundef nofpclass(nan) [[C_COERCE0:%.*]], double noundef nofpclass(nan) [[C_COERCE1:%.*]]) #[[ATTR0]] {
// NONANS-NEXT: entry:
// NONANS-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8
@@ -605,7 +605,7 @@ _Complex float defined_complex_func(_Complex float a, _Complex double b, _Comple
// NONANS-NEXT: [[ISNAN_CMP5:%.*]] = fcmp nnan uno double [[MUL_I]], [[MUL_I]]
// NONANS-NEXT: br i1 [[ISNAN_CMP5]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]]
// NONANS: complex_mul_libcall:
-// NONANS-NEXT: [[CALL:%.*]] = call nnan { double, double } @__muldc3(double noundef nofpclass(nan) [[C_REAL]], double noundef nofpclass(nan) [[C_IMAG]], double noundef nofpclass(nan) [[C_REAL2]], double noundef nofpclass(nan) [[C_IMAG4]]) #[[ATTR7:[0-9]+]]
+// NONANS-NEXT: [[CALL:%.*]] = call nnan nofpclass(nan) { double, double } @__muldc3(double noundef nofpclass(nan) [[C_REAL]], double noundef nofpclass(nan) [[C_IMAG]], double noundef nofpclass(nan) [[C_REAL2]], double noundef nofpclass(nan) [[C_IMAG4]]) #[[ATTR7:[0-9]+]]
// NONANS-NEXT: [[TMP2:%.*]] = extractvalue { double, double } [[CALL]], 0
// NONANS-NEXT: [[TMP3:%.*]] = extractvalue { double, double } [[CALL]], 1
// NONANS-NEXT: br label [[COMPLEX_MUL_CONT]]
@@ -620,7 +620,7 @@ _Complex float defined_complex_func(_Complex float a, _Complex double b, _Comple
// NONANS-NEXT: ret { double, double } [[TMP4]]
//
// NOINFS: Function Attrs: noinline nounwind optnone
-// NOINFS-LABEL: define dso_local { double, double } @defined_complex_func_f64_ret
+// NOINFS-LABEL: define dso_local nofpclass(inf) { double, double } @defined_complex_func_f64_ret
// NOINFS-SAME: (double noundef nofpclass(inf) [[C_COERCE0:%.*]], double noundef nofpclass(inf) [[C_COERCE1:%.*]]) #[[ATTR0]] {
// NOINFS-NEXT: entry:
// NOINFS-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8
@@ -649,7 +649,7 @@ _Complex float defined_complex_func(_Complex float a, _Complex double b, _Comple
// NOINFS-NEXT: [[ISNAN_CMP5:%.*]] = fcmp ninf uno double [[MUL_I]], [[MUL_I]]
// NOINFS-NEXT: br i1 [[ISNAN_CMP5]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]]
// NOINFS: complex_mul_libcall:
-// NOINFS-NEXT: [[CALL:%.*]] = call ninf { double, double } @__muldc3(double noundef nofpclass(inf) [[C_REAL]], double noundef nofpclass(inf) [[C_IMAG]], double noundef nofpclass(inf) [[C_REAL2]], double noundef nofpclass(inf) [[C_IMAG4]]) #[[ATTR7:[0-9]+]]
+// NOINFS-NEXT: [[CALL:%.*]] = call ninf nofpclass(inf) { double, double } @__muldc3(double noundef nofpclass(inf) [[C_REAL]], double noundef nofpclass(inf) [[C_IMAG]], double noundef nofpclass(inf) [[C_REAL2]], double noundef nofpclass(inf) [[C_IMAG4]]) #[[ATTR7:[0-9]+]]
// NOINFS-NEXT: [[TMP2:%.*]] = extractvalue { double, double } [[CALL]], 0
// NOINFS-NEXT: [[TMP3:%.*]] = extractvalue { double, double } [[CALL]], 1
// NOINFS-NEXT: br label [[COMPLEX_MUL_CONT]]
>From 1869ca2d8bb082f6c1aec4dc0e8c2bed40cff768 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 1 Oct 2024 08:46:37 +0000
Subject: [PATCH 7/9] Use auto
---
llvm/include/llvm/IR/Operator.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
index 1228df0fb9f225..81877ea7b8a2ad 100644
--- a/llvm/include/llvm/IR/Operator.h
+++ b/llvm/include/llvm/IR/Operator.h
@@ -329,11 +329,11 @@ class FPMathOperator : public Operator {
/// Returns true if `Ty` is a supported floating-point type for phi, select,
/// or call FPMathOperators.
static bool isSupportedFloatingPointType(Type *Ty) {
- if (StructType *StructTy = dyn_cast<StructType>(Ty)) {
+ if (auto *StructTy = dyn_cast<StructType>(Ty)) {
if (!StructTy->isLiteral() || !StructTy->containsHomogeneousTypes())
return false;
Ty = StructTy->elements().front();
- } else if (ArrayType *ArrayTy = dyn_cast<ArrayType>(Ty)) {
+ } else if (auto *ArrayTy = dyn_cast<ArrayType>(Ty)) {
do {
Ty = ArrayTy->getElementType();
} while ((ArrayTy = dyn_cast<ArrayType>(Ty)));
>From 6bfe0bc85ea624e5af03c0ecb5404dd0bcd7cbb3 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 1 Oct 2024 09:16:44 +0000
Subject: [PATCH 8/9] Remove TODO
---
llvm/lib/IR/Attributes.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index ddfbcdb902cc3d..fb71443d681a68 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -2092,8 +2092,6 @@ bool AttrBuilder::operator==(const AttrBuilder &B) const {
/// Returns true if this is a type legal for the 'nofpclass' attribute. This
/// follows the same type rules as FPMathOperator.
-///
-/// TODO: Consider relaxing to any FP type struct fields.
bool AttributeFuncs::isNoFPClassCompatibleType(Type *Ty) {
return FPMathOperator::isSupportedFloatingPointType(Ty);
}
>From 4687a18fa3bc4acbce3aa743a9646dc4f67365b1 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 1 Oct 2024 15:29:55 +0000
Subject: [PATCH 9/9] Short circuit check
---
llvm/include/llvm/IR/Operator.h | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
index 81877ea7b8a2ad..43b13317d5def8 100644
--- a/llvm/include/llvm/IR/Operator.h
+++ b/llvm/include/llvm/IR/Operator.h
@@ -326,9 +326,9 @@ class FPMathOperator : public Operator {
/// precision.
float getFPAccuracy() const;
- /// Returns true if `Ty` is a supported floating-point type for phi, select,
- /// or call FPMathOperators.
- static bool isSupportedFloatingPointType(Type *Ty) {
+ /// Returns true if `Ty` is a supported aggregate floating-point type for phi,
+ /// select, or call FPMathOperators.
+ static bool isSupportedFloatingPointAggregateType(Type *Ty) {
if (auto *StructTy = dyn_cast<StructType>(Ty)) {
if (!StructTy->isLiteral() || !StructTy->containsHomogeneousTypes())
return false;
@@ -341,6 +341,12 @@ class FPMathOperator : public Operator {
return Ty->isFPOrFPVectorTy();
}
+ /// Returns true if `Ty` is a supported floating-point type for phi, select,
+ /// or call FPMathOperators.
+ static bool isSupportedFloatingPointType(Type *Ty) {
+ return Ty->isFPOrFPVectorTy() || isSupportedFloatingPointAggregateType(Ty);
+ }
+
static bool classof(const Value *V) {
unsigned Opcode;
if (auto *I = dyn_cast<Instruction>(V))
More information about the cfe-commits
mailing list