[clang] c987f9d - [Matrix] Try to emit fmuladd for both vector and matrix types
Francis Visoiu Mistrih via cfe-commits
cfe-commits at lists.llvm.org
Thu Aug 31 17:13:34 PDT 2023
Author: Francis Visoiu Mistrih
Date: 2023-08-31T17:13:19-07:00
New Revision: c987f9d7fdc7b22c9bf68d7b3f0df10b68c679be
URL: https://github.com/llvm/llvm-project/commit/c987f9d7fdc7b22c9bf68d7b3f0df10b68c679be
DIFF: https://github.com/llvm/llvm-project/commit/c987f9d7fdc7b22c9bf68d7b3f0df10b68c679be.diff
LOG: [Matrix] Try to emit fmuladd for both vector and matrix types
For vector * scalar + vector, we emit `fmuladd` directly from clang.
This enables it also for matrix * scalar + matrix.
rdar://113967122
Differential Revision: https://reviews.llvm.org/D158883
Added:
Modified:
clang/lib/CodeGen/CGExprScalar.cpp
clang/test/CodeGen/ffp-model.c
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 6d5a61b24133e2..a71b7057bb523a 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -3874,6 +3874,14 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) {
}
}
+ // For vector and matrix adds, try to fold into a fmuladd.
+ if (op.LHS->getType()->isFPOrFPVectorTy()) {
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
+ // Try to form an fmuladd.
+ if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder))
+ return FMulAdd;
+ }
+
if (op.Ty->isConstantMatrixType()) {
llvm::MatrixBuilder MB(Builder);
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
@@ -3887,10 +3895,6 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) {
if (op.LHS->getType()->isFPOrFPVectorTy()) {
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
- // Try to form an fmuladd.
- if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder))
- return FMulAdd;
-
return Builder.CreateFAdd(op.LHS, op.RHS, "add");
}
@@ -4024,6 +4028,14 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) {
}
}
+ // For vector and matrix subs, try to fold into a fmuladd.
+ if (op.LHS->getType()->isFPOrFPVectorTy()) {
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
+ // Try to form an fmuladd.
+ if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder, true))
+ return FMulAdd;
+ }
+
if (op.Ty->isConstantMatrixType()) {
llvm::MatrixBuilder MB(Builder);
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
@@ -4037,9 +4049,6 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) {
if (op.LHS->getType()->isFPOrFPVectorTy()) {
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
- // Try to form an fmuladd.
- if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder, true))
- return FMulAdd;
return Builder.CreateFSub(op.LHS, op.RHS, "sub");
}
diff --git a/clang/test/CodeGen/ffp-model.c b/clang/test/CodeGen/ffp-model.c
index 57fa0ef2782051..b3d297a2f85f46 100644
--- a/clang/test/CodeGen/ffp-model.c
+++ b/clang/test/CodeGen/ffp-model.c
@@ -1,18 +1,18 @@
// REQUIRES: x86-registered-target
-// RUN: %clang -S -emit-llvm -ffp-model=fast -emit-llvm %s -o - \
+// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=fast %s -o - \
// RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-FAST
-// RUN: %clang -S -emit-llvm -ffp-model=precise %s -o - \
+// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=precise %s -o - \
// RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-PRECISE
-// RUN: %clang -S -emit-llvm -ffp-model=strict %s -o - \
+// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=strict %s -o - \
// RUN: -target x86_64 | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
-// RUN: %clang -S -emit-llvm -ffp-model=strict -ffast-math \
+// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=strict -ffast-math \
// RUN: -target x86_64 %s -o - | FileCheck %s \
// RUN: --check-prefixes CHECK,CHECK-STRICT-FAST
-// RUN: %clang -S -emit-llvm -ffp-model=precise -ffast-math \
+// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=precise -ffast-math \
// RUN: %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-FAST1
float mymuladd(float x, float y, float z) {
@@ -46,3 +46,105 @@ float mymuladd(float x, float y, float z) {
// CHECK-FAST1: load float, ptr {{.*}}
// CHECK-FAST1: fadd fast float {{.*}}, {{.*}}
}
+
+typedef float __attribute__((ext_vector_type(2))) v2f;
+
+v2f my_vec_muladd(v2f x, float y, v2f z) {
+ // CHECK: define{{.*}} @my_vec_muladd
+ return x * y + z;
+
+ // CHECK-FAST: fmul fast <2 x float>
+ // CHECK-FAST: load <2 x float>, ptr
+ // CHECK-FAST: fadd fast <2 x float>
+
+ // CHECK-PRECISE: load <2 x float>, ptr
+ // CHECK-PRECISE: load float, ptr
+ // CHECK-PRECISE: load <2 x float>, ptr
+ // CHECK-PRECISE: call <2 x float> @llvm.fmuladd.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, <2 x float> {{.*}})
+
+ // CHECK-STRICT: load <2 x float>, ptr
+ // CHECK-STRICT: load float, ptr
+ // CHECK-STRICT: call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, {{.*}})
+ // CHECK-STRICT: load <2 x float>, ptr
+ // CHECK-STRICT: call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, {{.*}})
+
+ // CHECK-STRICT-FAST: load <2 x float>, ptr
+ // CHECK-STRICT-FAST: load float, ptr
+ // CHECK-STRICT-FAST: fmul fast <2 x float> {{.*}}, {{.*}}
+ // CHECK-STRICT-FAST: load <2 x float>, ptr
+ // CHECK-STRICT-FAST: fadd fast <2 x float> {{.*}}, {{.*}}
+
+ // CHECK-FAST1: load <2 x float>, ptr
+ // CHECK-FAST1: load float, ptr
+ // CHECK-FAST1: fmul fast <2 x float> {{.*}}, {{.*}}
+ // CHECK-FAST1: load <2 x float>, ptr {{.*}}
+ // CHECK-FAST1: fadd fast <2 x float> {{.*}}, {{.*}}
+}
+
+typedef float __attribute__((matrix_type(2, 1))) m21f;
+
+m21f my_m21_muladd(m21f x, float y, m21f z) {
+ // CHECK: define{{.*}} <2 x float> @my_m21_muladd
+ return x * y + z;
+
+ // CHECK-FAST: fmul fast <2 x float>
+ // CHECK-FAST: load <2 x float>, ptr
+ // CHECK-FAST: fadd fast <2 x float>
+
+ // CHECK-PRECISE: load <2 x float>, ptr
+ // CHECK-PRECISE: load float, ptr
+ // CHECK-PRECISE: load <2 x float>, ptr
+ // CHECK-PRECISE: call <2 x float> @llvm.fmuladd.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, <2 x float> {{.*}})
+
+ // CHECK-STRICT: load <2 x float>, ptr
+ // CHECK-STRICT: load float, ptr
+ // CHECK-STRICT: call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, {{.*}})
+ // CHECK-STRICT: load <2 x float>, ptr
+ // CHECK-STRICT: call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, {{.*}})
+
+ // CHECK-STRICT-FAST: load <2 x float>, ptr
+ // CHECK-STRICT-FAST: load float, ptr
+ // CHECK-STRICT-FAST: fmul fast <2 x float> {{.*}}, {{.*}}
+ // CHECK-STRICT-FAST: load <2 x float>, ptr
+ // CHECK-STRICT-FAST: fadd fast <2 x float> {{.*}}, {{.*}}
+
+ // CHECK-FAST1: load <2 x float>, ptr
+ // CHECK-FAST1: load float, ptr
+ // CHECK-FAST1: fmul fast <2 x float> {{.*}}, {{.*}}
+ // CHECK-FAST1: load <2 x float>, ptr {{.*}}
+ // CHECK-FAST1: fadd fast <2 x float> {{.*}}, {{.*}}
+}
+
+typedef float __attribute__((matrix_type(2, 2))) m22f;
+
+m22f my_m22_muladd(m22f x, float y, m22f z) {
+ // CHECK: define{{.*}} <4 x float> @my_m22_muladd
+ return x * y + z;
+
+ // CHECK-FAST: fmul fast <4 x float>
+ // CHECK-FAST: load <4 x float>, ptr
+ // CHECK-FAST: fadd fast <4 x float>
+
+ // CHECK-PRECISE: load <4 x float>, ptr
+ // CHECK-PRECISE: load float, ptr
+ // CHECK-PRECISE: load <4 x float>, ptr
+ // CHECK-PRECISE: call <4 x float> @llvm.fmuladd.v4f32(<4 x float> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}})
+
+ // CHECK-STRICT: load <4 x float>, ptr
+ // CHECK-STRICT: load float, ptr
+ // CHECK-STRICT: call <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float> {{.*}}, <4 x float> {{.*}}, {{.*}})
+ // CHECK-STRICT: load <4 x float>, ptr
+ // CHECK-STRICT: call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> {{.*}}, <4 x float> {{.*}}, {{.*}})
+
+ // CHECK-STRICT-FAST: load <4 x float>, ptr
+ // CHECK-STRICT-FAST: load float, ptr
+ // CHECK-STRICT-FAST: fmul fast <4 x float> {{.*}}, {{.*}}
+ // CHECK-STRICT-FAST: load <4 x float>, ptr
+ // CHECK-STRICT-FAST: fadd fast <4 x float> {{.*}}, {{.*}}
+
+ // CHECK-FAST1: load <4 x float>, ptr
+ // CHECK-FAST1: load float, ptr
+ // CHECK-FAST1: fmul fast <4 x float> {{.*}}, {{.*}}
+ // CHECK-FAST1: load <4 x float>, ptr {{.*}}
+ // CHECK-FAST1: fadd fast <4 x float> {{.*}}, {{.*}}
+}
More information about the cfe-commits
mailing list