[PATCH] D140467: [X86][Reduce] Preserve fast math flags when change it. NFCI

Thu Dec 22 07:42:25 PST 2022

pengfei updated this revision to Diff 484838.
pengfei marked 2 inline comments as done.
pengfei added a comment.

Address review comments. Thanks!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140467/new/

https://reviews.llvm.org/D140467

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-x86-reduce.c


Index: clang/test/CodeGen/builtins-x86-reduce.c
===================================================================

--- /dev/null
+++ clang/test/CodeGen/builtins-x86-reduce.c
@@ -0,0 +1,37 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 %s -triple x86_64-unknown-unknown -target-feature +avx512f -emit-llvm -o - | FileCheck %s
+
+typedef double double8 __attribute__((ext_vector_type(8)));
+
+// CHECK-LABEL: @foo(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <8 x double>, align 64
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
+// CHECK-NEXT:    store <8 x double> [[A:%.*]], ptr [[A_ADDR]], align 64
+// CHECK-NEXT:    store double [[B:%.*]], ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x double>, ptr [[A_ADDR]], align 64
+// CHECK-NEXT:    [[TMP1:%.*]] = call reassoc double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = load double, ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[ADD:%.*]] = fadd double [[TMP1]], [[TMP2]]
+// CHECK-NEXT:    ret double [[ADD]]
+//
+double foo(double8 a, double b) {
+  return __builtin_ia32_reduce_fadd_pd512(0.0, a) + b;
+}
+
+#pragma clang fp reassociate(on)
+// CHECK-LABEL: @bar(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <8 x double>, align 64
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
+// CHECK-NEXT:    store <8 x double> [[A:%.*]], ptr [[A_ADDR]], align 64
+// CHECK-NEXT:    store double [[B:%.*]], ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x double>, ptr [[A_ADDR]], align 64
+// CHECK-NEXT:    [[TMP1:%.*]] = call reassoc double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = load double, ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[ADD:%.*]] = fadd reassoc double [[TMP1]], [[TMP2]]
+// CHECK-NEXT:    ret double [[ADD]]
+//
+double bar(double8 a, double b) {
+  return __builtin_ia32_reduce_fadd_pd512(0.0, a) + b;
+}
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -14737,6 +14737,7 @@
   case X86::BI__builtin_ia32_reduce_fadd_ph128: {
     Function *F =
         CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
+    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
     Builder.getFastMathFlags().setAllowReassoc();
     return Builder.CreateCall(F, {Ops[0], Ops[1]});
   }
@@ -14747,6 +14748,7 @@
   case X86::BI__builtin_ia32_reduce_fmul_ph128: {
     Function *F =
         CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
+    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
     Builder.getFastMathFlags().setAllowReassoc();
     return Builder.CreateCall(F, {Ops[0], Ops[1]});
   }
@@ -14757,6 +14759,7 @@
   case X86::BI__builtin_ia32_reduce_fmax_ph128: {
     Function *F =
         CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
+    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
     Builder.getFastMathFlags().setNoNaNs();
     return Builder.CreateCall(F, {Ops[0]});
   }
@@ -14767,6 +14770,7 @@
   case X86::BI__builtin_ia32_reduce_fmin_ph128: {
     Function *F =
         CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
+    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
     Builder.getFastMathFlags().setNoNaNs();
     return Builder.CreateCall(F, {Ops[0]});
   }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D140467.484838.patch
Type: text/x-patch
Size: 3563 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20221222/26d9b593/attachment-0001.bin>