[clang] [clang][codegen] Don't mark "int" TBAA on FP libcalls with indirect args (PR #108853)

Mon Sep 16 09:34:33 PDT 2024

https://github.com/MacDue created https://github.com/llvm/llvm-project/pull/108853

On some targets, an FP libcall with argument types such as long double
will be lowered to pass arguments indirectly via pointers. When this is
the case we should not mark the libcall with "int" TBAA as it may lead
to incorrect optimizations.

Currently, this can be seen for long doubles on x86_64-w64-mingw32. The
`load x86_fp80` after the call is (incorrectly) marked with "int" TBAA
(overwriting the previous metadata for "long double").

Nothing seems to break due to this currently as the metadata is being
incorrectly placed on the load and not the call. But if the metadata
is moved to the call (which this patch ensures), LLVM will optimize out
the setup for the arguments.


>From 6db9f6d56f0bbd56d017156f858eae68653fbd1b Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 16 Sep 2024 16:27:23 +0000
Subject: [PATCH 1/2] Precommit math-libcalls-tbaa-indirect-args.c

---
 .../math-libcalls-tbaa-indirect-args.c        | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c

diff --git a/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c b/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c
new file mode 100644
index 00000000000000..dd013dcc8b3ca8
--- /dev/null
+++ b/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c
@@ -0,0 +1,38 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple=x86_64-w64-mingw32 -fmath-errno -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK
+
+long double powl(long double a, long double b);
+
+// Negative test: powl is a floating-point math function that is
+// ConstWithoutErrnoAndExceptions, however, for this target long doubles are
+// passed indirectly via a pointer. Annotating the call with "int" TBAA metadata
+// will cause the setup for the BYVAL arguments to be incorrectly optimized out.
+
+// CHECK-LABEL: define dso_local void @test_powl(
+// CHECK-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret(x86_fp80) align 16 [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    [[BYVAL_TEMP:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    [[BYVAL_TEMP1:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[BYVAL_TEMP]]) #[[ATTR3:[0-9]+]]
+// CHECK-NEXT:    store x86_fp80 0xK40008000000000000000, ptr [[BYVAL_TEMP]], align 16, !tbaa [[TBAA3:![0-9]+]]
+// CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[BYVAL_TEMP1]]) #[[ATTR3]]
+// CHECK-NEXT:    store x86_fp80 0xK40008000000000000000, ptr [[BYVAL_TEMP1]], align 16, !tbaa [[TBAA3]]
+// CHECK-NEXT:    call void @powl(ptr dead_on_unwind nonnull writable sret(x86_fp80) align 16 [[TMP]], ptr noundef nonnull [[BYVAL_TEMP]], ptr noundef nonnull [[BYVAL_TEMP1]]) #[[ATTR3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load x86_fp80, ptr [[TMP]], align 16, !tbaa [[TBAA7:![0-9]+]]
+// CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[BYVAL_TEMP]]) #[[ATTR3]]
+// CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[BYVAL_TEMP1]]) #[[ATTR3]]
+// CHECK-NEXT:    store x86_fp80 [[TMP0]], ptr [[AGG_RESULT]], align 16, !tbaa [[TBAA3]]
+// CHECK-NEXT:    ret void
+//
+long double test_powl() {
+   return powl(2.0L, 2.0L); // Don't emit TBAA metadata
+}
+//.
+// CHECK: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+// CHECK: [[META4]] = !{!"long double", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// CHECK: [[META6]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
+// CHECK: [[META8]] = !{!"int", [[META5]], i64 0}
+//.

>From 482639f9a8df7785d1b24c723571f477eb5febd7 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 16 Sep 2024 16:14:01 +0000
Subject: [PATCH 2/2] [clang][codegen] Don't mark "int" TBAA on FP libcalls
 with indirect args

On some targets, an FP libcall with argument types such as long double
will be lowered to pass arguments indirectly via pointers. When this is
the case we should not mark the libcall with "int" TBAA as it may lead
to incorrect optimizations.

Currently, this can be seen for long doubles on x86_64-w64-mingw32. The
`load x86_fp80` after the call is (incorrectly) marked with "int" TBAA
(overwriting the previous metadata for "long double").

Nothing seems to break due to this currently as the metadata is being
incorrectly placed on the load and not the call. But if the metadata
is moved to the call (which this patch ensures), LLVM will optimize out
the setup for the arguments.
---
 clang/lib/CodeGen/CGBuiltin.cpp               | 24 +++++++++++++++----
 clang/lib/CodeGen/CGExpr.cpp                  |  6 ++++-
 clang/lib/CodeGen/CodeGenFunction.h           |  3 ++-
 .../math-libcalls-tbaa-indirect-args.c        |  4 +---
 4 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 27abeba92999b3..5730e7867a648f 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -690,23 +690,37 @@ static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
                               const CallExpr *E, llvm::Constant *calleeValue) {
   CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
   CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
+  llvm::CallBase *callOrInvoke = nullptr;
+  CGFunctionInfo const *FnInfo = nullptr;
   RValue Call =
-      CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
+      CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot(),
+                   /*Chain=*/nullptr, &callOrInvoke, &FnInfo);
 
   if (unsigned BuiltinID = FD->getBuiltinID()) {
     // Check whether a FP math builtin function, such as BI__builtin_expf
     ASTContext &Context = CGF.getContext();
     bool ConstWithoutErrnoAndExceptions =
         Context.BuiltinInfo.isConstWithoutErrnoAndExceptions(BuiltinID);
+
+    // Before annotating this libcall with "int" TBAA metadata check all
+    // arguments are passed directly. On some targets, types such as "long
+    // double" are passed indirectly via a pointer, and annotating the call with
+    // "int" TBAA metadata will lead to set up for those arguments being
+    // incorrectly optimized out.
+    bool AllArgumentsPassedDirectly =
+        llvm::all_of(FnInfo->arguments(), [&](auto const &ArgInfo) {
+          return ArgInfo.info.isDirect() || ArgInfo.info.isExtend();
+        });
+
     // Restrict to target with errno, for example, MacOS doesn't set errno.
     // TODO: Support builtin function with complex type returned, eg: cacosh
-    if (ConstWithoutErrnoAndExceptions && CGF.CGM.getLangOpts().MathErrno &&
-        !CGF.Builder.getIsFPConstrained() && Call.isScalar()) {
+    if (AllArgumentsPassedDirectly && ConstWithoutErrnoAndExceptions &&
+        CGF.CGM.getLangOpts().MathErrno && !CGF.Builder.getIsFPConstrained() &&
+        Call.isScalar()) {
       // Emit "int" TBAA metadata on FP math libcalls.
       clang::QualType IntTy = Context.IntTy;
       TBAAAccessInfo TBAAInfo = CGF.CGM.getTBAAAccessInfo(IntTy);
-      Instruction *Inst = cast<llvm::Instruction>(Call.getScalarVal());
-      CGF.CGM.DecorateInstructionWithTBAA(Inst, TBAAInfo);
+      CGF.CGM.DecorateInstructionWithTBAA(callOrInvoke, TBAAInfo);
     }
   }
   return Call;
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 35b5daaf6d4b55..9166db4c74128c 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -5932,7 +5932,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType,
                                  const CGCallee &OrigCallee, const CallExpr *E,
                                  ReturnValueSlot ReturnValue,
                                  llvm::Value *Chain,
-                                 llvm::CallBase **CallOrInvoke) {
+                                 llvm::CallBase **CallOrInvoke,
+                                 CGFunctionInfo const **ResolvedFnInfo) {
   // Get the actual function type. The callee type will always be a pointer to
   // function type or a block pointer type.
   assert(CalleeType->isFunctionPointerType() &&
@@ -6111,6 +6112,9 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType,
   const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionCall(
       Args, FnType, /*ChainCall=*/Chain);
 
+  if (ResolvedFnInfo)
+    *ResolvedFnInfo = &FnInfo;
+
   // C99 6.5.2.2p6:
   //   If the expression that denotes the called function has a type
   //   that does not include a prototype, [the default argument
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 9f868c98458996..1755dab3e3e4e8 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4396,7 +4396,8 @@ class CodeGenFunction : public CodeGenTypeCache {
   }
   RValue EmitCall(QualType FnType, const CGCallee &Callee, const CallExpr *E,
                   ReturnValueSlot ReturnValue, llvm::Value *Chain = nullptr,
-                  llvm::CallBase **CallOrInvoke = nullptr);
+                  llvm::CallBase **CallOrInvoke = nullptr,
+                  CGFunctionInfo const **ResolvedFnInfo = nullptr);
 
   // If a Call or Invoke instruction was emitted for this CallExpr, this method
   // writes the pointer to `CallOrInvoke` if it's not null.
diff --git a/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c b/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c
index dd013dcc8b3ca8..56c6b3ec00bc7e 100644
--- a/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c
+++ b/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c
@@ -19,7 +19,7 @@ long double powl(long double a, long double b);
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[BYVAL_TEMP1]]) #[[ATTR3]]
 // CHECK-NEXT:    store x86_fp80 0xK40008000000000000000, ptr [[BYVAL_TEMP1]], align 16, !tbaa [[TBAA3]]
 // CHECK-NEXT:    call void @powl(ptr dead_on_unwind nonnull writable sret(x86_fp80) align 16 [[TMP]], ptr noundef nonnull [[BYVAL_TEMP]], ptr noundef nonnull [[BYVAL_TEMP1]]) #[[ATTR3]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load x86_fp80, ptr [[TMP]], align 16, !tbaa [[TBAA7:![0-9]+]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load x86_fp80, ptr [[TMP]], align 16, !tbaa [[TBAA3]]
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[BYVAL_TEMP]]) #[[ATTR3]]
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[BYVAL_TEMP1]]) #[[ATTR3]]
 // CHECK-NEXT:    store x86_fp80 [[TMP0]], ptr [[AGG_RESULT]], align 16, !tbaa [[TBAA3]]
@@ -33,6 +33,4 @@ long double test_powl() {
 // CHECK: [[META4]] = !{!"long double", [[META5:![0-9]+]], i64 0}
 // CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
 // CHECK: [[META6]] = !{!"Simple C/C++ TBAA"}
-// CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
-// CHECK: [[META8]] = !{!"int", [[META5]], i64 0}
 //.