[clang] 7501e53 - [Clang] Give warning for an underaligned 128-bit __sync library call.

Wed Mar 15 04:46:33 PDT 2023

Author: Jonas Paulsson
Date: 2023-03-15T12:46:06+01:00
New Revision: 7501e53b8d6d7563b047a34d0141630d5afc86c5

URL: https://github.com/llvm/llvm-project/commit/7501e53b8d6d7563b047a34d0141630d5afc86c5
DIFF: https://github.com/llvm/llvm-project/commit/7501e53b8d6d7563b047a34d0141630d5afc86c5.diff

LOG: [Clang] Give warning for an underaligned 128-bit __sync library call.

On SystemZ, int128 values are generally aligned to only 8 bytes per the ABI
while 128 bit atomic ISA instructions exist with a full 16 byte alignment
requirement.

__sync builtins are emitted as atomicrmw instructions which always require
the natural alignment (16 bytes in this case), and they always get it
regardless of the alignment of the value being addressed.

This patch improves this situation by giving a warning if the alignment is
not known to be sufficient. This check is done in CodeGen instead of in Sema
as this is currently the only place where the alignment can be computed. This
could/should be moved into Sema in case the alignment computation could be
made there eventually.

Reviewed By: efriedma, jyknight, uweigand

Differential Revision: https://reviews.llvm.org/D143813

Added: 
    clang/test/CodeGen/SystemZ/sync-builtins-i128-16Al.c
    clang/test/CodeGen/SystemZ/sync-builtins-i128-8Al.c

Modified: 
    clang/include/clang/Basic/DiagnosticFrontendKinds.td
    clang/include/clang/Basic/DiagnosticGroups.td
    clang/lib/CodeGen/CGBuiltin.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
index b35c697701c1d..c1a71d51d91bc 100644

--- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td
+++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
@@ -309,6 +309,10 @@ def warn_atomic_op_oversized : Warning<
   "; the access size (%0 bytes) exceeds the max lock-free size (%1  bytes)">,
 InGroup<AtomicAlignment>;
 
+def warn_sync_op_misaligned : Warning<
+  "__sync builtin operation MUST have natural alignment (consider using __atomic).">,
+  InGroup<SyncAlignment>;
+
 def warn_alias_with_section : Warning<
   "%select{alias|ifunc}1 will not be in section '%0' but in the same section "
   "as the %select{aliasee|resolver}2">,

diff  --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 866dca0083810..274f58b1a5ff0 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -801,6 +801,7 @@ def AtomicAlignment : DiagGroup<"atomic-alignment">;
 def CustomAtomic : DiagGroup<"custom-atomic-properties">;
 def AtomicProperties : DiagGroup<"atomic-properties",
                                  [ImplicitAtomic, CustomAtomic]>;
+def SyncAlignment : DiagGroup<"sync-alignment">;
 def ARCUnsafeRetainedAssign : DiagGroup<"arc-unsafe-retained-assign">;
 def ARCRetainCycles : DiagGroup<"arc-retain-cycles">;
 def ARCNonPodMemAccess : DiagGroup<"arc-non-pod-memaccess">;

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index fa8703b1e5202..572d742f36a98 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -28,6 +28,7 @@
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -223,9 +224,23 @@ static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
   return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
 }
 
+static void CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E) {
+  ASTContext &Ctx = CGF.getContext();
+  Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0));
+  unsigned Bytes = Ptr.getElementType()->isPointerTy()
+                       ? Ctx.getTypeSizeInChars(Ctx.VoidPtrTy).getQuantity()
+                       : Ptr.getElementType()->getScalarSizeInBits() / 8;
+  unsigned Align = Ptr.getAlignment().getQuantity();
+  if (Align % Bytes != 0) {
+    DiagnosticsEngine &Diags = CGF.CGM.getDiags();
+    Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
+  }
+}
+
 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
                                llvm::AtomicRMWInst::BinOp Kind,
                                const CallExpr *E) {
+  CheckAtomicAlignment(CGF, E);
   return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
 }
 
@@ -237,6 +252,7 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
                                    const CallExpr *E,
                                    Instruction::BinaryOps Op,
                                    bool Invert = false) {
+  CheckAtomicAlignment(CGF, E);
   QualType T = E->getType();
   assert(E->getArg(0)->getType()->isPointerType());
   assert(CGF.getContext().hasSameUnqualifiedType(T,
@@ -284,6 +300,7 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
 /// invoke the function EmitAtomicCmpXchgForMSIntrin.
 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
                                      bool ReturnBool) {
+  CheckAtomicAlignment(CGF, E);
   QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
@@ -4016,6 +4033,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   case Builtin::BI__sync_lock_release_4:
   case Builtin::BI__sync_lock_release_8:
   case Builtin::BI__sync_lock_release_16: {
+    CheckAtomicAlignment(*this, E);
     Value *Ptr = EmitScalarExpr(E->getArg(0));
     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);

diff  --git a/clang/test/CodeGen/SystemZ/sync-builtins-i128-16Al.c b/clang/test/CodeGen/SystemZ/sync-builtins-i128-16Al.c
new file mode 100644
index 0000000000000..1adc17a476c25
--- /dev/null
+++ b/clang/test/CodeGen/SystemZ/sync-builtins-i128-16Al.c
@@ -0,0 +1,206 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple s390x-linux-gnu -O1 -emit-llvm %s -o - \
+// RUN:   | FileCheck %s
+//
+// Test __sync_ builtins for __int128 aligned to 16 bytes.
+
+#include <stdint.h>
+
+__int128 Ptr __attribute__((aligned(16)));
+__int128 Val __attribute__((aligned(16)));
+__int128 OldVal __attribute__((aligned(16)));
+
+// CHECK-LABEL: @f1(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    ret void
+//
+__int128 f1() {
+  return __sync_fetch_and_add(&Ptr, Val);
+}
+
+// CHECK-LABEL: @f2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    ret void
+//
+__int128 f2() {
+  return __sync_fetch_and_sub(&Ptr, Val);
+}
+
+// CHECK-LABEL: @f3(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    ret void
+//
+__int128 f3() {
+  return __sync_fetch_and_or(&Ptr, Val);
+}
+
+// CHECK-LABEL: @f4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    ret void
+//
+__int128 f4() {
+  return __sync_fetch_and_and(&Ptr, Val);
+}
+
+// CHECK-LABEL: @f5(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    ret void
+//
+__int128 f5() {
+  return __sync_fetch_and_xor(&Ptr, Val);
+}
+
+// CHECK-LABEL: @f6(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    ret void
+//
+__int128 f6() {
+  return __sync_fetch_and_nand(&Ptr, Val);
+}
+
+// CHECK-LABEL: @f7(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = add i128 [[TMP1]], [[TMP0]]
+// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    ret void
+//
+__int128 f7() {
+  return __sync_add_and_fetch(&Ptr, Val);
+}
+
+// CHECK-LABEL: @f8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = sub i128 [[TMP1]], [[TMP0]]
+// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    ret void
+//
+__int128 f8() {
+  return __sync_sub_and_fetch(&Ptr, Val);
+}
+
+// CHECK-LABEL: @f9(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = or i128 [[TMP1]], [[TMP0]]
+// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    ret void
+//
+__int128 f9() {
+  return __sync_or_and_fetch(&Ptr, Val);
+}
+
+// CHECK-LABEL: @f10(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]]
+// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    ret void
+//
+__int128 f10() {
+  return __sync_and_and_fetch(&Ptr, Val);
+}
+
+// CHECK-LABEL: @f11(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = xor i128 [[TMP1]], [[TMP0]]
+// CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    ret void
+//
+__int128 f11() {
+  return __sync_xor_and_fetch(&Ptr, Val);
+}
+
+// CHECK-LABEL: @f12(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]]
+// CHECK-NEXT:    [[TMP3:%.*]] = xor i128 [[TMP2]], -1
+// CHECK-NEXT:    store i128 [[TMP3]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    ret void
+//
+__int128 f12() {
+  return __sync_nand_and_fetch(&Ptr, Val);
+}
+
+// CHECK-LABEL: @f13(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @OldVal, align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP0]], i128 [[TMP1]] seq_cst seq_cst, align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1
+// CHECK-NEXT:    ret i1 [[TMP3]]
+//
+_Bool f13() {
+  return __sync_bool_compare_and_swap(&Ptr, OldVal, Val);
+}
+
+// CHECK-LABEL: @f14(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @OldVal, align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP0]], i128 [[TMP1]] seq_cst seq_cst, align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
+// CHECK-NEXT:    store i128 [[TMP3]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    ret void
+//
+__int128 f14() {
+  return __sync_val_compare_and_swap(&Ptr, OldVal, Val);
+}
+
+// CHECK-LABEL: @f15(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    ret void
+//
+__int128 f15() {
+  return __sync_lock_test_and_set(&Ptr, Val);
+}
+
+// CHECK-LABEL: @f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    store atomic i128 0, ptr @Ptr release, align 16
+// CHECK-NEXT:    ret void
+//
+void f16() {
+  return __sync_lock_release(&Ptr);
+}
+
+// CHECK-LABEL: @f17(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    ret void
+//
+__int128 f17() {
+  return __sync_swap(&Ptr, Val);
+}

diff  --git a/clang/test/CodeGen/SystemZ/sync-builtins-i128-8Al.c b/clang/test/CodeGen/SystemZ/sync-builtins-i128-8Al.c
new file mode 100644
index 0000000000000..76c9c0ebed2bb
--- /dev/null
+++ b/clang/test/CodeGen/SystemZ/sync-builtins-i128-8Al.c
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple s390x-linux-gnu -O1 -emit-llvm %s -o - 2>&1 | FileCheck %s
+//
+// Test that an underaligned 16 byte __sync gives a warning.
+
+#include <stdint.h>
+
+__int128 Ptr __attribute__((aligned(8)));
+
+__int128 f1() {
+// CHECK: warning: __sync builtin operation MUST have natural alignment (consider using __atomic). [-Wsync-alignment]
+  return __sync_fetch_and_add(&Ptr, 1);
+}
+
+__int128 f2() {
+// CHECK: warning: __sync builtin operation MUST have natural alignment (consider using __atomic). [-Wsync-alignment]
+  return __sync_sub_and_fetch(&Ptr, 1);
+}
+
+__int128 f3() {
+// CHECK: warning: __sync builtin operation MUST have natural alignment (consider using __atomic). [-Wsync-alignment]
+  return __sync_val_compare_and_swap(&Ptr, 0, 1);
+}
+
+void f4() {
+// CHECK: warning: __sync builtin operation MUST have natural alignment (consider using __atomic). [-Wsync-alignment]
+  __sync_lock_release(&Ptr);
+}