[clang] [clang][bytecode] Handle __builtin_memcmp (PR #119544)

Timm Baeder via cfe-commits cfe-commits at lists.llvm.org
Wed Dec 11 06:38:10 PST 2024


https://github.com/tbaederr updated https://github.com/llvm/llvm-project/pull/119544

>From b10358067fa9b4e4d05293be168092cd2ea43d12 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Wed, 11 Dec 2024 12:35:29 +0100
Subject: [PATCH] [clang][bytecode] Handle __builtin_memcmp

---
 clang/lib/AST/ByteCode/BitcastBuffer.h        |  9 +++
 clang/lib/AST/ByteCode/InterpBuiltin.cpp      | 74 ++++++++++++++++++-
 .../lib/AST/ByteCode/InterpBuiltinBitCast.cpp |  6 +-
 clang/lib/AST/ByteCode/InterpBuiltinBitCast.h |  8 +-
 clang/test/AST/ByteCode/builtin-functions.cpp | 25 +++++++
 clang/test/SemaCXX/constexpr-string.cpp       |  2 +-
 6 files changed, 117 insertions(+), 7 deletions(-)

diff --git a/clang/lib/AST/ByteCode/BitcastBuffer.h b/clang/lib/AST/ByteCode/BitcastBuffer.h
index b1b6b9e5173a7c..d1d6ee39ad17bc 100644
--- a/clang/lib/AST/ByteCode/BitcastBuffer.h
+++ b/clang/lib/AST/ByteCode/BitcastBuffer.h
@@ -18,6 +18,8 @@ namespace interp {
 
 enum class Endian { Little, Big };
 
+struct Bytes;
+
 /// A quantity in bits.
 struct Bits {
   size_t N = 0;
@@ -30,6 +32,7 @@ struct Bits {
   bool isFullByte() const { return N % 8 == 0; }
   bool nonZero() const { return N != 0; }
   bool isZero() const { return N == 0; }
+  Bytes toBytes() const;
 
   Bits operator-(Bits Other) const { return Bits(N - Other.N); }
   Bits operator+(Bits Other) const { return Bits(N + Other.N); }
@@ -56,6 +59,11 @@ struct Bytes {
   Bits toBits() const { return Bits(N * 8); }
 };
 
+inline Bytes Bits::toBytes() const {
+  assert(isFullByte());
+  return Bytes(N / 8);
+}
+
 /// A bit range. Both Start and End are inclusive.
 struct BitRange {
   Bits Start;
@@ -83,6 +91,7 @@ struct BitcastBuffer {
 
   /// Returns the buffer size in bits.
   Bits size() const { return FinalBitSize; }
+  Bytes byteSize() const { return FinalBitSize.toBytes(); }
 
   /// Returns \c true if all bits in the buffer have been initialized.
   bool allInitialized() const;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 4fe17ec01906e9..5c40f4e52c71a0 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -190,6 +190,12 @@ static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC,
   return true;
 }
 
+/// Determine if T is a character type for which we guarantee that
+/// sizeof(T) == 1.
+static bool isOneByteCharacterType(QualType T) {
+  return T->isCharType() || T->isChar8Type();
+}
+
 static bool interp__builtin_strcmp(InterpState &S, CodePtr OpPC,
                                    const InterpFrame *Frame,
                                    const Function *Func, const CallExpr *Call) {
@@ -219,6 +225,19 @@ static bool interp__builtin_strcmp(InterpState &S, CodePtr OpPC,
   assert(A.getFieldDesc()->isPrimitiveArray());
   assert(B.getFieldDesc()->isPrimitiveArray());
 
+#if 0
+  if (IsRawByte &&
+      (!isOneByteCharacterType(A.getFieldDesc()->getElemQualType()) ||
+       !isOneByteCharacterType(B.getFieldDesc()->getElemQualType()))) {
+    QualType CharTy1 = A.getFieldDesc()->getElemQualType();
+    QualType CharTy2 = B.getFieldDesc()->getElemQualType();
+    S.FFDiag(S.Current->getSource(OpPC),
+             diag::note_constexpr_memcmp_unsupported)
+        << ("'" + S.getASTContext().BuiltinInfo.getName(ID) + "'").str()
+        << CharTy1 << CharTy2;
+    return false;
+  }
+#endif
   unsigned IndexA = A.getIndex();
   unsigned IndexB = B.getIndex();
   int32_t Result = 0;
@@ -1830,6 +1849,7 @@ static bool interp__builtin_elementwise_popcount(InterpState &S, CodePtr OpPC,
 
   return true;
 }
+
 static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC,
                                    const InterpFrame *Frame,
                                    const Function *Func, const CallExpr *Call) {
@@ -1876,7 +1896,8 @@ static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC,
   }
 
   // Check for overlapping memory regions.
-  if (!Move && SrcPtr.block() == DestPtr.block()) {
+  if (!Move && Pointer::pointToSameBlock(
+                   SrcPtr, DestPtr)) { // SrcPtr.block() == DestPtr.block()) {
     unsigned SrcIndex = SrcPtr.getIndex() * SrcPtr.elemSize();
     unsigned DstIndex = DestPtr.getIndex() * DestPtr.elemSize();
     unsigned N = Size.getZExtValue();
@@ -1900,6 +1921,51 @@ static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC,
   return true;
 }
 
+static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC,
+                                   const InterpFrame *Frame,
+                                   const Function *Func, const CallExpr *Call) {
+  assert(Call->getNumArgs() == 3);
+  unsigned ID = Func->getBuiltinID();
+  Pointer PtrA = getParam<Pointer>(Frame, 0);
+  const Pointer &PtrB = getParam<Pointer>(Frame, 1);
+  const APSInt &Size =
+      peekToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(2)));
+
+  if (ID == Builtin::BImemcmp)
+    diagnoseNonConstexprBuiltin(S, OpPC, ID);
+
+  if (Size.isZero()) {
+    pushInteger(S, 0, Call->getType());
+    return true;
+  }
+
+  if (PtrA.isDummy() || PtrB.isDummy())
+    return false;
+
+  // Now, read both pointers to a buffer and compare those.
+
+  BitcastBuffer BufferA(
+      Bits(S.getASTContext().getTypeSize(PtrA.getFieldDesc()->getType())));
+  readPointerToBuffer(S.getContext(), PtrA, BufferA, false);
+
+  BitcastBuffer BufferB(
+      Bits(S.getASTContext().getTypeSize(PtrB.getFieldDesc()->getType())));
+  readPointerToBuffer(S.getContext(), PtrB, BufferB, false);
+
+  size_t MinBufferSize = std::min(BufferA.byteSize().getQuantity(),
+                                  BufferB.byteSize().getQuantity());
+  size_t CmpSize = std::min(MinBufferSize, Size.getZExtValue());
+  int Result = std::memcmp(BufferA.Data.get(), BufferB.Data.get(), CmpSize);
+  if (Result == 0)
+    pushInteger(S, 0, Call->getType());
+  else if (Result < 0)
+    pushInteger(S, -1, Call->getType());
+  else
+    pushInteger(S, 1, Call->getType());
+
+  return true;
+}
+
 bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
                       const CallExpr *Call, uint32_t BuiltinID) {
   const InterpFrame *Frame = S.Current;
@@ -2373,6 +2439,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
       return false;
     break;
 
+  case Builtin::BI__builtin_memcmp:
+  case Builtin::BImemcmp:
+    if (!interp__builtin_memcmp(S, OpPC, Frame, F, Call))
+      return false;
+    break;
+
   default:
     S.FFDiag(S.Current->getLocation(OpPC),
              diag::note_invalid_subexpr_in_const_expr)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp
index c9141c0fad2f57..c87993b8739a77 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp
@@ -259,8 +259,10 @@ static bool CheckBitcastType(InterpState &S, CodePtr OpPC, QualType T,
   return true;
 }
 
-static bool readPointerToBuffer(const Context &Ctx, const Pointer &FromPtr,
-                                BitcastBuffer &Buffer, bool ReturnOnUninit) {
+bool clang::interp::readPointerToBuffer(const Context &Ctx,
+                                        const Pointer &FromPtr,
+                                        BitcastBuffer &Buffer,
+                                        bool ReturnOnUninit) {
   const ASTContext &ASTCtx = Ctx.getASTContext();
   Endian TargetEndianness =
       ASTCtx.getTargetInfo().isLittleEndian() ? Endian::Little : Endian::Big;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.h b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.h
index 92e6ffc79fc4f0..08c207c7415dfa 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.h
+++ b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.h
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CLANG_AST_INTERP_BUILITN_BIT_CAST_H
-#define LLVM_CLANG_AST_INTERP_BUILITN_BIT_CAST_H
+#ifndef LLVM_CLANG_AST_INTERP_BUILTIN_BIT_CAST_H
+#define LLVM_CLANG_AST_INTERP_BUILTIN_BIT_CAST_H
 
 #include "BitcastBuffer.h"
 #include <cstddef>
@@ -17,6 +17,7 @@ namespace interp {
 class Pointer;
 class InterpState;
 class CodePtr;
+class Context;
 
 bool DoBitCast(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
                std::byte *Buff, Bits BitWidth, Bits FullBitWidth,
@@ -25,7 +26,8 @@ bool DoBitCastPtr(InterpState &S, CodePtr OpPC, const Pointer &FromPtr,
                   Pointer &ToPtr);
 bool DoBitCastPtr(InterpState &S, CodePtr OpPC, const Pointer &FromPtr,
                   Pointer &ToPtr, size_t Size);
-
+bool readPointerToBuffer(const Context &Ctx, const Pointer &FromPtr,
+                         BitcastBuffer &Buffer, bool ReturnOnUninit);
 } // namespace interp
 } // namespace clang
 
diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp
index ef6faae030a8f2..ec92ba7952dccb 100644
--- a/clang/test/AST/ByteCode/builtin-functions.cpp
+++ b/clang/test/AST/ByteCode/builtin-functions.cpp
@@ -1223,3 +1223,28 @@ namespace BuiltinMemcpy {
   static_assert(test_memcpy(0, 1, sizeof(int) * 2) == 2334); // both-error {{not an integral constant expression}} \
                                                              // both-note {{in call}}
 }
+
+namespace Memcmp {
+  constexpr unsigned char ku00fe00[] = {0x00, 0xfe, 0x00};
+  constexpr unsigned char ku00feff[] = {0x00, 0xfe, 0xff};
+  constexpr signed char ks00fe00[] = {0, -2, 0};
+  constexpr signed char ks00feff[] = {0, -2, -1};
+  static_assert(__builtin_memcmp(ku00feff, ks00fe00, 2) == 0);
+  static_assert(__builtin_memcmp(ku00feff, ks00fe00, 99) == 1);
+  static_assert(__builtin_memcmp(ku00fe00, ks00feff, 99) == -1);
+  static_assert(__builtin_memcmp(ks00feff, ku00fe00, 2) == 0);
+  static_assert(__builtin_memcmp(ks00feff, ku00fe00, 99) == 1);
+  static_assert(__builtin_memcmp(ks00fe00, ku00feff, 99) == -1);
+  static_assert(__builtin_memcmp(ks00fe00, ks00feff, 2) == 0);
+  static_assert(__builtin_memcmp(ks00feff, ks00fe00, 99) == 1);
+  static_assert(__builtin_memcmp(ks00fe00, ks00feff, 99) == -1);
+
+  struct Bool3Tuple { bool bb[3]; };
+  constexpr Bool3Tuple kb000100 = {{false, true, false}};
+  static_assert(sizeof(bool) != 1u || __builtin_memcmp(ks00fe00, kb000100.bb, 1) == 0); // ref-error {{constant}} \
+                                                                                        // ref-note {{not supported}}
+
+  constexpr char a = 'a';
+  constexpr char b = 'a';
+  static_assert(__builtin_memcmp(&a, &b, 1) == 0);
+}
diff --git a/clang/test/SemaCXX/constexpr-string.cpp b/clang/test/SemaCXX/constexpr-string.cpp
index c456740ef7551f..3537326be3461d 100644
--- a/clang/test/SemaCXX/constexpr-string.cpp
+++ b/clang/test/SemaCXX/constexpr-string.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -triple x86_64-linux-gnu -std=c++2a -fsyntax-only -verify -pedantic -Wno-vla-extension
+// RUN: %clang_cc1 %s -triple x86_64-linux-gnu -std=c++2a -fsyntax-only -verify -pedantic -Wno-vla-extension -fexperimental-new-constant-interpreter
 // RUN: %clang_cc1 %s -triple x86_64-linux-gnu -std=gnu++2a -fsyntax-only -verify -pedantic -Wno-vla-extension -DGNUMODE
 // RUN: %clang_cc1 %s -triple x86_64-linux-gnu -std=c++2a -fsyntax-only -verify -pedantic -Wno-vla-extension -fno-signed-char
 // RUN: %clang_cc1 %s -triple x86_64-linux-gnu -std=c++2a -fsyntax-only -verify -pedantic -Wno-vla-extension -fno-wchar -DNO_PREDEFINED_WCHAR_T



More information about the cfe-commits mailing list