[clang] eaadbcd - [clang][Interp] Implement __builtin_strcmp

Timm Bäder via cfe-commits cfe-commits at lists.llvm.org
Thu Jul 20 06:46:19 PDT 2023


Author: Timm Bäder
Date: 2023-07-20T15:46:04+02:00
New Revision: eaadbcd5e06893f2536e643d5d08b161ecf8c7ec

URL: https://github.com/llvm/llvm-project/commit/eaadbcd5e06893f2536e643d5d08b161ecf8c7ec
DIFF: https://github.com/llvm/llvm-project/commit/eaadbcd5e06893f2536e643d5d08b161ecf8c7ec.diff

LOG: [clang][Interp] Implement __builtin_strcmp

Make our Function class keep a list of parameter offsets so we can
simply get a parameter by index when evaluating builtin functions.

Differential Revision: https://reviews.llvm.org/D149816

Added: 
    clang/test/AST/Interp/builtin-functions.cpp

Modified: 
    clang/lib/AST/Interp/ByteCodeEmitter.cpp
    clang/lib/AST/Interp/Function.cpp
    clang/lib/AST/Interp/Function.h
    clang/lib/AST/Interp/Interp.h
    clang/lib/AST/Interp/InterpBuiltin.cpp
    clang/lib/AST/Interp/Pointer.h

Removed: 
    


################################################################################
diff  --git a/clang/lib/AST/Interp/ByteCodeEmitter.cpp b/clang/lib/AST/Interp/ByteCodeEmitter.cpp
index 3248877b5f4704..f2072f974c4084 100644
--- a/clang/lib/AST/Interp/ByteCodeEmitter.cpp
+++ b/clang/lib/AST/Interp/ByteCodeEmitter.cpp
@@ -26,6 +26,7 @@ ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl) {
   // Set up argument indices.
   unsigned ParamOffset = 0;
   SmallVector<PrimType, 8> ParamTypes;
+  SmallVector<unsigned, 8> ParamOffsets;
   llvm::DenseMap<unsigned, Function::ParamDescriptor> ParamDescriptors;
 
   // If the return is not a primitive, a pointer to the storage where the
@@ -36,6 +37,7 @@ ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl) {
   if (!Ty->isVoidType() && !Ctx.classify(Ty)) {
     HasRVO = true;
     ParamTypes.push_back(PT_Ptr);
+    ParamOffsets.push_back(ParamOffset);
     ParamOffset += align(primSize(PT_Ptr));
   }
 
@@ -47,6 +49,7 @@ ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl) {
     if (MD->isInstance()) {
       HasThisPointer = true;
       ParamTypes.push_back(PT_Ptr);
+      ParamOffsets.push_back(ParamOffset);
       ParamOffset += align(primSize(PT_Ptr));
     }
 
@@ -75,6 +78,7 @@ ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl) {
     Descriptor *Desc = P.createDescriptor(PD, Ty);
     ParamDescriptors.insert({ParamOffset, {Ty, Desc}});
     Params.insert({PD, ParamOffset});
+    ParamOffsets.push_back(ParamOffset);
     ParamOffset += align(primSize(Ty));
     ParamTypes.push_back(Ty);
   }
@@ -82,9 +86,9 @@ ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl) {
   // Create a handle over the emitted code.
   Function *Func = P.getFunction(FuncDecl);
   if (!Func)
-    Func =
-        P.createFunction(FuncDecl, ParamOffset, std::move(ParamTypes),
-                         std::move(ParamDescriptors), HasThisPointer, HasRVO);
+    Func = P.createFunction(FuncDecl, ParamOffset, std::move(ParamTypes),
+                            std::move(ParamDescriptors),
+                            std::move(ParamOffsets), HasThisPointer, HasRVO);
 
   assert(Func);
   // For not-yet-defined functions, we only create a Function instance and

diff  --git a/clang/lib/AST/Interp/Function.cpp b/clang/lib/AST/Interp/Function.cpp
index 4e6d175c41b261..75312999d23d66 100644
--- a/clang/lib/AST/Interp/Function.cpp
+++ b/clang/lib/AST/Interp/Function.cpp
@@ -16,12 +16,14 @@ using namespace clang;
 using namespace clang::interp;
 
 Function::Function(Program &P, const FunctionDecl *F, unsigned ArgSize,
-                   llvm::SmallVector<PrimType, 8> &&ParamTypes,
+                   llvm::SmallVectorImpl<PrimType> &&ParamTypes,
                    llvm::DenseMap<unsigned, ParamDescriptor> &&Params,
+                   llvm::SmallVectorImpl<unsigned> &&ParamOffsets,
                    bool HasThisPointer, bool HasRVO)
     : P(P), Loc(F->getBeginLoc()), F(F), ArgSize(ArgSize),
       ParamTypes(std::move(ParamTypes)), Params(std::move(Params)),
-      HasThisPointer(HasThisPointer), HasRVO(HasRVO) {}
+      ParamOffsets(std::move(ParamOffsets)), HasThisPointer(HasThisPointer),
+      HasRVO(HasRVO) {}
 
 Function::ParamDescriptor Function::getParamDescriptor(unsigned Offset) const {
   auto It = Params.find(Offset);

diff  --git a/clang/lib/AST/Interp/Function.h b/clang/lib/AST/Interp/Function.h
index 357e6e2bf50968..55a23ff288e846 100644
--- a/clang/lib/AST/Interp/Function.h
+++ b/clang/lib/AST/Interp/Function.h
@@ -156,12 +156,17 @@ class Function final {
 
   unsigned getNumParams() const { return ParamTypes.size(); }
 
+  unsigned getParamOffset(unsigned ParamIndex) const {
+    return ParamOffsets[ParamIndex];
+  }
+
 private:
   /// Construct a function representing an actual function.
   Function(Program &P, const FunctionDecl *F, unsigned ArgSize,
-           llvm::SmallVector<PrimType, 8> &&ParamTypes,
+           llvm::SmallVectorImpl<PrimType> &&ParamTypes,
            llvm::DenseMap<unsigned, ParamDescriptor> &&Params,
-           bool HasThisPointer, bool HasRVO);
+           llvm::SmallVectorImpl<unsigned> &&ParamOffsets, bool HasThisPointer,
+           bool HasRVO);
 
   /// Sets the code of a function.
   void setCode(unsigned NewFrameSize, std::vector<std::byte> &&NewCode,
@@ -201,6 +206,8 @@ class Function final {
   llvm::SmallVector<PrimType, 8> ParamTypes;
   /// Map from byte offset to parameter descriptor.
   llvm::DenseMap<unsigned, ParamDescriptor> Params;
+  /// List of parameter offsets.
+  llvm::SmallVector<unsigned, 8> ParamOffsets;
   /// Flag to indicate if the function is valid.
   bool IsValid = false;
   /// Flag to indicate if the function is done being

diff  --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h
index 15112001536650..ff67e873a08445 100644
--- a/clang/lib/AST/Interp/Interp.h
+++ b/clang/lib/AST/Interp/Interp.h
@@ -169,7 +169,7 @@ bool CheckFloatResult(InterpState &S, CodePtr OpPC, APFloat::opStatus Status);
 bool Interpret(InterpState &S, APValue &Result);
 
 /// Interpret a builtin function.
-bool InterpretBuiltin(InterpState &S, CodePtr &PC, unsigned BuiltinID);
+bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F);
 
 enum class ArithOp { Add, Sub };
 
@@ -1701,7 +1701,7 @@ inline bool CallBI(InterpState &S, CodePtr &PC, const Function *Func) {
   InterpFrame *FrameBefore = S.Current;
   S.Current = NewFrame.get();
 
-  if (InterpretBuiltin(S, PC, Func->getBuiltinID())) {
+  if (InterpretBuiltin(S, PC, Func)) {
     NewFrame.release();
     return true;
   }

diff  --git a/clang/lib/AST/Interp/InterpBuiltin.cpp b/clang/lib/AST/Interp/InterpBuiltin.cpp
index c929ad687d829a..c11f22aa94cacf 100644
--- a/clang/lib/AST/Interp/InterpBuiltin.cpp
+++ b/clang/lib/AST/Interp/InterpBuiltin.cpp
@@ -13,15 +13,64 @@
 namespace clang {
 namespace interp {
 
-bool InterpretBuiltin(InterpState &S, CodePtr &PC, unsigned BuiltinID) {
+template <typename T> T getParam(InterpFrame *Frame, unsigned Index) {
+  unsigned Offset = Frame->getFunction()->getParamOffset(Index);
+  return Frame->getParam<T>(Offset);
+}
+
+static bool interp__builtin_strcmp(InterpState &S, CodePtr OpPC,
+                                   InterpFrame *Frame) {
+  const Pointer &A = getParam<Pointer>(Frame, 0);
+  const Pointer &B = getParam<Pointer>(Frame, 1);
+
+  if (!CheckLive(S, OpPC, A, AK_Read) || !CheckLive(S, OpPC, B, AK_Read))
+    return false;
+
+  assert(A.getFieldDesc()->isPrimitiveArray());
+  assert(B.getFieldDesc()->isPrimitiveArray());
+
+  unsigned IndexA = A.getIndex();
+  unsigned IndexB = B.getIndex();
+  int32_t Result = 0;
+  for (;; ++IndexA, ++IndexB) {
+    const Pointer &PA = A.atIndex(IndexA);
+    const Pointer &PB = B.atIndex(IndexB);
+    if (!CheckRange(S, OpPC, PA, AK_Read) ||
+        !CheckRange(S, OpPC, PB, AK_Read)) {
+      return false;
+    }
+    uint8_t CA = PA.deref<uint8_t>();
+    uint8_t CB = PB.deref<uint8_t>();
+
+    if (CA > CB) {
+      Result = 1;
+      break;
+    } else if (CA < CB) {
+      Result = -1;
+      break;
+    }
+    if (CA == 0 || CB == 0)
+      break;
+  }
+
+  S.Stk.push<Integral<32, true>>(Integral<32, true>::from(Result));
+  return true;
+}
+
+bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F) {
+  InterpFrame *Frame = S.Current;
   APValue Dummy;
 
-  switch (BuiltinID) {
+  switch (F->getBuiltinID()) {
   case Builtin::BI__builtin_is_constant_evaluated:
     S.Stk.push<Boolean>(Boolean::from(S.inConstantContext()));
-    return Ret<PT_Bool, true>(S, PC, Dummy);
+    return Ret<PT_Bool, true>(S, OpPC, Dummy);
   case Builtin::BI__builtin_assume:
-    return RetVoid<true>(S, PC, Dummy);
+    return RetVoid<true>(S, OpPC, Dummy);
+  case Builtin::BI__builtin_strcmp:
+    if (interp__builtin_strcmp(S, OpPC, Frame))
+      return Ret<PT_Sint32, true>(S, OpPC, Dummy);
+    return false;
   default:
     return false;
   }

diff  --git a/clang/lib/AST/Interp/Pointer.h b/clang/lib/AST/Interp/Pointer.h
index 7d9e45a0a5a206..f795466f1db4c5 100644
--- a/clang/lib/AST/Interp/Pointer.h
+++ b/clang/lib/AST/Interp/Pointer.h
@@ -325,7 +325,8 @@ class Pointer {
 
   /// Dereferences a primitive element.
   template <typename T> T &elem(unsigned I) const {
-    return reinterpret_cast<T *>(Pointee->rawData())[I];
+    assert(I < getNumElems());
+    return reinterpret_cast<T *>(Pointee->data() + sizeof(InitMap *))[I];
   }
 
   /// Initializes a field.

diff  --git a/clang/test/AST/Interp/builtin-functions.cpp b/clang/test/AST/Interp/builtin-functions.cpp
new file mode 100644
index 00000000000000..e5141b0049d35a
--- /dev/null
+++ b/clang/test/AST/Interp/builtin-functions.cpp
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -fexperimental-new-constant-interpreter %s -verify
+// RUN: %clang_cc1 -verify=ref %s -Wno-constant-evaluated
+
+namespace strcmp {
+  constexpr char kFoobar[6] = {'f','o','o','b','a','r'};
+  constexpr char kFoobazfoobar[12] = {'f','o','o','b','a','z','f','o','o','b','a','r'};
+
+  static_assert(__builtin_strcmp("", "") == 0);
+  static_assert(__builtin_strcmp("abab", "abab") == 0);
+  static_assert(__builtin_strcmp("abab", "abba") == -1);
+  static_assert(__builtin_strcmp("abab", "abaa") == 1);
+  static_assert(__builtin_strcmp("ababa", "abab") == 1);
+  static_assert(__builtin_strcmp("abab", "ababa") == -1);
+  static_assert(__builtin_strcmp("a\203", "a") == 1);
+  static_assert(__builtin_strcmp("a\203", "a\003") == 1);
+  static_assert(__builtin_strcmp("abab\0banana", "abab") == 0);
+  static_assert(__builtin_strcmp("abab", "abab\0banana") == 0);
+  static_assert(__builtin_strcmp("abab\0banana", "abab\0canada") == 0);
+  static_assert(__builtin_strcmp(0, "abab") == 0); // expected-error {{not an integral constant}} \
+                                                   // expected-note {{dereferenced null}} \
+                                                   // expected-note {{in call to}} \
+                                                   // ref-error {{not an integral constant}} \
+                                                   // ref-note {{dereferenced null}}
+  static_assert(__builtin_strcmp("abab", 0) == 0); // expected-error {{not an integral constant}} \
+                                                   // expected-note {{dereferenced null}} \
+                                                   // expected-note {{in call to}} \
+                                                   // ref-error {{not an integral constant}} \
+                                                   // ref-note {{dereferenced null}}
+
+  static_assert(__builtin_strcmp(kFoobar, kFoobazfoobar) == -1);
+  static_assert(__builtin_strcmp(kFoobar, kFoobazfoobar + 6) == 0); // expected-error {{not an integral constant}} \
+                                                                    // expected-note {{dereferenced one-past-the-end}} \
+                                                                    // expected-note {{in call to}} \
+                                                                    // ref-error {{not an integral constant}} \
+                                                                    // ref-note {{dereferenced one-past-the-end}}
+}


        


More information about the cfe-commits mailing list