[clang] [clang][bytecode] Handle builtin_wmemcmp (PR #120070)

via cfe-commits cfe-commits at lists.llvm.org
Mon Dec 16 04:04:29 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Timm Baeder (tbaederr)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/120070.diff


2 Files Affected:

- (modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+49-22) 
- (modified) clang/test/AST/ByteCode/builtin-functions.cpp (+15) 


``````````diff
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index c1c43672176add..d6b33c8aeeaac3 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -1917,7 +1917,8 @@ static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC,
   const APSInt &Size =
       peekToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(2)));
 
-  if (ID == Builtin::BImemcmp || ID == Builtin::BIbcmp)
+  if (ID == Builtin::BImemcmp || ID == Builtin::BIbcmp ||
+      ID == Builtin::BIwmemcmp)
     diagnoseNonConstexprBuiltin(S, OpPC, ID);
 
   if (Size.isZero()) {
@@ -1925,14 +1926,18 @@ static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC,
     return true;
   }
 
+  bool IsWide =
+      (ID == Builtin::BIwmemcmp || ID == Builtin::BI__builtin_wmemcmp);
+
+  const ASTContext &ASTCtx = S.getASTContext();
   // FIXME: This is an arbitrary limitation the current constant interpreter
   // had. We could remove this.
-  if (!isOneByteCharacterType(PtrA.getType()) ||
-      !isOneByteCharacterType(PtrB.getType())) {
+  if (!IsWide && (!isOneByteCharacterType(PtrA.getType()) ||
+                  !isOneByteCharacterType(PtrB.getType()))) {
     S.FFDiag(S.Current->getSource(OpPC),
              diag::note_constexpr_memcmp_unsupported)
-        << ("'" + S.getASTContext().BuiltinInfo.getName(ID) + "'").str()
-        << PtrA.getType() << PtrB.getType();
+        << ("'" + ASTCtx.BuiltinInfo.getName(ID) + "'").str() << PtrA.getType()
+        << PtrB.getType();
     return false;
   }
 
@@ -1941,42 +1946,62 @@ static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC,
 
   // Now, read both pointers to a buffer and compare those.
   BitcastBuffer BufferA(
-      Bits(S.getASTContext().getTypeSize(PtrA.getFieldDesc()->getType())));
+      Bits(ASTCtx.getTypeSize(PtrA.getFieldDesc()->getType())));
   readPointerToBuffer(S.getContext(), PtrA, BufferA, false);
   // FIXME: The swapping here is UNDOING something we do when reading the
   // data into the buffer.
-  if (S.getASTContext().getTargetInfo().isBigEndian())
+  if (ASTCtx.getTargetInfo().isBigEndian())
     swapBytes(BufferA.Data.get(), BufferA.byteSize().getQuantity());
 
   BitcastBuffer BufferB(
-      Bits(S.getASTContext().getTypeSize(PtrB.getFieldDesc()->getType())));
+      Bits(ASTCtx.getTypeSize(PtrB.getFieldDesc()->getType())));
   readPointerToBuffer(S.getContext(), PtrB, BufferB, false);
   // FIXME: The swapping here is UNDOING something we do when reading the
   // data into the buffer.
-  if (S.getASTContext().getTargetInfo().isBigEndian())
+  if (ASTCtx.getTargetInfo().isBigEndian())
     swapBytes(BufferB.Data.get(), BufferB.byteSize().getQuantity());
 
   size_t MinBufferSize = std::min(BufferA.byteSize().getQuantity(),
                                   BufferB.byteSize().getQuantity());
-  size_t CmpSize =
-      std::min(MinBufferSize, static_cast<size_t>(Size.getZExtValue()));
-
-  for (size_t I = 0; I != CmpSize; ++I) {
-    std::byte A = BufferA.Data[I];
-    std::byte B = BufferB.Data[I];
 
-    if (A < B) {
-      pushInteger(S, -1, Call->getType());
-      return true;
-    } else if (A > B) {
-      pushInteger(S, 1, Call->getType());
-      return true;
+  unsigned ElemSize = 1;
+  if (IsWide)
+    ElemSize = ASTCtx.getTypeSizeInChars(ASTCtx.getWCharType()).getQuantity();
+  // The Size given for the wide variants is in wide-char units. Convert it
+  // to bytes.
+  size_t ByteSize = Size.getZExtValue() * ElemSize;
+  size_t CmpSize = std::min(MinBufferSize, ByteSize);
+
+  for (size_t I = 0; I != CmpSize; I += ElemSize) {
+    if (IsWide) {
+      INT_TYPE_SWITCH(*S.getContext().classify(ASTCtx.getWCharType()), {
+        T A = *reinterpret_cast<T *>(BufferA.Data.get() + I);
+        T B = *reinterpret_cast<T *>(BufferB.Data.get() + I);
+        if (A < B) {
+          pushInteger(S, -1, Call->getType());
+          return true;
+        } else if (A > B) {
+          pushInteger(S, 1, Call->getType());
+          return true;
+        }
+      });
+    } else {
+      std::byte A = BufferA.Data[I];
+      std::byte B = BufferB.Data[I];
+
+      if (A < B) {
+        pushInteger(S, -1, Call->getType());
+        return true;
+      } else if (A > B) {
+        pushInteger(S, 1, Call->getType());
+        return true;
+      }
     }
   }
 
   // We compared CmpSize bytes above. If the limiting factor was the Size
   // passed, we're done and the result is equality (0).
-  if (Size.getZExtValue() <= CmpSize) {
+  if (ByteSize <= CmpSize) {
     pushInteger(S, 0, Call->getType());
     return true;
   }
@@ -2467,6 +2492,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
   case Builtin::BImemcmp:
   case Builtin::BI__builtin_bcmp:
   case Builtin::BIbcmp:
+  case Builtin::BI__builtin_wmemcmp:
+  case Builtin::BIwmemcmp:
     if (!interp__builtin_memcmp(S, OpPC, Frame, F, Call))
       return false;
     break;
diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp
index 4ee24646286fa8..5906cb970f06c4 100644
--- a/clang/test/AST/ByteCode/builtin-functions.cpp
+++ b/clang/test/AST/ByteCode/builtin-functions.cpp
@@ -1270,4 +1270,19 @@ namespace Memcmp {
   static_assert(__builtin_bcmp("abab\0banana", "abab\0canada", 7) != 0);
   static_assert(__builtin_bcmp("abab\0banana", "abab\0canada", 6) != 0);
   static_assert(__builtin_bcmp("abab\0banana", "abab\0canada", 5) == 0);
+
+
+  static_assert(__builtin_wmemcmp(L"abaa", L"abba", 3) == -1);
+  static_assert(__builtin_wmemcmp(L"abaa", L"abba", 2) == 0);
+  static_assert(__builtin_wmemcmp(0, 0, 0) == 0);
+#if __WCHAR_WIDTH__ == 32
+  static_assert(__builtin_wmemcmp(L"a\x83838383", L"aa", 2) ==
+                (wchar_t)-1U >> 31);
+#endif
+  static_assert(__builtin_wmemcmp(L"abab\0banana", L"abab\0banana", 100) == 0); // both-error {{not an integral constant}} \
+                                                                                // both-note {{dereferenced one-past-the-end}}
+  static_assert(__builtin_wmemcmp(L"abab\0banana", L"abab\0canada", 100) == -1); // FIXME: Should we reject this?
+  static_assert(__builtin_wmemcmp(L"abab\0banana", L"abab\0canada", 7) == -1);
+  static_assert(__builtin_wmemcmp(L"abab\0banana", L"abab\0canada", 6) == -1);
+  static_assert(__builtin_wmemcmp(L"abab\0banana", L"abab\0canada", 5) == 0);
 }

``````````

</details>


https://github.com/llvm/llvm-project/pull/120070


More information about the cfe-commits mailing list