[clang] [clang] implement current direction of CWG2765 for string literal comparisons in constant evaluation (PR #109208)
Richard Smith via cfe-commits
cfe-commits at lists.llvm.org
Fri Sep 20 12:25:52 PDT 2024
================
@@ -2142,11 +2150,91 @@ static const ValueDecl *GetLValueBaseDecl(const LValue &LVal) {
return LVal.Base.dyn_cast<const ValueDecl*>();
}
-static bool IsLiteralLValue(const LValue &Value) {
- if (Value.getLValueCallIndex())
+// Information about an LValueBase that is some kind of string.
+struct LValueBaseString {
+ std::string ObjCEncodeStorage;
+ StringRef Bytes;
+ int CharWidth;
+};
+
+// Gets the lvalue base of LVal as a string.
+static bool GetLValueBaseAsString(const EvalInfo &Info, const LValue &LVal,
+ LValueBaseString &AsString) {
+ const auto *BaseExpr = LVal.Base.dyn_cast<const Expr *>();
+ if (!BaseExpr)
+ return false;
+
+ // For ObjCEncodeExpr, we need to compute and store the string.
+ if (const auto *EE = dyn_cast<ObjCEncodeExpr>(BaseExpr)) {
+ Info.Ctx.getObjCEncodingForType(EE->getEncodedType(),
+ AsString.ObjCEncodeStorage);
+ AsString.Bytes = AsString.ObjCEncodeStorage;
+ AsString.CharWidth = 1;
+ return true;
+ }
+
+ // Otherwise, we have a StringLiteral.
+ const auto *Lit = dyn_cast<StringLiteral>(BaseExpr);
+ if (const auto *PE = dyn_cast<PredefinedExpr>(BaseExpr))
+ Lit = PE->getFunctionName();
+
+ if (!Lit)
return false;
- const Expr *E = Value.Base.dyn_cast<const Expr*>();
- return E && !isa<MaterializeTemporaryExpr>(E);
+
+ AsString.Bytes = Lit->getBytes();
+ AsString.CharWidth = Lit->getCharByteWidth();
+ return true;
+}
+
+// Determine whether two string literals potentially overlap. This will be the
+// case if they agree on the values of all the bytes on the overlapping region
+// between them.
+//
+// The overlapping region is the portion of the two string literals that must
+// overlap in memory if the pointers actually point to the same address at
+// runtime. For example, if LHS is "abcdef" + 3 and RHS is "cdef\0gh" + 1 then
+// the overlapping region is "cdef\0", which in this case does agree, so the
+// strings are potentially overlapping. Conversely, for "foobar" + 3 versus
+// "bazbar" + 3, the overlapping region contains all of both strings, so they
+// are not potentially overlapping, even though they agree from the given
+// addresses onwards.
+//
+// See open core issue CWG2765 which is discussing the desired rule here.
+static bool ArePotentiallyOverlappingStringLiterals(const EvalInfo &Info,
+ const LValue &LHS,
+ const LValue &RHS) {
+ LValueBaseString LHSString, RHSString;
+ if (!GetLValueBaseAsString(Info, LHS, LHSString) ||
+ !GetLValueBaseAsString(Info, RHS, RHSString))
+ return false;
+
+ // This is the byte offset to the location of the first character of LHS
+ // within RHS. We don't need to look at the characters of one string that
+ // would appear before the start of the other string if they were merged.
+ CharUnits Offset = RHS.Offset - LHS.Offset;
+ if (Offset.isNegative())
+ LHSString.Bytes = LHSString.Bytes.drop_front(-Offset.getQuantity());
+ else
+ RHSString.Bytes = RHSString.Bytes.drop_front(Offset.getQuantity());
+
+ bool LHSIsLonger = LHSString.Bytes.size() > RHSString.Bytes.size();
+ StringRef Longer = LHSIsLonger ? LHSString.Bytes : RHSString.Bytes;
+ StringRef Shorter = LHSIsLonger ? RHSString.Bytes : LHSString.Bytes;
+ int ShorterCharWidth = (LHSIsLonger ? RHSString : LHSString).CharWidth;
+
+ // The null terminator isn't included in the string data, so check for it
+ // manually. If the longer string doesn't have a null terminator where the
+ // shorter string ends, they aren't potentially overlapping.
+ for (int nullByte : llvm::seq(ShorterCharWidth)) {
+ if (Shorter.size() + nullByte >= Longer.size())
+ break;
+ if (Longer[Shorter.size() + nullByte])
----------------
zygoloid wrote:
```suggestion
if (Shorter.size() + NullByte >= Longer.size())
break;
if (Longer[Shorter.size() + NullByte])
```
https://github.com/llvm/llvm-project/pull/109208
More information about the cfe-commits
mailing list