[clang] [compiler-rt] [clang][UBSan] Add implicit conversion check for bitfields (PR #75481)

Sun Mar 10 14:22:41 PDT 2024

https://github.com/Zonotora updated https://github.com/llvm/llvm-project/pull/75481

>From cd64144a4cf18adba1494871bfd9da1b0bf0a489 Mon Sep 17 00:00:00 2001
From: Zonotora <Zonotora at hotmail.com>
Date: Sat, 16 Dec 2023 19:33:21 +0100
Subject: [PATCH 1/3] [clang] Extract negativity check lambda to function

---
 clang/lib/CodeGen/CGExprScalar.cpp | 41 ++++++++++++++----------------
 1 file changed, 19 insertions(+), 22 deletions(-)

diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 8536570087ad0f..74d94259572dc2 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -1103,6 +1103,21 @@ void ScalarExprEmitter::EmitIntegerTruncationCheck(Value *Src, QualType SrcType,
                 {Src, Dst});
 }
 
+static llvm::Value *EmitIsNegativeTestHelper(Value *V, QualType VType,
+                                             const char *Name,
+                                             CGBuilderTy &Builder) {
+  bool VSigned = VType->isSignedIntegerOrEnumerationType();
+  llvm::Type *VTy = V->getType();
+  if (!VSigned) {
+    // If the value is unsigned, then it is never negative.
+    return llvm::ConstantInt::getFalse(VTy->getContext());
+  }
+  llvm::Constant *Zero = llvm::ConstantInt::get(VTy, 0);
+  return Builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, V, Zero,
+                            llvm::Twine(Name) + "." + V->getName() +
+                                ".negativitycheck");
+};
+
 // Should be called within CodeGenFunction::SanitizerScope RAII scope.
 // Returns 'i1 false' when the conversion Src -> Dst changed the sign.
 static std::pair<ScalarExprEmitter::ImplicitConversionCheckKind,
@@ -1127,30 +1142,12 @@ EmitIntegerSignChangeCheckHelper(Value *Src, QualType SrcType, Value *Dst,
   assert(((SrcBits != DstBits) || (SrcSigned != DstSigned)) &&
          "either the widths should be different, or the signednesses.");
 
-  // NOTE: zero value is considered to be non-negative.
-  auto EmitIsNegativeTest = [&Builder](Value *V, QualType VType,
-                                       const char *Name) -> Value * {
-    // Is this value a signed type?
-    bool VSigned = VType->isSignedIntegerOrEnumerationType();
-    llvm::Type *VTy = V->getType();
-    if (!VSigned) {
-      // If the value is unsigned, then it is never negative.
-      // FIXME: can we encounter non-scalar VTy here?
-      return llvm::ConstantInt::getFalse(VTy->getContext());
-    }
-    // Get the zero of the same type with which we will be comparing.
-    llvm::Constant *Zero = llvm::ConstantInt::get(VTy, 0);
-    // %V.isnegative = icmp slt %V, 0
-    // I.e is %V *strictly* less than zero, does it have negative value?
-    return Builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, V, Zero,
-                              llvm::Twine(Name) + "." + V->getName() +
-                                  ".negativitycheck");
-  };
-
   // 1. Was the old Value negative?
-  llvm::Value *SrcIsNegative = EmitIsNegativeTest(Src, SrcType, "src");
+  llvm::Value *SrcIsNegative =
+      EmitIsNegativeTestHelper(Src, SrcType, "src", Builder);
   // 2. Is the new Value negative?
-  llvm::Value *DstIsNegative = EmitIsNegativeTest(Dst, DstType, "dst");
+  llvm::Value *DstIsNegative =
+      EmitIsNegativeTestHelper(Dst, DstType, "dst", Builder);
   // 3. Now, was the 'negativity status' preserved during the conversion?
   //    NOTE: conversion from negative to zero is considered to change the sign.
   //    (We want to get 'false' when the conversion changed the sign)

>From e5914b0519efa0d70c429f2b08b326f863df4fbe Mon Sep 17 00:00:00 2001
From: Zonotora <Zonotora at hotmail.com>
Date: Sat, 16 Dec 2023 19:36:04 +0100
Subject: [PATCH 2/3] [clang][UBSan] Add implicit conversion check for
 bitfields

This patch implements the implicit truncation and implicit
sign change checks for bitfields.
---
 clang/docs/ReleaseNotes.rst                   |   7 +
 clang/docs/UndefinedBehaviorSanitizer.rst     |  19 +-
 clang/include/clang/Basic/Sanitizers.def      |  20 +-
 clang/lib/CodeGen/CGExpr.cpp                  |  45 +++-
 clang/lib/CodeGen/CGExprScalar.cpp            | 224 +++++++++++++++++-
 clang/lib/CodeGen/CodeGenFunction.h           |  12 +
 .../test/CodeGen/ubsan-bitfield-conversion.c  |  61 +++++
 compiler-rt/lib/ubsan/ubsan_handlers.cpp      |  27 ++-
 compiler-rt/lib/ubsan/ubsan_handlers.h        |   1 +
 9 files changed, 378 insertions(+), 38 deletions(-)
 create mode 100644 clang/test/CodeGen/ubsan-bitfield-conversion.c

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 3b89d5a8720785..7a8135077a2bc2 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -173,6 +173,10 @@ Non-comprehensive list of changes in this release
 
 New Compiler Flags
 ------------------
+- ``-fsanitize=implicit-bitfield-conversion`` checks implicit truncation and
+  sign change.
+- ``-fsanitize=implicit-integer-conversion`` a group that replaces the previous
+  group ``-fsanitize=implicit-conversion``.
 
 - ``-Wmissing-designated-field-initializers``, grouped under ``-Wmissing-field-initializers``.
   This diagnostic can be disabled to make ``-Wmissing-field-initializers`` behave
@@ -183,6 +187,9 @@ Deprecated Compiler Flags
 
 Modified Compiler Flags
 -----------------------
+- ``-fsanitize=implicit-conversion`` is now a group for both
+  ``-fsanitize=implicit-integer-conversion`` and
+  ``-fsanitize=implicit-bitfield-conversion``.
 
 Removed Compiler Flags
 -------------------------
diff --git a/clang/docs/UndefinedBehaviorSanitizer.rst b/clang/docs/UndefinedBehaviorSanitizer.rst
index 8f58c92bd2a163..531d56e313826c 100644
--- a/clang/docs/UndefinedBehaviorSanitizer.rst
+++ b/clang/docs/UndefinedBehaviorSanitizer.rst
@@ -148,6 +148,11 @@ Available checks are:
      Issues caught by this sanitizer are not undefined behavior,
      but are often unintentional.
   -  ``-fsanitize=integer-divide-by-zero``: Integer division by zero.
+  -  ``-fsanitize=implicit-bitfield-conversion``: Implicit conversion from
+     integer of larger bit width to smaller bitfield, if that results in data
+     loss. This includes unsigned/signed truncations and sign changes, similarly
+     to how the ``-fsanitize=implicit-integer-conversion`` group works, but
+     explicitly for bitfields.
   -  ``-fsanitize=nonnull-attribute``: Passing null pointer as a function
      parameter which is declared to never be null.
   -  ``-fsanitize=null``: Use of a null pointer or creation of a null
@@ -193,8 +198,8 @@ Available checks are:
      signed division overflow (``INT_MIN/-1``). Note that checks are still
      added even when ``-fwrapv`` is enabled. This sanitizer does not check for
      lossy implicit conversions performed before the computation (see
-     ``-fsanitize=implicit-conversion``). Both of these two issues are handled
-     by ``-fsanitize=implicit-conversion`` group of checks.
+     ``-fsanitize=implicit-integer-conversion``). Both of these two issues are handled
+     by ``-fsanitize=implicit-integer-conversion`` group of checks.
   -  ``-fsanitize=unreachable``: If control flow reaches an unreachable
      program point.
   -  ``-fsanitize=unsigned-integer-overflow``: Unsigned integer overflow, where
@@ -202,7 +207,7 @@ Available checks are:
      type. Unlike signed integer overflow, this is not undefined behavior, but
      it is often unintentional. This sanitizer does not check for lossy implicit
      conversions performed before such a computation
-     (see ``-fsanitize=implicit-conversion``).
+     (see ``-fsanitize=implicit-integer-conversion``).
   -  ``-fsanitize=vla-bound``: A variable-length array whose bound
      does not evaluate to a positive value.
   -  ``-fsanitize=vptr``: Use of an object whose vptr indicates that it is of
@@ -224,11 +229,15 @@ You can also use the following check groups:
   -  ``-fsanitize=implicit-integer-arithmetic-value-change``: Catches implicit
      conversions that change the arithmetic value of the integer. Enables
      ``implicit-signed-integer-truncation`` and ``implicit-integer-sign-change``.
-  -  ``-fsanitize=implicit-conversion``: Checks for suspicious
-     behavior of implicit conversions. Enables
+  -  ``-fsanitize=implicit-integer-conversion``: Checks for suspicious
+     behavior of implicit integer conversions. Enables
      ``implicit-unsigned-integer-truncation``,
      ``implicit-signed-integer-truncation``, and
      ``implicit-integer-sign-change``.
+  -  ``-fsanitize=implicit-conversion``: Checks for suspicious
+     behavior of implicit conversions. Enables
+     ``implicit-integer-conversion``, and
+     ``implicit-bitfield-conversion``.
   -  ``-fsanitize=integer``: Checks for undefined or suspicious integer
      behavior (e.g. unsigned integer overflow).
      Enables ``signed-integer-overflow``, ``unsigned-integer-overflow``,
diff --git a/clang/include/clang/Basic/Sanitizers.def b/clang/include/clang/Basic/Sanitizers.def
index c2137e3f61f645..b228ffd07ee745 100644
--- a/clang/include/clang/Basic/Sanitizers.def
+++ b/clang/include/clang/Basic/Sanitizers.def
@@ -163,24 +163,24 @@ SANITIZER_GROUP("implicit-integer-arithmetic-value-change",
                 ImplicitIntegerArithmeticValueChange,
                 ImplicitIntegerSignChange | ImplicitSignedIntegerTruncation)
 
-SANITIZER("objc-cast", ObjCCast)
+SANITIZER_GROUP("implicit-integer-conversion", ImplicitIntegerConversion,
+                ImplicitIntegerArithmeticValueChange |
+                    ImplicitUnsignedIntegerTruncation)
 
-// FIXME:
-//SANITIZER_GROUP("implicit-integer-conversion", ImplicitIntegerConversion,
-//                ImplicitIntegerArithmeticValueChange |
-//                    ImplicitUnsignedIntegerTruncation)
-//SANITIZER_GROUP("implicit-conversion", ImplicitConversion,
-//                ImplicitIntegerConversion)
+// Implicit bitfield sanitizers
+SANITIZER("implicit-bitfield-conversion", ImplicitBitfieldConversion)
 
 SANITIZER_GROUP("implicit-conversion", ImplicitConversion,
-                ImplicitIntegerArithmeticValueChange |
-                    ImplicitUnsignedIntegerTruncation)
+                ImplicitIntegerConversion |
+                    ImplicitBitfieldConversion)
 
 SANITIZER_GROUP("integer", Integer,
-                ImplicitConversion | IntegerDivideByZero | Shift |
+                ImplicitIntegerConversion | IntegerDivideByZero | Shift |
                     SignedIntegerOverflow | UnsignedIntegerOverflow |
                     UnsignedShiftBase)
 
+SANITIZER("objc-cast", ObjCCast)
+
 SANITIZER("local-bounds", LocalBounds)
 SANITIZER_GROUP("bounds", Bounds, ArrayBounds | LocalBounds)
 
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 59a7fe8925001c..f04849f31b377c 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -5571,11 +5571,52 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) {
       break;
     }
 
-    RValue RV = EmitAnyExpr(E->getRHS());
+    llvm::Value *Previous = nullptr;
+    RValue RV;
+    QualType SrcType = E->getRHS()->getType();
+    // Check if LHS is a bitfield, if RHS contains an implicit cast expression
+    // we want to extract that value and potentially (if the bitfield sanitizer
+    // is enabled) use it to check for an implicit conversion.
+    if (E->getLHS()->refersToBitField()) {
+      // Get the RHS before scalar conversion.
+      if (auto *ICE = GetOriginalRHSForBitfieldSanitizer(E)) {
+        SrcType = ICE->getSubExpr()->getType();
+        Previous = EmitScalarExpr(ICE->getSubExpr());
+        // Pass default ScalarConversionOpts to avoid emitting
+        // integer sanitizer checks as E refers to bitfield.
+        llvm::Value *RHS = EmitScalarConversion(
+            Previous, SrcType, ICE->getType(), ICE->getExprLoc());
+        RV = RValue::get(RHS);
+      }
+    }
+
+    // Otherwise, visit RHS as usual.
+    if (!Previous)
+      RV = EmitAnyExpr(E->getRHS());
+
     LValue LV = EmitCheckedLValue(E->getLHS(), TCK_Store);
+
     if (RV.isScalar())
       EmitNullabilityCheck(LV, RV.getScalarVal(), E->getExprLoc());
-    EmitStoreThroughLValue(RV, LV);
+
+    if (LV.isBitField() && RV.isScalar()) {
+      llvm::Value *Result;
+      // If bitfield sanitizers are enabled we want to use the result
+      // to check whether a truncation or sign change has occurred.
+      if (SanOpts.has(SanitizerKind::ImplicitBitfieldConversion))
+        EmitStoreThroughBitfieldLValue(RV, LV, &Result);
+      else
+        EmitStoreThroughBitfieldLValue(RV, LV);
+
+      // If the expression contained an implicit conversion, make sure
+      // to use the value before the scalar conversion.
+      llvm::Value *Src = Previous ? Previous : RV.getScalarVal();
+      QualType DstType = E->getLHS()->getType();
+      EmitBitfieldConversionCheck(Src, SrcType, Result, DstType,
+                                  LV.getBitFieldInfo(), E->getExprLoc());
+    } else
+      EmitStoreThroughLValue(RV, LV);
+
     if (getLangOpts().OpenMP)
       CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(*this,
                                                                 E->getLHS());
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 74d94259572dc2..aed591cb1d44ed 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -15,6 +15,7 @@
 #include "CGDebugInfo.h"
 #include "CGObjCRuntime.h"
 #include "CGOpenMPRuntime.h"
+#include "CGRecordLayout.h"
 #include "CodeGenFunction.h"
 #include "CodeGenModule.h"
 #include "ConstantEmitter.h"
@@ -308,6 +309,7 @@ class ScalarExprEmitter
                                 llvm::Type *DstTy, SourceLocation Loc);
 
   /// Known implicit conversion check kinds.
+  /// This is used for bitfield conversion checks as well.
   /// Keep in sync with the enum of the same name in ubsan_handlers.h
   enum ImplicitConversionCheckKind : unsigned char {
     ICCK_IntegerTruncation = 0, // Legacy, was only used by clang 7.
@@ -1116,7 +1118,7 @@ static llvm::Value *EmitIsNegativeTestHelper(Value *V, QualType VType,
   return Builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, V, Zero,
                             llvm::Twine(Name) + "." + V->getName() +
                                 ".negativitycheck");
-};
+}
 
 // Should be called within CodeGenFunction::SanitizerScope RAII scope.
 // Returns 'i1 false' when the conversion Src -> Dst changed the sign.
@@ -1242,6 +1244,136 @@ void ScalarExprEmitter::EmitIntegerSignChangeCheck(Value *Src, QualType SrcType,
                 {Src, Dst});
 }
 
+// Should be called within CodeGenFunction::SanitizerScope RAII scope.
+// Returns 'i1 false' when the truncation Src -> Dst was lossy.
+static std::pair<ScalarExprEmitter::ImplicitConversionCheckKind,
+                 std::pair<llvm::Value *, SanitizerMask>>
+EmitBitfieldTruncationCheckHelper(Value *Src, QualType SrcType, Value *Dst,
+                                  QualType DstType, CGBuilderTy &Builder) {
+  bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType();
+  bool DstSigned = DstType->isSignedIntegerOrEnumerationType();
+
+  ScalarExprEmitter::ImplicitConversionCheckKind Kind;
+  if (!SrcSigned && !DstSigned)
+    Kind = ScalarExprEmitter::ICCK_UnsignedIntegerTruncation;
+  else
+    Kind = ScalarExprEmitter::ICCK_SignedIntegerTruncation;
+
+  llvm::Value *Check = nullptr;
+  // 1. Extend the truncated value back to the same width as the Src.
+  Check = Builder.CreateIntCast(Dst, Src->getType(), DstSigned, "bf.anyext");
+  // 2. Equality-compare with the original source value
+  Check = Builder.CreateICmpEQ(Check, Src, "bf.truncheck");
+  // If the comparison result is 'i1 false', then the truncation was lossy.
+
+  return std::make_pair(
+      Kind, std::make_pair(Check, SanitizerKind::ImplicitBitfieldConversion));
+}
+
+// Should be called within CodeGenFunction::SanitizerScope RAII scope.
+// Returns 'i1 false' when the conversion Src -> Dst changed the sign.
+static std::pair<ScalarExprEmitter::ImplicitConversionCheckKind,
+                 std::pair<llvm::Value *, SanitizerMask>>
+EmitBitfieldSignChangeCheckHelper(Value *Src, QualType SrcType, Value *Dst,
+                                  QualType DstType, CGBuilderTy &Builder) {
+  // 1. Was the old Value negative?
+  llvm::Value *SrcIsNegative =
+      EmitIsNegativeTestHelper(Src, SrcType, "bf.src", Builder);
+  // 2. Is the new Value negative?
+  llvm::Value *DstIsNegative =
+      EmitIsNegativeTestHelper(Dst, DstType, "bf.dst", Builder);
+  // 3. Now, was the 'negativity status' preserved during the conversion?
+  //    NOTE: conversion from negative to zero is considered to change the sign.
+  //    (We want to get 'false' when the conversion changed the sign)
+  //    So we should just equality-compare the negativity statuses.
+  llvm::Value *Check = nullptr;
+  Check =
+      Builder.CreateICmpEQ(SrcIsNegative, DstIsNegative, "bf.signchangecheck");
+  // If the comparison result is 'false', then the conversion changed the sign.
+  return std::make_pair(
+      ScalarExprEmitter::ICCK_IntegerSignChange,
+      std::make_pair(Check, SanitizerKind::ImplicitBitfieldConversion));
+}
+
+void CodeGenFunction::EmitBitfieldConversionCheck(Value *Src, QualType SrcType,
+                                                  Value *Dst, QualType DstType,
+                                                  const CGBitFieldInfo &Info,
+                                                  SourceLocation Loc) {
+
+  if (!SanOpts.has(SanitizerKind::ImplicitBitfieldConversion))
+    return;
+
+  // We only care about int->int conversions here.
+  // We ignore conversions to/from pointer and/or bool.
+  if (!PromotionIsPotentiallyEligibleForImplicitIntegerConversionCheck(SrcType,
+                                                                       DstType))
+    return;
+
+  if (DstType->isBooleanType() || SrcType->isBooleanType())
+    return;
+
+  // This should be truncation of integral types.
+  assert(isa<llvm::IntegerType>(Src->getType()) &&
+         isa<llvm::IntegerType>(Dst->getType()) && "non-integer llvm type");
+
+  // TODO: Calculate src width to avoid emitting code
+  // for unecessary cases.
+  unsigned SrcBits = ConvertType(SrcType)->getScalarSizeInBits();
+  unsigned DstBits = Info.Size;
+
+  bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType();
+  bool DstSigned = DstType->isSignedIntegerOrEnumerationType();
+
+  CodeGenFunction::SanitizerScope SanScope(this);
+
+  std::pair<ScalarExprEmitter::ImplicitConversionCheckKind,
+            std::pair<llvm::Value *, SanitizerMask>>
+      Check;
+
+  // Truncation
+  bool EmitTruncation = DstBits < SrcBits;
+  // If Dst is signed and Src unsigned, we want to be more specific
+  // about the CheckKind we emit, in this case we want to emit
+  // ICCK_SignedIntegerTruncationOrSignChange.
+  bool EmitTruncationFromUnsignedToSigned =
+      EmitTruncation && DstSigned && !SrcSigned;
+  // Sign change
+  bool SameTypeSameSize = SrcSigned == DstSigned && SrcBits == DstBits;
+  bool BothUnsigned = !SrcSigned && !DstSigned;
+  bool LargerSigned = (DstBits > SrcBits) && DstSigned;
+  // We can avoid emitting sign change checks in some obvious cases
+  //   1. If Src and Dst have the same signedness and size
+  //   2. If both are unsigned sign check is unecessary!
+  //   3. If Dst is signed and bigger than Src, either
+  //      sign-extension or zero-extension will make sure
+  //      the sign remains.
+  bool EmitSignChange = !SameTypeSameSize && !BothUnsigned && !LargerSigned;
+
+  if (EmitTruncation)
+    Check =
+        EmitBitfieldTruncationCheckHelper(Src, SrcType, Dst, DstType, Builder);
+  else if (EmitSignChange) {
+    assert(((SrcBits != DstBits) || (SrcSigned != DstSigned)) &&
+           "either the widths should be different, or the signednesses.");
+    Check =
+        EmitBitfieldSignChangeCheckHelper(Src, SrcType, Dst, DstType, Builder);
+  } else
+    return;
+
+  ScalarExprEmitter::ImplicitConversionCheckKind CheckKind = Check.first;
+  if (EmitTruncationFromUnsignedToSigned)
+    CheckKind = ScalarExprEmitter::ICCK_SignedIntegerTruncationOrSignChange;
+
+  llvm::Constant *StaticArgs[] = {
+      EmitCheckSourceLocation(Loc), EmitCheckTypeDescriptor(SrcType),
+      EmitCheckTypeDescriptor(DstType),
+      llvm::ConstantInt::get(Builder.getInt8Ty(), CheckKind),
+      llvm::ConstantInt::get(Builder.getInt32Ty(), Info.Size)};
+
+  EmitCheck(Check.second, SanitizerHandler::ImplicitConversion, StaticArgs,
+            {Src, Dst});
+}
+
 Value *ScalarExprEmitter::EmitScalarCast(Value *Src, QualType SrcType,
                                          QualType DstType, llvm::Type *SrcTy,
                                          llvm::Type *DstTy,
@@ -2614,6 +2746,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
   llvm::PHINode *atomicPHI = nullptr;
   llvm::Value *value;
   llvm::Value *input;
+  llvm::Value *Previous = nullptr;
+  QualType SrcType = E->getType();
 
   int amount = (isInc ? 1 : -1);
   bool isSubtraction = !isInc;
@@ -2702,7 +2836,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
              "base or promoted) will be signed, or the bitwidths will match.");
     }
     if (CGF.SanOpts.hasOneOf(
-            SanitizerKind::ImplicitIntegerArithmeticValueChange) &&
+            SanitizerKind::ImplicitIntegerArithmeticValueChange |
+            SanitizerKind::ImplicitBitfieldConversion) &&
         canPerformLossyDemotionCheck) {
       // While `x += 1` (for `x` with width less than int) is modeled as
       // promotion+arithmetics+demotion, and we can catch lossy demotion with
@@ -2713,13 +2848,26 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
       // the increment/decrement in the wider type, and finally
       // perform the demotion. This will catch lossy demotions.
 
+      // We have a special case for bitfields defined using all the bits of the
+      // type. In this case we need to do the same trick as for the integer
+      // sanitizer checks, i.e., promotion -> increment/decrement -> demotion.
+
       value = EmitScalarConversion(value, type, promotedType, E->getExprLoc());
       Value *amt = llvm::ConstantInt::get(value->getType(), amount, true);
       value = Builder.CreateAdd(value, amt, isInc ? "inc" : "dec");
       // Do pass non-default ScalarConversionOpts so that sanitizer check is
-      // emitted.
+      // emitted if LV is not a bitfield, otherwise the bitfield sanitizer
+      // checks will take care of the conversion.
+      ScalarConversionOpts Opts;
+      if (!LV.isBitField())
+        Opts = ScalarConversionOpts(CGF.SanOpts);
+      else if (CGF.SanOpts.has(SanitizerKind::ImplicitBitfieldConversion)) {
+        Previous = value;
+        SrcType = promotedType;
+      }
+
       value = EmitScalarConversion(value, promotedType, type, E->getExprLoc(),
-                                   ScalarConversionOpts(CGF.SanOpts));
+                                   Opts);
 
       // Note that signed integer inc/dec with width less than int can't
       // overflow because of promotion rules; we're just eliding a few steps
@@ -2904,9 +3052,12 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
   }
 
   // Store the updated result through the lvalue.
-  if (LV.isBitField())
+  if (LV.isBitField()) {
+    Value *Src = Previous ? Previous : value;
     CGF.EmitStoreThroughBitfieldLValue(RValue::get(value), LV, &value);
-  else
+    CGF.EmitBitfieldConversionCheck(Src, SrcType, value, E->getType(),
+                                    LV.getBitFieldInfo(), E->getExprLoc());
+  } else
     CGF.EmitStoreThroughLValue(RValue::get(value), LV);
 
   // If this is a postinc, return the value read from memory, otherwise use the
@@ -3411,8 +3562,15 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
 
   // Convert the result back to the LHS type,
   // potentially with Implicit Conversion sanitizer check.
-  Result = EmitScalarConversion(Result, PromotionTypeCR, LHSTy, Loc,
-                                ScalarConversionOpts(CGF.SanOpts));
+  // If LHSLV is a bitfield, use default ScalarConversionOpts
+  // to avoid emit any implicit integer checks.
+  Value *Previous = nullptr;
+  if (LHSLV.isBitField()) {
+    Previous = Result;
+    Result = EmitScalarConversion(Result, PromotionTypeCR, LHSTy, Loc);
+  } else
+    Result = EmitScalarConversion(Result, PromotionTypeCR, LHSTy, Loc,
+                                  ScalarConversionOpts(CGF.SanOpts));
 
   if (atomicPHI) {
     llvm::BasicBlock *curBlock = Builder.GetInsertBlock();
@@ -3431,9 +3589,14 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
   // specially because the result is altered by the store, i.e., [C99 6.5.16p1]
   // 'An assignment expression has the value of the left operand after the
   // assignment...'.
-  if (LHSLV.isBitField())
+  if (LHSLV.isBitField()) {
+    Value *Src = Previous ? Previous : Result;
+    QualType SrcType = E->getRHS()->getType();
+    QualType DstType = E->getLHS()->getType();
     CGF.EmitStoreThroughBitfieldLValue(RValue::get(Result), LHSLV, &Result);
-  else
+    CGF.EmitBitfieldConversionCheck(Src, SrcType, Result, DstType,
+                                    LHSLV.getBitFieldInfo(), E->getExprLoc());
+  } else
     CGF.EmitStoreThroughLValue(RValue::get(Result), LHSLV);
 
   if (CGF.getLangOpts().OpenMP)
@@ -4545,6 +4708,19 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E,
                               E->getExprLoc());
 }
 
+ImplicitCastExpr *
+CodeGenFunction::GetOriginalRHSForBitfieldSanitizer(const BinaryOperator *E) {
+  // In case we have the integer or bitfield sanitizer checks enabled
+  // we want to get the expression before scalar conversion.
+  if (auto *ICE = dyn_cast<ImplicitCastExpr>(E->getRHS())) {
+    CastKind Kind = ICE->getCastKind();
+    if (Kind == CK_IntegralCast) {
+      return ICE;
+    }
+  }
+  return nullptr;
+}
+
 Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) {
   bool Ignore = TestAndClearIgnoreResultAssign();
 
@@ -4573,7 +4749,27 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) {
   case Qualifiers::OCL_None:
     // __block variables need to have the rhs evaluated first, plus
     // this should improve codegen just a little.
-    RHS = Visit(E->getRHS());
+    Value *Previous = nullptr;
+    QualType SrcType = E->getRHS()->getType();
+    // Check if LHS is a bitfield, if RHS contains an implicit cast expression
+    // we want to extract that value and potentially (if the bitfield sanitizer
+    // is enabled) use it to check for an implicit conversion.
+    if (E->getLHS()->refersToBitField()) {
+      // Get the RHS before scalar conversion
+      if (auto *ICE = CGF.GetOriginalRHSForBitfieldSanitizer(E)) {
+        SrcType = ICE->getSubExpr()->getType();
+        Previous = Visit(ICE->getSubExpr());
+        // Pass default ScalarConversionOpts to avoid emitting
+        // integer sanitizer checks as E refers to bitfield.
+        RHS = EmitScalarConversion(Previous, SrcType, ICE->getType(),
+                                   ICE->getExprLoc());
+      }
+    }
+
+    // Otherwise, visit RHS as usual.
+    if (!Previous)
+      RHS = Visit(E->getRHS());
+
     LHS = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
 
     // Store the value into the LHS.  Bit-fields are handled specially
@@ -4581,7 +4777,13 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) {
     // 'An assignment expression has the value of the left operand after
     // the assignment...'.
     if (LHS.isBitField()) {
+      // If the expression contained an implicit conversion, make sure
+      // to use the value before the scalar conversion.
+      Value *Src = Previous ? Previous : RHS;
+      QualType DstType = E->getLHS()->getType();
       CGF.EmitStoreThroughBitfieldLValue(RValue::get(RHS), LHS, &RHS);
+      CGF.EmitBitfieldConversionCheck(Src, SrcType, RHS, DstType,
+                                      LHS.getBitFieldInfo(), E->getExprLoc());
     } else {
       CGF.EmitNullabilityCheck(LHS, RHS, E->getExprLoc());
       CGF.EmitStoreThroughLValue(RValue::get(RHS), LHS);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 06327a18471778..00172a8eaefdf8 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -2697,6 +2697,18 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// expression and compare the result against zero, returning an Int1Ty value.
   llvm::Value *EvaluateExprAsBool(const Expr *E);
 
+  /// Retrieve the implicit cast expression of the rhs in a binary operator
+  /// expression This is used for implicit bitfield conversion checks, which
+  /// must compare with the value before truncation.
+  ImplicitCastExpr *GetOriginalRHSForBitfieldSanitizer(const BinaryOperator *E);
+
+  /// Emit a check that an [implicit] conversion of a bitfield. It is not UB,
+  /// so we use the value after conversion.
+  void EmitBitfieldConversionCheck(llvm::Value *Src, QualType SrcType,
+                                   llvm::Value *Dst, QualType DstType,
+                                   const CGBitFieldInfo &Info,
+                                   SourceLocation Loc);
+
   /// EmitIgnoredExpr - Emit an expression in a context which ignores the result.
   void EmitIgnoredExpr(const Expr *E);
 
diff --git a/clang/test/CodeGen/ubsan-bitfield-conversion.c b/clang/test/CodeGen/ubsan-bitfield-conversion.c
new file mode 100644
index 00000000000000..5f9623ca8e295f
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-bitfield-conversion.c
@@ -0,0 +1,61 @@
+// UN: %clang -fsanitize=implicit-bitfield-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BITFIELD-CONVERSION
+// RUN: %clang -fsanitize=implicit-integer-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK
+// UN: %clang -fsanitize=implicit-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BITFIELD-CONVERSION
+
+typedef struct _xx {
+  int x1:3;
+  char x2:2;
+} xx, *pxx;
+
+xx vxx;
+
+// CHECK-LABEL: define{{.*}} void @foo1
+void foo1(int x) {
+  vxx.x1 = x;
+  // CHECK: store i8 %{{.*}}
+  // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5
+  // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5
+  // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32
+  // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion
+  // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6
+  // CHECK-BITFIELD-CONVERSION: [[CONT]]:
+  // CHECK-NEXT: ret void
+}
+
+// CHECK-LABEL: define{{.*}} void @foo2
+void foo2(int x) {
+  vxx.x2 = x;
+  // CHECK: store i8 %{{.*}}
+  // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 6
+  // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 6
+  // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion
+  // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6
+  // CHECK-BITFIELD-CONVERSION: [[CONT]]:
+  // CHECK-NEXT: ret void
+}
+
+// CHECK-LABEL: define{{.*}} void @foo3
+void foo3() {
+  vxx.x1++;
+  // CHECK: store i8 %{{.*}}
+  // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5
+  // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5
+  // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32
+  // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion
+  // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6
+  // CHECK-BITFIELD-CONVERSION: [[CONT]]:
+  // CHECK-NEXT: ret void
+}
+
+// CHECK-LABEL: define{{.*}} void @foo4
+void foo4(int x) {
+  vxx.x1 += x;
+  // CHECK: store i8 %{{.*}}
+  // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5
+  // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5
+  // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32
+  // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion
+  // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6
+  // CHECK-BITFIELD-CONVERSION: [[CONT]]:
+  // CHECK-NEXT: ret void
+}
\ No newline at end of file
diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.cpp b/compiler-rt/lib/ubsan/ubsan_handlers.cpp
index 0f16507d5d88f1..27d01653f088da 100644
--- a/compiler-rt/lib/ubsan/ubsan_handlers.cpp
+++ b/compiler-rt/lib/ubsan/ubsan_handlers.cpp
@@ -555,13 +555,11 @@ static void handleImplicitConversion(ImplicitConversionData *Data,
                                      ReportOptions Opts, ValueHandle Src,
                                      ValueHandle Dst) {
   SourceLocation Loc = Data->Loc.acquire();
-  ErrorType ET = ErrorType::GenericUB;
-
   const TypeDescriptor &SrcTy = Data->FromType;
   const TypeDescriptor &DstTy = Data->ToType;
-
   bool SrcSigned = SrcTy.isSignedIntegerTy();
   bool DstSigned = DstTy.isSignedIntegerTy();
+  ErrorType ET = ErrorType::GenericUB;
 
   switch (Data->Kind) {
   case ICCK_IntegerTruncation: { // Legacy, no longer used.
@@ -594,14 +592,23 @@ static void handleImplicitConversion(ImplicitConversionData *Data,
 
   ScopedReport R(Opts, Loc, ET);
 
+  // In the case we have a bitfield, we want to explicitly say so in the
+  // error message.
   // FIXME: is it possible to dump the values as hex with fixed width?
-
-  Diag(Loc, DL_Error, ET,
-       "implicit conversion from type %0 of value %1 (%2-bit, %3signed) to "
-       "type %4 changed the value to %5 (%6-bit, %7signed)")
-      << SrcTy << Value(SrcTy, Src) << SrcTy.getIntegerBitWidth()
-      << (SrcSigned ? "" : "un") << DstTy << Value(DstTy, Dst)
-      << DstTy.getIntegerBitWidth() << (DstSigned ? "" : "un");
+  if (Data->BitfieldBits)
+    Diag(Loc, DL_Error, ET,
+         "implicit conversion from type %0 of value %1 (%2-bit, %3signed) to "
+         "type %4 changed the value to %5 (%6-bit bitfield, %7signed)")
+        << SrcTy << Value(SrcTy, Src) << SrcTy.getIntegerBitWidth()
+        << (SrcSigned ? "" : "un") << DstTy << Value(DstTy, Dst)
+        << Data->BitfieldBits << (DstSigned ? "" : "un");
+  else
+    Diag(Loc, DL_Error, ET,
+         "implicit conversion from type %0 of value %1 (%2-bit, %3signed) to "
+         "type %4 changed the value to %5 (%6-bit, %7signed)")
+        << SrcTy << Value(SrcTy, Src) << SrcTy.getIntegerBitWidth()
+        << (SrcSigned ? "" : "un") << DstTy << Value(DstTy, Dst)
+        << DstTy.getIntegerBitWidth() << (DstSigned ? "" : "un");
 }
 
 void __ubsan::__ubsan_handle_implicit_conversion(ImplicitConversionData *Data,
diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.h b/compiler-rt/lib/ubsan/ubsan_handlers.h
index 3bd5046de3d7d1..bae661a56833dd 100644
--- a/compiler-rt/lib/ubsan/ubsan_handlers.h
+++ b/compiler-rt/lib/ubsan/ubsan_handlers.h
@@ -147,6 +147,7 @@ struct ImplicitConversionData {
   const TypeDescriptor &FromType;
   const TypeDescriptor &ToType;
   /* ImplicitConversionCheckKind */ unsigned char Kind;
+  unsigned int BitfieldBits;
 };
 
 /// \brief Implict conversion that changed the value.

>From bcf44e0593ba6215bec216f1bc9018d6b3583a70 Mon Sep 17 00:00:00 2001
From: Zonotora <Zonotora at hotmail.com>
Date: Thu, 15 Feb 2024 11:44:22 +0100
Subject: [PATCH 3/3] [clang][UBSan] Update testcases

Update testcases due to new -fsanitize options regarding bitfields
---
 clang/test/Driver/fsanitize.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c
index 1671825042c323..571f79a6e7f70d 100644
--- a/clang/test/Driver/fsanitize.c
+++ b/clang/test/Driver/fsanitize.c
@@ -35,20 +35,20 @@
 // RUN: %clang --target=%itanium_abi_triple -fsanitize=integer %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-INTEGER -implicit-check-not="-fsanitize-address-use-after-scope"
 // CHECK-INTEGER: "-fsanitize={{((signed-integer-overflow|unsigned-integer-overflow|integer-divide-by-zero|shift-base|shift-exponent|implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change|unsigned-shift-base),?){9}"}}
 
-// RUN: %clang -fsanitize=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-RECOVER
-// RUN: %clang -fsanitize=implicit-conversion -fsanitize-recover=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-RECOVER
-// RUN: %clang -fsanitize=implicit-conversion -fno-sanitize-recover=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-NORECOVER
-// RUN: %clang -fsanitize=implicit-conversion -fsanitize-trap=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-TRAP
-// CHECK-implicit-conversion: "-fsanitize={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}}
-// CHECK-implicit-conversion-RECOVER: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}}
-// CHECK-implicit-conversion-RECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}}
-// CHECK-implicit-conversion-RECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}}
-// CHECK-implicit-conversion-NORECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} // ???
-// CHECK-implicit-conversion-NORECOVER-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}}
-// CHECK-implicit-conversion-NORECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}}
-// CHECK-implicit-conversion-TRAP: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}}
-// CHECK-implicit-conversion-TRAP-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}}
-// CHECK-implicit-conversion-TRAP-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}}
+// RUN: %clang -fsanitize=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-RECOVER
+// RUN: %clang -fsanitize=implicit-integer-conversion -fsanitize-recover=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-RECOVER
+// RUN: %clang -fsanitize=implicit-integer-conversion -fno-sanitize-recover=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-NORECOVER
+// RUN: %clang -fsanitize=implicit-integer-conversion -fsanitize-trap=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-TRAP
+// CHECK-implicit-integer-conversion: "-fsanitize={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}}
+// CHECK-implicit-integer-conversion-RECOVER: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}}
+// CHECK-implicit-integer-conversion-RECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}}
+// CHECK-implicit-integer-conversion-RECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}}
+// CHECK-implicit-integer-conversion-NORECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} // ???
+// CHECK-implicit-integer-conversion-NORECOVER-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}}
+// CHECK-implicit-integer-conversion-NORECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}}
+// CHECK-implicit-integer-conversion-TRAP: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}}
+// CHECK-implicit-integer-conversion-TRAP-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}}
+// CHECK-implicit-integer-conversion-TRAP-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}}
 
 // RUN: %clang -fsanitize=implicit-integer-arithmetic-value-change %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-arithmetic-value-change,CHECK-implicit-integer-arithmetic-value-change-RECOVER
 // RUN: %clang -fsanitize=implicit-integer-arithmetic-value-change -fsanitize-recover=implicit-integer-arithmetic-value-change %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-arithmetic-value-change,CHECK-implicit-integer-arithmetic-value-change-RECOVER