[llvm] [ConstantFolding] Support bitcasting vectors to smaller element sizes with non-integer ratios. (PR #179640)

Wed Feb 11 03:03:13 PST 2026

https://github.com/varev-dev updated https://github.com/llvm/llvm-project/pull/179640

>From 3d54070bdc4d35e1a727a4287fce4f42c909ffa1 Mon Sep 17 00:00:00 2001
From: "Doga, Kacper" <kacper.doga at intel.com>
Date: Wed, 4 Feb 2026 09:37:02 +0000
Subject: [PATCH] [ConstantFolding] Support bitcasting vectors to smaller
 element sizes with non-integer ratios.

Fixes #179626
---
 llvm/lib/Analysis/ConstantFolding.cpp         | 62 ++++++++++++++-----
 .../InstSimplify/bitcast-vector-fold.ll       | 24 +++++++
 2 files changed, 69 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index c0754d3a41264..ea6e8297bd732 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -217,13 +217,12 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
   // conversion here, which depends on whether the input or output has
   // more elements.
   bool isLittleEndian = DL.isLittleEndian();
-
+  unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
   SmallVector<Constant*, 32> Result;
   if (NumDstElt < NumSrcElt) {
     // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
     Constant *Zero = Constant::getNullValue(DstEltTy);
-    unsigned Ratio = NumSrcElt/NumDstElt;
-    unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
+    unsigned Ratio = NumSrcElt / NumDstElt;
     unsigned SrcElt = 0;
     for (unsigned i = 0; i != NumDstElt; ++i) {
       // Build each element of the result.
@@ -262,36 +261,65 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
   }
 
   // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+  //         bitcast (<3 x i64> <i64 0, i64 1, i64 2> to <8 x i24>)
   unsigned Ratio = NumDstElt/NumSrcElt;
   unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy);
-
-  // Loop over each source value, expanding into multiple results.
-  for (unsigned i = 0; i != NumSrcElt; ++i) {
-    auto *Element = C->getAggregateElement(i);
-
+  APInt Rest(SrcBitSize, 0);
+  unsigned RestSize = 0;
+  for (unsigned i = 0; i != NumSrcElt || Result.size() != NumDstElt;) {
+    unsigned UnusedBits = SrcBitSize - RestSize;
+    if (RestSize >= DstBitSize) {
+      APInt Elt = isLittleEndian ? Rest.lshr(UnusedBits)
+                                 : Rest << (RestSize - DstBitSize);
+      Result.push_back(ConstantInt::get(DstEltTy, Elt.trunc(DstBitSize)));
+      RestSize -= DstBitSize;
+      Rest.clearBits(UnusedBits - 1, UnusedBits - 1 + DstBitSize);
+      continue;
+    }
+    auto *Element = C->getAggregateElement(i++);
     if (!Element) // Reject constantexpr elements.
       return ConstantExpr::getBitCast(C, DestTy);
 
+    APInt NextVecElem;
     if (isa<UndefValue>(Element)) {
       // Correctly Propagate undef values.
-      Result.append(Ratio, UndefValue::get(DstEltTy));
-      continue;
+      if (SrcBitSize % DstBitSize == 0) {
+        Result.append(Ratio, UndefValue::get(DstEltTy));
+        continue;
+      }
+      // Replace Undef with zeros to prevent partial corruption.
+      NextVecElem = APInt::getZero(SrcBitSize);
+    } else {
+      auto *Src = dyn_cast<ConstantInt>(Element);
+      if (!Src)
+        return ConstantExpr::getBitCast(C, DestTy);
+      NextVecElem = Src->getValue();
     }
-
-    auto *Src = dyn_cast<ConstantInt>(Element);
-    if (!Src)
-      return ConstantExpr::getBitCast(C, DestTy);
-
-    unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
+    APInt Value = Rest;
+    if (SrcBitSize % DstBitSize)
+      Rest = NextVecElem;
+    if (RestSize != 0) {
+      // Shift the Rest into the right place, shift NextVecElem to fit Rest.
+      if (isLittleEndian) {
+        Value.lshrInPlace(UnusedBits == SrcBitSize ? 0 : UnusedBits);
+        NextVecElem <<= RestSize;
+      } else {
+        Value <<= UnusedBits;
+        NextVecElem.lshrInPlace(RestSize);
+      }
+    }
+    Value |= NextVecElem;
+    unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize - DstBitSize;
     for (unsigned j = 0; j != Ratio; ++j) {
       // Shift the piece of the value into the right place, depending on
       // endianness.
-      APInt Elt = Src->getValue().lshr(ShiftAmt);
+      APInt Elt = Value.lshr(ShiftAmt);
       ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
 
       // Truncate and remember this piece.
       Result.push_back(ConstantInt::get(DstEltTy, Elt.trunc(DstBitSize)));
     }
+    RestSize = SrcBitSize + RestSize - Ratio * DstBitSize;
   }
 
   return ConstantVector::get(Result);
diff --git a/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll b/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll
index d2656e291547c..3ad83ba18d64c 100644
--- a/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll
+++ b/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll
@@ -83,6 +83,22 @@ define <1 x i1> @test10() {
   ret <1 x i1> %ret
 }
 
+define <4 x i24> @test11() {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    ret <4 x i24> <i24 1, i24 256, i24 65536, i24 0>
+;
+  %c = bitcast <3 x i32> <i32 1, i32 1, i32 1> to <4 x i24>
+  ret <4 x i24> %c
+}
+
+define <8 x i24> @test12() {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    ret <8 x i24> splat (i24 1)
+;
+  %c = bitcast <3 x i64> <i64 281474993487873, i64 72057598332895488, i64 1099511693312> to <8 x i24>
+  ret <8 x i24> %c
+}
+
 ; from MultiSource/Benchmarks/Bullet
 define <2 x float> @foo() {
 ; CHECK-LABEL: @foo(
@@ -277,6 +293,14 @@ define <16 x i8> @bitcast_constexpr_16i8_8i16_u256uuu256uu() {
   ret <16 x i8> %cast
 }
 
+define <4 x i24> @bitcast_constexpr_4i24_3i32_u1u() {
+; CHECK-LABEL: @bitcast_constexpr_4i24_3i32_u1u(
+; CHECK-NEXT:    ret <4 x i24> <i24 0, i24 256, i24 0, i24 0>
+;
+  %cast = bitcast <3 x i32><i32 undef, i32 1, i32 undef> to <4 x i24>
+  ret <4 x i24> %cast
+}
+
 define <1 x i32> @bitcast_constexpr_scalar_fp_to_vector_int() {
 ; CHECK-LABEL: @bitcast_constexpr_scalar_fp_to_vector_int(
 ; CHECK-NEXT:    ret <1 x i32> splat (i32 1065353216)