[llvm] [ConstantFolding] Fix bitcasting vectors with non-integer ratios (PR #179640)

Fri Mar 6 07:11:31 PST 2026

https://github.com/varev-dev updated https://github.com/llvm/llvm-project/pull/179640

>From 71348c0a994d22486741d3524feee99c30840e30 Mon Sep 17 00:00:00 2001
From: "Doga, Kacper" <kacper.doga at intel.com>
Date: Wed, 4 Feb 2026 09:37:02 +0000
Subject: [PATCH] Support bitcasting vectors with non-integer ratios.

---
 llvm/lib/Analysis/ConstantFolding.cpp         | 149 ++++++++++++------
 llvm/test/Transforms/InstCombine/cast.ll      |   4 +-
 .../InstSimplify/bitcast-vector-fold.ll       |  65 +++++++-
 .../SCCP/bitcast-vector-refinement.l.ll       |   3 +-
 4 files changed, 168 insertions(+), 53 deletions(-)

diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 7573afe423ec9..d90c15a5825d9 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -217,81 +217,138 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
   // conversion here, which depends on whether the input or output has
   // more elements.
   bool isLittleEndian = DL.isLittleEndian();
-
+  unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
+  unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy);
   SmallVector<Constant*, 32> Result;
+  unsigned SrcElt = 0;
+  APInt Rest(std::max(SrcBitSize, DstBitSize), 0);
+  unsigned RestBitSize = 0;
+  bool HasUndef = true;
+  bool HasPoison = false;
+
   if (NumDstElt < NumSrcElt) {
     // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
-    Constant *Zero = Constant::getNullValue(DstEltTy);
-    unsigned Ratio = NumSrcElt/NumDstElt;
-    unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
-    unsigned SrcElt = 0;
-    for (unsigned i = 0; i != NumDstElt; ++i) {
-      // Build each element of the result.
-      Constant *Elt = Zero;
-      unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
-      for (unsigned j = 0; j != Ratio; ++j) {
-        Constant *Src = C->getAggregateElement(SrcElt++);
-        if (isa_and_nonnull<UndefValue>(Src))
-          Src = Constant::getNullValue(
-              cast<VectorType>(C->getType())->getElementType());
-        else
-          Src = dyn_cast_or_null<ConstantInt>(Src);
-        if (!Src)  // Reject constantexpr elements.
+    //         bitcast (<4 x i24> <i24 0, i24 1, i24 2, i24 3> to <3 x i32>)
+    APInt Zero = APInt::getZero(DstBitSize);
+    while (Result.size() != NumDstElt) {
+      APInt NextVecElem;
+      if (!HasUndef)
+        NextVecElem = Zero;
+      while (RestBitSize < DstBitSize) {
+        assert(SrcElt < NumSrcElt && "Source vector overflow.");
+        auto *Element = C->getAggregateElement(SrcElt++);
+        if (!Element) // Reject constantexpr elements.
           return ConstantExpr::getBitCast(C, DestTy);
 
-        // Zero extend the element to the right size.
-        Src = ConstantFoldCastOperand(Instruction::ZExt, Src, Elt->getType(),
-                                      DL);
-        assert(Src && "Constant folding cannot fail on plain integers");
+        RestBitSize += SrcBitSize;
+        HasUndef = isa<UndefValue>(Element);
+        if (HasUndef) {
+          HasPoison |= isa<PoisonValue>(Element);
+          continue;
+        }
 
-        // Shift it to the right place, depending on endianness.
-        Src = ConstantFoldBinaryOpOperands(
-            Instruction::Shl, Src, ConstantInt::get(Src->getType(), ShiftAmt),
-            DL);
-        assert(Src && "Constant folding cannot fail on plain integers");
+        auto *Src = dyn_cast<ConstantInt>(Element);
+        if (!Src)
+          return ConstantExpr::getBitCast(C, DestTy);
+        NextVecElem = Src->getValue();
+        NextVecElem = NextVecElem.zext(DstBitSize);
 
-        ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
+        // Shift next SrcElt to right place, depending on endianness.
+        if (isLittleEndian) {
+          Rest |= NextVecElem << RestBitSize - SrcBitSize;
+        } else {
+          if (RestBitSize <= DstBitSize)
+            Rest |= NextVecElem << (DstBitSize - RestBitSize);
+          else
+            Rest |= NextVecElem.lshr(RestBitSize - DstBitSize);
+        }
+      }
 
-        // Mix it in.
-        Elt = ConstantFoldBinaryOpOperands(Instruction::Or, Elt, Src, DL);
-        assert(Elt && "Constant folding cannot fail on plain integers");
+      RestBitSize -= DstBitSize;
+      if (NextVecElem.getBitWidth() != DstBitSize) {
+        if (HasPoison)
+          Result.push_back(PoisonValue::get(DstEltTy));
+        else
+          Result.push_back(UndefValue::get(DstEltTy));
+        continue;
       }
-      Result.push_back(Elt);
+      Result.push_back(ConstantInt::get(DstEltTy, Rest));
+
+      // Shift unused bits from last SrcElt to next DstElt right place.
+      if (isLittleEndian)
+        Rest = NextVecElem.lshr(SrcBitSize - RestBitSize);
+      else
+        Rest = NextVecElem << (DstBitSize - RestBitSize);
     }
     return ConstantVector::get(Result);
   }
 
   // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+  //         bitcast (<3 x i64> <i64 0, i64 1, i64 2> to <8 x i24>)
   unsigned Ratio = NumDstElt/NumSrcElt;
-  unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy);
-
-  // Loop over each source value, expanding into multiple results.
-  for (unsigned i = 0; i != NumSrcElt; ++i) {
-    auto *Element = C->getAggregateElement(i);
-
+  while (Result.size() != NumDstElt) {
+    unsigned UnusedBits = SrcBitSize - RestBitSize;
+    if (RestBitSize >= DstBitSize) {
+      if (!HasUndef) {
+        APInt Elt =
+            Rest.lshr(isLittleEndian ? UnusedBits : (RestBitSize - DstBitSize));
+        Result.push_back(ConstantInt::get(DstEltTy, Elt.trunc(DstBitSize)));
+      } else if (HasPoison) {
+        Result.push_back(PoisonValue::get(DstEltTy));
+        HasPoison = RestBitSize > DstBitSize;
+      } else {
+        Result.push_back(UndefValue::get(DstEltTy));
+        HasUndef = RestBitSize > DstBitSize;
+      }
+      RestBitSize -= DstBitSize;
+      continue;
+    }
+    auto *Element = C->getAggregateElement(SrcElt++);
     if (!Element) // Reject constantexpr elements.
       return ConstantExpr::getBitCast(C, DestTy);
 
+    APInt NextVecElem;
     if (isa<UndefValue>(Element)) {
       // Correctly Propagate undef values.
-      Result.append(Ratio, UndefValue::get(DstEltTy));
-      continue;
+      HasUndef = true;
+      HasPoison = isa<PoisonValue>(Element);
+      if (RestBitSize == 0) {
+        RestBitSize += SrcBitSize;
+        continue;
+      }
+      NextVecElem = APInt::getZero(SrcBitSize);
+    } else {
+      auto *Src = dyn_cast<ConstantInt>(Element);
+      if (!Src)
+        return ConstantExpr::getBitCast(C, DestTy);
+      NextVecElem = Src->getValue();
+      HasUndef = false;
     }
-
-    auto *Src = dyn_cast<ConstantInt>(Element);
-    if (!Src)
-      return ConstantExpr::getBitCast(C, DestTy);
-
-    unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
+    APInt Value = Rest;
+    if (SrcBitSize % DstBitSize)
+      Rest = NextVecElem;
+    if (RestBitSize != 0) {
+      // Shift the Rest into the right place, shift NextVecElem to fit Rest.
+      if (isLittleEndian) {
+        Value.lshrInPlace(UnusedBits);
+        NextVecElem <<= RestBitSize;
+      } else {
+        Value <<= UnusedBits;
+        NextVecElem.lshrInPlace(RestBitSize);
+      }
+    }
+    Value |= NextVecElem;
+    unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize - DstBitSize;
     for (unsigned j = 0; j != Ratio; ++j) {
       // Shift the piece of the value into the right place, depending on
       // endianness.
-      APInt Elt = Src->getValue().lshr(ShiftAmt);
+      APInt Elt = Value.lshr(ShiftAmt);
       ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
 
       // Truncate and remember this piece.
       Result.push_back(ConstantInt::get(DstEltTy, Elt.trunc(DstBitSize)));
     }
+    RestBitSize += SrcBitSize - Ratio * DstBitSize;
   }
 
   return ConstantVector::get(Result);
diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll
index 46deb294b9d45..130fdec6fdb05 100644
--- a/llvm/test/Transforms/InstCombine/cast.ll
+++ b/llvm/test/Transforms/InstCombine/cast.ll
@@ -1438,10 +1438,10 @@ define i32 @test89() {
 
 define <2 x i32> @test90() {
 ; BE-LABEL: @test90(
-; BE-NEXT:    ret <2 x i32> <i32 0, i32 15360>
+; BE-NEXT:    ret <2 x i32> <i32 poison, i32 15360>
 ;
 ; LE-LABEL: @test90(
-; LE-NEXT:    ret <2 x i32> <i32 0, i32 1006632960>
+; LE-NEXT:    ret <2 x i32> <i32 poison, i32 1006632960>
 ;
   %t6 = bitcast <4 x half> <half poison, half poison, half poison, half 0xH3C00> to <2 x i32>
   ret <2 x i32> %t6
diff --git a/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll b/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll
index d2656e291547c..ede6db4dbc3e0 100644
--- a/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll
+++ b/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
-; RUN: opt < %s -passes=instsimplify -use-constant-fp-for-fixed-length-splat -use-constant-int-for-fixed-length-splat -S | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-f64:32:64-v64:64:64-v128:128:128"
+; RUN: opt < %s -passes=instsimplify -S -data-layout="e-p:32:32:32-i1:8:8-i8:8:8-f64:32:64-v64:64:64-v128:128:128" | FileCheck %s --check-prefixes=CHECK
+; RUN: opt < %s -passes=instsimplify -use-constant-fp-for-fixed-length-splat -use-constant-int-for-fixed-length-splat -S -data-layout="e-p:32:32:32-i1:8:8-i8:8:8-f64:32:64-v64:64:64-v128:128:128" | FileCheck %s --check-prefixes=CHECK
+; RUN: opt < %s -passes=instsimplify -S -data-layout="E-p:32:32:32-i1:8:8-i8:8:8-f64:32:64-v64:64:64-v128:128:128" | FileCheck %s --check-prefixes=BE
 
 define <2 x i64> @test1() {
 ; CHECK-LABEL: @test1(
@@ -83,6 +83,49 @@ define <1 x i1> @test10() {
   ret <1 x i1> %ret
 }
 
+define <4 x i24> @test11() {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    ret <4 x i24> <i24 1, i24 256, i24 65536, i24 0>
+;
+; BE-LABEL: @test11(
+; BE-NEXT:    ret <4 x i24> <i24 0, i24 65536, i24 256, i24 1>
+;
+  %c = bitcast <3 x i32> <i32 1, i32 1, i32 1> to <4 x i24>
+  ret <4 x i24> %c
+}
+
+define <8 x i24> @test12() {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    ret <8 x i24> splat (i24 1)
+;
+; BE-LABEL: @test12(
+; BE-NEXT:    ret <8 x i24> <i24 256, i24 256, i24 257, i24 1, i24 1, i24 0, i24 65536, i24 65536>
+  %c = bitcast <3 x i64> <i64 281474993487873, i64 72057598332895488, i64 1099511693312> to <8 x i24>
+  ret <8 x i24> %c
+}
+
+define <3 x i32> @test13() {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    ret <3 x i32> splat (i32 1)
+;
+; BE-LABEL: @test13(
+; BE-NEXT:    ret <3 x i32> <i32 256, i32 16777472, i32 0>
+;
+  %c = bitcast <4 x i24> <i24 1, i24 256, i24 65536, i24 0> to <3 x i32>
+  ret <3 x i32> %c
+}
+
+define <3 x i64> @test14() {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    ret <3 x i64> <i64 281474993487873, i64 72057598332895488, i64 1099511693312>
+;
+; BE-LABEL: @test14(
+; BE-NEXT:    ret <3 x i64> <i64 1099511693312, i64 72057598332895488, i64 281474993487873>
+;
+  %c = bitcast <8 x i24> splat (i24 1) to <3 x i64>
+  ret <3 x i64> %c
+}
+
 ; from MultiSource/Benchmarks/Bullet
 define <2 x float> @foo() {
 ; CHECK-LABEL: @foo(
@@ -277,6 +320,22 @@ define <16 x i8> @bitcast_constexpr_16i8_8i16_u256uuu256uu() {
   ret <16 x i8> %cast
 }
 
+define <4 x i24> @bitcast_constexpr_4i24_3i32_u1u() {
+; CHECK-LABEL: @bitcast_constexpr_4i24_3i32_u1u(
+; CHECK-NEXT:    ret <4 x i24> <i24 undef, i24 256, i24 0, i24 undef>
+;
+  %cast = bitcast <3 x i32><i32 undef, i32 1, i32 undef> to <4 x i24>
+  ret <4 x i24> %cast
+}
+
+define <3 x i32> @bitcast_constexpr_3i32_4i24_n1255uu() {
+; CHECK-LABEL: @bitcast_constexpr_3i32_4i24_n1255uu(
+; CHECK-NEXT:    ret <3 x i32> <i32 -1, i32 0, i32 undef>
+;
+  %cast = bitcast <4 x i24><i24 -1, i24 255, i24 undef, i24 undef> to <3 x i32>
+  ret <3 x i32> %cast
+}
+
 define <1 x i32> @bitcast_constexpr_scalar_fp_to_vector_int() {
 ; CHECK-LABEL: @bitcast_constexpr_scalar_fp_to_vector_int(
 ; CHECK-NEXT:    ret <1 x i32> splat (i32 1065353216)
diff --git a/llvm/test/Transforms/SCCP/bitcast-vector-refinement.l.ll b/llvm/test/Transforms/SCCP/bitcast-vector-refinement.l.ll
index 94df835e317b7..754f749128869 100644
--- a/llvm/test/Transforms/SCCP/bitcast-vector-refinement.l.ll
+++ b/llvm/test/Transforms/SCCP/bitcast-vector-refinement.l.ll
@@ -10,8 +10,7 @@ define <32 x i8> @test(i1 %cond) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi <4 x i64> [ zeroinitializer, %[[ENTRY]] ], [ splat (i64 1), %[[FOR_COND2]] ]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[FOR_COND2]], label %[[IF_THEN:.*]]
 ; CHECK:       [[IF_THEN]]:
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i64> zeroinitializer to <32 x i8>
-; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
+; CHECK-NEXT:    ret <32 x i8> zeroinitializer
 ;
 entry:
   br label %for.cond2