[llvm] [ConstantFolding] Support bitcasting vectors to smaller element sizes with non-integer ratios. (PR #179640)
Kacper Doga via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 3 06:23:17 PST 2026
https://github.com/varev-dev updated https://github.com/llvm/llvm-project/pull/179640
>From 2468011d13bed5a17bdd8728b2ee6829e497b003 Mon Sep 17 00:00:00 2001
From: "Doga, Kacper" <kacper.doga at intel.com>
Date: Wed, 4 Feb 2026 09:37:02 +0000
Subject: [PATCH 1/4] [ConstantFolding] Support bitcasting vectors to smaller
element sizes with non-integer ratios.
Fixes #179626
---
llvm/lib/Analysis/ConstantFolding.cpp | 62 ++++++++++++++-----
.../InstSimplify/bitcast-vector-fold.ll | 24 +++++++
2 files changed, 69 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 7573afe423ec9..06d7a2613855a 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -217,13 +217,12 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
// conversion here, which depends on whether the input or output has
// more elements.
bool isLittleEndian = DL.isLittleEndian();
-
+ unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
SmallVector<Constant*, 32> Result;
if (NumDstElt < NumSrcElt) {
// Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
Constant *Zero = Constant::getNullValue(DstEltTy);
- unsigned Ratio = NumSrcElt/NumDstElt;
- unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
+ unsigned Ratio = NumSrcElt / NumDstElt;
unsigned SrcElt = 0;
for (unsigned i = 0; i != NumDstElt; ++i) {
// Build each element of the result.
@@ -262,36 +261,65 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
}
// Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+ // bitcast (<3 x i64> <i64 0, i64 1, i64 2> to <8 x i24>)
unsigned Ratio = NumDstElt/NumSrcElt;
unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy);
-
- // Loop over each source value, expanding into multiple results.
- for (unsigned i = 0; i != NumSrcElt; ++i) {
- auto *Element = C->getAggregateElement(i);
-
+ APInt Rest(SrcBitSize, 0);
+ unsigned RestSize = 0;
+ for (unsigned i = 0; i != NumSrcElt || Result.size() != NumDstElt;) {
+ unsigned UnusedBits = SrcBitSize - RestSize;
+ if (RestSize >= DstBitSize) {
+ APInt Elt = isLittleEndian ? Rest.lshr(UnusedBits)
+ : Rest << (RestSize - DstBitSize);
+ Result.push_back(ConstantInt::get(DstEltTy, Elt.trunc(DstBitSize)));
+ RestSize -= DstBitSize;
+ Rest.clearBits(UnusedBits - 1, UnusedBits - 1 + DstBitSize);
+ continue;
+ }
+ auto *Element = C->getAggregateElement(i++);
if (!Element) // Reject constantexpr elements.
return ConstantExpr::getBitCast(C, DestTy);
+ APInt NextVecElem;
if (isa<UndefValue>(Element)) {
// Correctly Propagate undef values.
- Result.append(Ratio, UndefValue::get(DstEltTy));
- continue;
+ if (SrcBitSize % DstBitSize == 0) {
+ Result.append(Ratio, UndefValue::get(DstEltTy));
+ continue;
+ }
+ // Replace Undef with zeros to prevent partial corruption.
+ NextVecElem = APInt::getZero(SrcBitSize);
+ } else {
+ auto *Src = dyn_cast<ConstantInt>(Element);
+ if (!Src)
+ return ConstantExpr::getBitCast(C, DestTy);
+ NextVecElem = Src->getValue();
}
-
- auto *Src = dyn_cast<ConstantInt>(Element);
- if (!Src)
- return ConstantExpr::getBitCast(C, DestTy);
-
- unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
+ APInt Value = Rest;
+ if (SrcBitSize % DstBitSize)
+ Rest = NextVecElem;
+ if (RestSize != 0) {
+ // Shift the Rest into the right place, shift NextVecElem to fit Rest.
+ if (isLittleEndian) {
+ Value.lshrInPlace(UnusedBits == SrcBitSize ? 0 : UnusedBits);
+ NextVecElem <<= RestSize;
+ } else {
+ Value <<= UnusedBits;
+ NextVecElem.lshrInPlace(RestSize);
+ }
+ }
+ Value |= NextVecElem;
+ unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize - DstBitSize;
for (unsigned j = 0; j != Ratio; ++j) {
// Shift the piece of the value into the right place, depending on
// endianness.
- APInt Elt = Src->getValue().lshr(ShiftAmt);
+ APInt Elt = Value.lshr(ShiftAmt);
ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
// Truncate and remember this piece.
Result.push_back(ConstantInt::get(DstEltTy, Elt.trunc(DstBitSize)));
}
+ RestSize = SrcBitSize + RestSize - Ratio * DstBitSize;
}
return ConstantVector::get(Result);
diff --git a/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll b/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll
index d2656e291547c..3ad83ba18d64c 100644
--- a/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll
+++ b/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll
@@ -83,6 +83,22 @@ define <1 x i1> @test10() {
ret <1 x i1> %ret
}
+define <4 x i24> @test11() {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: ret <4 x i24> <i24 1, i24 256, i24 65536, i24 0>
+;
+ %c = bitcast <3 x i32> <i32 1, i32 1, i32 1> to <4 x i24>
+ ret <4 x i24> %c
+}
+
+define <8 x i24> @test12() {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: ret <8 x i24> splat (i24 1)
+;
+ %c = bitcast <3 x i64> <i64 281474993487873, i64 72057598332895488, i64 1099511693312> to <8 x i24>
+ ret <8 x i24> %c
+}
+
; from MultiSource/Benchmarks/Bullet
define <2 x float> @foo() {
; CHECK-LABEL: @foo(
@@ -277,6 +293,14 @@ define <16 x i8> @bitcast_constexpr_16i8_8i16_u256uuu256uu() {
ret <16 x i8> %cast
}
+define <4 x i24> @bitcast_constexpr_4i24_3i32_u1u() {
+; CHECK-LABEL: @bitcast_constexpr_4i24_3i32_u1u(
+; CHECK-NEXT: ret <4 x i24> <i24 0, i24 256, i24 0, i24 0>
+;
+ %cast = bitcast <3 x i32><i32 undef, i32 1, i32 undef> to <4 x i24>
+ ret <4 x i24> %cast
+}
+
define <1 x i32> @bitcast_constexpr_scalar_fp_to_vector_int() {
; CHECK-LABEL: @bitcast_constexpr_scalar_fp_to_vector_int(
; CHECK-NEXT: ret <1 x i32> splat (i32 1065353216)
>From d5797bc6dfd6dbc5d7b95681184cff70eff63962 Mon Sep 17 00:00:00 2001
From: "Doga, Kacper" <kacper.doga at intel.com>
Date: Wed, 11 Feb 2026 11:20:07 +0000
Subject: [PATCH 2/4] remove redundant instructions
---
llvm/lib/Analysis/ConstantFolding.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 06d7a2613855a..a48bbd7c0d23e 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -266,14 +266,13 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy);
APInt Rest(SrcBitSize, 0);
unsigned RestSize = 0;
- for (unsigned i = 0; i != NumSrcElt || Result.size() != NumDstElt;) {
+ for (unsigned i = 0; Result.size() != NumDstElt;) {
unsigned UnusedBits = SrcBitSize - RestSize;
if (RestSize >= DstBitSize) {
APInt Elt = isLittleEndian ? Rest.lshr(UnusedBits)
: Rest << (RestSize - DstBitSize);
Result.push_back(ConstantInt::get(DstEltTy, Elt.trunc(DstBitSize)));
RestSize -= DstBitSize;
- Rest.clearBits(UnusedBits - 1, UnusedBits - 1 + DstBitSize);
continue;
}
auto *Element = C->getAggregateElement(i++);
>From b9e1e7a420c2977556128e89ab667a4a8252e4ee Mon Sep 17 00:00:00 2001
From: "Doga, Kacper" <kacper.doga at intel.com>
Date: Tue, 3 Mar 2026 14:15:19 +0000
Subject: [PATCH 3/4] create regression tests for miscompiled bitcast
---
.../InstSimplify/bitcast-vector-fold.ll | 22 +++++++++++++++++++
1 file changed, 22 insertions(+)
diff --git a/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll b/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll
index 3ad83ba18d64c..6734bc5a6b0ea 100644
--- a/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll
+++ b/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll
@@ -99,6 +99,20 @@ define <8 x i24> @test12() {
ret <8 x i24> %c
}
+define <3 x i32> @test13() {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: ret <3 x i32> splat (i32 1)
+ %c = bitcast <4 x i24> <i24 1, i24 256, i24 65536, i24 0> to <3 x i32>
+ ret <3 x i32> %c
+}
+
+define <3 x i64> @test14() {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: ret <3 x i64> <i64 281474993487873, i64 72057598332895488, i64 1099511693312>
+ %c = bitcast <8 x i24> splat (i24 1) to <3 x i64>
+ ret <3 x i64> %c
+}
+
; from MultiSource/Benchmarks/Bullet
define <2 x float> @foo() {
; CHECK-LABEL: @foo(
@@ -301,6 +315,14 @@ define <4 x i24> @bitcast_constexpr_4i24_3i32_u1u() {
ret <4 x i24> %cast
}
+define <3 x i32> @bitcast_constexpr_3i32_4i24_uu11() {
+; CHECK-LABEL: @bitcast_constexpr_3i32_4i24_uu11(
+; CHECK-NEXT: ret <3 x i32> <i32 undef, i32 65536, i32 256>
+;
+ %cast = bitcast <4 x i24><i24 undef, i24 undef, i24 1, i24 1> to <3 x i32>
+ ret <3 x i32> %cast
+}
+
define <1 x i32> @bitcast_constexpr_scalar_fp_to_vector_int() {
; CHECK-LABEL: @bitcast_constexpr_scalar_fp_to_vector_int(
; CHECK-NEXT: ret <1 x i32> splat (i32 1065353216)
>From a7fe5f7067ccf6f72faa1a789770ab154eed8505 Mon Sep 17 00:00:00 2001
From: "Doga, Kacper" <kacper.doga at intel.com>
Date: Tue, 3 Mar 2026 14:16:22 +0000
Subject: [PATCH 4/4] modify numDstElt<numSrcElt path to handle non-integer
ratio bitcasting
---
llvm/lib/Analysis/ConstantFolding.cpp | 77 ++++++++++++++++-----------
1 file changed, 45 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index a48bbd7c0d23e..3c858167849be 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -218,44 +218,58 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
// more elements.
bool isLittleEndian = DL.isLittleEndian();
unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
+ unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy);
SmallVector<Constant*, 32> Result;
if (NumDstElt < NumSrcElt) {
// Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
- Constant *Zero = Constant::getNullValue(DstEltTy);
- unsigned Ratio = NumSrcElt / NumDstElt;
+ // bitcast (<4 x i24> <i24 0, i24 1, i24 2, i24 3> to <3 x i32>)
+ APInt Rest(DstBitSize, 0);
+ unsigned RestBitSize = 0;
+ bool RestConsistsUndef = false;
unsigned SrcElt = 0;
- for (unsigned i = 0; i != NumDstElt; ++i) {
- // Build each element of the result.
- Constant *Elt = Zero;
- unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
- for (unsigned j = 0; j != Ratio; ++j) {
- Constant *Src = C->getAggregateElement(SrcElt++);
- if (isa_and_nonnull<UndefValue>(Src))
- Src = Constant::getNullValue(
- cast<VectorType>(C->getType())->getElementType());
- else
- Src = dyn_cast_or_null<ConstantInt>(Src);
- if (!Src) // Reject constantexpr elements.
+ while (Result.size() != NumDstElt) {
+ APInt Elt = Rest;
+ APInt NextVecElem;
+ while (RestBitSize < DstBitSize) {
+ assert (SrcElt < NumSrcElt && "Source vector overflow.");
+ auto *Element = C->getAggregateElement(SrcElt++);
+ if (!Element) // Reject constantexpr elements.
return ConstantExpr::getBitCast(C, DestTy);
- // Zero extend the element to the right size.
- Src = ConstantFoldCastOperand(Instruction::ZExt, Src, Elt->getType(),
- DL);
- assert(Src && "Constant folding cannot fail on plain integers");
-
- // Shift it to the right place, depending on endianness.
- Src = ConstantFoldBinaryOpOperands(
- Instruction::Shl, Src, ConstantInt::get(Src->getType(), ShiftAmt),
- DL);
- assert(Src && "Constant folding cannot fail on plain integers");
-
- ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
-
- // Mix it in.
- Elt = ConstantFoldBinaryOpOperands(Instruction::Or, Elt, Src, DL);
- assert(Elt && "Constant folding cannot fail on plain integers");
+ if (isa_and_nonnull<UndefValue>(Element)) {
+ RestBitSize += SrcBitSize;
+ if (RestBitSize == SrcBitSize || RestConsistsUndef) {
+ RestConsistsUndef = true;
+ continue;
+ }
+ NextVecElem = APInt::getZero(DstBitSize);
+ } else {
+ auto *Src = dyn_cast<ConstantInt>(Element);
+ if (!Src)
+ return ConstantExpr::getBitCast(C, DestTy);
+ NextVecElem = Src->getValue();
+ NextVecElem = NextVecElem.zext(DstBitSize);
+ }
+
+ if (isLittleEndian) {
+ Elt |= NextVecElem << RestBitSize;
+ } else {
+ Elt <<= SrcBitSize;
+ Elt |= NextVecElem.lshr(RestBitSize);
+ }
+ RestBitSize += SrcBitSize;
+ RestConsistsUndef = false;
}
- Result.push_back(Elt);
+ if (RestConsistsUndef)
+ Result.push_back(UndefValue::get(DstEltTy));
+ else
+ Result.push_back(ConstantInt::get(DstEltTy, Elt));
+
+ if (isLittleEndian)
+ Rest = NextVecElem.lshr(DstBitSize - RestBitSize);
+ else
+ Rest = NextVecElem << (DstBitSize - RestBitSize);
+ RestBitSize -= DstBitSize;
}
return ConstantVector::get(Result);
}
@@ -263,7 +277,6 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
// Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
// bitcast (<3 x i64> <i64 0, i64 1, i64 2> to <8 x i24>)
unsigned Ratio = NumDstElt/NumSrcElt;
- unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy);
APInt Rest(SrcBitSize, 0);
unsigned RestSize = 0;
for (unsigned i = 0; Result.size() != NumDstElt;) {
More information about the llvm-commits
mailing list