[llvm] r281343 - [ConstantFold] Improve the bitcast folding logic for constant vectors.
Andrea Di Biagio via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 13 07:50:47 PDT 2016
Author: adibiagio
Date: Tue Sep 13 09:50:47 2016
New Revision: 281343
URL: http://llvm.org/viewvc/llvm-project?rev=281343&view=rev
Log:
[ConstantFold] Improve the bitcast folding logic for constant vectors.
The constant folder didn't know how to always fold bitcasts of constant integer
vectors. In particular, it was unable to handle the case where a constant vector
had some undef elements, and the resulting (i.e. bitcasted) vector type had more
elements than the original vector type.
Example:
%cast = bitcast <2 x i64><i64 undef, i64 2> to <4 x i32>
On a little endian target, %cast could have been folded to:
<4 x i32><i32 undef, i32 undef, i32 2, i32 0>
This patch improves the folding logic by teaching how to correctly propagate
undef elements in the folded vector.
Differential Revision: https://reviews.llvm.org/D24301
Modified:
llvm/trunk/lib/Analysis/ConstantFolding.cpp
llvm/trunk/test/Transforms/InstCombine/x86-sse4a.ll
llvm/trunk/test/Transforms/InstSimplify/bitcast-vector-fold.ll
Modified: llvm/trunk/lib/Analysis/ConstantFolding.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ConstantFolding.cpp?rev=281343&r1=281342&r2=281343&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/ConstantFolding.cpp (original)
+++ llvm/trunk/lib/Analysis/ConstantFolding.cpp Tue Sep 13 09:50:47 2016
@@ -224,8 +224,19 @@ Constant *FoldBitCast(Constant *C, Type
// Loop over each source value, expanding into multiple results.
for (unsigned i = 0; i != NumSrcElt; ++i) {
- auto *Src = dyn_cast_or_null<ConstantInt>(C->getAggregateElement(i));
- if (!Src) // Reject constantexpr elements.
+ auto *Element = C->getAggregateElement(i);
+
+ if (!Element) // Reject constantexpr elements.
+ return ConstantExpr::getBitCast(C, DestTy);
+
+ if (isa<UndefValue>(Element)) {
+ // Correctly Propagate undef values.
+ Result.append(Ratio, UndefValue::get(DstEltTy));
+ continue;
+ }
+
+ auto *Src = dyn_cast<ConstantInt>(Element);
+ if (!Src)
return ConstantExpr::getBitCast(C, DestTy);
unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
Modified: llvm/trunk/test/Transforms/InstCombine/x86-sse4a.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-sse4a.ll?rev=281343&r1=281342&r2=281343&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-sse4a.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-sse4a.ll Tue Sep 13 09:50:47 2016
@@ -57,8 +57,7 @@ define <2 x i64> @test_extrq_constant_un
define <2 x i64> @test_extrq_call_constexpr(<2 x i64> %x) {
; CHECK-LABEL: @test_extrq_call_constexpr(
-; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> bitcast (<2 x i64> <i64 0, i64 undef> to <16 x i8>))
-; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+; CHECK-NEXT: ret <2 x i64> %x
;
%1 = call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> bitcast (<2 x i64> <i64 0, i64 undef> to <16 x i8>))
ret <2 x i64> %1
@@ -133,7 +132,7 @@ define <2 x i64> @test_extrqi_constant_u
define <2 x i64> @test_extrqi_call_constexpr() {
; CHECK-LABEL: @test_extrqi_call_constexpr(
-; CHECK-NEXT: ret <2 x i64> bitcast (<16 x i8> <i8 extractelement (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>), i32 2), i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef> to <2 x i64>)
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
;
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 8, i8 16)
ret <2 x i64> %1
@@ -179,7 +178,7 @@ define <2 x i64> @test_insertq_constant_
define <2 x i64> @test_insertq_call_constexpr(<2 x i64> %x) {
; CHECK-LABEL: @test_insertq_call_constexpr(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>))
+; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> <i64 0, i64 undef>, i8 2, i8 0)
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
;
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>))
@@ -224,7 +223,7 @@ define <2 x i64> @test_insertqi_constant
define <2 x i64> @test_insertqi_call_constexpr(<2 x i64> %x) {
; CHECK-LABEL: @test_insertqi_call_constexpr(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 48, i8 3)
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> <i64 0, i64 undef>, i8 48, i8 3)
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
;
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 48, i8 3)
Modified: llvm/trunk/test/Transforms/InstSimplify/bitcast-vector-fold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstSimplify/bitcast-vector-fold.ll?rev=281343&r1=281342&r2=281343&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstSimplify/bitcast-vector-fold.ll (original)
+++ llvm/trunk/test/Transforms/InstSimplify/bitcast-vector-fold.ll Tue Sep 13 09:50:47 2016
@@ -126,7 +126,7 @@ define <2 x double> @foo6() {
define <4 x i32> @bitcast_constexpr_4i32_2i64_u2() {
; CHECK-LABEL: @bitcast_constexpr_4i32_2i64_u2(
-; CHECK-NEXT: ret <4 x i32> bitcast (<2 x i64> <i64 undef, i64 2> to <4 x i32>)
+; CHECK-NEXT: ret <4 x i32> <i32 undef, i32 undef, i32 2, i32 0>
;
%cast = bitcast <2 x i64><i64 undef, i64 2> to <4 x i32>
ret <4 x i32> %cast
@@ -134,7 +134,7 @@ define <4 x i32> @bitcast_constexpr_4i32
define <4 x i32> @bitcast_constexpr_4i32_2i64_1u() {
; CHECK-LABEL: @bitcast_constexpr_4i32_2i64_1u(
-; CHECK-NEXT: ret <4 x i32> bitcast (<2 x i64> <i64 1, i64 undef> to <4 x i32>)
+; CHECK-NEXT: ret <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
;
%cast = bitcast <2 x i64><i64 1, i64 undef> to <4 x i32>
ret <4 x i32> %cast
@@ -142,7 +142,7 @@ define <4 x i32> @bitcast_constexpr_4i32
define <4 x i32> @bitcast_constexpr_4i32_2i64() {
; CHECK-LABEL: @bitcast_constexpr_4i32_2i64(
-; CHECK-NEXT: ret <4 x i32> bitcast (<2 x i64> <i64 undef, i64 2> to <4 x i32>)
+; CHECK-NEXT: ret <4 x i32> <i32 undef, i32 undef, i32 2, i32 0>
;
%cast = bitcast <2 x i64><i64 undef, i64 2> to <4 x i32>
ret <4 x i32> %cast
@@ -150,7 +150,7 @@ define <4 x i32> @bitcast_constexpr_4i32
define <8 x i16> @bitcast_constexpr_8i16_2i64_u2() {
; CHECK-LABEL: @bitcast_constexpr_8i16_2i64_u2(
-; CHECK-NEXT: ret <8 x i16> bitcast (<2 x i64> <i64 undef, i64 2> to <8 x i16>)
+; CHECK-NEXT: ret <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 2, i16 0, i16 0, i16 0>
;
%cast = bitcast <2 x i64><i64 undef, i64 2> to <8 x i16>
ret <8 x i16> %cast
@@ -158,7 +158,7 @@ define <8 x i16> @bitcast_constexpr_8i16
define <8 x i16> @bitcast_constexpr_8i16_2i64_1u() {
; CHECK-LABEL: @bitcast_constexpr_8i16_2i64_1u(
-; CHECK-NEXT: ret <8 x i16> bitcast (<2 x i64> <i64 1, i64 undef> to <8 x i16>)
+; CHECK-NEXT: ret <8 x i16> <i16 1, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef>
;
%cast = bitcast <2 x i64><i64 1, i64 undef> to <8 x i16>
ret <8 x i16> %cast
@@ -166,7 +166,7 @@ define <8 x i16> @bitcast_constexpr_8i16
define <8 x i16> @bitcast_constexpr_8i16_2i64_u65536() {
; CHECK-LABEL: @bitcast_constexpr_8i16_2i64_u65536(
-; CHECK-NEXT: ret <8 x i16> bitcast (<2 x i64> <i64 undef, i64 65536> to <8 x i16>)
+; CHECK-NEXT: ret <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 1, i16 0, i16 0>
;
%cast = bitcast <2 x i64><i64 undef, i64 65536> to <8 x i16>
ret <8 x i16> %cast
@@ -174,7 +174,7 @@ define <8 x i16> @bitcast_constexpr_8i16
define <16 x i8> @bitcast_constexpr_16i8_2i64_u2() {
; CHECK-LABEL: @bitcast_constexpr_16i8_2i64_u2(
-; CHECK-NEXT: ret <16 x i8> bitcast (<2 x i64> <i64 undef, i64 2> to <16 x i8>)
+; CHECK-NEXT: ret <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 2, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
;
%cast = bitcast <2 x i64><i64 undef, i64 2> to <16 x i8>
ret <16 x i8> %cast
@@ -182,7 +182,7 @@ define <16 x i8> @bitcast_constexpr_16i8
define <16 x i8> @bitcast_constexpr_16i8_2i64_256u() {
; CHECK-LABEL: @bitcast_constexpr_16i8_2i64_256u(
-; CHECK-NEXT: ret <16 x i8> bitcast (<2 x i64> <i64 256, i64 undef> to <16 x i8>)
+; CHECK-NEXT: ret <16 x i8> <i8 0, i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>
;
%cast = bitcast <2 x i64><i64 256, i64 undef> to <16 x i8>
ret <16 x i8> %cast
@@ -190,7 +190,7 @@ define <16 x i8> @bitcast_constexpr_16i8
define <16 x i8> @bitcast_constexpr_16i8_2i64_u256() {
; CHECK-LABEL: @bitcast_constexpr_16i8_2i64_u256(
-; CHECK-NEXT: ret <16 x i8> bitcast (<2 x i64> <i64 undef, i64 256> to <16 x i8>)
+; CHECK-NEXT: ret <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
;
%cast = bitcast <2 x i64><i64 undef, i64 256> to <16 x i8>
ret <16 x i8> %cast
@@ -198,7 +198,7 @@ define <16 x i8> @bitcast_constexpr_16i8
define <8 x i16> @bitcast_constexpr_8i16_4i32_uu22() {
; CHECK-LABEL: @bitcast_constexpr_8i16_4i32_uu22(
-; CHECK-NEXT: ret <8 x i16> bitcast (<4 x i32> <i32 undef, i32 undef, i32 2, i32 2> to <8 x i16>)
+; CHECK-NEXT: ret <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 2, i16 0, i16 2, i16 0>
;
%cast = bitcast <4 x i32><i32 undef, i32 undef, i32 2, i32 2> to <8 x i16>
ret <8 x i16> %cast
@@ -206,7 +206,7 @@ define <8 x i16> @bitcast_constexpr_8i16
define <8 x i16> @bitcast_constexpr_8i16_4i32_10uu() {
; CHECK-LABEL: @bitcast_constexpr_8i16_4i32_10uu(
-; CHECK-NEXT: ret <8 x i16> bitcast (<4 x i32> <i32 1, i32 0, i32 undef, i32 undef> to <8 x i16>)
+; CHECK-NEXT: ret <8 x i16> <i16 1, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef>
;
%cast = bitcast <4 x i32><i32 1, i32 0, i32 undef, i32 undef> to <8 x i16>
ret <8 x i16> %cast
@@ -214,7 +214,7 @@ define <8 x i16> @bitcast_constexpr_8i16
define <8 x i16> @bitcast_constexpr_8i16_4i32_u257u256() {
; CHECK-LABEL: @bitcast_constexpr_8i16_4i32_u257u256(
-; CHECK-NEXT: ret <8 x i16> bitcast (<4 x i32> <i32 undef, i32 65536, i32 undef, i32 65536> to <8 x i16>)
+; CHECK-NEXT: ret <8 x i16> <i16 undef, i16 undef, i16 0, i16 1, i16 undef, i16 undef, i16 0, i16 1>
;
%cast = bitcast <4 x i32><i32 undef, i32 65536, i32 undef, i32 65536> to <8 x i16>
ret <8 x i16> %cast
@@ -222,7 +222,7 @@ define <8 x i16> @bitcast_constexpr_8i16
define <16 x i8> @bitcast_constexpr_16i8_4i32_u2u2() {
; CHECK-LABEL: @bitcast_constexpr_16i8_4i32_u2u2(
-; CHECK-NEXT: ret <16 x i8> bitcast (<4 x i32> <i32 undef, i32 2, i32 undef, i32 2> to <16 x i8>)
+; CHECK-NEXT: ret <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 2, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 2, i8 0, i8 0, i8 0>
;
%cast = bitcast <4 x i32><i32 undef, i32 2, i32 undef, i32 2> to <16 x i8>
ret <16 x i8> %cast
@@ -230,7 +230,7 @@ define <16 x i8> @bitcast_constexpr_16i8
define <16 x i8> @bitcast_constexpr_16i8_4i32_1u1u() {
; CHECK-LABEL: @bitcast_constexpr_16i8_4i32_1u1u(
-; CHECK-NEXT: ret <16 x i8> bitcast (<4 x i32> <i32 1, i32 undef, i32 1, i32 undef> to <16 x i8>)
+; CHECK-NEXT: ret <16 x i8> <i8 1, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 1, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef>
;
%cast = bitcast <4 x i32><i32 1, i32 undef, i32 1, i32 undef> to <16 x i8>
ret <16 x i8> %cast
@@ -238,7 +238,7 @@ define <16 x i8> @bitcast_constexpr_16i8
define <16 x i8> @bitcast_constexpr_16i8_4i32_u256uu() {
; CHECK-LABEL: @bitcast_constexpr_16i8_4i32_u256uu(
-; CHECK-NEXT: ret <16 x i8> bitcast (<4 x i32> <i32 undef, i32 256, i32 undef, i32 undef> to <16 x i8>)
+; CHECK-NEXT: ret <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 1, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>
;
%cast = bitcast <4 x i32><i32 undef, i32 256, i32 undef, i32 undef> to <16 x i8>
ret <16 x i8> %cast
@@ -246,7 +246,7 @@ define <16 x i8> @bitcast_constexpr_16i8
define <16 x i8> @bitcast_constexpr_16i8_8i16_u2u2u2u2() {
; CHECK-LABEL: @bitcast_constexpr_16i8_8i16_u2u2u2u2(
-; CHECK-NEXT: ret <16 x i8> bitcast (<8 x i16> <i16 undef, i16 2, i16 undef, i16 2, i16 undef, i16 2, i16 undef, i16 2> to <16 x i8>)
+; CHECK-NEXT: ret <16 x i8> <i8 undef, i8 undef, i8 2, i8 0, i8 undef, i8 undef, i8 2, i8 0, i8 undef, i8 undef, i8 2, i8 0, i8 undef, i8 undef, i8 2, i8 0>
;
%cast = bitcast <8 x i16><i16 undef, i16 2, i16 undef, i16 2, i16 undef, i16 2, i16 undef, i16 2> to <16 x i8>
ret <16 x i8> %cast
@@ -254,7 +254,7 @@ define <16 x i8> @bitcast_constexpr_16i8
define <16 x i8> @bitcast_constexpr_16i8_8i16_1u1u1u1u() {
; CHECK-LABEL: @bitcast_constexpr_16i8_8i16_1u1u1u1u(
-; CHECK-NEXT: ret <16 x i8> bitcast (<8 x i16> <i16 1, i16 undef, i16 1, i16 undef, i16 1, i16 undef, i16 1, i16 undef> to <16 x i8>)
+; CHECK-NEXT: ret <16 x i8> <i8 1, i8 0, i8 undef, i8 undef, i8 1, i8 0, i8 undef, i8 undef, i8 1, i8 0, i8 undef, i8 undef, i8 1, i8 0, i8 undef, i8 undef>
;
%cast = bitcast <8 x i16><i16 1, i16 undef, i16 1, i16 undef, i16 1, i16 undef, i16 1, i16 undef> to <16 x i8>
ret <16 x i8> %cast
@@ -262,7 +262,7 @@ define <16 x i8> @bitcast_constexpr_16i8
define <16 x i8> @bitcast_constexpr_16i8_8i16_u256uuu256uu() {
; CHECK-LABEL: @bitcast_constexpr_16i8_8i16_u256uuu256uu(
-; CHECK-NEXT: ret <16 x i8> bitcast (<8 x i16> <i16 undef, i16 256, i16 undef, i16 undef, i16 undef, i16 256, i16 undef, i16 undef> to <16 x i8>)
+; CHECK-NEXT: ret <16 x i8> <i8 undef, i8 undef, i8 0, i8 1, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 1, i8 undef, i8 undef, i8 undef, i8 undef>
;
%cast = bitcast <8 x i16><i16 undef, i16 256, i16 undef, i16 undef, i16 undef, i16 256, i16 undef, i16 undef> to <16 x i8>
ret <16 x i8> %cast
More information about the llvm-commits
mailing list