[llvm] [VectorCombine] foldBitcastShuffle - peek through any residual bitcasts before creating a new bitcast on top (PR #86119)

Fri Mar 22 07:39:21 PDT 2024

https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/86119

>From df919cbdd5cf2b34513ad804df8d71cf8fd7ea01 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Thu, 21 Mar 2024 13:41:56 +0000
Subject: [PATCH] [VectorCombine] foldBitcastShuffle - peek through any
 residual bitcasts before creating a new bitcast on top

Encountered while working on #67803, this helps prevents cases where the bitcast chains aren't cleared out and we can't perform further combines until after InstCombine/InstSimplify has run.

I'm assuming we can't safely put this inside IRBuilderBase.CreateBitCast?
---
 llvm/lib/Transforms/Vectorize/VectorCombine.cpp      | 12 ++++++++++--
 .../VectorCombine/X86/shuffle-inseltpoison.ll        |  6 ++----
 llvm/test/Transforms/VectorCombine/X86/shuffle.ll    |  6 ++----
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 7e86137f23f3c8..af5e7c9bc385ca 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -135,6 +135,14 @@ class VectorCombine {
 };
 } // namespace
 
+/// Return the source operand of a potentially bitcasted value. If there is no
+/// bitcast, return the input value itself.
+static Value *peekThroughBitcasts(Value *V) {
+  while (auto *BitCast = dyn_cast<BitCastInst>(V))
+    V = BitCast->getOperand(0);
+  return V;
+}
+
 static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI) {
   // Do not widen load if atomic/volatile or under asan/hwasan/memtag/tsan.
   // The widened load may load data from dirty regions or create data races
@@ -751,8 +759,8 @@ bool VectorCombine::foldBitcastShuffle(Instruction &I) {
 
   // bitcast (shuf V0, V1, MaskC) --> shuf (bitcast V0), (bitcast V1), MaskC'
   ++NumShufOfBitcast;
-  Value *CastV0 = Builder.CreateBitCast(V0, NewShuffleTy);
-  Value *CastV1 = Builder.CreateBitCast(V1, NewShuffleTy);
+  Value *CastV0 = Builder.CreateBitCast(peekThroughBitcasts(V0), NewShuffleTy);
+  Value *CastV1 = Builder.CreateBitCast(peekThroughBitcasts(V1), NewShuffleTy);
   Value *Shuf = Builder.CreateShuffleVector(CastV0, CastV1, NewMask);
   replaceValue(I, *Shuf);
   return true;
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll
index 8c5c6656ca1795..74a58c8d313611 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll
@@ -133,8 +133,7 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) {
 ; SSE-NEXT:    ret <2 x i64> [[BC3]]
 ;
 ; AVX-LABEL: @PR35454_1(
-; AVX-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32>
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[BC]] to <16 x i8>
+; AVX-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <16 x i8>
 ; AVX-NEXT:    [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; AVX-NEXT:    [[ADD:%.*]] = shl <16 x i8> [[BC1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
 ; AVX-NEXT:    [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32>
@@ -164,8 +163,7 @@ define <2 x i64> @PR35454_2(<2 x i64> %v) {
 ; SSE-NEXT:    ret <2 x i64> [[BC3]]
 ;
 ; AVX-LABEL: @PR35454_2(
-; AVX-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32>
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[BC]] to <8 x i16>
+; AVX-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16>
 ; AVX-NEXT:    [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
 ; AVX-NEXT:    [[ADD:%.*]] = shl <8 x i16> [[BC1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
 ; AVX-NEXT:    [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32>
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll
index 60cfc4d4b07079..d1484fd5ab3399 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll
@@ -133,8 +133,7 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) {
 ; SSE-NEXT:    ret <2 x i64> [[BC3]]
 ;
 ; AVX-LABEL: @PR35454_1(
-; AVX-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32>
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[BC]] to <16 x i8>
+; AVX-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <16 x i8>
 ; AVX-NEXT:    [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; AVX-NEXT:    [[ADD:%.*]] = shl <16 x i8> [[BC1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
 ; AVX-NEXT:    [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32>
@@ -164,8 +163,7 @@ define <2 x i64> @PR35454_2(<2 x i64> %v) {
 ; SSE-NEXT:    ret <2 x i64> [[BC3]]
 ;
 ; AVX-LABEL: @PR35454_2(
-; AVX-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32>
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[BC]] to <8 x i16>
+; AVX-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16>
 ; AVX-NEXT:    [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
 ; AVX-NEXT:    [[ADD:%.*]] = shl <8 x i16> [[BC1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
 ; AVX-NEXT:    [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32>