[PATCH] D97397: [InstCombine] Add a combine for a shuffle of identical bitcasts

Wed Feb 24 09:22:38 PST 2021

sanwou01 created this revision.
Herald added subscribers: hiraditya, kristof.beyls.
sanwou01 requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Some intrinsics wrapper code has the habit of ignoring the type of the
elements in vectors, thinking of vector registers as a "bag of bits". As
a consequence, some operations are shared between vectors of different
types are shared. For example, functions that rearrange elements in a
vector can be shared between vectors of int32 and float.

This can result in bitcasts in awkward places that prevent the backend
from recognizing some instructions. For AArch64 in particular, it
inhibits the selection of dup from a general purpose register (GPR), and
mov from GPR to a vector lane.

This patch adds a pattern in InstCombine to move the bitcasts past the
shufflevector if this is possible. Sometimes this even allows
InstCombine to remove the bitcast entirely, as in the included tests.

Alternatively this could be done with a few extra patterns in the
AArch64 backend, but InstCombine seems like a better place for this.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D97397

Files:
  llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
  llvm/test/Transforms/InstCombine/vtrn1_bitcast.ll
  llvm/test/Transforms/InstCombine/vtrn2_bitcast.ll


Index: llvm/test/Transforms/InstCombine/vtrn2_bitcast.ll
===================================================================

--- llvm/test/Transforms/InstCombine/vtrn2_bitcast.ll
+++ llvm/test/Transforms/InstCombine/vtrn2_bitcast.ll
@@ -4,13 +4,10 @@
 define void @vtrn2([8 x i8]* nocapture %result, i32 %i, i32 %j, i32 %k) {
 ; CHECK-LABEL: @vtrn2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[J:%.*]], i64 1
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[K:%.*]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <2 x float>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast [8 x i8]* [[RESULT:%.*]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> [[TMP4]], <2 x float>* [[TMP5]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[K:%.*]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[J:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [8 x i8]* [[RESULT:%.*]] to <2 x i32>*
+; CHECK-NEXT:    store <2 x i32> [[TMP1]], <2 x i32>* [[TMP2]], align 1
 ; CHECK-NEXT:    ret void
 ;
 entry:
Index: llvm/test/Transforms/InstCombine/vtrn1_bitcast.ll
===================================================================
--- llvm/test/Transforms/InstCombine/vtrn1_bitcast.ll
+++ llvm/test/Transforms/InstCombine/vtrn1_bitcast.ll
@@ -5,11 +5,9 @@
 ; CHECK-LABEL: @vtrn1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[I:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast [8 x i8]* [[RESULT:%.*]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> [[TMP3]], <2 x float>* [[TMP4]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [8 x i8]* [[RESULT:%.*]] to <2 x i32>*
+; CHECK-NEXT:    store <2 x i32> [[TMP1]], <2 x i32>* [[TMP2]], align 1
 ; CHECK-NEXT:    ret void
 ;
 {
Index: llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -2289,6 +2289,19 @@
 
   unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
   unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
+
+  // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
+  //
+  //
+  Value *X, *Y;
+  if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) &&
+      X->getType()->isVectorTy() &&
+      cast<FixedVectorType>(X->getType())->getNumElements() == VWidth &&
+      X->getType() == Y->getType()) {
+    Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask());
+    return new BitCastInst(V, SVI.getType());
+  }
+
   ArrayRef<int> Mask = SVI.getShuffleMask();
   Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
 
@@ -2298,7 +2311,6 @@
   // TODO: This could be extended to allow length-changing shuffles.
   //       The transform might also be obsoleted if we allowed canonicalization
   //       of bitcasted shuffles.
-  Value *X;
   if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
       X->getType()->isVectorTy() && VWidth == LHSWidth) {
     // Try to create a scaled mask constant.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D97397.326116.patch
Type: text/x-patch
Size: 3839 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210224/bdcc25f4/attachment.bin>