[llvm] 856cc60 - [InstCombine] canonicalize bitcast after insertelement into undef

Sun May 10 08:38:35 PDT 2020

Author: Sanjay Patel
Date: 2020-05-10T11:37:47-04:00
New Revision: 856cc60bc1ad07b5cba1ab81160c1c3ef8ff4c23

URL: https://github.com/llvm/llvm-project/commit/856cc60bc1ad07b5cba1ab81160c1c3ef8ff4c23
DIFF: https://github.com/llvm/llvm-project/commit/856cc60bc1ad07b5cba1ab81160c1c3ef8ff4c23.diff

LOG: [InstCombine] canonicalize bitcast after insertelement into undef

We have a transform in the opposite direction only for the x86 MMX type,
Other types are not handled either way before this patch.

The motivating case from PR45748:
https://bugs.llvm.org/show_bug.cgi?id=45748
...is the last test diff. In that example, we are triggering an existing
bitcast transform, so we reduce the number of casts, and that should give
us the ideal x86 codegen.

Differential Revision: https://reviews.llvm.org/D79171

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
    llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index b2dc7259e139..d7c6db2051f4 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1050,9 +1050,26 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
           VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
     return replaceInstUsesWith(IE, V);
 
+  // If the scalar is bitcast and inserted into undef, do the insert in the
+  // source type followed by bitcast.
+  // TODO: Generalize for insert into any constant, not just undef?
+  Value *ScalarSrc;
+  if (match(VecOp, m_Undef()) &&
+      match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
+      (ScalarSrc->getType()->isIntegerTy() ||
+       ScalarSrc->getType()->isFloatingPointTy())) {
+    // inselt undef, (bitcast ScalarSrc), IdxOp -->
+    //   bitcast (inselt undef, ScalarSrc, IdxOp)
+    Type *ScalarTy = ScalarSrc->getType();
+    Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
+    UndefValue *NewUndef = UndefValue::get(VecTy);
+    Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
+    return new BitCastInst(NewInsElt, IE.getType());
+  }
+
   // If the vector and scalar are both bitcast from the same element type, do
   // the insert in that source type followed by bitcast.
-  Value *VecSrc, *ScalarSrc;
+  Value *VecSrc;
   if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
       match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
       (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&

diff  --git a/llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll b/llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll
index 73aa226bd586..e90f60c5f67e 100644
--- a/llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll
+++ b/llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll
@@ -70,10 +70,12 @@ entry:
   ret double %1
 }
 
+; FP source is ok.
+
 define <3 x i64> @bitcast_inselt_undef(double %x, i32 %idx) {
 ; CHECK-LABEL: @bitcast_inselt_undef(
-; CHECK-NEXT:    [[XB:%.*]] = bitcast double [[X:%.*]] to i64
-; CHECK-NEXT:    [[I:%.*]] = insertelement <3 x i64> undef, i64 [[XB]], i32 [[IDX:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <3 x double> undef, double [[X:%.*]], i32 [[IDX:%.*]]
+; CHECK-NEXT:    [[I:%.*]] = bitcast <3 x double> [[TMP1]] to <3 x i64>
 ; CHECK-NEXT:    ret <3 x i64> [[I]]
 ;
   %xb = bitcast double %x to i64
@@ -81,10 +83,12 @@ define <3 x i64> @bitcast_inselt_undef(double %x, i32 %idx) {
   ret <3 x i64> %i
 }
 
+; Integer source is ok; index is anything.
+
 define <3 x float> @bitcast_inselt_undef_fp(i32 %x, i567 %idx) {
 ; CHECK-LABEL: @bitcast_inselt_undef_fp(
-; CHECK-NEXT:    [[XB:%.*]] = bitcast i32 [[X:%.*]] to float
-; CHECK-NEXT:    [[I:%.*]] = insertelement <3 x float> undef, float [[XB]], i567 [[IDX:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <3 x i32> undef, i32 [[X:%.*]], i567 [[IDX:%.*]]
+; CHECK-NEXT:    [[I:%.*]] = bitcast <3 x i32> [[TMP1]] to <3 x float>
 ; CHECK-NEXT:    ret <3 x float> [[I]]
 ;
   %xb = bitcast i32 %x to float
@@ -92,8 +96,21 @@ define <3 x float> @bitcast_inselt_undef_fp(i32 %x, i567 %idx) {
   ret <3 x float> %i
 }
 
+define <vscale x 3 x float> @bitcast_inselt_undef_vscale(i32 %x, i567 %idx) {
+; CHECK-LABEL: @bitcast_inselt_undef_vscale(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <vscale x 3 x i32> undef, i32 [[X:%.*]], i567 [[IDX:%.*]]
+; CHECK-NEXT:    [[I:%.*]] = bitcast <vscale x 3 x i32> [[TMP1]] to <vscale x 3 x float>
+; CHECK-NEXT:    ret <vscale x 3 x float> [[I]]
+;
+  %xb = bitcast i32 %x to float
+  %i = insertelement <vscale x 3 x float> undef, float %xb, i567 %idx
+  ret <vscale x 3 x float> %i
+}
+
 declare void @use(i64)
 
+; Negative test - extra use prevents canonicalization
+
 define <3 x i64> @bitcast_inselt_undef_extra_use(double %x, i32 %idx) {
 ; CHECK-LABEL: @bitcast_inselt_undef_extra_use(
 ; CHECK-NEXT:    [[XB:%.*]] = bitcast double [[X:%.*]] to i64
@@ -107,6 +124,8 @@ define <3 x i64> @bitcast_inselt_undef_extra_use(double %x, i32 %idx) {
   ret <3 x i64> %i
 }
 
+; Negative test - source type must be scalar
+
 define <3 x i64> @bitcast_inselt_undef_vec_src(<2 x i32> %x, i32 %idx) {
 ; CHECK-LABEL: @bitcast_inselt_undef_vec_src(
 ; CHECK-NEXT:    [[XB:%.*]] = bitcast <2 x i32> [[X:%.*]] to i64
@@ -118,6 +137,8 @@ define <3 x i64> @bitcast_inselt_undef_vec_src(<2 x i32> %x, i32 %idx) {
   ret <3 x i64> %i
 }
 
+; Negative test - source type must be scalar
+
 define <3 x i64> @bitcast_inselt_undef_from_mmx(x86_mmx %x, i32 %idx) {
 ; CHECK-LABEL: @bitcast_inselt_undef_from_mmx(
 ; CHECK-NEXT:    [[XB:%.*]] = bitcast x86_mmx [[X:%.*]] to i64
@@ -129,12 +150,13 @@ define <3 x i64> @bitcast_inselt_undef_from_mmx(x86_mmx %x, i32 %idx) {
   ret <3 x i64> %i
 }
 
+; Reduce number of casts
+
 define <2 x i64> @PR45748(double %x, double %y) {
 ; CHECK-LABEL: @PR45748(
-; CHECK-NEXT:    [[XB:%.*]] = bitcast double [[X:%.*]] to i64
-; CHECK-NEXT:    [[I0:%.*]] = insertelement <2 x i64> undef, i64 [[XB]], i32 0
-; CHECK-NEXT:    [[YB:%.*]] = bitcast double [[Y:%.*]] to i64
-; CHECK-NEXT:    [[I1:%.*]] = insertelement <2 x i64> [[I0]], i64 [[YB]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[Y:%.*]], i32 1
+; CHECK-NEXT:    [[I1:%.*]] = bitcast <2 x double> [[TMP2]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[I1]]
 ;
   %xb = bitcast double %x to i64