[llvm] r343407 - [InstCombine] try to convert vector insert+extract to trunc

Mon Oct 1 06:22:13 PDT 2018

Thanks, investigating now - sorry about the bug.

On Mon, Oct 1, 2018 at 6:10 AM Hans Wennborg <hans at chromium.org> wrote:

> I've reverted in r343458 as it caused assertions while building Chromium.
>
> The repro is available here:
> https://bugs.chromium.org/p/chromium/issues/detail?id=890723#c1
>
> On Sun, Sep 30, 2018 at 4:34 PM, Sanjay Patel via llvm-commits
> <llvm-commits at lists.llvm.org> wrote:
> > Author: spatel
> > Date: Sun Sep 30 07:34:01 2018
> > New Revision: 343407
> >
> > URL: http://llvm.org/viewvc/llvm-project?rev=343407&view=rev
> > Log:
> > [InstCombine] try to convert vector insert+extract to trunc
> >
> > This transform is requested for the backend in:
> > https://bugs.llvm.org/show_bug.cgi?id=39016
> > ...but I figured it was worth doing in IR too, and it's probably
> > easier to implement here, so that's this patch.
> >
> > In the simplest case, we are just truncating a scalar value. If the
> > extract index doesn't correspond to the LSBs of the scalar, then we
> > have to shift-right before the truncate. Endian-ness makes this tricky,
> > but hopefully the ASCII-art helps visualize the transform.
> >
> > Differential Revision: https://reviews.llvm.org/D52439
> >
> > Modified:
> >     llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
> >     llvm/trunk/test/Transforms/InstCombine/extractelement.ll
> >
> > Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp?rev=343407&r1=343406&r2=343407&view=diff
> >
> ==============================================================================
> > --- llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
> (original)
> > +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp Sun
> Sep 30 07:34:01 2018
> > @@ -167,7 +167,8 @@ Instruction *InstCombiner::scalarizePHI(
> >  }
> >
> >  static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
> > -                                      InstCombiner::BuilderTy &Builder)
> {
> > +                                      InstCombiner::BuilderTy &Builder,
> > +                                      bool IsBigEndian) {
> >    Value *X;
> >    uint64_t ExtIndexC;
> >    if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
> > @@ -186,6 +187,47 @@ static Instruction *foldBitcastExtElt(Ex
> >      if (Value *Elt = findScalarElement(X, ExtIndexC))
> >        return new BitCastInst(Elt, DestTy);
> >
> > +  // If the source elements are wider than the destination, try to
> shift and
> > +  // truncate a subset of scalar bits of an insert op.
> > +  if (NumSrcElts < NumElts && SrcTy->getScalarType()->isIntegerTy()) {
> > +    Value *Scalar;
> > +    uint64_t InsIndexC;
> > +    if (!match(X, m_InsertElement(m_Value(), m_Value(Scalar),
> > +                                  m_ConstantInt(InsIndexC))))
> > +      return nullptr;
> > +
> > +    // The extract must be from the subset of vector elements that we
> inserted
> > +    // into. Example: if we inserted element 1 of a <2 x i64> and we are
> > +    // extracting an i16 (narrowing ratio = 4), then this extract must
> be from 1
> > +    // of elements 4-7 of the bitcasted vector.
> > +    unsigned NarrowingRatio = NumElts / NumSrcElts;
> > +    if (ExtIndexC / NarrowingRatio != InsIndexC)
> > +      return nullptr;
> > +
> > +    // We are extracting part of the original scalar. How that scalar is
> > +    // inserted into the vector depends on the endian-ness. Example:
> > +    //              Vector Byte Elt Index:    0  1  2  3  4  5  6  7
> > +    //                                       +--+--+--+--+--+--+--+--+
> > +    // inselt <2 x i32> V, <i32> S, 1:       |V0|V1|V2|V3|S0|S1|S2|S3|
> > +    // extelt <4 x i16> V', 3:               |                 |S2|S3|
> > +    //                                       +--+--+--+--+--+--+--+--+
> > +    // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S'
> value.
> > +    // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
> > +    // In this example, we must right-shift little-endian. Big-endian
> is just a
> > +    // truncate.
> > +    unsigned Chunk = ExtIndexC % NarrowingRatio;
> > +    if (IsBigEndian)
> > +      Chunk = NarrowingRatio - 1 - Chunk;
> > +    unsigned ShAmt = Chunk * DestTy->getPrimitiveSizeInBits();
> > +    if (ShAmt) {
> > +      // Bail out if we could end with more instructions than we
> started with.
> > +      if (!Ext.getVectorOperand()->hasOneUse())
> > +        return nullptr;
> > +      Scalar = Builder.CreateLShr(Scalar, ShAmt);
> > +    }
> > +    return new TruncInst(Scalar, DestTy);
> > +  }
> > +
> >    return nullptr;
> >  }
> >
> > @@ -224,7 +266,7 @@ Instruction *InstCombiner::visitExtractE
> >        }
> >      }
> >
> > -    if (Instruction *I = foldBitcastExtElt(EI, Builder))
> > +    if (Instruction *I = foldBitcastExtElt(EI, Builder,
> DL.isBigEndian()))
> >        return I;
> >
> >      // If there's a vector PHI feeding a scalar use through this
> extractelement
> >
> > Modified: llvm/trunk/test/Transforms/InstCombine/extractelement.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/extractelement.ll?rev=343407&r1=343406&r2=343407&view=diff
> >
> ==============================================================================
> > --- llvm/trunk/test/Transforms/InstCombine/extractelement.ll (original)
> > +++ llvm/trunk/test/Transforms/InstCombine/extractelement.ll Sun Sep 30
> 07:34:01 2018
> > @@ -42,11 +42,14 @@ define i64 @test2(i64 %in) {
> >  }
> >
> >  define i32 @bitcasted_inselt_wide_source_zero_elt(i64 %x) {
> > -; ANY-LABEL: @bitcasted_inselt_wide_source_zero_elt(
> > -; ANY-NEXT:    [[I:%.*]] = insertelement <2 x i64> undef, i64
> [[X:%.*]], i32 0
> > -; ANY-NEXT:    [[B:%.*]] = bitcast <2 x i64> [[I]] to <4 x i32>
> > -; ANY-NEXT:    [[R:%.*]] = extractelement <4 x i32> [[B]], i32 0
> > -; ANY-NEXT:    ret i32 [[R]]
> > +; LE-LABEL: @bitcasted_inselt_wide_source_zero_elt(
> > +; LE-NEXT:    [[R:%.*]] = trunc i64 [[X:%.*]] to i32
> > +; LE-NEXT:    ret i32 [[R]]
> > +;
> > +; BE-LABEL: @bitcasted_inselt_wide_source_zero_elt(
> > +; BE-NEXT:    [[TMP1:%.*]] = lshr i64 [[X:%.*]], 32
> > +; BE-NEXT:    [[R:%.*]] = trunc i64 [[TMP1]] to i32
> > +; BE-NEXT:    ret i32 [[R]]
> >  ;
> >    %i = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
> >    %b = bitcast <2 x i64> %i to <4 x i32>
> > @@ -55,11 +58,14 @@ define i32 @bitcasted_inselt_wide_source
> >  }
> >
> >  define i16 @bitcasted_inselt_wide_source_modulo_elt(i64 %x) {
> > -; ANY-LABEL: @bitcasted_inselt_wide_source_modulo_elt(
> > -; ANY-NEXT:    [[I:%.*]] = insertelement <2 x i64> undef, i64
> [[X:%.*]], i32 1
> > -; ANY-NEXT:    [[B:%.*]] = bitcast <2 x i64> [[I]] to <8 x i16>
> > -; ANY-NEXT:    [[R:%.*]] = extractelement <8 x i16> [[B]], i32 4
> > -; ANY-NEXT:    ret i16 [[R]]
> > +; LE-LABEL: @bitcasted_inselt_wide_source_modulo_elt(
> > +; LE-NEXT:    [[R:%.*]] = trunc i64 [[X:%.*]] to i16
> > +; LE-NEXT:    ret i16 [[R]]
> > +;
> > +; BE-LABEL: @bitcasted_inselt_wide_source_modulo_elt(
> > +; BE-NEXT:    [[TMP1:%.*]] = lshr i64 [[X:%.*]], 48
> > +; BE-NEXT:    [[R:%.*]] = trunc i64 [[TMP1]] to i16
> > +; BE-NEXT:    ret i16 [[R]]
> >  ;
> >    %i = insertelement <2 x i64> undef, i64 %x, i32 1
> >    %b = bitcast <2 x i64> %i to <8 x i16>
> > @@ -68,11 +74,14 @@ define i16 @bitcasted_inselt_wide_source
> >  }
> >
> >  define i32 @bitcasted_inselt_wide_source_not_modulo_elt(i64 %x) {
> > -; ANY-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt(
> > -; ANY-NEXT:    [[I:%.*]] = insertelement <2 x i64> undef, i64
> [[X:%.*]], i32 0
> > -; ANY-NEXT:    [[B:%.*]] = bitcast <2 x i64> [[I]] to <4 x i32>
> > -; ANY-NEXT:    [[R:%.*]] = extractelement <4 x i32> [[B]], i32 1
> > -; ANY-NEXT:    ret i32 [[R]]
> > +; LE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt(
> > +; LE-NEXT:    [[TMP1:%.*]] = lshr i64 [[X:%.*]], 32
> > +; LE-NEXT:    [[R:%.*]] = trunc i64 [[TMP1]] to i32
> > +; LE-NEXT:    ret i32 [[R]]
> > +;
> > +; BE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt(
> > +; BE-NEXT:    [[R:%.*]] = trunc i64 [[X:%.*]] to i32
> > +; BE-NEXT:    ret i32 [[R]]
> >  ;
> >    %i = insertelement <2 x i64> undef, i64 %x, i32 0
> >    %b = bitcast <2 x i64> %i to <4 x i32>
> > @@ -81,11 +90,15 @@ define i32 @bitcasted_inselt_wide_source
> >  }
> >
> >  define i8 @bitcasted_inselt_wide_source_not_modulo_elt_not_half(i32 %x)
> {
> > -; ANY-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half(
> > -; ANY-NEXT:    [[I:%.*]] = insertelement <2 x i32> undef, i32
> [[X:%.*]], i32 0
> > -; ANY-NEXT:    [[B:%.*]] = bitcast <2 x i32> [[I]] to <8 x i8>
> > -; ANY-NEXT:    [[R:%.*]] = extractelement <8 x i8> [[B]], i32 2
> > -; ANY-NEXT:    ret i8 [[R]]
> > +; LE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half(
> > +; LE-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 16
> > +; LE-NEXT:    [[R:%.*]] = trunc i32 [[TMP1]] to i8
> > +; LE-NEXT:    ret i8 [[R]]
> > +;
> > +; BE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half(
> > +; BE-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 8
> > +; BE-NEXT:    [[R:%.*]] = trunc i32 [[TMP1]] to i8
> > +; BE-NEXT:    ret i8 [[R]]
> >  ;
> >    %i = insertelement <2 x i32> undef, i32 %x, i32 0
> >    %b = bitcast <2 x i32> %i to <8 x i8>
> > @@ -94,11 +107,15 @@ define i8 @bitcasted_inselt_wide_source_
> >  }
> >
> >  define i3
> @bitcasted_inselt_wide_source_not_modulo_elt_not_half_weird_types(i15 %x) {
> > -; ANY-LABEL:
> @bitcasted_inselt_wide_source_not_modulo_elt_not_half_weird_types(
> > -; ANY-NEXT:    [[I:%.*]] = insertelement <3 x i15> undef, i15
> [[X:%.*]], i32 0
> > -; ANY-NEXT:    [[B:%.*]] = bitcast <3 x i15> [[I]] to <15 x i3>
> > -; ANY-NEXT:    [[R:%.*]] = extractelement <15 x i3> [[B]], i32 1
> > -; ANY-NEXT:    ret i3 [[R]]
> > +; LE-LABEL:
> @bitcasted_inselt_wide_source_not_modulo_elt_not_half_weird_types(
> > +; LE-NEXT:    [[TMP1:%.*]] = lshr i15 [[X:%.*]], 3
> > +; LE-NEXT:    [[R:%.*]] = trunc i15 [[TMP1]] to i3
> > +; LE-NEXT:    ret i3 [[R]]
> > +;
> > +; BE-LABEL:
> @bitcasted_inselt_wide_source_not_modulo_elt_not_half_weird_types(
> > +; BE-NEXT:    [[TMP1:%.*]] = lshr i15 [[X:%.*]], 9
> > +; BE-NEXT:    [[R:%.*]] = trunc i15 [[TMP1]] to i3
> > +; BE-NEXT:    ret i3 [[R]]
> >  ;
> >    %i = insertelement <3 x i15> undef, i15 %x, i32 0
> >    %b = bitcast <3 x i15> %i to <15 x i3>
> > @@ -125,12 +142,19 @@ define i8 @bitcasted_inselt_wide_source_
> >  declare void @use(<8 x i8>)
> >
> >  define i8 @bitcasted_inselt_wide_source_uses(i32 %x) {
> > -; ANY-LABEL: @bitcasted_inselt_wide_source_uses(
> > -; ANY-NEXT:    [[I:%.*]] = insertelement <2 x i32> undef, i32
> [[X:%.*]], i32 0
> > -; ANY-NEXT:    [[B:%.*]] = bitcast <2 x i32> [[I]] to <8 x i8>
> > -; ANY-NEXT:    call void @use(<8 x i8> [[B]])
> > -; ANY-NEXT:    [[R:%.*]] = extractelement <8 x i8> [[B]], i32 3
> > -; ANY-NEXT:    ret i8 [[R]]
> > +; LE-LABEL: @bitcasted_inselt_wide_source_uses(
> > +; LE-NEXT:    [[I:%.*]] = insertelement <2 x i32> undef, i32 [[X:%.*]],
> i32 0
> > +; LE-NEXT:    [[B:%.*]] = bitcast <2 x i32> [[I]] to <8 x i8>
> > +; LE-NEXT:    call void @use(<8 x i8> [[B]])
> > +; LE-NEXT:    [[R:%.*]] = extractelement <8 x i8> [[B]], i32 3
> > +; LE-NEXT:    ret i8 [[R]]
> > +;
> > +; BE-LABEL: @bitcasted_inselt_wide_source_uses(
> > +; BE-NEXT:    [[I:%.*]] = insertelement <2 x i32> undef, i32 [[X:%.*]],
> i32 0
> > +; BE-NEXT:    [[B:%.*]] = bitcast <2 x i32> [[I]] to <8 x i8>
> > +; BE-NEXT:    call void @use(<8 x i8> [[B]])
> > +; BE-NEXT:    [[R:%.*]] = trunc i32 [[X]] to i8
> > +; BE-NEXT:    ret i8 [[R]]
> >  ;
> >    %i = insertelement <2 x i32> undef, i32 %x, i32 0
> >    %b = bitcast <2 x i32> %i to <8 x i8>
> >
> >
> > _______________________________________________
> > llvm-commits mailing list
> > llvm-commits at lists.llvm.org
> > http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20181001/b8a68519/attachment.html>