[llvm] r338817 - [InstSimplify] fold extracting from std::pair (2/2)

Mon Aug 6 23:21:05 PDT 2018

Merged to 7.0 in r339097.

On Fri, Aug 3, 2018 at 7:39 AM, Hiroshi Inoue via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
> Author: inouehrs
> Date: Thu Aug  2 22:39:48 2018
> New Revision: 338817
>
> URL: http://llvm.org/viewvc/llvm-project?rev=338817&view=rev
> Log:
> [InstSimplify] fold extracting from std::pair (2/2)
>
> This is the second patch of the series which intends to enable jump threading for an inlined method whose return type is std::pair<int, bool> or std::pair<bool, int>.
> The first patch is https://reviews.llvm.org/rL338485.
>
> This patch handles code sequences that merges two values using `shl` and `or`, then extracts one value using `and`.
>
> Differential Revision: https://reviews.llvm.org/D49981
>
>
> Modified:
>     llvm/trunk/lib/Analysis/InstructionSimplify.cpp
>     llvm/trunk/test/Transforms/InstSimplify/AndOrXor.ll
>     llvm/trunk/test/Transforms/NewGVN/pair_jumpthread.ll
>
> Modified: llvm/trunk/lib/Analysis/InstructionSimplify.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/InstructionSimplify.cpp?rev=338817&r1=338816&r2=338817&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Analysis/InstructionSimplify.cpp (original)
> +++ llvm/trunk/lib/Analysis/InstructionSimplify.cpp Thu Aug  2 22:39:48 2018
> @@ -1863,6 +1863,40 @@ static Value *SimplifyAndInst(Value *Op0
>                                        MaxRecurse))
>        return V;
>
> +  // Assuming the effective width of Y is not larger than A, i.e. all bits
> +  // from X and Y are disjoint in (X << A) | Y,
> +  // if the mask of this AND op covers all bits of X or Y, while it covers
> +  // no bits from the other, we can bypass this AND op. E.g.,
> +  // ((X << A) | Y) & Mask -> Y,
> +  //     if Mask = ((1 << effective_width_of(Y)) - 1)
> +  // ((X << A) | Y) & Mask -> X << A,
> +  //     if Mask = ((1 << effective_width_of(X)) - 1) << A
> +  // SimplifyDemandedBits in InstCombine can optimize the general case.
> +  // This pattern aims to help other passes for a common case.
> +  Value *Y, *XShifted;
> +  if (match(Op1, m_APInt(Mask)) &&
> +      match(Op0, m_c_Or(m_CombineAnd(m_NUWShl(m_Value(X), m_APInt(ShAmt)),
> +                                     m_Value(XShifted)),
> +                        m_Value(Y)))) {
> +    const unsigned ShftCnt = ShAmt->getZExtValue();
> +    const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
> +    const unsigned Width = Op0->getType()->getScalarSizeInBits();
> +    const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros();
> +    if (EffWidthY <= ShftCnt) {
> +      const KnownBits XKnown = computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI,
> +                                                Q.DT);
> +      const unsigned EffWidthX = Width - XKnown.countMinLeadingZeros();
> +      const APInt EffBitsY = APInt::getLowBitsSet(Width, EffWidthY);
> +      const APInt EffBitsX = APInt::getLowBitsSet(Width, EffWidthX) << ShftCnt;
> +      // If the mask is extracting all bits from X or Y as is, we can skip
> +      // this AND op.
> +      if (EffBitsY.isSubsetOf(*Mask) && !EffBitsX.intersects(*Mask))
> +        return Y;
> +      if (EffBitsX.isSubsetOf(*Mask) && !EffBitsY.intersects(*Mask))
> +        return XShifted;
> +    }
> +  }
> +
>    return nullptr;
>  }
>
>
> Modified: llvm/trunk/test/Transforms/InstSimplify/AndOrXor.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstSimplify/AndOrXor.ll?rev=338817&r1=338816&r2=338817&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/InstSimplify/AndOrXor.ll (original)
> +++ llvm/trunk/test/Transforms/InstSimplify/AndOrXor.ll Thu Aug  2 22:39:48 2018
> @@ -967,12 +967,8 @@ define i32 @reversed_not(i32 %a) {
>
>  define i64 @shl_or_and1(i32 %a, i1 %b) {
>  ; CHECK-LABEL: @shl_or_and1(
> -; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[A:%.*]] to i64
>  ; CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[B:%.*]] to i64
> -; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32
> -; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]]
> -; CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 1
> -; CHECK-NEXT:    ret i64 [[TMP5]]
> +; CHECK-NEXT:    ret i64 [[TMP2]]
>  ;
>    %tmp1 = zext i32 %a to i64
>    %tmp2 = zext i1 %b to i64
> @@ -985,11 +981,8 @@ define i64 @shl_or_and1(i32 %a, i1 %b) {
>  define i64 @shl_or_and2(i32 %a, i1 %b) {
>  ; CHECK-LABEL: @shl_or_and2(
>  ; CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[B:%.*]] to i64
> -; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[A:%.*]] to i64
>  ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32
> -; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]]
> -; CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 4294967296
> -; CHECK-NEXT:    ret i64 [[TMP5]]
> +; CHECK-NEXT:    ret i64 [[TMP3]]
>  ;
>    %tmp1 = zext i1 %b to i64
>    %tmp2 = zext i32 %a to i64
> @@ -999,15 +992,11 @@ define i64 @shl_or_and2(i32 %a, i1 %b) {
>    ret i64 %tmp5
>  }
>
> -; concatinate two 32-bit integers and extract lower 32-bit
> +; concatenate two 32-bit integers and extract lower 32-bit
>  define i64 @shl_or_and3(i32 %a, i32 %b) {
>  ; CHECK-LABEL: @shl_or_and3(
> -; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[A:%.*]] to i64
>  ; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[B:%.*]] to i64
> -; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32
> -; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]]
> -; CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 4294967295
> -; CHECK-NEXT:    ret i64 [[TMP5]]
> +; CHECK-NEXT:    ret i64 [[TMP2]]
>  ;
>    %tmp1 = zext i32 %a to i64
>    %tmp2 = zext i32 %b to i64
> @@ -1017,15 +1006,12 @@ define i64 @shl_or_and3(i32 %a, i32 %b)
>    ret i64 %tmp5
>  }
>
> -; concatinate two 16-bit integers and extract higher 16-bit
> +; concatenate two 16-bit integers and extract higher 16-bit
>  define i32 @shl_or_and4(i16 %a, i16 %b) {
>  ; CHECK-LABEL: @shl_or_and4(
>  ; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
> -; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
>  ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i32 [[TMP1]], 16
> -; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]
> -; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], -65536
> -; CHECK-NEXT:    ret i32 [[TMP5]]
> +; CHECK-NEXT:    ret i32 [[TMP3]]
>  ;
>    %tmp1 = zext i16 %a to i32
>    %tmp2 = zext i16 %b to i32
> @@ -1037,12 +1023,8 @@ define i32 @shl_or_and4(i16 %a, i16 %b)
>
>  define i128 @shl_or_and5(i64 %a, i1 %b) {
>  ; CHECK-LABEL: @shl_or_and5(
> -; CHECK-NEXT:    [[TMP1:%.*]] = zext i64 [[A:%.*]] to i128
>  ; CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[B:%.*]] to i128
> -; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i128 [[TMP1]], 64
> -; CHECK-NEXT:    [[TMP4:%.*]] = or i128 [[TMP2]], [[TMP3]]
> -; CHECK-NEXT:    [[TMP5:%.*]] = and i128 [[TMP4]], 1
> -; CHECK-NEXT:    ret i128 [[TMP5]]
> +; CHECK-NEXT:    ret i128 [[TMP2]]
>  ;
>    %tmp1 = zext i64 %a to i128
>    %tmp2 = zext i1 %b to i128
> @@ -1108,12 +1090,8 @@ define i32 @shl_or_and8(i16 %a, i16 %b)
>
>  define <2 x i64> @shl_or_and1v(<2 x i32> %a, <2 x i1> %b) {
>  ; CHECK-LABEL: @shl_or_and1v(
> -; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
>  ; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i64>
> -; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP1]], <i64 32, i64 32>
> -; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i64> [[TMP3]], [[TMP2]]
> -; CHECK-NEXT:    [[TMP5:%.*]] = and <2 x i64> [[TMP4]], <i64 1, i64 1>
> -; CHECK-NEXT:    ret <2 x i64> [[TMP5]]
> +; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
>  ;
>    %tmp1 = zext <2 x i32> %a to <2 x i64>
>    %tmp2 = zext <2 x i1> %b to <2 x i64>
> @@ -1126,11 +1104,8 @@ define <2 x i64> @shl_or_and1v(<2 x i32>
>  define <2 x i64> @shl_or_and2v(<2 x i32> %a, <2 x i1> %b) {
>  ; CHECK-LABEL: @shl_or_and2v(
>  ; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i64>
> -; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
>  ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP1]], <i64 32, i64 32>
> -; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i64> [[TMP2]], [[TMP3]]
> -; CHECK-NEXT:    [[TMP5:%.*]] = and <2 x i64> [[TMP4]], <i64 4294967296, i64 4294967296>
> -; CHECK-NEXT:    ret <2 x i64> [[TMP5]]
> +; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
>  ;
>    %tmp1 = zext <2 x i1> %b to <2 x i64>
>    %tmp2 = zext <2 x i32> %a to <2 x i64>
>
> Modified: llvm/trunk/test/Transforms/NewGVN/pair_jumpthread.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/pair_jumpthread.ll?rev=338817&r1=338816&r2=338817&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/NewGVN/pair_jumpthread.ll (original)
> +++ llvm/trunk/test/Transforms/NewGVN/pair_jumpthread.ll Thu Aug  2 22:39:48 2018
> @@ -1,8 +1,6 @@
>  ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
>  ; RUN: opt < %s -newgvn -S | FileCheck %s
>  ; RUN: opt < %s -newgvn -jump-threading -S | FileCheck --check-prefix=CHECK-JT %s
> -; This test is expected to fail until the transformation is committed.
> -; XFAIL: *
>
>  define signext i32 @testBI(i32 signext %v) {
>  ; Test with std::pair<bool, int>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits