[llvm] 656001e - [ValueTracking] look through bitcast of vector in computeKnownBits

Wed Jun 23 08:46:57 PDT 2021

Author: Sanjay Patel
Date: 2021-06-23T11:46:46-04:00
New Revision: 656001e7b2b939d9bce4fb58831d314dc67ddf7a

URL: https://github.com/llvm/llvm-project/commit/656001e7b2b939d9bce4fb58831d314dc67ddf7a
DIFF: https://github.com/llvm/llvm-project/commit/656001e7b2b939d9bce4fb58831d314dc67ddf7a.diff

LOG: [ValueTracking] look through bitcast of vector in computeKnownBits

This borrows as much as possible from the SDAG version of the code
(originally added with D27129 and since updated with big endian support).

In IR, we can test more easily for correctness than we did in the
original patch. I'm using the simplest cases that I could find for
InstSimplify: we computeKnownBits on variable shift amounts to see if
they are zero or in range. So shuffle constant elements into a vector,
cast it, and shift it.

The motivating x86 example from https://llvm.org/PR50123 is also here.
We computeKnownBits in the caller code, but we only check if the shift
amount is in range. That could be enhanced to catch the 2nd x86 test -
if the shift amount is known too big, the result is 0.

Alive2 understands the datalayout and agrees that the tests here are
correct - example:
https://alive2.llvm.org/ce/z/KZJFMZ

Differential Revision: https://reviews.llvm.org/D104472

Added: 
    

Modified: 
    llvm/lib/Analysis/ValueTracking.cpp
    llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll
    llvm/test/Transforms/InstSimplify/shift-knownbits.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 3621deef60da0..276a5a4c09c99 100644

--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1182,6 +1182,47 @@ static void computeKnownBitsFromOperator(const Operator *I,
       computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
       break;
     }
+
+    // Handle cast from vector integer type to scalar or vector integer.
+    auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcTy);
+    if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() ||
+        !I->getType()->isIntOrIntVectorTy())
+      break;
+
+    // Look through a cast from narrow vector elements to wider type.
+    // Examples: v4i32 -> v2i64, v3i8 -> v24
+    unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits();
+    if (BitWidth % SubBitWidth == 0) {
+      // Known bits are automatically intersected across demanded elements of a
+      // vector. So for example, if a bit is computed as known zero, it must be
+      // zero across all demanded elements of the vector.
+      //
+      // For this bitcast, each demanded element of the output is sub-divided
+      // across a set of smaller vector elements in the source vector. To get
+      // the known bits for an entire element of the output, compute the known
+      // bits for each sub-element sequentially. This is done by shifting the
+      // one-set-bit demanded elements parameter across the sub-elements for
+      // consecutive calls to computeKnownBits. We are using the demanded
+      // elements parameter as a mask operator.
+      //
+      // The known bits of each sub-element are then inserted into place
+      // (dependent on endian) to form the full result of known bits.
+      unsigned NumElts = DemandedElts.getBitWidth();
+      unsigned SubScale = BitWidth / SubBitWidth;
+      APInt SubDemandedElts = APInt::getNullValue(NumElts * SubScale);
+      for (unsigned i = 0; i != NumElts; ++i) {
+        if (DemandedElts[i])
+          SubDemandedElts.setBit(i * SubScale);
+      }
+
+      KnownBits KnownSrc(SubBitWidth);
+      for (unsigned i = 0; i != SubScale; ++i) {
+        computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc,
+                         Depth + 1, Q);
+        unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i;
+        Known.insertBits(KnownSrc, ShiftElt * SubBitWidth);
+      }
+    }
     break;
   }
   case Instruction::SExt: {

diff  --git a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll
index 73fe52f8c3284..fc82ac1740c6c 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll
@@ -2762,14 +2762,18 @@ define <2 x i64> @sse2_psll_q_128_masked(<2 x i64> %v, <2 x i64> %a) {
   ret <2 x i64> %2
 }
 
+; The shift amount is in range (masked with 31 and high 32-bits are zero),
+; so convert to standard IR - https://llvm.org/PR50123
+
 define <2 x i64> @sse2_psll_q_128_masked_bitcast(<2 x i64> %v, <2 x i64> %a) {
 ; CHECK-LABEL: @sse2_psll_q_128_masked_bitcast(
 ; CHECK-NEXT:    [[B:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32>
 ; CHECK-NEXT:    [[M:%.*]] = and <4 x i32> [[B]], <i32 31, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[I:%.*]] = insertelement <4 x i32> [[M]], i32 0, i32 1
 ; CHECK-NEXT:    [[SHAMT:%.*]] = bitcast <4 x i32> [[I]] to <2 x i64>
-; CHECK-NEXT:    [[R:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V:%.*]], <2 x i64> [[SHAMT]])
-; CHECK-NEXT:    ret <2 x i64> [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i64> [[SHAMT]], <2 x i64> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[V:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
 ;
   %b = bitcast <2 x i64> %a to <4 x i32>
   %m = and <4 x i32> %b, <i32 31, i32 poison, i32 poison, i32 poison>
@@ -2779,6 +2783,8 @@ define <2 x i64> @sse2_psll_q_128_masked_bitcast(<2 x i64> %v, <2 x i64> %a) {
   ret <2 x i64> %r
 }
 
+; TODO: This could be recognized as an over-shift.
+
 define <2 x i64> @sse2_psll_q_128_masked_bitcast_overshift(<2 x i64> %v, <2 x i64> %a) {
 ; CHECK-LABEL: @sse2_psll_q_128_masked_bitcast_overshift(
 ; CHECK-NEXT:    [[B:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32>

diff  --git a/llvm/test/Transforms/InstSimplify/shift-knownbits.ll b/llvm/test/Transforms/InstSimplify/shift-knownbits.ll
index c8c5a6e99fe53..348f47a0f19cc 100644
--- a/llvm/test/Transforms/InstSimplify/shift-knownbits.ll
+++ b/llvm/test/Transforms/InstSimplify/shift-knownbits.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instsimplify -S -data-layout="E" | FileCheck %s
+; RUN: opt < %s -instsimplify -S -data-layout="E" | FileCheck %s --check-prefixes=CHECK,BIGENDIAN
+; RUN: opt < %s -instsimplify -S -data-layout="e" | FileCheck %s --check-prefixes=CHECK,LITTLEENDIAN
 
 ; If any bits of the shift amount are known to make it exceed or equal
 ; the number of bits in the type, the shift causes undefined behavior.
@@ -223,12 +224,11 @@ define i8 @lshr_cttz_zero_is_undef_vec(<2 x i8> %x) {
   ret i8 %ex
 }
 
+; The shift amount is 0 on either of high/low bytes. The middle byte doesn't matter.
+
 define i24 @bitcast_noshift_scalar(<3 x i8> %v1, i24 %v2) {
 ; CHECK-LABEL: @bitcast_noshift_scalar(
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <3 x i8> [[V1:%.*]], <3 x i8> <i8 0, i8 poison, i8 poison>, <3 x i32> <i32 3, i32 1, i32 3>
-; CHECK-NEXT:    [[B:%.*]] = bitcast <3 x i8> [[S]] to i24
-; CHECK-NEXT:    [[R:%.*]] = shl i24 [[V2:%.*]], [[B]]
-; CHECK-NEXT:    ret i24 [[R]]
+; CHECK-NEXT:    ret i24 [[V2:%.*]]
 ;
   %c = insertelement <3 x i8> poison, i8 0, i64 0
   %s = shufflevector <3 x i8> %v1, <3 x i8> %c, <3 x i32> <i32 3, i32 1, i32 3>
@@ -237,12 +237,17 @@ define i24 @bitcast_noshift_scalar(<3 x i8> %v1, i24 %v2) {
   ret i24 %r
 }
 
+; The shift amount is 0 on low byte of big-endian and unknown on little-endian.
+
 define i24 @bitcast_noshift_scalar_bigend(<3 x i8> %v1, i24 %v2) {
-; CHECK-LABEL: @bitcast_noshift_scalar_bigend(
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <3 x i8> [[V1:%.*]], <3 x i8> <i8 0, i8 poison, i8 poison>, <3 x i32> <i32 0, i32 1, i32 3>
-; CHECK-NEXT:    [[B:%.*]] = bitcast <3 x i8> [[S]] to i24
-; CHECK-NEXT:    [[R:%.*]] = shl i24 [[V2:%.*]], [[B]]
-; CHECK-NEXT:    ret i24 [[R]]
+; BIGENDIAN-LABEL: @bitcast_noshift_scalar_bigend(
+; BIGENDIAN-NEXT:    ret i24 [[V2:%.*]]
+;
+; LITTLEENDIAN-LABEL: @bitcast_noshift_scalar_bigend(
+; LITTLEENDIAN-NEXT:    [[S:%.*]] = shufflevector <3 x i8> [[V1:%.*]], <3 x i8> <i8 0, i8 poison, i8 poison>, <3 x i32> <i32 0, i32 1, i32 3>
+; LITTLEENDIAN-NEXT:    [[B:%.*]] = bitcast <3 x i8> [[S]] to i24
+; LITTLEENDIAN-NEXT:    [[R:%.*]] = shl i24 [[V2:%.*]], [[B]]
+; LITTLEENDIAN-NEXT:    ret i24 [[R]]
 ;
   %c = insertelement <3 x i8> poison, i8 0, i64 0
   %s = shufflevector <3 x i8> %v1, <3 x i8> %c, <3 x i32> <i32 0, i32 1, i32 3>
@@ -251,12 +256,17 @@ define i24 @bitcast_noshift_scalar_bigend(<3 x i8> %v1, i24 %v2) {
   ret i24 %r
 }
 
+; The shift amount is 0 on low byte of little-endian and unknown on big-endian.
+
 define i24 @bitcast_noshift_scalar_littleend(<3 x i8> %v1, i24 %v2) {
-; CHECK-LABEL: @bitcast_noshift_scalar_littleend(
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <3 x i8> [[V1:%.*]], <3 x i8> <i8 0, i8 poison, i8 poison>, <3 x i32> <i32 3, i32 1, i32 2>
-; CHECK-NEXT:    [[B:%.*]] = bitcast <3 x i8> [[S]] to i24
-; CHECK-NEXT:    [[R:%.*]] = shl i24 [[V2:%.*]], [[B]]
-; CHECK-NEXT:    ret i24 [[R]]
+; BIGENDIAN-LABEL: @bitcast_noshift_scalar_littleend(
+; BIGENDIAN-NEXT:    [[S:%.*]] = shufflevector <3 x i8> [[V1:%.*]], <3 x i8> <i8 0, i8 poison, i8 poison>, <3 x i32> <i32 3, i32 1, i32 2>
+; BIGENDIAN-NEXT:    [[B:%.*]] = bitcast <3 x i8> [[S]] to i24
+; BIGENDIAN-NEXT:    [[R:%.*]] = shl i24 [[V2:%.*]], [[B]]
+; BIGENDIAN-NEXT:    ret i24 [[R]]
+;
+; LITTLEENDIAN-LABEL: @bitcast_noshift_scalar_littleend(
+; LITTLEENDIAN-NEXT:    ret i24 [[V2:%.*]]
 ;
   %c = insertelement <3 x i8> poison, i8 0, i64 0
   %s = shufflevector <3 x i8> %v1, <3 x i8> %c, <3 x i32> <i32 3, i32 1, i32 2>
@@ -265,12 +275,12 @@ define i24 @bitcast_noshift_scalar_littleend(<3 x i8> %v1, i24 %v2) {
   ret i24 %r
 }
 
+; The shift amount is known 24 on little-endian and known 24<<16 on big-endian
+; across all vector elements, so it's an overshift either way.
+
 define <3 x i24> @bitcast_overshift_vector(<9 x i8> %v1, <3 x i24> %v2) {
 ; CHECK-LABEL: @bitcast_overshift_vector(
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> <i8 24, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 9, i32 7, i32 8>
-; CHECK-NEXT:    [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24>
-; CHECK-NEXT:    [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]]
-; CHECK-NEXT:    ret <3 x i24> [[R]]
+; CHECK-NEXT:    ret <3 x i24> poison
 ;
   %c = insertelement <9 x i8> poison, i8 24, i64 0
   %s = shufflevector <9 x i8> %v1, <9 x i8> %c, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 9, i32 7, i32 8>
@@ -279,12 +289,18 @@ define <3 x i24> @bitcast_overshift_vector(<9 x i8> %v1, <3 x i24> %v2) {
   ret <3 x i24> %r
 }
 
+; The shift amount is known 23 on little-endian and known 23<<16 on big-endian
+; across all vector elements, so it's an overshift for big-endian.
+
 define <3 x i24> @bitcast_overshift_vector_bigend(<9 x i8> %v1, <3 x i24> %v2) {
-; CHECK-LABEL: @bitcast_overshift_vector_bigend(
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> <i8 23, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 9, i32 7, i32 8>
-; CHECK-NEXT:    [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24>
-; CHECK-NEXT:    [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]]
-; CHECK-NEXT:    ret <3 x i24> [[R]]
+; BIGENDIAN-LABEL: @bitcast_overshift_vector_bigend(
+; BIGENDIAN-NEXT:    ret <3 x i24> poison
+;
+; LITTLEENDIAN-LABEL: @bitcast_overshift_vector_bigend(
+; LITTLEENDIAN-NEXT:    [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> <i8 23, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 9, i32 7, i32 8>
+; LITTLEENDIAN-NEXT:    [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24>
+; LITTLEENDIAN-NEXT:    [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]]
+; LITTLEENDIAN-NEXT:    ret <3 x i24> [[R]]
 ;
   %c = insertelement <9 x i8> poison, i8 23, i64 0
   %s = shufflevector <9 x i8> %v1, <9 x i8> %c, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 9, i32 7, i32 8>
@@ -293,12 +309,18 @@ define <3 x i24> @bitcast_overshift_vector_bigend(<9 x i8> %v1, <3 x i24> %v2) {
   ret <3 x i24> %r
 }
 
+; The shift amount is known 23 on big-endian and known 23<<16 on little-endian
+; across all vector elements, so it's an overshift for little-endian.
+
 define <3 x i24> @bitcast_overshift_vector_littleend(<9 x i8> %v1, <3 x i24> %v2) {
-; CHECK-LABEL: @bitcast_overshift_vector_littleend(
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> <i8 23, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <9 x i32> <i32 0, i32 1, i32 9, i32 3, i32 4, i32 9, i32 6, i32 7, i32 9>
-; CHECK-NEXT:    [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24>
-; CHECK-NEXT:    [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]]
-; CHECK-NEXT:    ret <3 x i24> [[R]]
+; BIGENDIAN-LABEL: @bitcast_overshift_vector_littleend(
+; BIGENDIAN-NEXT:    [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> <i8 23, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <9 x i32> <i32 0, i32 1, i32 9, i32 3, i32 4, i32 9, i32 6, i32 7, i32 9>
+; BIGENDIAN-NEXT:    [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24>
+; BIGENDIAN-NEXT:    [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]]
+; BIGENDIAN-NEXT:    ret <3 x i24> [[R]]
+;
+; LITTLEENDIAN-LABEL: @bitcast_overshift_vector_littleend(
+; LITTLEENDIAN-NEXT:    ret <3 x i24> poison
 ;
   %c = insertelement <9 x i8> poison, i8 23, i64 0
   %s = shufflevector <9 x i8> %v1, <9 x i8> %c, <9 x i32> <i32 0, i32 1, i32 9, i32 3, i32 4, i32 9, i32 6, i32 7, i32 9>
@@ -307,6 +329,8 @@ define <3 x i24> @bitcast_overshift_vector_littleend(<9 x i8> %v1, <3 x i24> %v2
   ret <3 x i24> %r
 }
 
+; Negative test - the shift amount is known 24 or 24<<16 on only 2 out of 3 elements.
+
 define <3 x i24> @bitcast_partial_overshift_vector(<9 x i8> %v1, <3 x i24> %v2) {
 ; CHECK-LABEL: @bitcast_partial_overshift_vector(
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> <i8 24, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7, i32 8>
@@ -321,6 +345,8 @@ define <3 x i24> @bitcast_partial_overshift_vector(<9 x i8> %v1, <3 x i24> %v2)
   ret <3 x i24> %r
 }
 
+; Negative test - don't know how to look through a cast with non-integer type (but we could handle this...).
+
 define <1 x i64> @bitcast_noshift_vector_wrong_type(<2 x float> %v1, <1 x i64> %v2) {
 ; CHECK-LABEL: @bitcast_noshift_vector_wrong_type(
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <2 x float> [[V1:%.*]], <2 x float> <float 0.000000e+00, float poison>, <2 x i32> <i32 2, i32 1>