[llvm] Revert "[ValueTracking] Improve `Bitcast` handling to match SDAG" (PR #145191)

Sat Jun 21 15:19:48 PDT 2025

https://github.com/aeubanks created https://github.com/llvm/llvm-project/pull/145191

Reverts llvm/llvm-project#125935

Causes miscompiles, see comments in #125935

>From cd646d16794e787b58c4e0a632483876f0952198 Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <ayueubanks at gmail.com>
Date: Sat, 21 Jun 2025 15:19:21 -0700
Subject: [PATCH] Revert "[ValueTracking] Improve `Bitcast` handling to match
 SDAG (#125935)"

This reverts commit 0b8179b2adbc821324c425d7cafd269f84e72d5e.
---
 llvm/lib/Analysis/ValueTracking.cpp           | 27 ++-----------------
 .../InstCombine/X86/x86-vector-shifts.ll      |  4 +--
 .../InstCombine/bitcast-known-bits.ll         | 21 ++++++++++-----
 3 files changed, 18 insertions(+), 34 deletions(-)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 73320b556f825..a17417cb5189c 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1346,8 +1346,6 @@ static void computeKnownBitsFromOperator(const Operator *I,
         isa<ScalableVectorType>(I->getType()))
       break;
 
-    unsigned NumElts = DemandedElts.getBitWidth();
-    bool IsLE = Q.DL.isLittleEndian();
     // Look through a cast from narrow vector elements to wider type.
     // Examples: v4i32 -> v2i64, v3i8 -> v24
     unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits();
@@ -1366,6 +1364,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
       //
       // The known bits of each sub-element are then inserted into place
       // (dependent on endian) to form the full result of known bits.
+      unsigned NumElts = DemandedElts.getBitWidth();
       unsigned SubScale = BitWidth / SubBitWidth;
       APInt SubDemandedElts = APInt::getZero(NumElts * SubScale);
       for (unsigned i = 0; i != NumElts; ++i) {
@@ -1377,32 +1376,10 @@ static void computeKnownBitsFromOperator(const Operator *I,
       for (unsigned i = 0; i != SubScale; ++i) {
         computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc, Q,
                          Depth + 1);
-        unsigned ShiftElt = IsLE ? i : SubScale - 1 - i;
+        unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i;
         Known.insertBits(KnownSrc, ShiftElt * SubBitWidth);
       }
     }
-    // Look through a cast from wider vector elements to narrow type.
-    // Examples: v2i64 -> v4i32
-    if (SubBitWidth % BitWidth == 0) {
-      unsigned SubScale = SubBitWidth / BitWidth;
-      KnownBits KnownSrc(SubBitWidth);
-      APInt SubDemandedElts =
-          APIntOps::ScaleBitMask(DemandedElts, NumElts / SubScale);
-      computeKnownBits(I->getOperand(0), SubDemandedElts, KnownSrc, Q,
-                       Depth + 1);
-
-      Known.Zero.setAllBits();
-      Known.One.setAllBits();
-      for (unsigned i = 0; i != SubScale; ++i) {
-        if (DemandedElts[i]) {
-          unsigned Shifts = IsLE ? i : NumElts - 1 - i;
-          unsigned Offset = (Shifts % SubScale) * BitWidth;
-          Known = Known.intersectWith(KnownSrc.extractBits(BitWidth, Offset));
-          if (Known.isUnknown())
-            break;
-        }
-      }
-    }
     break;
   }
   case Instruction::SExt: {
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll
index cc252ae53803b..db56080a3ea2b 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll
@@ -3732,6 +3732,7 @@ define <4 x i64> @test_avx2_psrl_0() {
   ret <4 x i64> %16
 }
 
+; FIXME: Failure to peek through bitcasts to ensure psllq shift amount is within bounds.
 define <2 x i64> @PR125228(<2 x i64> %v, <2 x i64> %s) {
 ; CHECK-LABEL: @PR125228(
 ; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i64> [[S:%.*]], splat (i64 63)
@@ -3740,8 +3741,7 @@ define <2 x i64> @PR125228(<2 x i64> %v, <2 x i64> %s) {
 ; CHECK-NEXT:    [[CAST:%.*]] = bitcast <2 x i64> [[MASK]] to <16 x i8>
 ; CHECK-NEXT:    [[PSRLDQ:%.*]] = shufflevector <16 x i8> [[CAST]], <16 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[CAST3:%.*]] = bitcast <16 x i8> [[PSRLDQ]] to <2 x i64>
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[CAST3]], <2 x i64> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[SLL1:%.*]] = shl <2 x i64> [[V]], [[TMP2]]
+; CHECK-NEXT:    [[SLL1:%.*]] = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V]], <2 x i64> [[CAST3]])
 ; CHECK-NEXT:    [[SHUFP_UNCASTED:%.*]] = shufflevector <2 x i64> [[SLL0]], <2 x i64> [[SLL1]], <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    ret <2 x i64> [[SHUFP_UNCASTED]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/bitcast-known-bits.ll b/llvm/test/Transforms/InstCombine/bitcast-known-bits.ll
index 65b43df752f76..3e47e775e3a28 100644
--- a/llvm/test/Transforms/InstCombine/bitcast-known-bits.ll
+++ b/llvm/test/Transforms/InstCombine/bitcast-known-bits.ll
@@ -12,7 +12,8 @@ define <16 x i8> @knownbits_bitcast_masked_shift(<16 x i8> %arg1, <16 x i8> %arg
 ; CHECK-NEXT:    [[BITCAST4:%.*]] = bitcast <16 x i8> [[OR]] to <8 x i16>
 ; CHECK-NEXT:    [[SHL5:%.*]] = shl nuw <8 x i16> [[BITCAST4]], splat (i16 2)
 ; CHECK-NEXT:    [[BITCAST6:%.*]] = bitcast <8 x i16> [[SHL5]] to <16 x i8>
-; CHECK-NEXT:    ret <16 x i8> [[BITCAST6]]
+; CHECK-NEXT:    [[AND7:%.*]] = and <16 x i8> [[BITCAST6]], splat (i8 -52)
+; CHECK-NEXT:    ret <16 x i8> [[AND7]]
 ;
   %and = and <16 x i8> %arg1, splat (i8 3)
   %and3 = and <16 x i8> %arg2, splat (i8 48)
@@ -32,7 +33,8 @@ define <16 x i8> @knownbits_shuffle_masked_nibble_shift(<16 x i8> %arg)  {
 ; CHECK-NEXT:    [[BITCAST1:%.*]] = bitcast <16 x i8> [[SHUFFLEVECTOR]] to <8 x i16>
 ; CHECK-NEXT:    [[SHL:%.*]] = shl nuw <8 x i16> [[BITCAST1]], splat (i16 4)
 ; CHECK-NEXT:    [[BITCAST2:%.*]] = bitcast <8 x i16> [[SHL]] to <16 x i8>
-; CHECK-NEXT:    ret <16 x i8> [[BITCAST2]]
+; CHECK-NEXT:    [[AND3:%.*]] = and <16 x i8> [[BITCAST2]], splat (i8 -16)
+; CHECK-NEXT:    ret <16 x i8> [[AND3]]
 ;
   %and = and <16 x i8> %arg, splat (i8 15)
   %shufflevector = shufflevector <16 x i8> %and, <16 x i8> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
@@ -51,7 +53,8 @@ define <16 x i8> @knownbits_reverse_shuffle_masked_shift(<16 x i8> %arg)  {
 ; CHECK-NEXT:    [[BITCAST1:%.*]] = bitcast <16 x i8> [[SHUFFLEVECTOR]] to <8 x i16>
 ; CHECK-NEXT:    [[SHL:%.*]] = shl nuw <8 x i16> [[BITCAST1]], splat (i16 4)
 ; CHECK-NEXT:    [[BITCAST2:%.*]] = bitcast <8 x i16> [[SHL]] to <16 x i8>
-; CHECK-NEXT:    ret <16 x i8> [[BITCAST2]]
+; CHECK-NEXT:    [[AND3:%.*]] = and <16 x i8> [[BITCAST2]], splat (i8 -16)
+; CHECK-NEXT:    ret <16 x i8> [[AND3]]
 ;
   %and = and <16 x i8> %arg, splat (i8 15)
   %shufflevector = shufflevector <16 x i8> %and, <16 x i8> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
@@ -67,7 +70,8 @@ define <16 x i8> @knownbits_extract_bit(<8 x i16> %arg)  {
 ; CHECK-SAME: <8 x i16> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[LSHR:%.*]] = lshr <8 x i16> [[ARG]], splat (i16 15)
 ; CHECK-NEXT:    [[BITCAST1:%.*]] = bitcast <8 x i16> [[LSHR]] to <16 x i8>
-; CHECK-NEXT:    ret <16 x i8> [[BITCAST1]]
+; CHECK-NEXT:    [[AND:%.*]] = and <16 x i8> [[BITCAST1]], splat (i8 1)
+; CHECK-NEXT:    ret <16 x i8> [[AND]]
 ;
   %lshr = lshr <8 x i16> %arg, splat (i16 15)
   %bitcast1 = bitcast <8 x i16> %lshr to <16 x i8>
@@ -84,8 +88,7 @@ define { i32, i1 } @knownbits_popcount_add_with_overflow(<2 x i64> %arg1, <2 x i
 ; CHECK-NEXT:    [[CALL9:%.*]] = tail call range(i64 0, 65) <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[ARG2]])
 ; CHECK-NEXT:    [[BITCAST10:%.*]] = bitcast <2 x i64> [[CALL9]] to <4 x i32>
 ; CHECK-NEXT:    [[EXTRACTELEMENT11:%.*]] = extractelement <4 x i32> [[BITCAST10]], i64 0
-; CHECK-NEXT:    [[CALL12:%.*]] = add nuw nsw i32 [[EXTRACTELEMENT]], [[EXTRACTELEMENT11]]
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } { i32 poison, i1 false }, i32 [[CALL12]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[EXTRACTELEMENT]], i32 [[EXTRACTELEMENT11]])
 ; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
 ;
   %call = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %arg1)
@@ -107,7 +110,11 @@ define <16 x i8> @knownbits_shuffle_add_shift_v32i8(<16 x i8> %arg1, <8 x i16> %
 ; CHECK-NEXT:    [[BITCAST11:%.*]] = bitcast <8 x i16> [[SHL10]] to <16 x i8>
 ; CHECK-NEXT:    [[ADD12:%.*]] = add <16 x i8> [[BITCAST11]], [[BITCAST7]]
 ; CHECK-NEXT:    [[ADD14:%.*]] = add <16 x i8> [[ADD12]], [[ARG1]]
-; CHECK-NEXT:    ret <16 x i8> [[ADD14]]
+; CHECK-NEXT:    [[BITCAST14:%.*]] = bitcast <16 x i8> [[ADD12]] to <8 x i16>
+; CHECK-NEXT:    [[SHL15:%.*]] = shl <8 x i16> [[BITCAST14]], splat (i16 8)
+; CHECK-NEXT:    [[BITCAST16:%.*]] = bitcast <8 x i16> [[SHL15]] to <16 x i8>
+; CHECK-NEXT:    [[ADD13:%.*]] = add <16 x i8> [[ADD14]], [[BITCAST16]]
+; CHECK-NEXT:    ret <16 x i8> [[ADD13]]
 ;
   %shl6 = shl <8 x i16> %arg2, splat (i16 8)
   %bitcast7 = bitcast <8 x i16> %shl6 to <16 x i8>