[llvm] [VectorCombine] Support pattern `bitop(cast(x), C) -> cast(bitop(x, InvC))` (PR #155216)

Wed Aug 27 03:17:07 PDT 2025

https://github.com/XChy updated https://github.com/llvm/llvm-project/pull/155216

>From ebdf90c0e829cb10b7095ad1c5bd6d0ea18e44b2 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Mon, 25 Aug 2025 01:19:07 +0800
Subject: [PATCH 1/5] [VectorCombine] NFC. Add tests for 154797

---
 .../VectorCombine/X86/bitop-of-castops.ll     | 160 ++++++++++++++++++
 1 file changed, 160 insertions(+)

diff --git a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
index 220556c8c38c3..b7c9e35ecc6f1 100644
--- a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
@@ -260,3 +260,163 @@ define <4 x i32> @or_zext_nneg(<4 x i16> %a, <4 x i16> %b) {
   %or = or <4 x i32> %z1, %z2
   ret <4 x i32> %or
 }
+
+; Test bitwise operations with integer-to-integer bitcast with one constant
+define <2 x i32> @or_bitcast_v4i16_to_v2i32_constant(<4 x i16> %a) {
+; CHECK-LABEL: @or_bitcast_v4i16_to_v2i32_constant(
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast <4 x i16> [[A:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[BC1]], <i32 1000000, i32 2000000>
+; CHECK-NEXT:    ret <2 x i32> [[OR]]
+;
+  %bc1 = bitcast <4 x i16> %a to <2 x i32>
+  %or = or <2 x i32> %bc1, <i32 1000000, i32 2000000>
+  ret <2 x i32> %or
+}
+
+define <2 x i32> @or_bitcast_v4i16_to_v2i32_constant_commuted(<4 x i16> %a) {
+; CHECK-LABEL: @or_bitcast_v4i16_to_v2i32_constant_commuted(
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast <4 x i16> [[A:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> <i32 1000000, i32 2000000>, [[BC1]]
+; CHECK-NEXT:    ret <2 x i32> [[OR]]
+;
+  %bc1 = bitcast <4 x i16> %a to <2 x i32>
+  %or = or <2 x i32> <i32 1000000, i32 2000000>, %bc1
+  ret <2 x i32> %or
+}
+
+; Test bitwise operations with truncate and one constant
+define <4 x i16> @or_trunc_v4i32_to_v4i16_constant(<4 x i32> %a) {
+; CHECK-LABEL: @or_trunc_v4i32_to_v4i16_constant(
+; CHECK-NEXT:    [[T1:%.*]] = trunc <4 x i32> [[A:%.*]] to <4 x i16>
+; CHECK-NEXT:    [[AND:%.*]] = or <4 x i16> [[T1]], <i16 1, i16 2, i16 3, i16 4>
+; CHECK-NEXT:    ret <4 x i16> [[AND]]
+;
+  %t1 = trunc <4 x i32> %a to <4 x i16>
+  %or = or <4 x i16> %t1, <i16 1, i16 2, i16 3, i16 4>
+  ret <4 x i16> %or
+}
+
+; Test bitwise operations with zero extend and one constant
+define <4 x i32> @or_zext_v4i16_to_v4i32_constant(<4 x i16> %a) {
+; CHECK-LABEL: @or_zext_v4i16_to_v4i32_constant(
+; CHECK-NEXT:    [[Z1:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[AND:%.*]] = or <4 x i32> [[Z1]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT:    ret <4 x i32> [[AND]]
+;
+  %z1 = zext <4 x i16> %a to <4 x i32>
+  %or = or <4 x i32> %z1, <i32 1, i32 2, i32 3, i32 4>
+  ret <4 x i32> %or
+}
+
+define <4 x i32> @or_zext_v4i8_to_v4i32_constant_with_loss(<4 x i8> %a) {
+; CHECK-LABEL: @or_zext_v4i8_to_v4i32_constant_with_loss(
+; CHECK-NEXT:    [[Z1:%.*]] = zext <4 x i8> [[A:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[OR:%.*]] = or <4 x i32> [[Z1]], <i32 1024, i32 129, i32 3, i32 4>
+; CHECK-NEXT:    ret <4 x i32> [[OR]]
+;
+  %z1 = zext <4 x i8> %a to <4 x i32>
+  %or = or <4 x i32> %z1, <i32 1024, i32 129, i32 3, i32 4>
+  ret <4 x i32> %or
+}
+
+; Test bitwise operations with sign extend and one constant
+define <4 x i32> @or_sext_v4i8_to_v4i32_positive_constant(<4 x i8> %a) {
+; CHECK-LABEL: @or_sext_v4i8_to_v4i32_positive_constant(
+; CHECK-NEXT:    [[S1:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[OR:%.*]] = or <4 x i32> [[S1]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT:    ret <4 x i32> [[OR]]
+;
+  %s1 = sext <4 x i8> %a to <4 x i32>
+  %or = or <4 x i32> %s1, <i32 1, i32 2, i32 3, i32 4>
+  ret <4 x i32> %or
+}
+
+define <4 x i32> @or_sext_v4i8_to_v4i32_minus_constant(<4 x i8> %a) {
+; CHECK-LABEL: @or_sext_v4i8_to_v4i32_minus_constant(
+; CHECK-NEXT:    [[S1:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[OR:%.*]] = or <4 x i32> [[S1]], <i32 -1, i32 -2, i32 -3, i32 -4>
+; CHECK-NEXT:    ret <4 x i32> [[OR]]
+;
+  %s1 = sext <4 x i8> %a to <4 x i32>
+  %or = or <4 x i32> %s1, <i32 -1, i32 -2, i32 -3, i32 -4>
+  ret <4 x i32> %or
+}
+
+define <4 x i32> @or_sext_v4i8_to_v4i32_constant_with_loss(<4 x i8> %a) {
+; CHECK-LABEL: @or_sext_v4i8_to_v4i32_constant_with_loss(
+; CHECK-NEXT:    [[Z1:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[OR:%.*]] = or <4 x i32> [[Z1]], <i32 -10000, i32 2, i32 3, i32 4>
+; CHECK-NEXT:    ret <4 x i32> [[OR]]
+;
+  %z1 = sext <4 x i8> %a to <4 x i32>
+  %or = or <4 x i32> %z1, <i32 -10000, i32 2, i32 3, i32 4>
+  ret <4 x i32> %or
+}
+
+; Test truncate with flag preservation and one constant
+define <4 x i16> @and_trunc_nuw_nsw_constant(<4 x i32> %a) {
+; CHECK-LABEL: @and_trunc_nuw_nsw_constant(
+; CHECK-NEXT:    [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i16>
+; CHECK-NEXT:    [[AND:%.*]] = and <4 x i16> [[T1]], <i16 1, i16 2, i16 3, i16 4>
+; CHECK-NEXT:    ret <4 x i16> [[AND]]
+;
+  %t1 = trunc nuw nsw <4 x i32> %a to <4 x i16>
+  %and = and <4 x i16> %t1, <i16 1, i16 2, i16 3, i16 4>
+  ret <4 x i16> %and
+}
+
+define <4 x i8> @and_trunc_nuw_nsw_minus_constant(<4 x i32> %a) {
+; CHECK-LABEL: @and_trunc_nuw_nsw_minus_constant(
+; CHECK-NEXT:    [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i8>
+; CHECK-NEXT:    [[AND:%.*]] = and <4 x i8> [[T1]], <i8 -16, i8 -15, i8 -14, i8 -13>
+; CHECK-NEXT:    ret <4 x i8> [[AND]]
+;
+  %t1 = trunc nuw nsw <4 x i32> %a to <4 x i8>
+  %and = and <4 x i8> %t1, <i8 240, i8 241, i8 242, i8 243>
+  ret <4 x i8> %and
+}
+
+define <4 x i8> @and_trunc_nuw_nsw_multiconstant(<4 x i32> %a) {
+; CHECK-LABEL: @and_trunc_nuw_nsw_multiconstant(
+; CHECK-NEXT:    [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i8>
+; CHECK-NEXT:    [[AND:%.*]] = and <4 x i8> [[T1]], <i8 -16, i8 1, i8 -14, i8 3>
+; CHECK-NEXT:    ret <4 x i8> [[AND]]
+;
+  %t1 = trunc nuw nsw <4 x i32> %a to <4 x i8>
+  %and = and <4 x i8> %t1, <i8 240, i8 1, i8 242, i8 3>
+  ret <4 x i8> %and
+}
+
+; Test sign extend with nneg flag and one constant
+define <4 x i32> @or_zext_nneg_constant(<4 x i16> %a) {
+; CHECK-LABEL: @or_zext_nneg_constant(
+; CHECK-NEXT:    [[Z1:%.*]] = zext nneg <4 x i16> [[A:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[OR:%.*]] = or <4 x i32> [[Z1]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT:    ret <4 x i32> [[OR]]
+;
+  %z1 = zext nneg <4 x i16> %a to <4 x i32>
+  %or = or <4 x i32> %z1, <i32 1, i32 2, i32 3, i32 4>
+  ret <4 x i32> %or
+}
+
+define <4 x i32> @or_zext_nneg_minus_constant(<4 x i8> %a) {
+; CHECK-LABEL: @or_zext_nneg_minus_constant(
+; CHECK-NEXT:    [[Z1:%.*]] = zext nneg <4 x i8> [[A:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[OR:%.*]] = or <4 x i32> [[Z1]], <i32 240, i32 241, i32 242, i32 243>
+; CHECK-NEXT:    ret <4 x i32> [[OR]]
+;
+  %z1 = zext nneg <4 x i8> %a to <4 x i32>
+  %or = or <4 x i32> %z1, <i32 240, i32 241, i32 242, i32 243>
+  ret <4 x i32> %or
+}
+
+define <4 x i32> @or_zext_nneg_multiconstant(<4 x i8> %a) {
+; CHECK-LABEL: @or_zext_nneg_multiconstant(
+; CHECK-NEXT:    [[Z1:%.*]] = zext nneg <4 x i8> [[A:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[OR:%.*]] = or <4 x i32> [[Z1]], <i32 240, i32 1, i32 242, i32 3>
+; CHECK-NEXT:    ret <4 x i32> [[OR]]
+;
+  %z1 = zext nneg <4 x i8> %a to <4 x i32>
+  %or = or <4 x i32> %z1, <i32 240, i32 1, i32 242, i32 3>
+  ret <4 x i32> %or
+}

>From 4d89a056680fbe4dfc9df4070b54869d8fd43853 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Mon, 25 Aug 2025 15:04:16 +0800
Subject: [PATCH 2/5] [VectorCombine] Support pattern bitop(cast(x), C) ->
 bitop(cast(x), cast(InvC))

---
 .../Transforms/Vectorize/VectorCombine.cpp    | 155 ++++++++++++++++++
 .../VectorCombine/X86/bitop-of-castops.ll     |  64 ++++----
 2 files changed, 187 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 1275d53a075b5..e351e9205499b 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -118,6 +118,7 @@ class VectorCombine {
   bool foldInsExtBinop(Instruction &I);
   bool foldInsExtVectorToShuffle(Instruction &I);
   bool foldBitOpOfCastops(Instruction &I);
+  bool foldBitOpOfCastConstant(Instruction &I);
   bool foldBitcastShuffle(Instruction &I);
   bool scalarizeOpOrCmp(Instruction &I);
   bool scalarizeVPIntrinsic(Instruction &I);
@@ -929,6 +930,158 @@ bool VectorCombine::foldBitOpOfCastops(Instruction &I) {
   return true;
 }
 
+struct PreservedCastFlags {
+  bool NNeg = false;
+  bool NUW = false;
+  bool NSW = false;
+};
+
+// Try to cast C to InvC losslessly, satisfying CastOp(InvC) == C.
+// Will try best to preserve the flags.
+static Constant *getLosslessInvCast(Constant *C, Type *InvCastTo,
+                                    unsigned CastOp, const DataLayout &DL,
+                                    PreservedCastFlags &Flags) {
+  switch (CastOp) {
+  case Instruction::BitCast:
+    // Bitcast is always lossless.
+    return ConstantFoldCastOperand(Instruction::BitCast, C, InvCastTo, DL);
+  case Instruction::Trunc: {
+    auto *ZExtC = ConstantFoldCastOperand(Instruction::ZExt, C, InvCastTo, DL);
+    auto *SExtC = ConstantFoldCastOperand(Instruction::SExt, C, InvCastTo, DL);
+    // Truncation back on ZExt value is always NUW.
+    Flags.NUW = true;
+    // Test positivity of C.
+    Flags.NSW = ZExtC == SExtC;
+    return ZExtC;
+  }
+  case Instruction::SExt:
+  case Instruction::ZExt: {
+    auto *InvC = ConstantExpr::getTrunc(C, InvCastTo);
+    auto *CastInvC = ConstantFoldCastOperand(CastOp, InvC, C->getType(), DL);
+    // Must satisfy CastOp(InvC) == C.
+    if (!CastInvC || CastInvC != C) 
+        return nullptr;
+    if (CastOp == Instruction::ZExt) {
+      auto *SExtInvC =
+          ConstantFoldCastOperand(Instruction::SExt, InvC, C->getType(), DL);
+      // Test positivity of InvC.
+      Flags.NNeg = CastInvC == SExtInvC;
+    }
+    return InvC;
+  }
+  default:
+    return nullptr;
+  }
+}
+
+/// Match:
+// bitop(castop(x), C) ->
+// bitop(castop(x), castop(InvC)) ->
+// castop(bitop(x, InvC))
+// Supports: bitcast, trunc, sext, zext
+bool VectorCombine::foldBitOpOfCastConstant(Instruction &I) {
+  Instruction *LHS;
+  Constant *C;
+
+  // Check if this is a bitwise logic operation
+  if (!match(&I, m_c_BitwiseLogic(m_Instruction(LHS), m_Constant(C))))
+    return false;
+
+  // Get the cast instructions
+  auto *LHSCast = dyn_cast<CastInst>(LHS);
+  if (!LHSCast)
+    return false;
+
+  Instruction::CastOps CastOpcode = LHSCast->getOpcode();
+
+  // Only handle supported cast operations
+  switch (CastOpcode) {
+  case Instruction::BitCast:
+  case Instruction::Trunc:
+  case Instruction::SExt:
+  case Instruction::ZExt:
+    break;
+  default:
+    return false;
+  }
+
+  Value *LHSSrc = LHSCast->getOperand(0);
+
+  // Only handle vector types with integer elements
+  auto *SrcVecTy = dyn_cast<FixedVectorType>(LHSSrc->getType());
+  auto *DstVecTy = dyn_cast<FixedVectorType>(I.getType());
+  if (!SrcVecTy || !DstVecTy)
+    return false;
+
+  if (!SrcVecTy->getScalarType()->isIntegerTy() ||
+      !DstVecTy->getScalarType()->isIntegerTy())
+    return false;
+
+  // Find the constant InvC, such that castop(InvC) equals to C.
+  PreservedCastFlags RHSFlags;
+  Constant *InvC = getLosslessInvCast(C, SrcVecTy, CastOpcode, *DL, RHSFlags);
+  if (!InvC)
+    return false;
+
+  // Cost Check :
+  // OldCost = bitlogic + cast
+  // NewCost = bitlogic + cast
+
+  // Calculate specific costs for each cast with instruction context
+  InstructionCost LHSCastCost =
+      TTI.getCastInstrCost(CastOpcode, DstVecTy, SrcVecTy,
+                           TTI::CastContextHint::None, CostKind, LHSCast);
+
+  InstructionCost OldCost =
+      TTI.getArithmeticInstrCost(I.getOpcode(), DstVecTy, CostKind) +
+      LHSCastCost;
+
+  // For new cost, we can't provide an instruction (it doesn't exist yet)
+  InstructionCost GenericCastCost = TTI.getCastInstrCost(
+      CastOpcode, DstVecTy, SrcVecTy, TTI::CastContextHint::None, CostKind);
+
+  InstructionCost NewCost =
+      TTI.getArithmeticInstrCost(I.getOpcode(), SrcVecTy, CostKind) +
+      GenericCastCost;
+
+  // Account for multi-use casts using specific costs
+  if (!LHSCast->hasOneUse())
+    NewCost += LHSCastCost;
+
+  LLVM_DEBUG(dbgs() << "foldBitOpOfCastConstant: OldCost=" << OldCost
+                    << " NewCost=" << NewCost << "\n");
+
+  if (NewCost > OldCost)
+    return false;
+
+  // Create the operation on the source type
+  Value *NewOp = Builder.CreateBinOp((Instruction::BinaryOps)I.getOpcode(),
+                                     LHSSrc, InvC, I.getName() + ".inner");
+  if (auto *NewBinOp = dyn_cast<BinaryOperator>(NewOp))
+    NewBinOp->copyIRFlags(&I);
+
+  Worklist.pushValue(NewOp);
+
+  // Create the cast operation directly to ensure we get a new instruction
+  Instruction *NewCast = CastInst::Create(CastOpcode, NewOp, I.getType());
+
+  // Preserve cast instruction flags
+  if (RHSFlags.NNeg)
+    NewCast->setNonNeg();
+  if (RHSFlags.NSW)
+    NewCast->setHasNoSignedWrap();
+  if (RHSFlags.NUW)
+    NewCast->setHasNoUnsignedWrap();
+
+  NewCast->andIRFlags(LHSCast);
+
+  // Insert the new instruction
+  Value *Result = Builder.Insert(NewCast);
+
+  replaceValue(I, *Result);
+  return true;
+}
+
 /// If this is a bitcast of a shuffle, try to bitcast the source vector to the
 /// destination type followed by shuffle. This can enable further transforms by
 /// moving bitcasts or shuffles together.
@@ -4206,6 +4359,8 @@ bool VectorCombine::run() {
       case Instruction::Xor:
         if (foldBitOpOfCastops(I))
           return true;
+        if (foldBitOpOfCastConstant(I))
+          return true;
         break;
       case Instruction::PHI:
         if (shrinkPhiOfShuffles(I))
diff --git a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
index b7c9e35ecc6f1..cd77818a2f9b6 100644
--- a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
@@ -264,9 +264,9 @@ define <4 x i32> @or_zext_nneg(<4 x i16> %a, <4 x i16> %b) {
 ; Test bitwise operations with integer-to-integer bitcast with one constant
 define <2 x i32> @or_bitcast_v4i16_to_v2i32_constant(<4 x i16> %a) {
 ; CHECK-LABEL: @or_bitcast_v4i16_to_v2i32_constant(
-; CHECK-NEXT:    [[BC1:%.*]] = bitcast <4 x i16> [[A:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[BC1]], <i32 1000000, i32 2000000>
-; CHECK-NEXT:    ret <2 x i32> [[OR]]
+; CHECK-NEXT:    [[A:%.*]] = or <4 x i16> [[A1:%.*]], <i16 16960, i16 15, i16 -31616, i16 30>
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast <4 x i16> [[A]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[BC1]]
 ;
   %bc1 = bitcast <4 x i16> %a to <2 x i32>
   %or = or <2 x i32> %bc1, <i32 1000000, i32 2000000>
@@ -275,9 +275,9 @@ define <2 x i32> @or_bitcast_v4i16_to_v2i32_constant(<4 x i16> %a) {
 
 define <2 x i32> @or_bitcast_v4i16_to_v2i32_constant_commuted(<4 x i16> %a) {
 ; CHECK-LABEL: @or_bitcast_v4i16_to_v2i32_constant_commuted(
-; CHECK-NEXT:    [[BC1:%.*]] = bitcast <4 x i16> [[A:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> <i32 1000000, i32 2000000>, [[BC1]]
-; CHECK-NEXT:    ret <2 x i32> [[OR]]
+; CHECK-NEXT:    [[A:%.*]] = or <4 x i16> [[A1:%.*]], <i16 16960, i16 15, i16 -31616, i16 30>
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast <4 x i16> [[A]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[BC1]]
 ;
   %bc1 = bitcast <4 x i16> %a to <2 x i32>
   %or = or <2 x i32> <i32 1000000, i32 2000000>, %bc1
@@ -287,9 +287,9 @@ define <2 x i32> @or_bitcast_v4i16_to_v2i32_constant_commuted(<4 x i16> %a) {
 ; Test bitwise operations with truncate and one constant
 define <4 x i16> @or_trunc_v4i32_to_v4i16_constant(<4 x i32> %a) {
 ; CHECK-LABEL: @or_trunc_v4i32_to_v4i16_constant(
-; CHECK-NEXT:    [[T1:%.*]] = trunc <4 x i32> [[A:%.*]] to <4 x i16>
-; CHECK-NEXT:    [[AND:%.*]] = or <4 x i16> [[T1]], <i16 1, i16 2, i16 3, i16 4>
-; CHECK-NEXT:    ret <4 x i16> [[AND]]
+; CHECK-NEXT:    [[A:%.*]] = or <4 x i32> [[A1:%.*]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT:    [[T1:%.*]] = trunc <4 x i32> [[A]] to <4 x i16>
+; CHECK-NEXT:    ret <4 x i16> [[T1]]
 ;
   %t1 = trunc <4 x i32> %a to <4 x i16>
   %or = or <4 x i16> %t1, <i16 1, i16 2, i16 3, i16 4>
@@ -299,9 +299,9 @@ define <4 x i16> @or_trunc_v4i32_to_v4i16_constant(<4 x i32> %a) {
 ; Test bitwise operations with zero extend and one constant
 define <4 x i32> @or_zext_v4i16_to_v4i32_constant(<4 x i16> %a) {
 ; CHECK-LABEL: @or_zext_v4i16_to_v4i32_constant(
-; CHECK-NEXT:    [[Z1:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT:    [[AND:%.*]] = or <4 x i32> [[Z1]], <i32 1, i32 2, i32 3, i32 4>
-; CHECK-NEXT:    ret <4 x i32> [[AND]]
+; CHECK-NEXT:    [[A:%.*]] = or <4 x i16> [[A1:%.*]], <i16 1, i16 2, i16 3, i16 4>
+; CHECK-NEXT:    [[Z1:%.*]] = zext <4 x i16> [[A]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[Z1]]
 ;
   %z1 = zext <4 x i16> %a to <4 x i32>
   %or = or <4 x i32> %z1, <i32 1, i32 2, i32 3, i32 4>
@@ -322,9 +322,9 @@ define <4 x i32> @or_zext_v4i8_to_v4i32_constant_with_loss(<4 x i8> %a) {
 ; Test bitwise operations with sign extend and one constant
 define <4 x i32> @or_sext_v4i8_to_v4i32_positive_constant(<4 x i8> %a) {
 ; CHECK-LABEL: @or_sext_v4i8_to_v4i32_positive_constant(
-; CHECK-NEXT:    [[S1:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT:    [[OR:%.*]] = or <4 x i32> [[S1]], <i32 1, i32 2, i32 3, i32 4>
-; CHECK-NEXT:    ret <4 x i32> [[OR]]
+; CHECK-NEXT:    [[A:%.*]] = or <4 x i8> [[A1:%.*]], <i8 1, i8 2, i8 3, i8 4>
+; CHECK-NEXT:    [[S1:%.*]] = sext <4 x i8> [[A]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[S1]]
 ;
   %s1 = sext <4 x i8> %a to <4 x i32>
   %or = or <4 x i32> %s1, <i32 1, i32 2, i32 3, i32 4>
@@ -333,9 +333,9 @@ define <4 x i32> @or_sext_v4i8_to_v4i32_positive_constant(<4 x i8> %a) {
 
 define <4 x i32> @or_sext_v4i8_to_v4i32_minus_constant(<4 x i8> %a) {
 ; CHECK-LABEL: @or_sext_v4i8_to_v4i32_minus_constant(
-; CHECK-NEXT:    [[S1:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT:    [[OR:%.*]] = or <4 x i32> [[S1]], <i32 -1, i32 -2, i32 -3, i32 -4>
-; CHECK-NEXT:    ret <4 x i32> [[OR]]
+; CHECK-NEXT:    [[A:%.*]] = or <4 x i8> [[A1:%.*]], <i8 -1, i8 -2, i8 -3, i8 -4>
+; CHECK-NEXT:    [[S1:%.*]] = sext <4 x i8> [[A]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[S1]]
 ;
   %s1 = sext <4 x i8> %a to <4 x i32>
   %or = or <4 x i32> %s1, <i32 -1, i32 -2, i32 -3, i32 -4>
@@ -356,9 +356,9 @@ define <4 x i32> @or_sext_v4i8_to_v4i32_constant_with_loss(<4 x i8> %a) {
 ; Test truncate with flag preservation and one constant
 define <4 x i16> @and_trunc_nuw_nsw_constant(<4 x i32> %a) {
 ; CHECK-LABEL: @and_trunc_nuw_nsw_constant(
-; CHECK-NEXT:    [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i16>
-; CHECK-NEXT:    [[AND:%.*]] = and <4 x i16> [[T1]], <i16 1, i16 2, i16 3, i16 4>
-; CHECK-NEXT:    ret <4 x i16> [[AND]]
+; CHECK-NEXT:    [[A:%.*]] = and <4 x i32> [[A1:%.*]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT:    [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A]] to <4 x i16>
+; CHECK-NEXT:    ret <4 x i16> [[T1]]
 ;
   %t1 = trunc nuw nsw <4 x i32> %a to <4 x i16>
   %and = and <4 x i16> %t1, <i16 1, i16 2, i16 3, i16 4>
@@ -367,8 +367,8 @@ define <4 x i16> @and_trunc_nuw_nsw_constant(<4 x i32> %a) {
 
 define <4 x i8> @and_trunc_nuw_nsw_minus_constant(<4 x i32> %a) {
 ; CHECK-LABEL: @and_trunc_nuw_nsw_minus_constant(
-; CHECK-NEXT:    [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i8>
-; CHECK-NEXT:    [[AND:%.*]] = and <4 x i8> [[T1]], <i8 -16, i8 -15, i8 -14, i8 -13>
+; CHECK-NEXT:    [[AND_INNER:%.*]] = and <4 x i32> [[A:%.*]], <i32 240, i32 241, i32 242, i32 243>
+; CHECK-NEXT:    [[AND:%.*]] = trunc nuw <4 x i32> [[AND_INNER]] to <4 x i8>
 ; CHECK-NEXT:    ret <4 x i8> [[AND]]
 ;
   %t1 = trunc nuw nsw <4 x i32> %a to <4 x i8>
@@ -378,8 +378,8 @@ define <4 x i8> @and_trunc_nuw_nsw_minus_constant(<4 x i32> %a) {
 
 define <4 x i8> @and_trunc_nuw_nsw_multiconstant(<4 x i32> %a) {
 ; CHECK-LABEL: @and_trunc_nuw_nsw_multiconstant(
-; CHECK-NEXT:    [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i8>
-; CHECK-NEXT:    [[AND:%.*]] = and <4 x i8> [[T1]], <i8 -16, i8 1, i8 -14, i8 3>
+; CHECK-NEXT:    [[AND_INNER:%.*]] = and <4 x i32> [[A:%.*]], <i32 240, i32 1, i32 242, i32 3>
+; CHECK-NEXT:    [[AND:%.*]] = trunc nuw <4 x i32> [[AND_INNER]] to <4 x i8>
 ; CHECK-NEXT:    ret <4 x i8> [[AND]]
 ;
   %t1 = trunc nuw nsw <4 x i32> %a to <4 x i8>
@@ -390,9 +390,9 @@ define <4 x i8> @and_trunc_nuw_nsw_multiconstant(<4 x i32> %a) {
 ; Test sign extend with nneg flag and one constant
 define <4 x i32> @or_zext_nneg_constant(<4 x i16> %a) {
 ; CHECK-LABEL: @or_zext_nneg_constant(
-; CHECK-NEXT:    [[Z1:%.*]] = zext nneg <4 x i16> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT:    [[OR:%.*]] = or <4 x i32> [[Z1]], <i32 1, i32 2, i32 3, i32 4>
-; CHECK-NEXT:    ret <4 x i32> [[OR]]
+; CHECK-NEXT:    [[A:%.*]] = or <4 x i16> [[A1:%.*]], <i16 1, i16 2, i16 3, i16 4>
+; CHECK-NEXT:    [[Z1:%.*]] = zext nneg <4 x i16> [[A]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[Z1]]
 ;
   %z1 = zext nneg <4 x i16> %a to <4 x i32>
   %or = or <4 x i32> %z1, <i32 1, i32 2, i32 3, i32 4>
@@ -401,8 +401,8 @@ define <4 x i32> @or_zext_nneg_constant(<4 x i16> %a) {
 
 define <4 x i32> @or_zext_nneg_minus_constant(<4 x i8> %a) {
 ; CHECK-LABEL: @or_zext_nneg_minus_constant(
-; CHECK-NEXT:    [[Z1:%.*]] = zext nneg <4 x i8> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT:    [[OR:%.*]] = or <4 x i32> [[Z1]], <i32 240, i32 241, i32 242, i32 243>
+; CHECK-NEXT:    [[OR_INNER:%.*]] = or <4 x i8> [[A:%.*]], <i8 -16, i8 -15, i8 -14, i8 -13>
+; CHECK-NEXT:    [[OR:%.*]] = zext <4 x i8> [[OR_INNER]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[OR]]
 ;
   %z1 = zext nneg <4 x i8> %a to <4 x i32>
@@ -412,8 +412,8 @@ define <4 x i32> @or_zext_nneg_minus_constant(<4 x i8> %a) {
 
 define <4 x i32> @or_zext_nneg_multiconstant(<4 x i8> %a) {
 ; CHECK-LABEL: @or_zext_nneg_multiconstant(
-; CHECK-NEXT:    [[Z1:%.*]] = zext nneg <4 x i8> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT:    [[OR:%.*]] = or <4 x i32> [[Z1]], <i32 240, i32 1, i32 242, i32 3>
+; CHECK-NEXT:    [[OR_INNER:%.*]] = or <4 x i8> [[A:%.*]], <i8 -16, i8 1, i8 -14, i8 3>
+; CHECK-NEXT:    [[OR:%.*]] = zext <4 x i8> [[OR_INNER]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[OR]]
 ;
   %z1 = zext nneg <4 x i8> %a to <4 x i32>

>From f3306dbcf4b39c13c272f062b4d6856f8c4470e2 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Mon, 25 Aug 2025 16:17:47 +0800
Subject: [PATCH 3/5] format

---
 llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index e351e9205499b..f73c2864cc2c6 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -959,8 +959,8 @@ static Constant *getLosslessInvCast(Constant *C, Type *InvCastTo,
     auto *InvC = ConstantExpr::getTrunc(C, InvCastTo);
     auto *CastInvC = ConstantFoldCastOperand(CastOp, InvC, C->getType(), DL);
     // Must satisfy CastOp(InvC) == C.
-    if (!CastInvC || CastInvC != C) 
-        return nullptr;
+    if (!CastInvC || CastInvC != C)
+      return nullptr;
     if (CastOp == Instruction::ZExt) {
       auto *SExtInvC =
           ConstantFoldCastOperand(Instruction::SExt, InvC, C->getType(), DL);

>From 68c2c1f45339addd24782242285b9fce7bac265f Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Mon, 25 Aug 2025 18:19:46 +0800
Subject: [PATCH 4/5] fix testcase

---
 llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll b/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll
index 761ad80d560e8..6c0ab8b9abaff 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll
@@ -45,11 +45,11 @@ define i32 @multiuse(<16 x i32> %u, <16 x i32> %v, ptr %b) {
 ; CHECK-NEXT:    [[U_MASKED:%.*]] = and <16 x i32> [[U:%.*]], splat (i32 255)
 ; CHECK-NEXT:    [[V_MASKED:%.*]] = and <16 x i32> [[V:%.*]], splat (i32 255)
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[B:%.*]], align 1
-; CHECK-NEXT:    [[TMP0:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = lshr <16 x i8> [[WIDE_LOAD]], splat (i8 4)
 ; CHECK-NEXT:    [[TMP7:%.*]] = zext <16 x i8> [[TMP6]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP3:%.*]] = or <16 x i32> [[TMP7]], [[V_MASKED]]
-; CHECK-NEXT:    [[TMP4:%.*]] = and <16 x i32> [[TMP0]], splat (i32 15)
+; CHECK-NEXT:    [[DOTINNER:%.*]] = and <16 x i8> [[WIDE_LOAD]], splat (i8 15)
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <16 x i8> [[DOTINNER]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP5:%.*]] = or <16 x i32> [[TMP4]], [[U_MASKED]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = add nuw nsw <16 x i32> [[TMP3]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP8]])

>From 4dbaf8eab2bcb8f2afed95b6a0202d7f75805ce8 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Wed, 27 Aug 2025 18:16:51 +0800
Subject: [PATCH 5/5] fix comment

---
 llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index f73c2864cc2c6..4593e1719f0db 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -939,7 +939,8 @@ struct PreservedCastFlags {
 // Try to cast C to InvC losslessly, satisfying CastOp(InvC) == C.
 // Will try best to preserve the flags.
 static Constant *getLosslessInvCast(Constant *C, Type *InvCastTo,
-                                    unsigned CastOp, const DataLayout &DL,
+                                    Instruction::CastOps CastOp,
+                                    const DataLayout &DL,
                                     PreservedCastFlags &Flags) {
   switch (CastOp) {
   case Instruction::BitCast: