[llvm] [VectorCombine] Relax vector type constraint on bitop(bitcast, bitcast) (PR #157245)

Sun Sep 7 23:22:20 PDT 2025

https://github.com/XChy updated https://github.com/llvm/llvm-project/pull/157245

>From c79de90773dae1ef37f25b9873ea71eb681868ef Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Sat, 6 Sep 2025 16:55:27 +0800
Subject: [PATCH 1/4] Precommit tests

---
 .../VectorCombine/X86/bitop-of-castops.ll     | 65 +++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
index c6253a7b858ad..1e527538dd594 100644
--- a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
@@ -433,6 +433,19 @@ define <2 x i16> @and_bitcast_f32_to_v2i16_constant(float %a) {
   ret <2 x i16> %and
 }
 
+define <2 x i16> @and_bitcast_f32_to_v2i16(float %a, float %b) {
+; CHECK-LABEL: @and_bitcast_f32_to_v2i16(
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast float [[A:%.*]] to <2 x i16>
+; CHECK-NEXT:    [[BC2:%.*]] = bitcast float [[B:%.*]] to <2 x i16>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i16> [[BC1]], [[BC2]]
+; CHECK-NEXT:    ret <2 x i16> [[AND]]
+;
+  %bc1 = bitcast float %a to <2 x i16>
+  %bc2 = bitcast float %b to <2 x i16>
+  %and = and <2 x i16> %bc1, %bc2
+  ret <2 x i16> %and
+}
+
 ; Negative test: bitcast from vector float to scalar int (optimization should not apply)
 define i64 @and_bitcast_v2f32_to_i64_constant(<2 x float> %a) {
 ; CHECK-LABEL: @and_bitcast_v2f32_to_i64_constant(
@@ -445,6 +458,19 @@ define i64 @and_bitcast_v2f32_to_i64_constant(<2 x float> %a) {
   ret i64 %and
 }
 
+define i64 @and_bitcast_v2f32_to_i64(<2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: @and_bitcast_v2f32_to_i64(
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast <2 x float> [[A:%.*]] to i64
+; CHECK-NEXT:    [[BC2:%.*]] = bitcast <2 x float> [[B:%.*]] to i64
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[BC1]], [[BC2]]
+; CHECK-NEXT:    ret i64 [[AND]]
+;
+  %bc1 = bitcast <2 x float> %a to i64
+  %bc2 = bitcast <2 x float> %b to i64
+  %and = and i64 %bc1, %bc2
+  ret i64 %and
+}
+
 ; Test no-op bitcast
 define i16 @xor_bitcast_i16_to_i16_constant(i16 %a) {
 ; CHECK-LABEL: @xor_bitcast_i16_to_i16_constant(
@@ -457,6 +483,19 @@ define i16 @xor_bitcast_i16_to_i16_constant(i16 %a) {
   ret i16 %or
 }
 
+define i16 @xor_bitcast_i16_to_i16(i16 %a, i16 %b) {
+; CHECK-LABEL: @xor_bitcast_i16_to_i16(
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast i16 [[A:%.*]] to i16
+; CHECK-NEXT:    [[BC2:%.*]] = bitcast i16 [[B:%.*]] to i16
+; CHECK-NEXT:    [[OR:%.*]] = xor i16 [[BC1]], [[BC2]]
+; CHECK-NEXT:    ret i16 [[OR]]
+;
+  %bc1 = bitcast i16 %a to i16
+  %bc2 = bitcast i16 %b to i16
+  %or = xor i16 %bc1, %bc2
+  ret i16 %or
+}
+
 ; Test bitwise operations with integer vector to integer bitcast
 define <16 x i1> @xor_bitcast_i16_to_v16i1_constant(i16 %a) {
 ; CHECK-LABEL: @xor_bitcast_i16_to_v16i1_constant(
@@ -469,6 +508,19 @@ define <16 x i1> @xor_bitcast_i16_to_v16i1_constant(i16 %a) {
   ret <16 x i1> %or
 }
 
+define <16 x i1> @xor_bitcast_i16_to_v16i1(i16 %a, i16 %b) {
+; CHECK-LABEL: @xor_bitcast_i16_to_v16i1(
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast i16 [[A:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[BC2:%.*]] = bitcast i16 [[B:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[OR:%.*]] = xor <16 x i1> [[BC1]], [[BC2]]
+; CHECK-NEXT:    ret <16 x i1> [[OR]]
+;
+  %bc1 = bitcast i16 %a to <16 x i1>
+  %bc2 = bitcast i16 %b to <16 x i1>
+  %or = xor <16 x i1> %bc1, %bc2
+  ret <16 x i1> %or
+}
+
 ; Test bitwise operations with integer vector to integer bitcast
 define i16 @or_bitcast_v16i1_to_i16_constant(<16 x i1> %a) {
 ; CHECK-LABEL: @or_bitcast_v16i1_to_i16_constant(
@@ -480,3 +532,16 @@ define i16 @or_bitcast_v16i1_to_i16_constant(<16 x i1> %a) {
   %or = or i16 %bc, 3
   ret i16 %or
 }
+
+define i16 @or_bitcast_v16i1_to_i16(<16 x i1> %a, <16 x i1> %b) {
+; CHECK-LABEL: @or_bitcast_v16i1_to_i16(
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast <16 x i1> [[A:%.*]] to i16
+; CHECK-NEXT:    [[BC2:%.*]] = bitcast <16 x i1> [[B:%.*]] to i16
+; CHECK-NEXT:    [[OR:%.*]] = or i16 [[BC1]], [[BC2]]
+; CHECK-NEXT:    ret i16 [[OR]]
+;
+  %bc1 = bitcast <16 x i1> %a to i16
+  %bc2 = bitcast <16 x i1> %b to i16
+  %or = or i16 %bc1, %bc2
+  ret i16 %or
+}

>From 84ae9236ee2c261ab2e9ca3b8b428fef774d4fd7 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Sat, 6 Sep 2025 16:58:44 +0800
Subject: [PATCH 2/4] [VectorCombine] Relax vector type constraint on
 bitop(bitcast, bitcast)

---
 .../Transforms/Vectorize/VectorCombine.cpp    | 29 +++++++++----------
 .../VectorCombine/X86/bitop-of-castops.ll     |  7 ++---
 2 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index b1c7a2682785b..75c613eca97cf 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -870,14 +870,15 @@ bool VectorCombine::foldBitOpOfCastops(Instruction &I) {
   if (LHSSrc->getType() != RHSSrc->getType())
     return false;
 
-  // Only handle vector types with integer elements
-  auto *SrcVecTy = dyn_cast<FixedVectorType>(LHSSrc->getType());
-  auto *DstVecTy = dyn_cast<FixedVectorType>(I.getType());
-  if (!SrcVecTy || !DstVecTy)
+  auto *SrcTy = LHSSrc->getType();
+  auto *DstTy = I.getType();
+  // Only handle vector types with integer elements if the cast is not bitcast
+  if (CastOpcode != Instruction::BitCast &&
+      (!isa<FixedVectorType>(SrcTy) || !isa<FixedVectorType>(DstTy)))
     return false;
 
-  if (!SrcVecTy->getScalarType()->isIntegerTy() ||
-      !DstVecTy->getScalarType()->isIntegerTy())
+  if (!SrcTy->getScalarType()->isIntegerTy() ||
+      !DstTy->getScalarType()->isIntegerTy())
     return false;
 
   // Cost Check :
@@ -885,23 +886,21 @@ bool VectorCombine::foldBitOpOfCastops(Instruction &I) {
   // NewCost = bitlogic + cast
 
   // Calculate specific costs for each cast with instruction context
-  InstructionCost LHSCastCost =
-      TTI.getCastInstrCost(CastOpcode, DstVecTy, SrcVecTy,
-                           TTI::CastContextHint::None, CostKind, LHSCast);
-  InstructionCost RHSCastCost =
-      TTI.getCastInstrCost(CastOpcode, DstVecTy, SrcVecTy,
-                           TTI::CastContextHint::None, CostKind, RHSCast);
+  InstructionCost LHSCastCost = TTI.getCastInstrCost(
+      CastOpcode, DstTy, SrcTy, TTI::CastContextHint::None, CostKind, LHSCast);
+  InstructionCost RHSCastCost = TTI.getCastInstrCost(
+      CastOpcode, DstTy, SrcTy, TTI::CastContextHint::None, CostKind, RHSCast);
 
   InstructionCost OldCost =
-      TTI.getArithmeticInstrCost(BinOp->getOpcode(), DstVecTy, CostKind) +
+      TTI.getArithmeticInstrCost(BinOp->getOpcode(), DstTy, CostKind) +
       LHSCastCost + RHSCastCost;
 
   // For new cost, we can't provide an instruction (it doesn't exist yet)
   InstructionCost GenericCastCost = TTI.getCastInstrCost(
-      CastOpcode, DstVecTy, SrcVecTy, TTI::CastContextHint::None, CostKind);
+      CastOpcode, DstTy, SrcTy, TTI::CastContextHint::None, CostKind);
 
   InstructionCost NewCost =
-      TTI.getArithmeticInstrCost(BinOp->getOpcode(), SrcVecTy, CostKind) +
+      TTI.getArithmeticInstrCost(BinOp->getOpcode(), SrcTy, CostKind) +
       GenericCastCost;
 
   // Account for multi-use casts using specific costs
diff --git a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
index 1e527538dd594..d17e25a845b5b 100644
--- a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
@@ -510,10 +510,9 @@ define <16 x i1> @xor_bitcast_i16_to_v16i1_constant(i16 %a) {
 
 define <16 x i1> @xor_bitcast_i16_to_v16i1(i16 %a, i16 %b) {
 ; CHECK-LABEL: @xor_bitcast_i16_to_v16i1(
-; CHECK-NEXT:    [[BC1:%.*]] = bitcast i16 [[A:%.*]] to <16 x i1>
-; CHECK-NEXT:    [[BC2:%.*]] = bitcast i16 [[B:%.*]] to <16 x i1>
-; CHECK-NEXT:    [[OR:%.*]] = xor <16 x i1> [[BC1]], [[BC2]]
-; CHECK-NEXT:    ret <16 x i1> [[OR]]
+; CHECK-NEXT:    [[B:%.*]] = xor i16 [[A:%.*]], [[B1:%.*]]
+; CHECK-NEXT:    [[BC2:%.*]] = bitcast i16 [[B]] to <16 x i1>
+; CHECK-NEXT:    ret <16 x i1> [[BC2]]
 ;
   %bc1 = bitcast i16 %a to <16 x i1>
   %bc2 = bitcast i16 %b to <16 x i1>

>From d29fbb40645007de54755f457ab189ad3a2f224f Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Sun, 7 Sep 2025 21:21:16 +0800
Subject: [PATCH 3/4] add comment

---
 llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 75c613eca97cf..7a0b7ad57a493 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -872,11 +872,13 @@ bool VectorCombine::foldBitOpOfCastops(Instruction &I) {
 
   auto *SrcTy = LHSSrc->getType();
   auto *DstTy = I.getType();
-  // Only handle vector types with integer elements if the cast is not bitcast
+  // Bitcasts can handle scalar/vector mixes, such as i16 -> <16 x i1>.
+  // Other casts only handle vector types with integer elements.
   if (CastOpcode != Instruction::BitCast &&
       (!isa<FixedVectorType>(SrcTy) || !isa<FixedVectorType>(DstTy)))
     return false;
 
+  // Only integer scalar/vector values are legal for bitwise logic operations.
   if (!SrcTy->getScalarType()->isIntegerTy() ||
       !DstTy->getScalarType()->isIntegerTy())
     return false;

>From 5338fa41264173a233f51ed9f4b6b69d4c2c3b5f Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Mon, 8 Sep 2025 14:21:52 +0800
Subject: [PATCH 4/4] resolve conflict

---
 llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
index d17e25a845b5b..f6c9dce542ef4 100644
--- a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
@@ -510,9 +510,9 @@ define <16 x i1> @xor_bitcast_i16_to_v16i1_constant(i16 %a) {
 
 define <16 x i1> @xor_bitcast_i16_to_v16i1(i16 %a, i16 %b) {
 ; CHECK-LABEL: @xor_bitcast_i16_to_v16i1(
-; CHECK-NEXT:    [[B:%.*]] = xor i16 [[A:%.*]], [[B1:%.*]]
-; CHECK-NEXT:    [[BC2:%.*]] = bitcast i16 [[B]] to <16 x i1>
-; CHECK-NEXT:    ret <16 x i1> [[BC2]]
+; CHECK-NEXT:    [[B1:%.*]] = xor i16 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[BC3:%.*]] = bitcast i16 [[B1]] to <16 x i1>
+; CHECK-NEXT:    ret <16 x i1> [[BC3]]
 ;
   %bc1 = bitcast i16 %a to <16 x i1>
   %bc2 = bitcast i16 %b to <16 x i1>