[llvm] c62ea65 - [VectorCombine] Add Ext and Trunc support in foldBitOpOfCastConstant (#157822)

Thu Sep 11 02:08:51 PDT 2025

Author: Hongyu Chen
Date: 2025-09-11T17:08:47+08:00
New Revision: c62ea6598eaab0a1c18a3ff1f067907a58b9a144

URL: https://github.com/llvm/llvm-project/commit/c62ea6598eaab0a1c18a3ff1f067907a58b9a144
DIFF: https://github.com/llvm/llvm-project/commit/c62ea6598eaab0a1c18a3ff1f067907a58b9a144.diff

LOG: [VectorCombine] Add Ext and Trunc support in foldBitOpOfCastConstant (#157822)

Follow-up of https://github.com/llvm/llvm-project/pull/155216.
This patch doesn't preserve the flags. I will implement it in the
follow-up patch.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/VectorCombine.cpp
    llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll
    llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 17cb18a22336a..a84b6f59971c9 100644

--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -962,6 +962,9 @@ bool VectorCombine::foldBitOpOfCastConstant(Instruction &I) {
   // Only handle supported cast operations
   switch (CastOpcode) {
   case Instruction::BitCast:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::Trunc:
     break;
   default:
     return false;

diff  --git a/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll b/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll
index 761ad80d560e8..6c0ab8b9abaff 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll
@@ -45,11 +45,11 @@ define i32 @multiuse(<16 x i32> %u, <16 x i32> %v, ptr %b) {
 ; CHECK-NEXT:    [[U_MASKED:%.*]] = and <16 x i32> [[U:%.*]], splat (i32 255)
 ; CHECK-NEXT:    [[V_MASKED:%.*]] = and <16 x i32> [[V:%.*]], splat (i32 255)
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[B:%.*]], align 1
-; CHECK-NEXT:    [[TMP0:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = lshr <16 x i8> [[WIDE_LOAD]], splat (i8 4)
 ; CHECK-NEXT:    [[TMP7:%.*]] = zext <16 x i8> [[TMP6]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP3:%.*]] = or <16 x i32> [[TMP7]], [[V_MASKED]]
-; CHECK-NEXT:    [[TMP4:%.*]] = and <16 x i32> [[TMP0]], splat (i32 15)
+; CHECK-NEXT:    [[DOTINNER:%.*]] = and <16 x i8> [[WIDE_LOAD]], splat (i8 15)
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <16 x i8> [[DOTINNER]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP5:%.*]] = or <16 x i32> [[TMP4]], [[U_MASKED]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = add nuw nsw <16 x i32> [[TMP3]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP8]])

diff  --git a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
index f6c9dce542ef4..79e72aaed6082 100644
--- a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
@@ -287,9 +287,9 @@ define <2 x i32> @or_bitcast_v4i16_to_v2i32_constant_commuted(<4 x i16> %a) {
 ; Test bitwise operations with truncate and one constant
 define <4 x i16> @or_trunc_v4i32_to_v4i16_constant(<4 x i32> %a) {
 ; CHECK-LABEL: @or_trunc_v4i32_to_v4i16_constant(
-; CHECK-NEXT:    [[T1:%.*]] = trunc <4 x i32> [[A:%.*]] to <4 x i16>
-; CHECK-NEXT:    [[OR:%.*]] = or <4 x i16> [[T1]], <i16 1, i16 2, i16 3, i16 4>
-; CHECK-NEXT:    ret <4 x i16> [[OR]]
+; CHECK-NEXT:    [[A:%.*]] = or <4 x i32> [[A1:%.*]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT:    [[T1:%.*]] = trunc <4 x i32> [[A]] to <4 x i16>
+; CHECK-NEXT:    ret <4 x i16> [[T1]]
 ;
   %t1 = trunc <4 x i32> %a to <4 x i16>
   %or = or <4 x i16> %t1, <i16 1, i16 2, i16 3, i16 4>
@@ -299,9 +299,9 @@ define <4 x i16> @or_trunc_v4i32_to_v4i16_constant(<4 x i32> %a) {
 ; Test bitwise operations with zero extend and one constant
 define <4 x i32> @or_zext_v4i16_to_v4i32_constant(<4 x i16> %a) {
 ; CHECK-LABEL: @or_zext_v4i16_to_v4i32_constant(
-; CHECK-NEXT:    [[Z1:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT:    [[OR:%.*]] = or <4 x i32> [[Z1]], <i32 1, i32 2, i32 3, i32 4>
-; CHECK-NEXT:    ret <4 x i32> [[OR]]
+; CHECK-NEXT:    [[A:%.*]] = or <4 x i16> [[A1:%.*]], <i16 1, i16 2, i16 3, i16 4>
+; CHECK-NEXT:    [[Z1:%.*]] = zext <4 x i16> [[A]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[Z1]]
 ;
   %z1 = zext <4 x i16> %a to <4 x i32>
   %or = or <4 x i32> %z1, <i32 1, i32 2, i32 3, i32 4>
@@ -322,9 +322,9 @@ define <4 x i32> @or_zext_v4i8_to_v4i32_constant_with_loss(<4 x i8> %a) {
 ; Test bitwise operations with sign extend and one constant
 define <4 x i32> @or_sext_v4i8_to_v4i32_positive_constant(<4 x i8> %a) {
 ; CHECK-LABEL: @or_sext_v4i8_to_v4i32_positive_constant(
-; CHECK-NEXT:    [[S1:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT:    [[OR:%.*]] = or <4 x i32> [[S1]], <i32 1, i32 2, i32 3, i32 4>
-; CHECK-NEXT:    ret <4 x i32> [[OR]]
+; CHECK-NEXT:    [[A:%.*]] = or <4 x i8> [[A1:%.*]], <i8 1, i8 2, i8 3, i8 4>
+; CHECK-NEXT:    [[S1:%.*]] = sext <4 x i8> [[A]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[S1]]
 ;
   %s1 = sext <4 x i8> %a to <4 x i32>
   %or = or <4 x i32> %s1, <i32 1, i32 2, i32 3, i32 4>
@@ -333,9 +333,9 @@ define <4 x i32> @or_sext_v4i8_to_v4i32_positive_constant(<4 x i8> %a) {
 
 define <4 x i32> @or_sext_v4i8_to_v4i32_minus_constant(<4 x i8> %a) {
 ; CHECK-LABEL: @or_sext_v4i8_to_v4i32_minus_constant(
-; CHECK-NEXT:    [[S1:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT:    [[OR:%.*]] = or <4 x i32> [[S1]], <i32 -1, i32 -2, i32 -3, i32 -4>
-; CHECK-NEXT:    ret <4 x i32> [[OR]]
+; CHECK-NEXT:    [[A:%.*]] = or <4 x i8> [[A1:%.*]], <i8 -1, i8 -2, i8 -3, i8 -4>
+; CHECK-NEXT:    [[S1:%.*]] = sext <4 x i8> [[A]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[S1]]
 ;
   %s1 = sext <4 x i8> %a to <4 x i32>
   %or = or <4 x i32> %s1, <i32 -1, i32 -2, i32 -3, i32 -4>
@@ -356,8 +356,8 @@ define <4 x i32> @or_sext_v4i8_to_v4i32_constant_with_loss(<4 x i8> %a) {
 ; Test truncate with flag preservation and one constant
 define <4 x i16> @and_trunc_nuw_nsw_constant(<4 x i32> %a) {
 ; CHECK-LABEL: @and_trunc_nuw_nsw_constant(
-; CHECK-NEXT:    [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i16>
-; CHECK-NEXT:    [[AND:%.*]] = and <4 x i16> [[T1]], <i16 1, i16 2, i16 3, i16 4>
+; CHECK-NEXT:    [[AND_INNER:%.*]] = and <4 x i32> [[A:%.*]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT:    [[AND:%.*]] = trunc <4 x i32> [[AND_INNER]] to <4 x i16>
 ; CHECK-NEXT:    ret <4 x i16> [[AND]]
 ;
   %t1 = trunc nuw nsw <4 x i32> %a to <4 x i16>
@@ -367,8 +367,8 @@ define <4 x i16> @and_trunc_nuw_nsw_constant(<4 x i32> %a) {
 
 define <4 x i8> @and_trunc_nuw_nsw_minus_constant(<4 x i32> %a) {
 ; CHECK-LABEL: @and_trunc_nuw_nsw_minus_constant(
-; CHECK-NEXT:    [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i8>
-; CHECK-NEXT:    [[AND:%.*]] = and <4 x i8> [[T1]], <i8 -16, i8 -15, i8 -14, i8 -13>
+; CHECK-NEXT:    [[AND_INNER:%.*]] = and <4 x i32> [[A:%.*]], <i32 240, i32 241, i32 242, i32 243>
+; CHECK-NEXT:    [[AND:%.*]] = trunc <4 x i32> [[AND_INNER]] to <4 x i8>
 ; CHECK-NEXT:    ret <4 x i8> [[AND]]
 ;
   %t1 = trunc nuw nsw <4 x i32> %a to <4 x i8>
@@ -378,8 +378,8 @@ define <4 x i8> @and_trunc_nuw_nsw_minus_constant(<4 x i32> %a) {
 
 define <4 x i8> @and_trunc_nuw_nsw_multiconstant(<4 x i32> %a) {
 ; CHECK-LABEL: @and_trunc_nuw_nsw_multiconstant(
-; CHECK-NEXT:    [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i8>
-; CHECK-NEXT:    [[AND:%.*]] = and <4 x i8> [[T1]], <i8 -16, i8 1, i8 -14, i8 3>
+; CHECK-NEXT:    [[AND_INNER:%.*]] = and <4 x i32> [[A:%.*]], <i32 240, i32 1, i32 242, i32 3>
+; CHECK-NEXT:    [[AND:%.*]] = trunc <4 x i32> [[AND_INNER]] to <4 x i8>
 ; CHECK-NEXT:    ret <4 x i8> [[AND]]
 ;
   %t1 = trunc nuw nsw <4 x i32> %a to <4 x i8>
@@ -390,8 +390,8 @@ define <4 x i8> @and_trunc_nuw_nsw_multiconstant(<4 x i32> %a) {
 ; Test sign extend with nneg flag and one constant
 define <4 x i32> @or_zext_nneg_constant(<4 x i16> %a) {
 ; CHECK-LABEL: @or_zext_nneg_constant(
-; CHECK-NEXT:    [[Z1:%.*]] = zext nneg <4 x i16> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT:    [[OR:%.*]] = or <4 x i32> [[Z1]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT:    [[OR_INNER:%.*]] = or <4 x i16> [[A:%.*]], <i16 1, i16 2, i16 3, i16 4>
+; CHECK-NEXT:    [[OR:%.*]] = zext <4 x i16> [[OR_INNER]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[OR]]
 ;
   %z1 = zext nneg <4 x i16> %a to <4 x i32>
@@ -401,8 +401,8 @@ define <4 x i32> @or_zext_nneg_constant(<4 x i16> %a) {
 
 define <4 x i32> @or_zext_nneg_minus_constant(<4 x i8> %a) {
 ; CHECK-LABEL: @or_zext_nneg_minus_constant(
-; CHECK-NEXT:    [[Z1:%.*]] = zext nneg <4 x i8> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT:    [[OR:%.*]] = or <4 x i32> [[Z1]], <i32 240, i32 241, i32 242, i32 243>
+; CHECK-NEXT:    [[OR_INNER:%.*]] = or <4 x i8> [[A:%.*]], <i8 -16, i8 -15, i8 -14, i8 -13>
+; CHECK-NEXT:    [[OR:%.*]] = zext <4 x i8> [[OR_INNER]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[OR]]
 ;
   %z1 = zext nneg <4 x i8> %a to <4 x i32>
@@ -412,8 +412,8 @@ define <4 x i32> @or_zext_nneg_minus_constant(<4 x i8> %a) {
 
 define <4 x i32> @or_zext_nneg_multiconstant(<4 x i8> %a) {
 ; CHECK-LABEL: @or_zext_nneg_multiconstant(
-; CHECK-NEXT:    [[Z1:%.*]] = zext nneg <4 x i8> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT:    [[OR:%.*]] = or <4 x i32> [[Z1]], <i32 240, i32 1, i32 242, i32 3>
+; CHECK-NEXT:    [[OR_INNER:%.*]] = or <4 x i8> [[A:%.*]], <i8 -16, i8 1, i8 -14, i8 3>
+; CHECK-NEXT:    [[OR:%.*]] = zext <4 x i8> [[OR_INNER]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[OR]]
 ;
   %z1 = zext nneg <4 x i8> %a to <4 x i32>