[llvm] [VPlan] Set flags when constructing zexts using VPWidenCastRecipe (PR #164198)
    Luke Lau via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Mon Oct 20 02:58:49 PDT 2025
    
    
  
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/164198
>From c6c49691aef869df4d4f2482ad2211c2e4ace1d8 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 20 Oct 2025 09:52:17 +0800
Subject: [PATCH 1/3] [VPlan] Set flags when constructing zexts using
 VPWidenCastRecipe
createWidenCast doesn't set the flag type, so when we simplify trunc (zext nneg x) -> zext x we would hit an assertion in CSE that the flag types don't match with other VPWidenCastRecipes that weren't simplified.
This fixes it the same way trunc flags are handled too.
As an aside I think it should be correct to preserve the nneg flag in this case since the input operand is still non-negative after the transform. But that's left to another PR.
---
 .../Vectorize/LoopVectorizationPlanner.h      |  2 +
 .../Transforms/LoopVectorize/cse-casts.ll     | 63 ++++++++++++++++++-
 2 files changed, 63 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 7651ba16b35cf..3fed003282f2b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -325,6 +325,8 @@ class VPBuilder {
     VPIRFlags Flags;
     if (Opcode == Instruction::Trunc)
       Flags = VPIRFlags::TruncFlagsTy(false, false);
+    else if (Opcode == Instruction::ZExt)
+      Flags = VPIRFlags::NonNegFlagsTy(false);
     return tryInsertInstruction(
         new VPWidenCastRecipe(Opcode, Op, ResultTy, Flags));
   }
diff --git a/llvm/test/Transforms/LoopVectorize/cse-casts.ll b/llvm/test/Transforms/LoopVectorize/cse-casts.ll
index e923560bb77e8..b8ce35dc704d2 100644
--- a/llvm/test/Transforms/LoopVectorize/cse-casts.ll
+++ b/llvm/test/Transforms/LoopVectorize/cse-casts.ll
@@ -319,8 +319,9 @@ define void @preserve_flags_narrowing_extends_and_truncs(ptr noalias %A, ptr noa
 ; CHECK:       [[PRED_STORE_CONTINUE60]]:
 ; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    br [[EXIT:label %.*]]
-; CHECK:       [[SCALAR_PH:.*:]]
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
 ;
 entry:
   br label %loop
@@ -349,3 +350,61 @@ loop:
 exit:
   ret void
 }
+
+define void @simplified_cast_preserves_irflag_type(ptr %p, ptr %q, ptr %r) {
+; CHECK-LABEL: define void @simplified_cast_preserves_irflag_type(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[VECTOR_MEMCHECK:.*]]
+; CHECK:       [[VECTOR_MEMCHECK]]:
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q]], i64 2
+; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[R]], i64 2
+; CHECK-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[P]], i64 1
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[Q]], [[SCEVGEP1]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[R]], [[SCEVGEP]]
+; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT:    [[BOUND03:%.*]] = icmp ult ptr [[Q]], [[SCEVGEP2]]
+; CHECK-NEXT:    [[BOUND14:%.*]] = icmp ult ptr [[P]], [[SCEVGEP]]
+; CHECK-NEXT:    [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
+; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
+; CHECK-NEXT:    [[BOUND06:%.*]] = icmp ult ptr [[R]], [[SCEVGEP2]]
+; CHECK-NEXT:    [[BOUND17:%.*]] = icmp ult ptr [[P]], [[SCEVGEP1]]
+; CHECK-NEXT:    [[FOUND_CONFLICT8:%.*]] = and i1 [[BOUND06]], [[BOUND17]]
+; CHECK-NEXT:    [[CONFLICT_RDX9:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT8]]
+; CHECK-NEXT:    br i1 [[CONFLICT_RDX9]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !alias.scope [[META4:![0-9]+]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[TMP0]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i16>
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+; CHECK-NEXT:    store i16 [[TMP2]], ptr [[Q]], align 2, !alias.scope [[META7:![0-9]+]], !noalias [[META9:![0-9]+]]
+; CHECK-NEXT:    store i16 [[TMP2]], ptr [[R]], align 2, !alias.scope [[META11:![0-9]+]], !noalias [[META4]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], -9223372036854775808
+; CHECK-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br [[EXIT:label %.*]]
+; CHECK:       [[SCALAR_PH]]:
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %x = load i8, ptr %p
+  %x.i32 = zext i8 %x to i32
+  %trunc = trunc i32 %x.i32 to i16
+  store i16 %trunc, ptr %q
+  %x.i16 = zext i8 %x to i16
+  store i16 %x.i16, ptr %r
+  %iv.next = add i64 %iv, 2
+  %done = icmp eq i64 %iv.next, 0
+  br i1 %done, label %exit, label %loop
+
+exit:
+  ret void
+}
>From d15fc606a51e837a116831ae259195331ff905f8 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 20 Oct 2025 17:36:03 +0800
Subject: [PATCH 2/3] Add noalias
---
 .../Transforms/LoopVectorize/cse-casts.ll     | 35 ++++++-------------
 1 file changed, 10 insertions(+), 25 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/cse-casts.ll b/llvm/test/Transforms/LoopVectorize/cse-casts.ll
index b8ce35dc704d2..5a118a55227d2 100644
--- a/llvm/test/Transforms/LoopVectorize/cse-casts.ll
+++ b/llvm/test/Transforms/LoopVectorize/cse-casts.ll
@@ -351,44 +351,29 @@ exit:
   ret void
 }
 
-define void @simplified_cast_preserves_irflag_type(ptr %p, ptr %q, ptr %r) {
+define void @simplified_cast_preserves_irflag_type(ptr noalias %p, ptr noalias %q, ptr noalias %r) {
 ; CHECK-LABEL: define void @simplified_cast_preserves_irflag_type(
-; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]]) {
+; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[Q:%.*]], ptr noalias [[R:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    br label %[[VECTOR_MEMCHECK:.*]]
-; CHECK:       [[VECTOR_MEMCHECK]]:
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q]], i64 2
-; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[R]], i64 2
-; CHECK-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[P]], i64 1
-; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[Q]], [[SCEVGEP1]]
-; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[R]], [[SCEVGEP]]
-; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
-; CHECK-NEXT:    [[BOUND03:%.*]] = icmp ult ptr [[Q]], [[SCEVGEP2]]
-; CHECK-NEXT:    [[BOUND14:%.*]] = icmp ult ptr [[P]], [[SCEVGEP]]
-; CHECK-NEXT:    [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
-; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
-; CHECK-NEXT:    [[BOUND06:%.*]] = icmp ult ptr [[R]], [[SCEVGEP2]]
-; CHECK-NEXT:    [[BOUND17:%.*]] = icmp ult ptr [[P]], [[SCEVGEP1]]
-; CHECK-NEXT:    [[FOUND_CONFLICT8:%.*]] = and i1 [[BOUND06]], [[BOUND17]]
-; CHECK-NEXT:    [[CONFLICT_RDX9:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT8]]
-; CHECK-NEXT:    br i1 [[CONFLICT_RDX9]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !alias.scope [[META4:![0-9]+]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[P]], align 1
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i16>
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
-; CHECK-NEXT:    store i16 [[TMP2]], ptr [[Q]], align 2, !alias.scope [[META7:![0-9]+]], !noalias [[META9:![0-9]+]]
-; CHECK-NEXT:    store i16 [[TMP2]], ptr [[R]], align 2, !alias.scope [[META11:![0-9]+]], !noalias [[META4]]
+; CHECK-NEXT:    store i16 [[TMP2]], ptr [[Q]], align 2
+; CHECK-NEXT:    store i16 [[TMP2]], ptr [[R]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], -9223372036854775808
-; CHECK-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    br [[EXIT:label %.*]]
-; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
 ;
 entry:
   br label %loop
>From 5e8aa980ed653ae6d9b9fc6eab32c698415ab3af Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 20 Oct 2025 17:58:29 +0800
Subject: [PATCH 3/3] Update exit condition
---
 llvm/test/Transforms/LoopVectorize/cse-casts.ll | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/cse-casts.ll b/llvm/test/Transforms/LoopVectorize/cse-casts.ll
index 5a118a55227d2..fb45745eff1cb 100644
--- a/llvm/test/Transforms/LoopVectorize/cse-casts.ll
+++ b/llvm/test/Transforms/LoopVectorize/cse-casts.ll
@@ -368,12 +368,11 @@ define void @simplified_cast_preserves_irflag_type(ptr noalias %p, ptr noalias %
 ; CHECK-NEXT:    store i16 [[TMP2]], ptr [[Q]], align 2
 ; CHECK-NEXT:    store i16 [[TMP2]], ptr [[R]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], -9223372036854775808
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 48
 ; CHECK-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    br label %[[EXIT:.*]]
-; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    ret void
+; CHECK-NEXT:    br label %[[SCALAR_PH:.*]]
+; CHECK:       [[SCALAR_PH]]:
 ;
 entry:
   br label %loop
@@ -387,8 +386,8 @@ loop:
   %x.i16 = zext i8 %x to i16
   store i16 %x.i16, ptr %r
   %iv.next = add i64 %iv, 2
-  %done = icmp eq i64 %iv.next, 0
-  br i1 %done, label %exit, label %loop
+  %ec = icmp eq i64 %iv.next, 100
+  br i1 %ec, label %exit, label %loop
 
 exit:
   ret void
    
    
More information about the llvm-commits
mailing list