[llvm] [VPlan] Set flags when constructing zexts using VPWidenCastRecipe (PR #164198)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 19 20:02:16 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
createWidenCast doesn't set the flag type, so when we simplify trunc (zext nneg x) -> zext x we would hit an assertion in CSE that the flag types don't match with other VPWidenCastRecipes that weren't simplified.
This fixes it the same way trunc flags are handled too.
As an aside I think it should be correct to preserve the nneg flag in this case since the input operand is still non-negative after the transform. But that's left to another PR.
---
Full diff: https://github.com/llvm/llvm-project/pull/164198.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h (+2)
- (modified) llvm/test/Transforms/LoopVectorize/cse-casts.ll (+61-2)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 7651ba16b35cf..3fed003282f2b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -325,6 +325,8 @@ class VPBuilder {
VPIRFlags Flags;
if (Opcode == Instruction::Trunc)
Flags = VPIRFlags::TruncFlagsTy(false, false);
+ else if (Opcode == Instruction::ZExt)
+ Flags = VPIRFlags::NonNegFlagsTy(false);
return tryInsertInstruction(
new VPWidenCastRecipe(Opcode, Op, ResultTy, Flags));
}
diff --git a/llvm/test/Transforms/LoopVectorize/cse-casts.ll b/llvm/test/Transforms/LoopVectorize/cse-casts.ll
index e923560bb77e8..b8ce35dc704d2 100644
--- a/llvm/test/Transforms/LoopVectorize/cse-casts.ll
+++ b/llvm/test/Transforms/LoopVectorize/cse-casts.ll
@@ -319,8 +319,9 @@ define void @preserve_flags_narrowing_extends_and_truncs(ptr noalias %A, ptr noa
; CHECK: [[PRED_STORE_CONTINUE60]]:
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br [[EXIT:label %.*]]
-; CHECK: [[SCALAR_PH:.*:]]
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
;
entry:
br label %loop
@@ -349,3 +350,61 @@ loop:
exit:
ret void
}
+
+define void @simplified_cast_preserves_irflag_type(ptr %p, ptr %q, ptr %r) {
+; CHECK-LABEL: define void @simplified_cast_preserves_irflag_type(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
+; CHECK: [[VECTOR_MEMCHECK]]:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q]], i64 2
+; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[R]], i64 2
+; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[P]], i64 1
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[Q]], [[SCEVGEP1]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[R]], [[SCEVGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[Q]], [[SCEVGEP2]]
+; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[P]], [[SCEVGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
+; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
+; CHECK-NEXT: [[BOUND06:%.*]] = icmp ult ptr [[R]], [[SCEVGEP2]]
+; CHECK-NEXT: [[BOUND17:%.*]] = icmp ult ptr [[P]], [[SCEVGEP1]]
+; CHECK-NEXT: [[FOUND_CONFLICT8:%.*]] = and i1 [[BOUND06]], [[BOUND17]]
+; CHECK-NEXT: [[CONFLICT_RDX9:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT8]]
+; CHECK-NEXT: br i1 [[CONFLICT_RDX9]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !alias.scope [[META4:![0-9]+]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[TMP0]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i16>
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+; CHECK-NEXT: store i16 [[TMP2]], ptr [[Q]], align 2, !alias.scope [[META7:![0-9]+]], !noalias [[META9:![0-9]+]]
+; CHECK-NEXT: store i16 [[TMP2]], ptr [[R]], align 2, !alias.scope [[META11:![0-9]+]], !noalias [[META4]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], -9223372036854775808
+; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH]]:
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %x = load i8, ptr %p
+ %x.i32 = zext i8 %x to i32
+ %trunc = trunc i32 %x.i32 to i16
+ store i16 %trunc, ptr %q
+ %x.i16 = zext i8 %x to i16
+ store i16 %x.i16, ptr %r
+ %iv.next = add i64 %iv, 2
+ %done = icmp eq i64 %iv.next, 0
+ br i1 %done, label %exit, label %loop
+
+exit:
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/164198
More information about the llvm-commits
mailing list