[llvm] [VPlan] Keep common flags during CSE. (PR #157664)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 10 02:47:31 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/157664
>From a33366f4fffe8f82d3a2eeb1283cac3c4d42def7 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 9 Sep 2025 13:57:21 +0100
Subject: [PATCH 1/2] [VPlan] Only drop flags on mis-match during CSE.
During CSE, we don't have to drop poison-generating flags, if both the
re-used recipe and the to-be-replaced recipe have the same flags.
---
llvm/lib/Transforms/Vectorize/VPlan.h | 4 +++
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 36 +++++++++++++++++++
.../Transforms/Vectorize/VPlanTransforms.cpp | 4 +--
.../LoopVectorize/PowerPC/vectorize-bswap.ll | 2 +-
.../LoopVectorize/X86/scatter_crash.ll | 8 ++---
llvm/test/Transforms/LoopVectorize/flags.ll | 2 +-
6 files changed, 48 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index b93bdf244237e..53291a931530f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -721,6 +721,10 @@ class VPIRFlags {
AllFlags = Other.AllFlags;
}
+ /// Only keep flags also present in \p Other. \p Other must have the same
+ /// OpType as the current object.
+ void intersectFlags(const VPIRFlags &Other);
+
/// Drop all poison-generating flags.
void dropPoisonGeneratingFlags() {
// NOTE: This needs to be kept in-sync with
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 46162a9276469..9f1311fbd0687 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -392,6 +392,42 @@ void VPPartialReductionRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif
+void VPIRFlags::intersectFlags(const VPIRFlags &Other) {
+ assert(OpType == Other.OpType && "OpType must match");
+ switch (OpType) {
+ case OperationType::OverflowingBinOp:
+ WrapFlags.HasNUW &= Other.WrapFlags.HasNUW;
+ WrapFlags.HasNSW &= Other.WrapFlags.HasNSW;
+ break;
+ case OperationType::Trunc:
+ TruncFlags.HasNUW &= Other.TruncFlags.HasNUW;
+ TruncFlags.HasNSW &= Other.TruncFlags.HasNSW;
+ break;
+ case OperationType::DisjointOp:
+ DisjointFlags.IsDisjoint &= Other.DisjointFlags.IsDisjoint;
+ break;
+ case OperationType::PossiblyExactOp:
+ ExactFlags.IsExact = Other.ExactFlags.IsExact;
+ break;
+ case OperationType::GEPOp:
+ GEPFlags &= Other.GEPFlags;
+ break;
+ case OperationType::FPMathOp:
+ FMFs.NoNaNs &= Other.FMFs.NoNaNs;
+ FMFs.NoInfs &= Other.FMFs.NoInfs;
+ break;
+ case OperationType::NonNegOp:
+ NonNegFlags.NonNeg &= Other.NonNegFlags.NonNeg;
+ break;
+ case OperationType::Cmp:
+ assert(CmpPredicate == Other.CmpPredicate && "Cannot drop CmpPredicate");
+ break;
+ case OperationType::Other:
+ assert(AllFlags == Other.AllFlags && "Cannot drop other flags");
+ break;
+ }
+}
+
FastMathFlags VPIRFlags::getFastMathFlags() const {
assert(OpType == OperationType::FPMathOp &&
"recipe doesn't have fast math flags");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 10b2f5df2e23e..d86b53dd894fb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2042,9 +2042,9 @@ void VPlanTransforms::cse(VPlan &Plan) {
// V must dominate Def for a valid replacement.
if (!VPDT.dominates(V->getParent(), VPBB))
continue;
- // Drop poison-generating flags when reusing a value.
+ // Only keep flags present on both V and Def.
if (auto *RFlags = dyn_cast<VPRecipeWithIRFlags>(V))
- RFlags->dropPoisonGeneratingFlags();
+ RFlags->intersectFlags(*cast<VPRecipeWithIRFlags>(Def));
Def->replaceAllUsesWith(V);
continue;
}
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll
index 36c3a2a612d82..db1f2c71e0f77 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll
@@ -16,7 +16,7 @@ define dso_local void @test(ptr %Arr, i32 signext %Len) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[INDEX]] to i64
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ARR:%.*]], i64 [[TMP1]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[WIDE_LOAD]])
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
index df54411f7e710..c2dfce0aa70b8 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
@@ -142,8 +142,8 @@ define void @_Z3fn1v() #0 {
; CHECK-NEXT: [[TMP32:%.*]] = add nsw <16 x i64> [[TMP30]], [[VEC_IND37]]
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP31]], <16 x i64> [[TMP32]], i64 0
; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> [[TMP33]], i32 16, <16 x i1> [[TMP34]])
-; CHECK-NEXT: [[TMP49:%.*]] = or <16 x i64> [[VEC_IND37]], splat (i64 1)
-; CHECK-NEXT: [[TMP36:%.*]] = add <16 x i64> [[TMP30]], [[TMP49]]
+; CHECK-NEXT: [[TMP49:%.*]] = or disjoint <16 x i64> [[VEC_IND37]], splat (i64 1)
+; CHECK-NEXT: [[TMP36:%.*]] = add nsw <16 x i64> [[TMP30]], [[TMP49]]
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP31]], <16 x i64> [[TMP36]], i64 0
; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> [[TMP37]], i32 8, <16 x i1> [[TMP34]])
; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 7), <16 x ptr> [[TMP33]], i32 16, <16 x i1> [[BROADCAST_SPLAT]])
@@ -191,8 +191,8 @@ define void @_Z3fn1v() #0 {
; CHECK-NEXT: [[TMP46:%.*]] = add nsw <8 x i64> [[TMP44]], [[VEC_IND70]]
; CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP45]], <8 x i64> [[TMP46]], i64 0
; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> [[TMP47]], i32 16, <8 x i1> [[TMP48]])
-; CHECK-NEXT: [[TMP54:%.*]] = or <8 x i64> [[VEC_IND70]], splat (i64 1)
-; CHECK-NEXT: [[TMP50:%.*]] = add <8 x i64> [[TMP44]], [[TMP54]]
+; CHECK-NEXT: [[TMP54:%.*]] = or disjoint <8 x i64> [[VEC_IND70]], splat (i64 1)
+; CHECK-NEXT: [[TMP50:%.*]] = add nsw <8 x i64> [[TMP44]], [[TMP54]]
; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP45]], <8 x i64> [[TMP50]], i64 0
; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> [[TMP51]], i32 8, <8 x i1> [[TMP48]])
; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 7), <8 x ptr> [[TMP47]], i32 16, <8 x i1> [[BROADCAST_SPLAT73]])
diff --git a/llvm/test/Transforms/LoopVectorize/flags.ll b/llvm/test/Transforms/LoopVectorize/flags.ll
index cef8ea656afaa..cbdcd50476b98 100644
--- a/llvm/test/Transforms/LoopVectorize/flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/flags.ll
@@ -175,7 +175,7 @@ define void @gep_with_shared_nusw_and_others(i64 %n, ptr %A) {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw float, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
; CHECK-NEXT: store <4 x float> [[WIDE_LOAD]], ptr [[TMP1]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
>From de5d6d938f2e91e40b505daab487c82b6803062e Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 10 Sep 2025 10:45:21 +0100
Subject: [PATCH 2/2] !fixup add missing &, add some more missing tests.
---
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +-
llvm/test/Transforms/LoopVectorize/flags.ll | 468 ++++++++++++++++++
2 files changed, 469 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 9f1311fbd0687..bf51489543098 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -407,7 +407,7 @@ void VPIRFlags::intersectFlags(const VPIRFlags &Other) {
DisjointFlags.IsDisjoint &= Other.DisjointFlags.IsDisjoint;
break;
case OperationType::PossiblyExactOp:
- ExactFlags.IsExact = Other.ExactFlags.IsExact;
+ ExactFlags.IsExact &= Other.ExactFlags.IsExact;
break;
case OperationType::GEPOp:
GEPFlags &= Other.GEPFlags;
diff --git a/llvm/test/Transforms/LoopVectorize/flags.ll b/llvm/test/Transforms/LoopVectorize/flags.ll
index cbdcd50476b98..7a4bfa9228925 100644
--- a/llvm/test/Transforms/LoopVectorize/flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/flags.ll
@@ -215,3 +215,471 @@ loop:
exit:
ret void
}
+
+define void @exact_on_first_shift(ptr noalias %A, ptr noalias %B) {
+; CHECK-LABEL: define void @exact_on_first_shift(
+; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[WIDE_LOAD]], splat (i32 10)
+; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
+; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
+; CHECK-NEXT: [[LSHR_1:%.*]] = lshr exact i32 [[L_A]], 10
+; CHECK-NEXT: store i32 [[LSHR_1]], ptr [[GEP_A]], align 4
+; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: [[LSHR_2:%.*]] = lshr i32 [[L_A]], 10
+; CHECK-NEXT: store i32 [[LSHR_2]], ptr [[GEP_B]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %gep.A = getelementptr i32, ptr %A, i64 %iv
+ %l.A = load i32, ptr %gep.A, align 4
+ %lshr.1 = lshr exact i32 %l.A, 10
+ store i32 %lshr.1, ptr %gep.A, align 4
+ %gep.B = getelementptr i32, ptr %B, i64 %iv
+ %lshr.2 = lshr i32 %l.A, 10
+ store i32 %lshr.2, ptr %gep.B, align 4
+ %iv.next = add i64 %iv, 1
+ %ec = icmp eq i64 %iv, 128
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @exact_on_second_shift(ptr noalias %A, ptr noalias %B) {
+; CHECK-LABEL: define void @exact_on_second_shift(
+; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[WIDE_LOAD]], splat (i32 10)
+; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
+; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
+; CHECK-NEXT: [[LSHR_1:%.*]] = lshr i32 [[L_A]], 10
+; CHECK-NEXT: store i32 [[LSHR_1]], ptr [[GEP_A]], align 4
+; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: [[LSHR_2:%.*]] = lshr exact i32 [[L_A]], 10
+; CHECK-NEXT: store i32 [[LSHR_2]], ptr [[GEP_B]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %gep.A = getelementptr i32, ptr %A, i64 %iv
+ %l.A = load i32, ptr %gep.A, align 4
+ %lshr.1 = lshr i32 %l.A, 10
+ store i32 %lshr.1, ptr %gep.A, align 4
+ %gep.B = getelementptr i32, ptr %B, i64 %iv
+ %lshr.2 = lshr exact i32 %l.A, 10
+ store i32 %lshr.2, ptr %gep.B, align 4
+ %iv.next = add i64 %iv, 1
+ %ec = icmp eq i64 %iv, 128
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @exact_on_both_shifts(ptr noalias %A, ptr noalias %B) {
+; CHECK-LABEL: define void @exact_on_both_shifts(
+; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = lshr exact <4 x i32> [[WIDE_LOAD]], splat (i32 10)
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
+; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
+; CHECK-NEXT: [[LSHR_1:%.*]] = lshr exact i32 [[L_A]], 10
+; CHECK-NEXT: store i32 [[LSHR_1]], ptr [[GEP_A]], align 4
+; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: [[LSHR_2:%.*]] = lshr exact i32 [[L_A]], 10
+; CHECK-NEXT: store i32 [[LSHR_2]], ptr [[GEP_B]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %gep.A = getelementptr i32, ptr %A, i64 %iv
+ %l.A = load i32, ptr %gep.A, align 4
+ %lshr.1 = lshr exact i32 %l.A, 10
+ store i32 %lshr.1, ptr %gep.A, align 4
+ %gep.B = getelementptr i32, ptr %B, i64 %iv
+ %lshr.2 = lshr exact i32 %l.A, 10
+ store i32 %lshr.2, ptr %gep.B, align 4
+ %iv.next = add i64 %iv, 1
+ %ec = icmp eq i64 %iv, 128
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @disjoint_on_first_or(ptr noalias %A, ptr noalias %B) {
+; CHECK-LABEL: define void @disjoint_on_first_or(
+; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> [[WIDE_LOAD]], splat (i32 10)
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
+; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
+; CHECK-NEXT: [[OR_1:%.*]] = or disjoint i32 [[L_A]], 10
+; CHECK-NEXT: store i32 [[OR_1]], ptr [[GEP_A]], align 4
+; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: [[OR_2:%.*]] = or i32 [[L_A]], 10
+; CHECK-NEXT: store i32 [[OR_2]], ptr [[GEP_B]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %gep.A = getelementptr i32, ptr %A, i64 %iv
+ %l.A = load i32, ptr %gep.A, align 4
+ %or.1 = or disjoint i32 %l.A, 10
+ store i32 %or.1, ptr %gep.A, align 4
+ %gep.B = getelementptr i32, ptr %B, i64 %iv
+ %or.2 = or i32 %l.A, 10
+ store i32 %or.2, ptr %gep.B, align 4
+ %iv.next = add i64 %iv, 1
+ %ec = icmp eq i64 %iv, 128
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @disjoint_on_second_or(ptr noalias %A, ptr noalias %B) {
+; CHECK-LABEL: define void @disjoint_on_second_or(
+; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> [[WIDE_LOAD]], splat (i32 10)
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
+; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
+; CHECK-NEXT: [[OR_1:%.*]] = or i32 [[L_A]], 10
+; CHECK-NEXT: store i32 [[OR_1]], ptr [[GEP_A]], align 4
+; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: [[OR_2:%.*]] = or disjoint i32 [[L_A]], 10
+; CHECK-NEXT: store i32 [[OR_2]], ptr [[GEP_B]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %gep.A = getelementptr i32, ptr %A, i64 %iv
+ %l.A = load i32, ptr %gep.A, align 4
+ %or.1 = or i32 %l.A, 10
+ store i32 %or.1, ptr %gep.A, align 4
+ %gep.B = getelementptr i32, ptr %B, i64 %iv
+ %or.2 = or disjoint i32 %l.A, 10
+ store i32 %or.2, ptr %gep.B, align 4
+ %iv.next = add i64 %iv, 1
+ %ec = icmp eq i64 %iv, 128
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @disjoint_on_both_or(ptr noalias %A, ptr noalias %B) {
+; CHECK-LABEL: define void @disjoint_on_both_or(
+; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint <4 x i32> [[WIDE_LOAD]], splat (i32 10)
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
+; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
+; CHECK-NEXT: [[OR_1:%.*]] = or disjoint i32 [[L_A]], 10
+; CHECK-NEXT: store i32 [[OR_1]], ptr [[GEP_A]], align 4
+; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: [[OR_2:%.*]] = or disjoint i32 [[L_A]], 10
+; CHECK-NEXT: store i32 [[OR_2]], ptr [[GEP_B]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP21:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %gep.A = getelementptr i32, ptr %A, i64 %iv
+ %l.A = load i32, ptr %gep.A, align 4
+ %or.1 = or disjoint i32 %l.A, 10
+ store i32 %or.1, ptr %gep.A, align 4
+ %gep.B = getelementptr i32, ptr %B, i64 %iv
+ %or.2 = or disjoint i32 %l.A, 10
+ store i32 %or.2, ptr %gep.B, align 4
+ %iv.next = add i64 %iv, 1
+ %ec = icmp eq i64 %iv, 128
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @trunc_flags_no_common(ptr noalias %A, ptr noalias %B, ptr noalias %C) {
+; CHECK-LABEL: define void @trunc_flags_no_common(
+; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[WIDE_LOAD]] to <4 x i16>
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[TMP2]], align 4
+; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[TMP2]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
+; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
+; CHECK-NEXT: [[TRUNC_1:%.*]] = trunc nsw i32 [[L_A]] to i16
+; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i16, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: store i16 [[TRUNC_1]], ptr [[GEP_B]], align 4
+; CHECK-NEXT: [[TRUNC_2:%.*]] = trunc nuw i32 [[L_A]] to i16
+; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr i16, ptr [[C]], i64 [[IV]]
+; CHECK-NEXT: store i16 [[TRUNC_2]], ptr [[GEP_B]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP23:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %gep.A = getelementptr i32, ptr %A, i64 %iv
+ %l.A = load i32, ptr %gep.A, align 4
+ %trunc.1 = trunc nsw i32 %l.A to i16
+ %gep.B = getelementptr i16, ptr %B, i64 %iv
+ store i16 %trunc.1, ptr %gep.B, align 4
+ %trunc.2 = trunc nuw i32 %l.A to i16
+ %gep.C = getelementptr i16, ptr %C, i64 %iv
+ store i16 %trunc.2, ptr %gep.B, align 4
+ %iv.next = add i64 %iv, 1
+ %ec = icmp eq i64 %iv, 128
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @trunc_flags_common(ptr noalias %A, ptr noalias %B, ptr noalias %C) {
+; CHECK-LABEL: define void @trunc_flags_common(
+; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw <4 x i32> [[WIDE_LOAD]] to <4 x i16>
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[TMP2]], align 4
+; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[TMP2]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
+; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
+; CHECK-NEXT: [[TRUNC_1:%.*]] = trunc nuw i32 [[L_A]] to i16
+; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i16, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: store i16 [[TRUNC_1]], ptr [[GEP_B]], align 4
+; CHECK-NEXT: [[TRUNC_2:%.*]] = trunc nuw nsw i32 [[L_A]] to i16
+; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr i16, ptr [[C]], i64 [[IV]]
+; CHECK-NEXT: store i16 [[TRUNC_2]], ptr [[GEP_B]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP25:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %gep.A = getelementptr i32, ptr %A, i64 %iv
+ %l.A = load i32, ptr %gep.A, align 4
+ %trunc.1 = trunc nuw i32 %l.A to i16
+ %gep.B = getelementptr i16, ptr %B, i64 %iv
+ store i16 %trunc.1, ptr %gep.B, align 4
+ %trunc.2 = trunc nuw nsw i32 %l.A to i16
+ %gep.C = getelementptr i16, ptr %C, i64 %iv
+ store i16 %trunc.2, ptr %gep.B, align 4
+ %iv.next = add i64 %iv, 1
+ %ec = icmp eq i64 %iv, 128
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
More information about the llvm-commits
mailing list