[llvm] 10f41a2 - [SLP]Fix PR55688: Miscompile due to incorrect nuw/nsw handling.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Wed May 25 14:00:09 PDT 2022
Author: Alexey Bataev
Date: 2022-05-25T13:59:06-07:00
New Revision: 10f41a214767219ef369b217f685eb545b80e386
URL: https://github.com/llvm/llvm-project/commit/10f41a214767219ef369b217f685eb545b80e386
DIFF: https://github.com/llvm/llvm-project/commit/10f41a214767219ef369b217f685eb545b80e386.diff
LOG: [SLP]Fix PR55688: Miscompile due to incorrect nuw/nsw handling.
Need to use all ReductionOps when propagating flags for the reduction
ops, otherwise transformation is not correct. Plus, need to drop nuw/nsw
flags.
Differential Revision: https://reviews.llvm.org/D126371
Added:
Modified:
llvm/include/llvm/Transforms/Utils/LoopUtils.h
llvm/lib/Transforms/Utils/LoopUtils.cpp
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 04e834c0bc8aa..676c0c1487db8 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -413,8 +413,10 @@ Value *createOrderedReduction(IRBuilderBase &B,
/// of each scalar operation (VL) that will be converted into a vector (I).
/// If OpValue is non-null, we only consider operations similar to OpValue
/// when intersecting.
-/// Flag set: NSW, NUW, exact, and all of fast-math.
-void propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue = nullptr);
+/// Flag set: NSW, NUW (if IncludeWrapFlags is true), exact, and all of
+/// fast-math.
+void propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue = nullptr,
+ bool IncludeWrapFlags = true);
/// Returns true if we can prove that \p S is defined and always negative in
/// loop \p L.
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index cd9e5a846afb6..c5582fb90bbfe 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1097,7 +1097,8 @@ Value *llvm::createOrderedReduction(IRBuilderBase &B,
return B.CreateFAddReduce(Start, Src);
}
-void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) {
+void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue,
+ bool IncludeWrapFlags) {
auto *VecOp = dyn_cast<Instruction>(I);
if (!VecOp)
return;
@@ -1106,7 +1107,7 @@ void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) {
if (!Intersection)
return;
const unsigned Opcode = Intersection->getOpcode();
- VecOp->copyIRFlags(Intersection);
+ VecOp->copyIRFlags(Intersection, IncludeWrapFlags);
for (auto *V : VL) {
auto *Instr = dyn_cast<Instruction>(V);
if (!Instr)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d74153c4c6368..259e0e565dd17 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10327,7 +10327,7 @@ class HorizontalReduction {
}
/// Creates reduction operation with the current opcode with the IR flags
- /// from \p ReductionOps.
+ /// from \p ReductionOps, dropping nuw/nsw flags.
static Value *createOp(IRBuilder<> &Builder, RecurKind RdxKind, Value *LHS,
Value *RHS, const Twine &Name,
const ReductionOpsListType &ReductionOps) {
@@ -10341,26 +10341,14 @@ class HorizontalReduction {
Value *Op = createOp(Builder, RdxKind, LHS, RHS, Name, UseSelect);
if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind)) {
if (auto *Sel = dyn_cast<SelectInst>(Op)) {
- propagateIRFlags(Sel->getCondition(), ReductionOps[0]);
- propagateIRFlags(Op, ReductionOps[1]);
+ propagateIRFlags(Sel->getCondition(), ReductionOps[0], nullptr,
+ /*IncludeWrapFlags=*/false);
+ propagateIRFlags(Op, ReductionOps[1], nullptr,
+ /*IncludeWrapFlags=*/false);
return Op;
}
}
- propagateIRFlags(Op, ReductionOps[0]);
- return Op;
- }
-
- /// Creates reduction operation with the current opcode with the IR flags
- /// from \p I.
- static Value *createOp(IRBuilder<> &Builder, RecurKind RdxKind, Value *LHS,
- Value *RHS, const Twine &Name, Value *I) {
- auto *SelI = dyn_cast<SelectInst>(I);
- Value *Op = createOp(Builder, RdxKind, LHS, RHS, Name, SelI != nullptr);
- if (SelI && RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind)) {
- if (auto *Sel = dyn_cast<SelectInst>(Op))
- propagateIRFlags(Sel->getCondition(), SelI->getCondition());
- }
- propagateIRFlags(Op, I);
+ propagateIRFlags(Op, ReductionOps[0], nullptr, /*IncludeWrapFlags=*/false);
return Op;
}
@@ -11031,10 +11019,6 @@ class HorizontalReduction {
for (unsigned I = 0, E = (Sz / 2) * 2; I < E; I += 2) {
Instruction *RedOp = InstVals[I + 1].first;
Builder.SetCurrentDebugLocation(RedOp->getDebugLoc());
- ReductionOpsListType Ops;
- if (auto *Sel = dyn_cast<SelectInst>(RedOp))
- Ops.emplace_back().push_back(Sel->getCondition());
- Ops.emplace_back().push_back(RedOp);
Value *RdxVal1 = InstVals[I].second;
Value *StableRdxVal1 = RdxVal1;
auto It1 = TrackedVals.find(RdxVal1);
@@ -11046,7 +11030,7 @@ class HorizontalReduction {
if (It2 != TrackedVals.end())
StableRdxVal2 = It2->second;
Value *ExtraRed = createOp(Builder, RdxKind, StableRdxVal1,
- StableRdxVal2, "op.rdx", Ops);
+ StableRdxVal2, "op.rdx", ReductionOps);
ExtraReds[I / 2] = std::make_pair(InstVals[I].first, ExtraRed);
}
if (Sz % 2 == 1)
@@ -11081,17 +11065,13 @@ class HorizontalReduction {
if (ExtraReductions.size() == 1) {
Instruction *RedOp = ExtraReductions.back().first;
Builder.SetCurrentDebugLocation(RedOp->getDebugLoc());
- ReductionOpsListType Ops;
- if (auto *Sel = dyn_cast<SelectInst>(RedOp))
- Ops.emplace_back().push_back(Sel->getCondition());
- Ops.emplace_back().push_back(RedOp);
Value *RdxVal = ExtraReductions.back().second;
Value *StableRdxVal = RdxVal;
auto It = TrackedVals.find(RdxVal);
if (It != TrackedVals.end())
StableRdxVal = It->second;
VectorizedTree = createOp(Builder, RdxKind, VectorizedTree,
- StableRdxVal, "op.rdx", Ops);
+ StableRdxVal, "op.rdx", ReductionOps);
}
ReductionRoot->replaceAllUsesWith(VectorizedTree);
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
index 5c310888353bd..df627f3456e05 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
@@ -28,7 +28,7 @@ define i32 @test_select(i32* noalias nocapture readonly %blk1, i32* noalias noca
; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[LX:%.*]] to i64
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[S_026:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[S_026:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[J_025:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[P2_024:%.*]] = phi i32* [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR29:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[P1_023:%.*]] = phi i32* [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
@@ -41,7 +41,7 @@ define i32 @test_select(i32* noalias nocapture readonly %blk1, i32* noalias noca
; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP4]]
; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
-; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP8]], [[S_026]]
+; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP8]], [[S_026]]
; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i32, i32* [[P1_023]], i64 [[IDX_EXT]]
; CHECK-NEXT: [[ADD_PTR29]] = getelementptr inbounds i32, i32* [[P2_024]], i64 [[IDX_EXT]]
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[J_025]], 1
@@ -50,7 +50,7 @@ define i32 @test_select(i32* noalias nocapture readonly %blk1, i32* noalias noca
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
-; CHECK-NEXT: [[S_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT: [[S_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX]], [[FOR_END_LOOPEXIT]] ]
; CHECK-NEXT: ret i32 [[S_0_LCSSA]]
;
entry:
@@ -148,7 +148,7 @@ define i32 @reduction_with_br(i32* noalias nocapture readonly %blk1, i32* noalia
; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[LX:%.*]] to i64
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[S_020:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT: [[S_020:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[OP_RDX:%.*]], [[IF_END:%.*]] ]
; CHECK-NEXT: [[J_019:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[IF_END]] ]
; CHECK-NEXT: [[P2_018:%.*]] = phi i32* [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR16:%.*]], [[IF_END]] ]
; CHECK-NEXT: [[P1_017:%.*]] = phi i32* [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[IF_END]] ]
@@ -158,8 +158,8 @@ define i32 @reduction_with_br(i32* noalias nocapture readonly %blk1, i32* noalia
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], [[TMP1]]
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
-; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP5]], [[S_020]]
-; CHECK-NEXT: [[CMP14:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]]
+; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP5]], [[S_020]]
+; CHECK-NEXT: [[CMP14:%.*]] = icmp slt i32 [[OP_RDX]], [[LIM:%.*]]
; CHECK-NEXT: br i1 [[CMP14]], label [[IF_END]], label [[FOR_END_LOOPEXIT:%.*]]
; CHECK: if.end:
; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i32, i32* [[P1_017]], i64 [[IDX_EXT]]
@@ -170,7 +170,7 @@ define i32 @reduction_with_br(i32* noalias nocapture readonly %blk1, i32* noalia
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
-; CHECK-NEXT: [[S_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT: [[S_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX]], [[FOR_END_LOOPEXIT]] ]
; CHECK-NEXT: ret i32 [[S_1]]
;
entry:
@@ -245,7 +245,7 @@ define i32 @test_unrolled_select(i8* noalias nocapture readonly %blk1, i8* noali
; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[LX:%.*]] to i64
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[S_047:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[IF_END_86:%.*]] ]
+; CHECK-NEXT: [[S_047:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[OP_RDX:%.*]], [[IF_END_86:%.*]] ]
; CHECK-NEXT: [[J_046:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[IF_END_86]] ]
; CHECK-NEXT: [[P2_045:%.*]] = phi i8* [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR88:%.*]], [[IF_END_86]] ]
; CHECK-NEXT: [[P1_044:%.*]] = phi i8* [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[IF_END_86]] ]
@@ -260,8 +260,8 @@ define i32 @test_unrolled_select(i8* noalias nocapture readonly %blk1, i8* noali
; CHECK-NEXT: [[TMP8:%.*]] = sub nsw <8 x i32> zeroinitializer, [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP7]], <8 x i32> [[TMP8]], <8 x i32> [[TMP6]]
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP9]])
-; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP10]], [[S_047]]
-; CHECK-NEXT: [[CMP83:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]]
+; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP10]], [[S_047]]
+; CHECK-NEXT: [[CMP83:%.*]] = icmp slt i32 [[OP_RDX]], [[LIM:%.*]]
; CHECK-NEXT: br i1 [[CMP83]], label [[IF_END_86]], label [[FOR_END_LOOPEXIT:%.*]]
; CHECK: if.end.86:
; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i8, i8* [[P1_044]], i64 [[IDX_EXT]]
@@ -272,7 +272,7 @@ define i32 @test_unrolled_select(i8* noalias nocapture readonly %blk1, i8* noali
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
-; CHECK-NEXT: [[S_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT: [[S_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX]], [[FOR_END_LOOPEXIT]] ]
; CHECK-NEXT: ret i32 [[S_1]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
index bafc67c9ea31f..a444df9ec7ade 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
@@ -1001,8 +1001,8 @@ define i32 @wobble(i32 %arg, i32 %bar) {
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
-; CHECK-NEXT: [[OP_RDX:%.*]] = add nuw i32 [[TMP3]], [[ARG]]
-; CHECK-NEXT: [[OP_RDX2:%.*]] = add nsw i32 [[TMP6]], [[OP_RDX]]
+; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP3]], [[ARG]]
+; CHECK-NEXT: [[OP_RDX2:%.*]] = add i32 [[TMP6]], [[OP_RDX]]
; CHECK-NEXT: ret i32 [[OP_RDX2]]
;
; THRESHOLD-LABEL: @wobble(
@@ -1016,8 +1016,8 @@ define i32 @wobble(i32 %arg, i32 %bar) {
; THRESHOLD-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer
; THRESHOLD-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32>
; THRESHOLD-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
-; THRESHOLD-NEXT: [[OP_RDX:%.*]] = add nuw i32 [[TMP3]], [[ARG]]
-; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = add nsw i32 [[TMP6]], [[OP_RDX]]
+; THRESHOLD-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP3]], [[ARG]]
+; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = add i32 [[TMP6]], [[OP_RDX]]
; THRESHOLD-NEXT: ret i32 [[OP_RDX2]]
;
bb:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
index 8ff25ec551585..f5a87aeed9564 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
@@ -1463,7 +1463,6 @@ define float @fadd_v4f32_fmf_intersect(float* %p) {
ret float %add3
}
-; FIXME: Can't preserve no-wrap guarantees with reassociated math.
; This must not propagate 'nsw' to a new add instruction.
define void @nsw_propagation_v4i32(i32* %res, i32 %start) {
@@ -1482,7 +1481,7 @@ define void @nsw_propagation_v4i32(i32* %res, i32 %start) {
; STORE-LABEL: @nsw_propagation_v4i32(
; STORE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr_i32 to <4 x i32>*), align 16
; STORE-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
-; STORE-NEXT: [[OP_RDX:%.*]] = add nsw i32 [[TMP2]], [[START:%.*]]
+; STORE-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP2]], [[START:%.*]]
; STORE-NEXT: store i32 [[OP_RDX]], i32* [[RES:%.*]], align 16
; STORE-NEXT: ret void
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
index e5fa8ad8233d4..f98f99907241c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
@@ -10,7 +10,7 @@ define i32 @foo(i32* %
diff ) #0 {
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[A_088:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[A_088:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[INDVARS_IV]], 3
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DIFF:%.*]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[TMP1]], 4
@@ -24,12 +24,12 @@ define i32 @foo(i32* %
diff ) #0 {
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[ARRAYIDX6]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 16
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
-; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP9]], [[A_088]]
+; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP9]], [[A_088]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
-; CHECK-NEXT: ret i32 [[OP_EXTRA]]
+; CHECK-NEXT: ret i32 [[OP_RDX]]
;
entry:
%m2 = alloca [8 x [8 x i32]], align 16
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll
index 7412b8781e53a..813b82b9aefcd 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll
@@ -9,7 +9,7 @@ define i32 @foo(i32* nocapture readonly %
diff ) #0 {
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[A_088:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[A_088:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[INDVARS_IV]], 3
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DIFF:%.*]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[TMP1]], 4
@@ -23,14 +23,14 @@ define i32 @foo(i32* nocapture readonly %
diff ) #0 {
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[ARRAYIDX6]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 16
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
-; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP9]], [[A_088]]
+; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP9]], [[A_088]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 0
; CHECK-NEXT: call void @ff([8 x i32]* [[ARRAYDECAY]])
-; CHECK-NEXT: ret i32 [[OP_EXTRA]]
+; CHECK-NEXT: ret i32 [[OP_RDX]]
;
entry:
%m2 = alloca [8 x [8 x i32]], align 16
More information about the llvm-commits
mailing list