[llvm] [InstCombine] Tighten use constraint in factorization transforms (PR #102943)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 12 10:53:34 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Kevin McAfee (kalxr)
<details>
<summary>Changes</summary>
The transform in `tryFactorization()` used to require that both operations have one use and thus would be removable after the transform. Commit https://github.com/llvm/llvm-project/commit/0cfc6510323fbb5a56a5de23cbc65f7cc30fd34c loosened factorization constraints such that only one of the operations must be removable after the transform. There are cases where this will increase the number of non-intermediate variable (variable that is not the result of a binop involved in the transform) uses with no discernable benefit (e.g. https://alive2.llvm.org/ce/z/8K4VuE). Likewise, `factorizeMathWithShlOps()` uses the same constraints and has the same issue (https://alive2.llvm.org/ce/z/EKK-Kp).
This patch fixes this by restricting the transform to fewer cases with new checks that allow desirable transforms such as those mentioned in https://github.com/llvm/llvm-project/commit/0cfc6510323fbb5a56a5de23cbc65f7cc30fd34c. For `tryFactorization()` it is restricted to cases where both operations are removable, with new support for treating identity operations as removable. For `factorizeMathWithShlOps()` the transform is restricted to cases where either both operations are removable, or one is and the created add/sub will be foldable.
---
Patch is 21.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/102943.diff
7 Files Affected:
- (modified) llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp (+6)
- (modified) llvm/lib/Transforms/InstCombine/InstructionCombining.cpp (+10-4)
- (modified) llvm/test/Transforms/InstCombine/and-or.ll (+8-8)
- (modified) llvm/test/Transforms/InstCombine/ctpop.ll (+6-6)
- (modified) llvm/test/Transforms/InstCombine/shl-factor.ll (+34-10)
- (modified) llvm/test/Transforms/LoopVectorize/induction.ll (+31-31)
- (modified) llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll (+3-3)
``````````diff
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 3bd086230cbec5..de2cdf093e1a01 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1424,6 +1424,12 @@ static Instruction *factorizeMathWithShlOps(BinaryOperator &I,
!match(Op1, m_Shl(m_Value(Y), m_Specific(ShAmt))))
return nullptr;
+ // This transform is only profitiable if both operations or one operation and
+ // the resulting add/sub can be eliminated/folded.
+ if (!(Op0->hasOneUse() && Op1->hasOneUse()) &&
+ !(isa<Constant>(X) && isa<Constant>(Y)))
+ return nullptr;
+
// No-wrap propagates only when all ops have no-wrap.
bool HasNSW = I.hasNoSignedWrap() && Op0->hasNoSignedWrap() &&
Op1->hasNoSignedWrap();
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 0fb8b639c97b95..1365b8ac8a3c78 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -688,9 +688,12 @@ static Value *tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ,
// If "B op D" simplifies then it can be formed with no cost.
V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
- // If "B op D" doesn't simplify then only go on if one of the existing
+ // If "B op D" doesn't simplify then only go on if both of the existing
// operations "A op' B" and "C op' D" will be zapped as no longer used.
- if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
+ // Note that when an operation is equal to one of its operands, that
+ // operation is "zapped" by having never existed in the first place.
+ if (!V && (LHS->hasOneUse() || LHS == A || LHS == B) &&
+ (RHS->hasOneUse() || RHS == C || RHS == D))
V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
if (V)
RetVal = Builder.CreateBinOp(InnerOpcode, A, V);
@@ -708,9 +711,12 @@ static Value *tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ,
// If "A op C" simplifies then it can be formed with no cost.
V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
- // If "A op C" doesn't simplify then only go on if one of the existing
+ // If "A op C" doesn't simplify then only go on if both of the existing
// operations "A op' B" and "C op' D" will be zapped as no longer used.
- if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
+ // Note that when an operation is equal to one of its operands, that
+ // operation is "zapped" by having never existed in the first place.
+ if (!V && (LHS->hasOneUse() || LHS == A || LHS == B) &&
+ (RHS->hasOneUse() || RHS == C || RHS == D))
V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
if (V)
RetVal = Builder.CreateBinOp(InnerOpcode, V, B);
diff --git a/llvm/test/Transforms/InstCombine/and-or.ll b/llvm/test/Transforms/InstCombine/and-or.ll
index b4ef27607121d2..7e1c99c82afca9 100644
--- a/llvm/test/Transforms/InstCombine/and-or.ll
+++ b/llvm/test/Transforms/InstCombine/and-or.ll
@@ -685,11 +685,11 @@ define i32 @or_or_and_noOneUse_fail1(i32 %a, i32 %b) {
; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[A:%.*]], 23
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHR]], 157
; CHECK-NEXT: call void @use2(i32 [[AND]])
-; CHECK-NEXT: [[AND1:%.*]] = or i32 [[B:%.*]], 157
-; CHECK-NEXT: [[OR:%.*]] = and i32 [[SHR]], [[AND1]]
-; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[B]], 23
-; CHECK-NEXT: [[AND9:%.*]] = and i32 [[TMP1]], 157
-; CHECK-NEXT: [[R:%.*]] = or i32 [[OR]], [[AND9]]
+; CHECK-NEXT: [[AND3:%.*]] = and i32 [[SHR]], [[B:%.*]]
+; CHECK-NEXT: [[SHR8:%.*]] = lshr i32 [[B]], 23
+; CHECK-NEXT: [[AND9:%.*]] = and i32 [[SHR8]], 157
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[AND3]], [[AND9]]
+; CHECK-NEXT: [[R:%.*]] = or i32 [[TMP1]], [[AND]]
; CHECK-NEXT: ret i32 [[R]]
;
%shr = ashr i32 %a, 23
@@ -714,9 +714,9 @@ define { i1, i1, i1, i1, i1 } @or_or_and_noOneUse_fail2(i1 %a_0, i1 %a_1, i1 %a_
; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[A_1:%.*]], [[B_1:%.*]]
; CHECK-NEXT: [[TMP4:%.*]] = xor i1 [[TMP3]], true
; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP0]], [[A_1]]
-; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[A_1]]
-; CHECK-NEXT: [[TMP7:%.*]] = and i1 [[TMP6]], [[B_1]]
-; CHECK-NEXT: [[D:%.*]] = or i1 [[TMP7]], [[TMP5]]
+; CHECK-NEXT: [[TMP6:%.*]] = and i1 [[TMP2]], [[B_1]]
+; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[TMP5]]
+; CHECK-NEXT: [[D:%.*]] = or i1 [[TMP7]], [[TMP3]]
; CHECK-NEXT: [[DOTNOT1:%.*]] = or i1 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { i1, i1, i1, i1, i1 } zeroinitializer, i1 [[D]], 0
; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { i1, i1, i1, i1, i1 } [[TMP8]], i1 [[TMP4]], 1
diff --git a/llvm/test/Transforms/InstCombine/ctpop.ll b/llvm/test/Transforms/InstCombine/ctpop.ll
index 940bb868c4c615..3177625b5cfb68 100644
--- a/llvm/test/Transforms/InstCombine/ctpop.ll
+++ b/llvm/test/Transforms/InstCombine/ctpop.ll
@@ -442,9 +442,9 @@ define i32 @parity_xor_extra_use(i32 %arg, i32 %arg1) {
; CHECK-NEXT: [[I:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[ARG:%.*]])
; CHECK-NEXT: [[I2:%.*]] = and i32 [[I]], 1
; CHECK-NEXT: tail call void @use(i32 [[I2]])
-; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[ARG1:%.*]], [[ARG]]
-; CHECK-NEXT: [[TMP2:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[TMP1]])
-; CHECK-NEXT: [[I5:%.*]] = and i32 [[TMP2]], 1
+; CHECK-NEXT: [[I3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[ARG1:%.*]])
+; CHECK-NEXT: [[I4:%.*]] = and i32 [[I3]], 1
+; CHECK-NEXT: [[I5:%.*]] = xor i32 [[I4]], [[I2]]
; CHECK-NEXT: ret i32 [[I5]]
;
%i = tail call i32 @llvm.ctpop.i32(i32 %arg)
@@ -461,9 +461,9 @@ define i32 @parity_xor_extra_use2(i32 %arg, i32 %arg1) {
; CHECK-NEXT: [[I:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[ARG1:%.*]])
; CHECK-NEXT: [[I2:%.*]] = and i32 [[I]], 1
; CHECK-NEXT: tail call void @use(i32 [[I2]])
-; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[ARG1]], [[ARG:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[TMP1]])
-; CHECK-NEXT: [[I5:%.*]] = and i32 [[TMP2]], 1
+; CHECK-NEXT: [[I3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[ARG:%.*]])
+; CHECK-NEXT: [[I4:%.*]] = and i32 [[I3]], 1
+; CHECK-NEXT: [[I5:%.*]] = xor i32 [[I2]], [[I4]]
; CHECK-NEXT: ret i32 [[I5]]
;
%i = tail call i32 @llvm.ctpop.i32(i32 %arg1)
diff --git a/llvm/test/Transforms/InstCombine/shl-factor.ll b/llvm/test/Transforms/InstCombine/shl-factor.ll
index 38eecaeff8e41e..dcdd5020f76645 100644
--- a/llvm/test/Transforms/InstCombine/shl-factor.ll
+++ b/llvm/test/Transforms/InstCombine/shl-factor.ll
@@ -43,8 +43,8 @@ define i8 @add_shl_same_amount_nsw_extra_use1(i8 %x, i8 %y, i8 %z) {
; CHECK-LABEL: @add_shl_same_amount_nsw_extra_use1(
; CHECK-NEXT: [[XS:%.*]] = shl nuw nsw i8 [[X:%.*]], [[Z:%.*]]
; CHECK-NEXT: call void @use8(i8 [[XS]])
-; CHECK-NEXT: [[TMP1:%.*]] = add nsw i8 [[X]], [[Y:%.*]]
-; CHECK-NEXT: [[DIFF:%.*]] = shl nsw i8 [[TMP1]], [[Z]]
+; CHECK-NEXT: [[YS:%.*]] = shl nuw nsw i8 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[DIFF:%.*]] = add nsw i8 [[XS]], [[YS]]
; CHECK-NEXT: ret i8 [[DIFF]]
;
%xs = shl nsw nuw i8 %x, %z
@@ -56,10 +56,10 @@ define i8 @add_shl_same_amount_nsw_extra_use1(i8 %x, i8 %y, i8 %z) {
define i8 @add_shl_same_amount_nuw_extra_use2(i8 %x, i8 %y, i8 %z) {
; CHECK-LABEL: @add_shl_same_amount_nuw_extra_use2(
-; CHECK-NEXT: [[YS:%.*]] = shl nuw nsw i8 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[XS:%.*]] = shl nuw i8 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[YS:%.*]] = shl nuw nsw i8 [[Y:%.*]], [[Z]]
; CHECK-NEXT: call void @use8(i8 [[YS]])
-; CHECK-NEXT: [[TMP1:%.*]] = add nuw i8 [[X:%.*]], [[Y]]
-; CHECK-NEXT: [[DIFF:%.*]] = shl nuw i8 [[TMP1]], [[Z]]
+; CHECK-NEXT: [[DIFF:%.*]] = add nuw nsw i8 [[XS]], [[YS]]
; CHECK-NEXT: ret i8 [[DIFF]]
;
%xs = shl nuw i8 %x, %z
@@ -174,8 +174,8 @@ define i8 @sub_shl_same_amount_nsw_extra_use1(i8 %x, i8 %y, i8 %z) {
; CHECK-LABEL: @sub_shl_same_amount_nsw_extra_use1(
; CHECK-NEXT: [[XS:%.*]] = shl nuw nsw i8 [[X:%.*]], [[Z:%.*]]
; CHECK-NEXT: call void @use8(i8 [[XS]])
-; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i8 [[X]], [[Y:%.*]]
-; CHECK-NEXT: [[DIFF:%.*]] = shl nsw i8 [[TMP1]], [[Z]]
+; CHECK-NEXT: [[YS:%.*]] = shl nuw nsw i8 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[DIFF:%.*]] = sub nsw i8 [[XS]], [[YS]]
; CHECK-NEXT: ret i8 [[DIFF]]
;
%xs = shl nsw nuw i8 %x, %z
@@ -187,10 +187,10 @@ define i8 @sub_shl_same_amount_nsw_extra_use1(i8 %x, i8 %y, i8 %z) {
define i8 @sub_shl_same_amount_nuw_extra_use2(i8 %x, i8 %y, i8 %z) {
; CHECK-LABEL: @sub_shl_same_amount_nuw_extra_use2(
-; CHECK-NEXT: [[YS:%.*]] = shl nuw nsw i8 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[XS:%.*]] = shl nuw i8 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[YS:%.*]] = shl nuw nsw i8 [[Y:%.*]], [[Z]]
; CHECK-NEXT: call void @use8(i8 [[YS]])
-; CHECK-NEXT: [[TMP1:%.*]] = sub nuw i8 [[X:%.*]], [[Y]]
-; CHECK-NEXT: [[DIFF:%.*]] = shl nuw i8 [[TMP1]], [[Z]]
+; CHECK-NEXT: [[DIFF:%.*]] = sub nuw nsw i8 [[XS]], [[YS]]
; CHECK-NEXT: ret i8 [[DIFF]]
;
%xs = shl nuw i8 %x, %z
@@ -265,3 +265,27 @@ define i6 @sub_shl_same_amount_partial_nuw2(i6 %x, i6 %y, i6 %z) {
ret i6 %diff
}
+define i8 @add_shl_same_amount_constants(i8 %z) {
+; CHECK-LABEL: @add_shl_same_amount_constants(
+; CHECK-NEXT: [[SUM:%.*]] = shl i8 7, [[Z:%.*]]
+; CHECK-NEXT: ret i8 [[SUM]]
+;
+ %s1 = shl i8 4, %z
+ %s2 = shl i8 3, %z
+ %sum = add i8 %s1, %s2
+ ret i8 %sum
+}
+
+define i8 @add_shl_same_amount_constants_extra_use(i8 %z) {
+; CHECK-LABEL: @add_shl_same_amount_constants_extra_use(
+; CHECK-NEXT: [[S1:%.*]] = shl i8 4, [[Z:%.*]]
+; CHECK-NEXT: [[SUM:%.*]] = shl i8 7, [[Z]]
+; CHECK-NEXT: call void @use8(i8 [[S1]])
+; CHECK-NEXT: ret i8 [[SUM]]
+;
+ %s1 = shl i8 4, %z
+ %s2 = shl i8 3, %z
+ %sum = add i8 %s1, %s2
+ call void @use8(i8 %s1)
+ ret i8 %sum
+}
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index 45674acaae5387..0d627e9b6a8834 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -3910,8 +3910,8 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], 510
; IND-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
; IND-NEXT: [[IND_END:%.*]] = add i8 [[DOTCAST]], [[T]]
-; IND-NEXT: [[EXT_MUL5:%.*]] = add nuw nsw i32 [[N_VEC]], [[EXT]]
-; IND-NEXT: [[IND_END1:%.*]] = shl nuw nsw i32 [[EXT_MUL5]], 2
+; IND-NEXT: [[TMP10:%.*]] = shl nuw nsw i32 [[N_VEC]], 2
+; IND-NEXT: [[IND_END1:%.*]] = add nuw nsw i32 [[EXT_MUL]], [[TMP10]]
; IND-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i64 0
; IND-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; IND-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], <i32 0, i32 4>
@@ -3921,13 +3921,13 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; IND-NEXT: [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8
; IND-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[DOTCAST4]], [[T]]
-; IND-NEXT: [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64
-; IND-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]]
-; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP11]], align 4
+; IND-NEXT: [[TMP11:%.*]] = sext i8 [[OFFSET_IDX]] to i64
+; IND-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
+; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP12]], align 4
; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 8, i32 8>
-; IND-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; IND-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
+; IND-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; IND-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
; IND: middle.block:
; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; IND-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -3940,8 +3940,8 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; IND-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; IND-NEXT: [[SPHI:%.*]] = phi i32 [ [[MUL:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
; IND-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
-; IND-NEXT: [[TMP13:%.*]] = sext i8 [[IDX]] to i64
-; IND-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
+; IND-NEXT: [[TMP14:%.*]] = sext i8 [[IDX]] to i64
+; IND-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
; IND-NEXT: store i32 [[SPHI]], ptr [[PTR]], align 4
; IND-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1
; IND-NEXT: [[IDX_INC_EXT:%.*]] = zext i8 [[IDX_INC]] to i32
@@ -3979,8 +3979,8 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], 508
; UNROLL-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
; UNROLL-NEXT: [[IND_END:%.*]] = add i8 [[DOTCAST]], [[T]]
-; UNROLL-NEXT: [[EXT_MUL6:%.*]] = add nuw nsw i32 [[N_VEC]], [[EXT]]
-; UNROLL-NEXT: [[IND_END1:%.*]] = shl nuw nsw i32 [[EXT_MUL6]], 2
+; UNROLL-NEXT: [[TMP10:%.*]] = shl nuw nsw i32 [[N_VEC]], 2
+; UNROLL-NEXT: [[IND_END1:%.*]] = add nuw nsw i32 [[EXT_MUL]], [[TMP10]]
; UNROLL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i64 0
; UNROLL-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; UNROLL-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], <i32 0, i32 4>
@@ -3991,15 +3991,15 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 8, i32 8>
; UNROLL-NEXT: [[DOTCAST5:%.*]] = trunc i32 [[INDEX]] to i8
; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[DOTCAST5]], [[T]]
-; UNROLL-NEXT: [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64
-; UNROLL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]]
-; UNROLL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 8
-; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP11]], align 4
-; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP12]], align 4
+; UNROLL-NEXT: [[TMP11:%.*]] = sext i8 [[OFFSET_IDX]] to i64
+; UNROLL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
+; UNROLL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 8
+; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP12]], align 4
+; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP13]], align 4
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 16, i32 16>
-; UNROLL-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
+; UNROLL-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; UNROLL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -4012,8 +4012,8 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; UNROLL-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; UNROLL-NEXT: [[SPHI:%.*]] = phi i32 [ [[MUL:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
; UNROLL-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
-; UNROLL-NEXT: [[TMP14:%.*]] = sext i8 [[IDX]] to i64
-; UNROLL-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
+; UNROLL-NEXT: [[TMP15:%.*]] = sext i8 [[IDX]] to i64
+; UNROLL-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP15]]
; UNROLL-NEXT: store i32 [[SPHI]], ptr [[PTR]], align 4
; UNROLL-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1
; UNROLL-NEXT: [[IDX_INC_EXT:%.*]] = zext i8 [[IDX_INC]] to i32
@@ -4129,8 +4129,8 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], 504
; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i8 [[DOTCAST]], [[T]]
-; INTERLEAVE-NEXT: [[EXT_MUL6:%.*]] = add nuw nsw i32 [[N_VEC]], [[EXT]]
-; INTERLEAVE-NEXT: [[IND_END1:%.*]] = shl nuw nsw i32 [[EXT_MUL6]], 2
+; INTERLEAVE-NEXT: [[TMP10:%.*]] = shl nuw nsw i32 [[N_VEC]], 2
+; INTERLEAVE-NEXT: [[IND_END1:%.*]] = add nuw nsw i32 [[EXT_MUL]], [[TMP10]]
; INTERLEAVE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[EXT_MUL]], i64 0
; INTERLEAVE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; INTERLEAVE-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i32> [[DOTSPLAT]], <i32 0, i32 4, i32 8, i32 12>
@@ -4141,15 +4141,15 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 16, i32 16, i32 16, i32 16>
; INTERLEAVE-NEXT: [[DOTCAST5:%.*]] = trunc i32 [[INDEX]] to i8
; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[DOTCAST5]], [[T]]
-; INTERLEAVE-NEXT: [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64
-; INTERLEAVE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]]
-; INTERLEAVE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 16
-; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP11]], align 4
-; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP12]], align 4
+; INTERLEAVE-NEXT: [[TMP11:%.*]] = sext i8 [[OFFSET_IDX]] to i64
+; INTERLEAVE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
+; INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 16
+; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP12]], align 4
+; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP13]], align 4
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 32, i32 32, i32 32, i32 32>
-; INTERLEAVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; INTERLEAVE-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
+; INTERLEAVE-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; INTERLEAVE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
; INTERLEAVE: middle.block:
; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -4162,8 +4162,8 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; INTERLEAVE-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; INTERLEAVE-NEXT: [[SPHI:%.*]] = phi i32 [ [[MUL:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
; INTERLEAVE-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
-;...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/102943
More information about the llvm-commits
mailing list