[llvm] goldsteinn/gep of div (PR #96898)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 27 04:54:42 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: None (goldsteinn)
<details>
<summary>Changes</summary>
- **[InstCombine] Add tests for canonicalizing `(gep <not i8> p, (exact_ins X, C))`; NFC**
- **[InstCombine] Canonicalize `(gep <not i8> p, (div exact X, C))`**
---
Full diff: https://github.com/llvm/llvm-project/pull/96898.diff
3 Files Affected:
- (modified) llvm/lib/Transforms/InstCombine/InstructionCombining.cpp (+49-10)
- (modified) llvm/test/Transforms/InstCombine/getelementptr.ll (+102)
- (modified) llvm/test/Transforms/LoopVectorize/induction.ll (+20-20)
``````````diff
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 900cb05d94046..296b8398cf471 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2939,18 +2939,57 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
});
return Changed ? &GEP : nullptr;
}
- } else {
+ } else if (auto *ExactIns =
+ dyn_cast<PossiblyExactOperator>(GEP.getOperand(1))) {
// Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
Value *V;
- if ((has_single_bit(TyAllocSize) &&
- match(GEP.getOperand(1),
- m_Exact(m_Shr(m_Value(V),
- m_SpecificInt(countr_zero(TyAllocSize)))))) ||
- match(GEP.getOperand(1),
- m_Exact(m_IDiv(m_Value(V), m_SpecificInt(TyAllocSize))))) {
- return GetElementPtrInst::Create(Builder.getInt8Ty(),
- GEP.getPointerOperand(), V,
- GEP.getNoWrapFlags());
+ if (ExactIns->isExact()) {
+ if ((has_single_bit(TyAllocSize) &&
+ match(GEP.getOperand(1),
+ m_Shr(m_Value(V),
+ m_SpecificInt(countr_zero(TyAllocSize))))) ||
+ match(GEP.getOperand(1),
+ m_IDiv(m_Value(V), m_SpecificInt(TyAllocSize)))) {
+ return GetElementPtrInst::Create(Builder.getInt8Ty(),
+ GEP.getPointerOperand(), V,
+ GEP.getNoWrapFlags());
+ }
+ }
+ if (ExactIns->isExact() && ExactIns->hasOneUse()) {
+ // Try to canonicalize non-i8 element type to i8 if the index is an
+ // exact instruction. If the index is an exact instruction (div/shr)
+ // with a constant RHS, we can fold the non-i8 element scale into the
+ // div/shr (similiar to the mul case, just inverted).
+ const APInt *C;
+ std::optional<APInt> NewC;
+ if (has_single_bit(TyAllocSize) &&
+ match(ExactIns, m_Shr(m_Value(V), m_APInt(C))) &&
+ C->uge(countr_zero(TyAllocSize)))
+ NewC = *C - countr_zero(TyAllocSize);
+ else if (match(ExactIns, m_UDiv(m_Value(V), m_APInt(C)))) {
+ APInt Quot;
+ uint64_t Rem;
+ APInt::udivrem(*C, TyAllocSize, Quot, Rem);
+ if (!Quot.isAllOnes() && Rem == 0)
+ NewC = Quot;
+ } else if (match(ExactIns, m_SDiv(m_Value(V), m_APInt(C)))) {
+ APInt Quot;
+ int64_t Rem;
+ APInt::sdivrem(*C, TyAllocSize, Quot, Rem);
+ // For sdiv we need to make sure we arent creating INT_MIN / -1.
+ if (!Quot.isAllOnes() && Rem == 0)
+ NewC = Quot;
+ }
+
+ if (NewC.has_value()) {
+ Value *NewOp = Builder.CreateBinOp(
+ static_cast<Instruction::BinaryOps>(ExactIns->getOpcode()), V,
+ ConstantInt::get(V->getType(), *NewC));
+ cast<BinaryOperator>(NewOp)->setIsExact();
+ return GetElementPtrInst::Create(Builder.getInt8Ty(),
+ GEP.getPointerOperand(), NewOp,
+ GEP.getNoWrapFlags());
+ }
}
}
}
diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index f25abae60904c..b3a01993992eb 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -1790,4 +1790,106 @@ define ptr @gep_sel_const_nuw(i1 %c) {
ret ptr %gep
}
+define ptr @gep_of_udiv(ptr %p, i64 %x) {
+; CHECK-LABEL: @gep_of_udiv(
+; CHECK-NEXT: [[TMP1:%.*]] = udiv exact i64 [[X:%.*]], 3
+; CHECK-NEXT: [[R:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP1]]
+; CHECK-NEXT: ret ptr [[R]]
+;
+ %idx = udiv exact i64 %x, 12
+ %r = getelementptr i32, ptr %p, i64 %idx
+ ret ptr %r
+}
+
+define ptr @gep_of_udiv_fail_not_divisible(ptr %p, i64 %x) {
+; CHECK-LABEL: @gep_of_udiv_fail_not_divisible(
+; CHECK-NEXT: [[IDX:%.*]] = udiv exact i64 [[X:%.*]], 13
+; CHECK-NEXT: [[R:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IDX]]
+; CHECK-NEXT: ret ptr [[R]]
+;
+ %idx = udiv exact i64 %x, 13
+ %r = getelementptr i32, ptr %p, i64 %idx
+ ret ptr %r
+}
+
+
+define ptr @gep_of_sdiv(ptr %p, i64 %x) {
+; CHECK-LABEL: @gep_of_sdiv(
+; CHECK-NEXT: [[TMP1:%.*]] = sdiv exact i64 [[X:%.*]], -9
+; CHECK-NEXT: [[R:%.*]] = getelementptr nusw nuw i8, ptr [[P:%.*]], i64 [[TMP1]]
+; CHECK-NEXT: ret ptr [[R]]
+;
+ %idx = sdiv exact i64 %x, -36
+ %r = getelementptr nusw nuw i32, ptr %p, i64 %idx
+ ret ptr %r
+}
+
+
+define ptr @gep_of_sdiv_fail_not_divisible(ptr %p, i64 %x) {
+; CHECK-LABEL: @gep_of_sdiv_fail_not_divisible(
+; CHECK-NEXT: [[IDX:%.*]] = sdiv exact i64 [[X:%.*]], -35
+; CHECK-NEXT: [[R:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IDX]]
+; CHECK-NEXT: ret ptr [[R]]
+;
+ %idx = sdiv exact i64 %x, -35
+ %r = getelementptr i32, ptr %p, i64 %idx
+ ret ptr %r
+}
+
+define ptr @gep_of_sdiv_fail_ub(ptr %p, i64 %x) {
+; CHECK-LABEL: @gep_of_sdiv_fail_ub(
+; CHECK-NEXT: [[IDX:%.*]] = sdiv i64 [[X:%.*]], -4
+; CHECK-NEXT: [[R:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IDX]]
+; CHECK-NEXT: ret ptr [[R]]
+;
+ %idx = sdiv i64 %x, -4
+ %r = getelementptr i32, ptr %p, i64 %idx
+ ret ptr %r
+}
+
+define ptr @gep_of_lshr(ptr %p, i64 %x) {
+; CHECK-LABEL: @gep_of_lshr(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i64 [[X:%.*]], 1
+; CHECK-NEXT: [[R:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP1]]
+; CHECK-NEXT: ret ptr [[R]]
+;
+ %idx = lshr exact i64 %x, 3
+ %r = getelementptr i32, ptr %p, i64 %idx
+ ret ptr %r
+}
+
+define ptr @gep_of_ashr(ptr %p, i64 %x) {
+; CHECK-LABEL: @gep_of_ashr(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i64 [[X:%.*]], 1
+; CHECK-NEXT: [[R:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 [[TMP1]]
+; CHECK-NEXT: ret ptr [[R]]
+;
+ %idx = ashr exact i64 %x, 3
+ %r = getelementptr inbounds i32, ptr %p, i64 %idx
+ ret ptr %r
+}
+
+define ptr @gep_of_lshr_fail_missing_exact(ptr %p, i64 %x) {
+; CHECK-LABEL: @gep_of_lshr_fail_missing_exact(
+; CHECK-NEXT: [[IDX:%.*]] = lshr i64 [[X:%.*]], 3
+; CHECK-NEXT: [[R:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IDX]]
+; CHECK-NEXT: ret ptr [[R]]
+;
+ %idx = lshr i64 %x, 3
+ %r = getelementptr i32, ptr %p, i64 %idx
+ ret ptr %r
+}
+
+define ptr @gep_of_ashr_fail_not_divisible(ptr %p, i64 %x) {
+; CHECK-LABEL: @gep_of_ashr_fail_not_divisible(
+; CHECK-NEXT: [[IDX:%.*]] = ashr exact i64 [[X:%.*]], 1
+; CHECK-NEXT: [[R:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IDX]]
+; CHECK-NEXT: ret ptr [[R]]
+;
+ %idx = ashr exact i64 %x, 1
+ %r = getelementptr i32, ptr %p, i64 %idx
+ ret ptr %r
+}
+
+
!0 = !{!"branch_weights", i32 2, i32 10}
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index 00b00bf160174..a79b885412097 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -4466,8 +4466,8 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; IND-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
-; IND-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 32
-; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
+; IND-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 30
+; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4
; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
@@ -4483,8 +4483,8 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; IND-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; IND-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
; IND-NEXT: [[SEXT1:%.*]] = shl i64 [[INDVARS_IV]], 32
-; IND-NEXT: [[TMP3:%.*]] = ashr exact i64 [[SEXT1]], 32
-; IND-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP3]]
+; IND-NEXT: [[TMP3:%.*]] = ashr exact i64 [[SEXT1]], 30
+; IND-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP3]]
; IND-NEXT: store i32 [[TRUNC_IV]], ptr [[ARRAYIDX]], align 4
; IND-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; IND-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
@@ -4507,8 +4507,8 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
; UNROLL-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
-; UNROLL-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 32
-; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
+; UNROLL-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 30
+; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8
; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4
; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4
@@ -4526,8 +4526,8 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; UNROLL-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
; UNROLL-NEXT: [[SEXT2:%.*]] = shl i64 [[INDVARS_IV]], 32
-; UNROLL-NEXT: [[TMP4:%.*]] = ashr exact i64 [[SEXT2]], 32
-; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
+; UNROLL-NEXT: [[TMP4:%.*]] = ashr exact i64 [[SEXT2]], 30
+; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]]
; UNROLL-NEXT: store i32 [[TRUNC_IV]], ptr [[ARRAYIDX]], align 4
; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
@@ -4599,8 +4599,8 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
; INTERLEAVE-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
-; INTERLEAVE-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 32
-; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
+; INTERLEAVE-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 30
+; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP1]], align 4
; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4
@@ -4618,8 +4618,8 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; INTERLEAVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; INTERLEAVE-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
; INTERLEAVE-NEXT: [[SEXT2:%.*]] = shl i64 [[INDVARS_IV]], 32
-; INTERLEAVE-NEXT: [[TMP4:%.*]] = ashr exact i64 [[SEXT2]], 32
-; INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
+; INTERLEAVE-NEXT: [[TMP4:%.*]] = ashr exact i64 [[SEXT2]], 30
+; INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]]
; INTERLEAVE-NEXT: store i32 [[TRUNC_IV]], ptr [[ARRAYIDX]], align 4
; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
@@ -6009,8 +6009,8 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
; IND-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; IND-NEXT: [[TMP2:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP0]]
; IND-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
-; IND-NEXT: [[TMP3:%.*]] = ashr exact i64 [[SEXT]], 32
-; IND-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 [[TMP3]]
+; IND-NEXT: [[TMP3:%.*]] = ashr exact i64 [[SEXT]], 30
+; IND-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP3]]
; IND-NEXT: [[TMP5:%.*]] = add <2 x i32> [[VEC_IND]], [[TMP2]]
; IND-NEXT: store <2 x i32> [[TMP5]], ptr [[TMP4]], align 4
; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
@@ -6044,8 +6044,8 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
; UNROLL-NEXT: [[TMP3:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT4]], [[TMP0]]
; UNROLL-NEXT: [[TMP4:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT4]], [[TMP1]]
; UNROLL-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
-; UNROLL-NEXT: [[TMP5:%.*]] = ashr exact i64 [[SEXT]], 32
-; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 [[TMP5]]
+; UNROLL-NEXT: [[TMP5:%.*]] = ashr exact i64 [[SEXT]], 30
+; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP5]]
; UNROLL-NEXT: [[TMP7:%.*]] = add <2 x i32> [[VEC_IND]], [[TMP3]]
; UNROLL-NEXT: [[TMP8:%.*]] = add <2 x i32> [[STEP_ADD]], [[TMP4]]
; UNROLL-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP6]], i64 8
@@ -6139,8 +6139,8 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
; INTERLEAVE-NEXT: [[TMP3:%.*]] = mul nsw <4 x i32> [[BROADCAST_SPLAT4]], [[TMP0]]
; INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[BROADCAST_SPLAT4]], [[TMP1]]
; INTERLEAVE-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
-; INTERLEAVE-NEXT: [[TMP5:%.*]] = ashr exact i64 [[SEXT]], 32
-; INTERLEAVE-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 [[TMP5]]
+; INTERLEAVE-NEXT: [[TMP5:%.*]] = ashr exact i64 [[SEXT]], 30
+; INTERLEAVE-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP5]]
; INTERLEAVE-NEXT: [[TMP7:%.*]] = add <4 x i32> [[VEC_IND]], [[TMP3]]
; INTERLEAVE-NEXT: [[TMP8:%.*]] = add <4 x i32> [[STEP_ADD]], [[TMP4]]
; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP6]], i64 16
@@ -6166,8 +6166,8 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
; INTERLEAVE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; INTERLEAVE-NEXT: [[IV_TRUNC]] = trunc i64 [[IV]] to i32
; INTERLEAVE-NEXT: [[SEXT5:%.*]] = shl i64 [[IV]], 32
-; INTERLEAVE-NEXT: [[TMP11:%.*]] = ashr exact i64 [[SEXT5]], 32
-; INTERLEAVE-NEXT: [[DST_GEP:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP11]]
+; INTERLEAVE-NEXT: [[TMP11:%.*]] = ashr exact i64 [[SEXT5]], 30
+; INTERLEAVE-NEXT: [[DST_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP11]]
; INTERLEAVE-NEXT: [[ADD:%.*]] = add i32 [[MUL]], [[IV_TRUNC]]
; INTERLEAVE-NEXT: store i32 [[ADD]], ptr [[DST_GEP]], align 4
; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TRUNC_IV_NEXT]], 100
``````````
</details>
https://github.com/llvm/llvm-project/pull/96898
More information about the llvm-commits
mailing list