[llvm] f865dbf - [SeparateConstOffsetFromGEP] Support GEP reordering for different types (#90802)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 9 16:57:40 PDT 2024
Author: Jeffrey Byrnes
Date: 2024-05-09T16:57:36-07:00
New Revision: f865dbff17ca516d605b053d5556c1498c300a42
URL: https://github.com/llvm/llvm-project/commit/f865dbff17ca516d605b053d5556c1498c300a42
DIFF: https://github.com/llvm/llvm-project/commit/f865dbff17ca516d605b053d5556c1498c300a42.diff
LOG: [SeparateConstOffsetFromGEP] Support GEP reordering for different types (#90802)
This doesn't show up in existing lit tests, but has an impact on real
code -- especially after the canonicalization of GEPs to i8.
Alive2 tests for the inbounds handling:
Case 1: https://alive2.llvm.org/ce/z/6bfFY3
Case 2: https://alive2.llvm.org/ce/z/DkLMLF
Added:
Modified:
llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll
llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll
llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll
llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index c54a956fc7e24..9f85396cde259 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -972,22 +972,13 @@ SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
TargetTransformInfo &TTI) {
- Type *GEPType = GEP->getResultElementType();
- // TODO: support reordering for non-trivial GEP chains
- if (GEPType->isAggregateType() || GEP->getNumIndices() != 1)
+ if (GEP->getNumIndices() != 1)
return false;
auto PtrGEP = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand());
if (!PtrGEP)
return false;
- Type *PtrGEPType = PtrGEP->getResultElementType();
- // TODO: support reordering for non-trivial GEP chains
- if (PtrGEPType->isAggregateType() || PtrGEP->getNumIndices() != 1)
- return false;
-
- // TODO: support reordering for non-trivial GEP chains
- if (PtrGEPType != GEPType ||
- PtrGEP->getSourceElementType() != GEP->getSourceElementType())
+ if (PtrGEP->getNumIndices() != 1)
return false;
bool NestedNeedsExtraction;
@@ -1002,8 +993,6 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
/*HasBaseReg=*/true, /*Scale=*/0, AddrSpace))
return false;
- IRBuilder<> Builder(GEP);
- Builder.SetCurrentDebugLocation(GEP->getDebugLoc());
bool GEPInBounds = GEP->isInBounds();
bool PtrGEPInBounds = PtrGEP->isInBounds();
bool IsChainInBounds = GEPInBounds && PtrGEPInBounds;
@@ -1018,13 +1007,14 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
}
}
+ IRBuilder<> Builder(GEP);
// For trivial GEP chains, we can swap the indicies.
- auto NewSrc = Builder.CreateGEP(PtrGEPType, PtrGEP->getPointerOperand(),
- SmallVector<Value *, 4>(GEP->indices()));
- cast<GetElementPtrInst>(NewSrc)->setIsInBounds(IsChainInBounds);
- auto NewGEP = Builder.CreateGEP(GEPType, NewSrc,
- SmallVector<Value *, 4>(PtrGEP->indices()));
- cast<GetElementPtrInst>(NewGEP)->setIsInBounds(IsChainInBounds);
+ Value *NewSrc = Builder.CreateGEP(
+ GEP->getSourceElementType(), PtrGEP->getPointerOperand(),
+ SmallVector<Value *, 4>(GEP->indices()), "", IsChainInBounds);
+ Value *NewGEP = Builder.CreateGEP(PtrGEP->getSourceElementType(), NewSrc,
+ SmallVector<Value *, 4>(PtrGEP->indices()),
+ "", IsChainInBounds);
GEP->replaceAllUsesWith(NewGEP);
RecursivelyDeleteTriviallyDeadInstructions(GEP);
return true;
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll
index c24bbd5f658f9..16e47f057babc 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll
@@ -1,28 +1,27 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -S -passes=separate-const-offset-from-gep < %s | FileCheck %s
-define void @inboundsPossiblyNegative(ptr %in.ptr, i32 %in.idx1) {
+define void @inboundsPossiblyNegative(ptr %in.ptr, i64 %in.idx1) {
; CHECK-LABEL: define void @inboundsPossiblyNegative(
-; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1]] to i64
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr <2 x i8>, ptr [[IN_PTR]], i64 [[IDXPROM]]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <2 x i8>, ptr [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr <2 x i8>, ptr [[IN_PTR]], i64 [[IN_IDX1]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <2 x i8>, ptr [[TMP0]], i64 1
; CHECK-NEXT: ret void
;
entry:
- %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i32 1
- %idx1 = getelementptr inbounds <2 x i8>, ptr %const1, i32 %in.idx1
+ %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i64 1
+ %idx1 = getelementptr inbounds <2 x i8>, ptr %const1, i64 %in.idx1
ret void
}
-define void @inboundsNonNegative(ptr %in.ptr, i32 %in.idx1) {
-; CHECK-LABEL: define void @inboundsNonNegative(
+define void @inboundsNonNegative_nonCanonical(ptr %in.ptr, i32 %in.idx1) {
+; CHECK-LABEL: define void @inboundsNonNegative_nonCanonical(
; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i32 [[IN_IDX1]], 2147483647
-; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1_NNEG]] to i64
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <2 x i8>, ptr [[IN_PTR]], i64 [[IDXPROM]]
+; CHECK-NEXT: [[IN_IDX1_NNEG1:%.*]] = and i32 [[IN_IDX1]], 2147483647
+; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = sext i32 [[IN_IDX1_NNEG1]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <2 x i8>, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i8>, ptr [[TMP0]], i32 1
; CHECK-NEXT: ret void
;
@@ -33,19 +32,277 @@ entry:
ret void
}
-define void @inboundsNonchained(ptr %in.ptr, i32 %in.idx1) {
+define void @inboundsNonNegative(ptr %in.ptr, i64 %in.idx1) {
+; CHECK-LABEL: define void @inboundsNonNegative(
+; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IDXPROM:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <2 x i8>, ptr [[IN_PTR]], i64 [[IDXPROM]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i8>, ptr [[TMP0]], i64 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807
+ %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i64 1
+ %idx1 = getelementptr inbounds <2 x i8>, ptr %const1, i64 %in.idx1.nneg
+ ret void
+}
+
+define void @inboundsNonchained(ptr %in.ptr, i64 %in.idx1) {
; CHECK-LABEL: define void @inboundsNonchained(
-; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i32 [[IN_IDX1]], 2147483647
-; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1_NNEG]] to i64
+; CHECK-NEXT: [[IDXPROM:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr <2 x i8>, ptr [[IN_PTR]], i64 [[IDXPROM]]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <2 x i8>, ptr [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <2 x i8>, ptr [[TMP0]], i64 1
; CHECK-NEXT: ret void
;
entry:
- %in.idx1.nneg = and i32 %in.idx1, 2147483647
- %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i32 1
- %idx1 = getelementptr <2 x i8>, ptr %const1, i32 %in.idx1.nneg
+ %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807
+ %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i64 1
+ %idx1 = getelementptr <2 x i8>, ptr %const1, i64 %in.idx1.nneg
+ ret void
+}
+
+define void @inboundsNonNegativeType_i16i8(ptr %in.ptr, i64 %in.idx1) {
+; CHECK-LABEL: define void @inboundsNonNegativeType_i16i8(
+; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IDXPROM:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[IN_PTR]], i64 [[IDXPROM]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 1024
+; CHECK-NEXT: ret void
+;
+entry:
+ %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807
+ %const1 = getelementptr inbounds i16, ptr %in.ptr, i64 1024
+ %idx1 = getelementptr inbounds i8, ptr %const1, i64 %in.idx1.nneg
+ ret void
+}
+
+define void @inboundsNonNegative_i8i16(ptr %in.ptr, i64 %in.idx1) {
+; CHECK-LABEL: define void @inboundsNonNegative_i8i16(
+; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IDXPROM:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[IN_PTR]], i64 [[IDXPROM]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 1024
+; CHECK-NEXT: ret void
+;
+entry:
+ %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807
+ %const1 = getelementptr inbounds i8, ptr %in.ptr, i64 1024
+ %idx1 = getelementptr inbounds i16, ptr %const1, i64 %in.idx1.nneg
+ ret void
+}
+
+define void @inboundsNonchained_first(ptr %in.ptr, i64 %in.idx1) {
+; CHECK-LABEL: define void @inboundsNonchained_first(
+; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 1024
+; CHECK-NEXT: ret void
+;
+entry:
+ %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807
+ %const1 = getelementptr inbounds i8, ptr %in.ptr, i64 1024
+ %idx1 = getelementptr i32, ptr %const1, i64 %in.idx1.nneg
+ ret void
+}
+
+define void @inboundsNonchained_second(ptr %in.ptr, i64 %in.idx1) {
+; CHECK-LABEL: define void @inboundsNonchained_second(
+; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 1024
+; CHECK-NEXT: ret void
+;
+entry:
+ %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807
+ %const1 = getelementptr i8, ptr %in.ptr, i64 1024
+ %idx1 = getelementptr inbounds i64, ptr %const1, i64 %in.idx1.nneg
+ ret void
+}
+
+define void @notInbounds(ptr %in.ptr, i64 %in.idx1) {
+; CHECK-LABEL: define void @notInbounds(
+; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i128, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 1024
+; CHECK-NEXT: ret void
+;
+entry:
+ %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807
+ %const1 = getelementptr i8, ptr %in.ptr, i64 1024
+ %idx1 = getelementptr i128, ptr %const1, i64 %in.idx1.nneg
+ ret void
+}
+
+define void @vectorType1(ptr %in.ptr, i64 %in.idx1) {
+; CHECK-LABEL: define void @vectorType1(
+; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <2 x i8>, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i8>, ptr [[TMP0]], i32 3
+; CHECK-NEXT: ret void
+;
+entry:
+ %in.idx1.nneg = and i64 %in.idx1, 2147483647
+ %const1 = getelementptr inbounds <4 x i8>, ptr %in.ptr, i32 3
+ %idx1 = getelementptr inbounds <2 x i8>, ptr %const1, i64 %in.idx1.nneg
+ ret void
+}
+
+define void @vectorType2(ptr %in.ptr, i64 %in.idx1) {
+; CHECK-LABEL: define void @vectorType2(
+; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x half>, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i8>, ptr [[TMP0]], i32 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %in.idx1.nneg = and i64 %in.idx1, 2147483647
+ %const1 = getelementptr inbounds <4 x i8>, ptr %in.ptr, i32 1
+ %idx1 = getelementptr inbounds <4 x half>, ptr %const1, i64 %in.idx1.nneg
+ ret void
+}
+
+define void @vectorType3(ptr %in.ptr, i64 %in.idx1) {
+; CHECK-LABEL: define void @vectorType3(
+; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds ptr, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x ptr>, ptr [[TMP0]], i32 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %in.idx1.nneg = and i64 %in.idx1, 2147483647
+ %const1 = getelementptr inbounds <4 x ptr>, ptr %in.ptr, i32 1
+ %idx1 = getelementptr inbounds ptr, ptr %const1, i64 %in.idx1.nneg
+ ret void
+}
+
+define void @vectorType4(ptr %in.ptr, i64 %in.idx1) {
+; CHECK-LABEL: define void @vectorType4(
+; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x ptr addrspace(1)>, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x ptr>, ptr [[TMP0]], i32 3
+; CHECK-NEXT: ret void
+;
+entry:
+ %in.idx1.nneg = and i64 %in.idx1, 2147483647
+ %const1 = getelementptr inbounds <4 x ptr>, ptr %in.ptr, i32 3
+ %idx1 = getelementptr inbounds <8 x ptr addrspace(1)>, ptr %const1, i64 %in.idx1.nneg
+ ret void
+}
+
+
+define void @ptrType(ptr %in.ptr, i64 %in.idx1) {
+; CHECK-LABEL: define void @ptrType(
+; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds ptr, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr addrspace(2), ptr [[TMP0]], i32 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %in.idx1.nneg = and i64 %in.idx1, 2147483647
+ %const1 = getelementptr inbounds ptr addrspace(2), ptr %in.ptr, i32 1
+ %idx1 = getelementptr inbounds ptr, ptr %const1, i64 %in.idx1.nneg
+ ret void
+}
+
+define void @ptrType2(ptr %in.ptr, i64 %in.idx1) {
+; CHECK-LABEL: define void @ptrType2(
+; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr addrspace(3), ptr [[TMP0]], i32 3
+; CHECK-NEXT: ret void
+;
+entry:
+ %in.idx1.nneg = and i64 %in.idx1, 2147483647
+ %const1 = getelementptr inbounds ptr addrspace(3), ptr %in.ptr, i32 3
+ %idx1 = getelementptr inbounds i64, ptr %const1, i64 %in.idx1.nneg
+ ret void
+}
+
+define void @ptrType3(ptr %in.ptr, i64 %in.idx1) {
+; CHECK-LABEL: define void @ptrType3(
+; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr addrspace(7), ptr [[TMP0]], i32 3
+; CHECK-NEXT: ret void
+;
+entry:
+ %in.idx1.nneg = and i64 %in.idx1, 2147483647
+ %const1 = getelementptr inbounds ptr addrspace(7), ptr %in.ptr, i32 3
+ %idx1 = getelementptr inbounds i16, ptr %const1, i64 %in.idx1.nneg
+ ret void
+}
+
+define void @addrspace1(ptr addrspace(1) %in.ptr, i64 %in.idx1) {
+; CHECK-LABEL: define void @addrspace1(
+; CHECK-SAME: ptr addrspace(1) [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i128, ptr addrspace(1) [[IN_PTR]], i64 [[IN_IDX1_NNEG]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 1024
+; CHECK-NEXT: ret void
+;
+entry:
+ %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807
+ %const1 = getelementptr inbounds i8, ptr addrspace(1) %in.ptr, i64 1024
+ %idx1 = getelementptr inbounds i128, ptr addrspace(1) %const1, i64 %in.idx1.nneg
+ ret void
+}
+
+define void @addrspace3(ptr addrspace(3) %in.ptr, i64 %in.idx1) {
+; CHECK-LABEL: define void @addrspace3(
+; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807
+; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX1_NNEG]] to i32
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i128, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i64 1024
+; CHECK-NEXT: ret void
+;
+entry:
+ %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807
+ %const1 = getelementptr inbounds i8, ptr addrspace(3) %in.ptr, i64 1024
+ %idx1 = getelementptr inbounds i128, ptr addrspace(3) %const1, i64 %in.idx1.nneg
+ ret void
+}
+
+define void @addrspace7(ptr addrspace(7) %in.ptr, i64 %in.idx1) {
+; CHECK-LABEL: define void @addrspace7(
+; CHECK-SAME: ptr addrspace(7) [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807
+; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX1_NNEG]] to i32
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i128, ptr addrspace(7) [[IN_PTR]], i32 [[IDXPROM]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(7) [[TMP0]], i64 1024
+; CHECK-NEXT: ret void
+;
+entry:
+ %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807
+ %const1 = getelementptr inbounds i8, ptr addrspace(7) %in.ptr, i64 1024
+ %idx1 = getelementptr inbounds i128, ptr addrspace(7) %const1, i64 %in.idx1.nneg
ret void
}
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll
index 7137f0fb66fdb..b4119f0b50b4f 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll
@@ -1,175 +1,286 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --start-before=separate-const-offset-from-gep < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --passes=separate-const-offset-from-gep < %s | FileCheck %s
-define protected amdgpu_kernel void @sink_addr(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) {
-; CHECK-LABEL: sink_addr:
-; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
-; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: s_lshl_b32 s3, s1, 1
-; CHECK-NEXT: s_add_i32 s0, s0, s3
-; CHECK-NEXT: s_lshl_b32 s2, s2, 1
-; CHECK-NEXT: s_add_i32 s0, s0, s2
-; CHECK-NEXT: s_cmp_lg_u32 s1, 0
-; CHECK-NEXT: s_cbranch_scc1 .LBB0_2
-; CHECK-NEXT: ; %bb.1: ; %bb.1
-; CHECK-NEXT: v_mov_b32_e32 v12, s0
-; CHECK-NEXT: ds_read_b128 v[0:3], v12
-; CHECK-NEXT: ds_read_b128 v[4:7], v12 offset:512
-; CHECK-NEXT: ds_read_b128 v[8:11], v12 offset:1024
-; CHECK-NEXT: ds_read_b128 v[12:15], v12 offset:1536
-; CHECK-NEXT: s_waitcnt lgkmcnt(3)
-; CHECK-NEXT: ;;#ASMSTART
-; CHECK-NEXT: ; use v[0:3]
-; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: s_waitcnt lgkmcnt(2)
-; CHECK-NEXT: ;;#ASMSTART
-; CHECK-NEXT: ; use v[4:7]
-; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: s_waitcnt lgkmcnt(1)
-; CHECK-NEXT: ;;#ASMSTART
-; CHECK-NEXT: ; use v[8:11]
-; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: ;;#ASMSTART
-; CHECK-NEXT: ; use v[12:15]
-; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: .LBB0_2: ; %end
-; CHECK-NEXT: s_add_i32 s1, s0, 0x200
-; CHECK-NEXT: v_mov_b32_e32 v0, s0
-; CHECK-NEXT: s_add_i32 s2, s0, 0x400
-; CHECK-NEXT: ;;#ASMSTART
-; CHECK-NEXT: ; use v0
-; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_mov_b32_e32 v0, s1
-; CHECK-NEXT: s_add_i32 s3, s0, 0x600
-; CHECK-NEXT: ;;#ASMSTART
-; CHECK-NEXT: ; use v0
-; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_mov_b32_e32 v0, s2
-; CHECK-NEXT: ;;#ASMSTART
-; CHECK-NEXT: ; use v0
-; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_mov_b32_e32 v0, s3
-; CHECK-NEXT: ;;#ASMSTART
-; CHECK-NEXT: ; use v0
-; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: s_endpgm
+define void @sink_addr(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) {
+; CHECK-LABEL: define void @sink_addr(
+; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32
+; CHECK-NEXT: [[BASE:%.*]] = getelementptr half, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]]
+; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]]
+; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM2]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr half, ptr addrspace(3) [[TMP0]], i64 256
+; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM3]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr half, ptr addrspace(3) [[TMP2]], i64 512
+; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM4]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr half, ptr addrspace(3) [[TMP4]], i64 768
+; CHECK-NEXT: ret void
+;
entry:
- %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0
- %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1
- %const1 = getelementptr half, ptr addrspace(3) %base, i32 256
- %idx1 = getelementptr half, ptr addrspace(3) %const1, i32 %in.idx1
- %const2 = getelementptr half, ptr addrspace(3) %base, i32 512
- %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1
- %const3 = getelementptr half, ptr addrspace(3) %base, i32 768
- %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1
- %cmp0 = icmp eq i32 %in.idx0, 0
- br i1 %cmp0, label %bb.1, label %end
+ %base = getelementptr half, ptr addrspace(3) %in.ptr, i64 %in.idx0
+ %idx0 = getelementptr half, ptr addrspace(3) %base, i64 %in.idx1
+ %const1 = getelementptr half, ptr addrspace(3) %base, i64 256
+ %idx1 = getelementptr half, ptr addrspace(3) %const1, i64 %in.idx1
+ %const2 = getelementptr half, ptr addrspace(3) %base, i64 512
+ %idx2 = getelementptr half, ptr addrspace(3) %const2, i64 %in.idx1
+ %const3 = getelementptr half, ptr addrspace(3) %base, i64 768
+ %idx3 = getelementptr half, ptr addrspace(3) %const3, i64 %in.idx1
+ ret void
+}
+
+define void @illegal_addr_mode(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) {
+; CHECK-LABEL: define void @illegal_addr_mode(
+; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32
+; CHECK-NEXT: [[BASE:%.*]] = getelementptr half, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]]
+; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]]
+; CHECK-NEXT: [[CONST1:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i64 38192
+; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[IDX1:%.*]] = getelementptr half, ptr addrspace(3) [[CONST1]], i32 [[IDXPROM2]]
+; CHECK-NEXT: [[CONST2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i64 38448
+; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[IDX2:%.*]] = getelementptr half, ptr addrspace(3) [[CONST2]], i32 [[IDXPROM3]]
+; CHECK-NEXT: [[CONST3:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i64 38764
+; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[IDX3:%.*]] = getelementptr half, ptr addrspace(3) [[CONST3]], i32 [[IDXPROM4]]
+; CHECK-NEXT: ret void
+;
+entry:
+ %base = getelementptr half, ptr addrspace(3) %in.ptr, i64 %in.idx0
+ %idx0 = getelementptr half, ptr addrspace(3) %base, i64 %in.idx1
+ %const1 = getelementptr half, ptr addrspace(3) %base, i64 38192
+ %idx1 = getelementptr half, ptr addrspace(3) %const1, i64 %in.idx1
+ %const2 = getelementptr half, ptr addrspace(3) %base, i64 38448
+ %idx2 = getelementptr half, ptr addrspace(3) %const2, i64 %in.idx1
+ %const3 = getelementptr half, ptr addrspace(3) %base, i64 38764
+ %idx3 = getelementptr half, ptr addrspace(3) %const3, i64 %in.idx1
+ ret void
+}
+
+
+define void @reorder_i8half(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) {
+; CHECK-LABEL: define void @reorder_i8half(
+; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32
+; CHECK-NEXT: [[BASE:%.*]] = getelementptr i8, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]]
+; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]]
+; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM2]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i64 256
+; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM3]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 512
+; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM4]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i64 768
+; CHECK-NEXT: ret void
+;
+entry:
+ %base = getelementptr i8, ptr addrspace(3) %in.ptr, i64 %in.idx0
+ %idx0 = getelementptr half, ptr addrspace(3) %base, i64 %in.idx1
+ %const1 = getelementptr i8, ptr addrspace(3) %base, i64 256
+ %idx1 = getelementptr half, ptr addrspace(3) %const1, i64 %in.idx1
+ %const2 = getelementptr i8, ptr addrspace(3) %base, i64 512
+ %idx2 = getelementptr half, ptr addrspace(3) %const2, i64 %in.idx1
+ %const3 = getelementptr i8, ptr addrspace(3) %base, i64 768
+ %idx3 = getelementptr half, ptr addrspace(3) %const3, i64 %in.idx1
+ ret void
+}
+
+define void @reorder_i64half(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) {
+; CHECK-LABEL: define void @reorder_i64half(
+; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32
+; CHECK-NEXT: [[BASE:%.*]] = getelementptr i64, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]]
+; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]]
+; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM2]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr addrspace(3) [[TMP0]], i64 256
+; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM3]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr addrspace(3) [[TMP2]], i64 512
+; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM4]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr addrspace(3) [[TMP4]], i64 768
+; CHECK-NEXT: ret void
+;
+entry:
+ %base = getelementptr i64, ptr addrspace(3) %in.ptr, i64 %in.idx0
+ %idx0 = getelementptr half, ptr addrspace(3) %base, i64 %in.idx1
+ %const1 = getelementptr i64, ptr addrspace(3) %base, i64 256
+ %idx1 = getelementptr half, ptr addrspace(3) %const1, i64 %in.idx1
+ %const2 = getelementptr i64, ptr addrspace(3) %base, i64 512
+ %idx2 = getelementptr half, ptr addrspace(3) %const2, i64 %in.idx1
+ %const3 = getelementptr i64, ptr addrspace(3) %base, i64 768
+ %idx3 = getelementptr half, ptr addrspace(3) %const3, i64 %in.idx1
+ ret void
+}
-bb.1:
- %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16
- %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16
- %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16
- %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16
- call void asm sideeffect "; use $0", "v"(<8 x half> %val0)
- call void asm sideeffect "; use $0", "v"(<8 x half> %val1)
- call void asm sideeffect "; use $0", "v"(<8 x half> %val2)
- call void asm sideeffect "; use $0", "v"(<8 x half> %val3)
- br label %end
+define void @reorder_halfi8(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) {
+; CHECK-LABEL: define void @reorder_halfi8(
+; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32
+; CHECK-NEXT: [[BASE:%.*]] = getelementptr half, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]]
+; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]]
+; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM2]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr half, ptr addrspace(3) [[TMP0]], i64 256
+; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM3]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr half, ptr addrspace(3) [[TMP2]], i64 512
+; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM4]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr half, ptr addrspace(3) [[TMP4]], i64 768
+; CHECK-NEXT: ret void
+;
+entry:
+ %base = getelementptr half, ptr addrspace(3) %in.ptr, i64 %in.idx0
+ %idx0 = getelementptr i8, ptr addrspace(3) %base, i64 %in.idx1
+ %const1 = getelementptr half, ptr addrspace(3) %base, i64 256
+ %idx1 = getelementptr i8, ptr addrspace(3) %const1, i64 %in.idx1
+ %const2 = getelementptr half, ptr addrspace(3) %base, i64 512
+ %idx2 = getelementptr i8, ptr addrspace(3) %const2, i64 %in.idx1
+ %const3 = getelementptr half, ptr addrspace(3) %base, i64 768
+ %idx3 = getelementptr i8, ptr addrspace(3) %const3, i64 %in.idx1
+ ret void
+}
-end:
- call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0)
- call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1)
- call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2)
- call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3)
+
+
+define void @bad_index(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) {
+; CHECK-LABEL: define void @bad_index(
+; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32
+; CHECK-NEXT: [[BASE:%.*]] = getelementptr half, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]]
+; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]]
+; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM2]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i64 1
+; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM3]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 2
+; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM4]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i64 3
+; CHECK-NEXT: ret void
+;
+entry:
+ %base = getelementptr half, ptr addrspace(3) %in.ptr, i64 %in.idx0
+ %idx0 = getelementptr half, ptr addrspace(3) %base, i64 %in.idx1
+ %const1 = getelementptr i8, ptr addrspace(3) %base, i64 1
+ %idx1 = getelementptr half, ptr addrspace(3) %const1, i64 %in.idx1
+ %const2 = getelementptr i8, ptr addrspace(3) %base, i64 2
+ %idx2 = getelementptr half, ptr addrspace(3) %const2, i64 %in.idx1
+ %const3 = getelementptr i8, ptr addrspace(3) %base, i64 3
+ %idx3 = getelementptr half, ptr addrspace(3) %const3, i64 %in.idx1
ret void
}
-define protected amdgpu_kernel void @illegal_addr_mode(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) {
-; CHECK-LABEL: illegal_addr_mode:
-; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: s_load_dwordx4 s[4:7], s[6:7], 0x0
-; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: s_lshl_b32 s0, s5, 1
-; CHECK-NEXT: s_lshl_b32 s1, s6, 1
-; CHECK-NEXT: s_add_i32 s3, s4, s0
-; CHECK-NEXT: s_add_i32 s3, s3, s1
-; CHECK-NEXT: s_add_i32 s2, s3, 0x12a60
-; CHECK-NEXT: s_add_i32 s1, s3, 0x12c60
-; CHECK-NEXT: s_add_i32 s0, s3, 0x12ed8
-; CHECK-NEXT: s_cmp_lg_u32 s5, 0
-; CHECK-NEXT: s_cbranch_scc1 .LBB1_2
-; CHECK-NEXT: ; %bb.1: ; %bb.1
-; CHECK-NEXT: v_mov_b32_e32 v0, s3
-; CHECK-NEXT: v_mov_b32_e32 v4, s2
-; CHECK-NEXT: v_mov_b32_e32 v8, s1
-; CHECK-NEXT: v_mov_b32_e32 v12, s0
-; CHECK-NEXT: ds_read_b128 v[0:3], v0
-; CHECK-NEXT: ds_read_b128 v[4:7], v4
-; CHECK-NEXT: ds_read_b128 v[8:11], v8
-; CHECK-NEXT: ds_read_b128 v[12:15], v12
-; CHECK-NEXT: s_waitcnt lgkmcnt(3)
-; CHECK-NEXT: ;;#ASMSTART
-; CHECK-NEXT: ; use v[0:3]
-; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: s_waitcnt lgkmcnt(2)
-; CHECK-NEXT: ;;#ASMSTART
-; CHECK-NEXT: ; use v[4:7]
-; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: s_waitcnt lgkmcnt(1)
-; CHECK-NEXT: ;;#ASMSTART
-; CHECK-NEXT: ; use v[8:11]
-; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: ;;#ASMSTART
-; CHECK-NEXT: ; use v[12:15]
-; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: .LBB1_2: ; %end
-; CHECK-NEXT: v_mov_b32_e32 v0, s3
-; CHECK-NEXT: ;;#ASMSTART
-; CHECK-NEXT: ; use v0
-; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_mov_b32_e32 v0, s2
-; CHECK-NEXT: ;;#ASMSTART
-; CHECK-NEXT: ; use v0
-; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_mov_b32_e32 v0, s1
-; CHECK-NEXT: ;;#ASMSTART
-; CHECK-NEXT: ; use v0
-; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_mov_b32_e32 v0, s0
-; CHECK-NEXT: ;;#ASMSTART
-; CHECK-NEXT: ; use v0
-; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: s_endpgm
+
+%struct.vec = type { [8 x i8], [4 x half] }
+define void @vector_struct_type(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) {
+; CHECK-LABEL: define void @vector_struct_type(
+; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32
+; CHECK-NEXT: [[BASE:%.*]] = getelementptr [1024 x %struct.vec], ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]]
+; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]]
+; CHECK-NEXT: [[CONST1:%.*]] = getelementptr [1024 x %struct.vec], ptr addrspace(3) [[BASE]], i64 256
+; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i8, ptr addrspace(3) [[CONST1]], i32 [[IDXPROM2]]
+; CHECK-NEXT: [[CONST2:%.*]] = getelementptr [1024 x %struct.vec], ptr addrspace(3) [[BASE]], i64 512
+; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i8, ptr addrspace(3) [[CONST2]], i32 [[IDXPROM3]]
+; CHECK-NEXT: [[CONST3:%.*]] = getelementptr [1024 x %struct.vec], ptr addrspace(3) [[BASE]], i64 768
+; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i8, ptr addrspace(3) [[CONST3]], i32 [[IDXPROM4]]
+; CHECK-NEXT: ret void
+;
entry:
- %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0
- %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1
- %const1 = getelementptr half, ptr addrspace(3) %base, i32 38192
- %idx1 = getelementptr half, ptr addrspace(3) %const1, i32 %in.idx1
- %const2 = getelementptr half, ptr addrspace(3) %base, i32 38448
- %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1
- %const3 = getelementptr half, ptr addrspace(3) %base, i32 38764
- %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1
- %cmp0 = icmp eq i32 %in.idx0, 0
- br i1 %cmp0, label %bb.1, label %end
+ %base = getelementptr [1024 x %struct.vec], ptr addrspace(3) %in.ptr, i64 %in.idx0
+ %idx0 = getelementptr i8, ptr addrspace(3) %base, i64 %in.idx1
+ %const1 = getelementptr [1024 x %struct.vec], ptr addrspace(3) %base, i64 256
+ %idx1 = getelementptr i8, ptr addrspace(3) %const1, i64 %in.idx1
+ %const2 = getelementptr [1024 x %struct.vec], ptr addrspace(3) %base, i64 512
+ %idx2 = getelementptr i8, ptr addrspace(3) %const2, i64 %in.idx1
+ %const3 = getelementptr [1024 x %struct.vec], ptr addrspace(3) %base, i64 768
+ %idx3 = getelementptr i8, ptr addrspace(3) %const3, i64 %in.idx1
+ ret void
+}
-bb.1:
- %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16
- %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16
- %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16
- %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16
- call void asm sideeffect "; use $0", "v"(<8 x half> %val0)
- call void asm sideeffect "; use $0", "v"(<8 x half> %val1)
- call void asm sideeffect "; use $0", "v"(<8 x half> %val2)
- call void asm sideeffect "; use $0", "v"(<8 x half> %val3)
- br label %end
+define void @struct_type(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) {
+; CHECK-LABEL: define void @struct_type(
+; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32
+; CHECK-NEXT: [[BASE:%.*]] = getelementptr [[STRUCT_VEC:%.*]], ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]]
+; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]]
+; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM2]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_VEC]], ptr addrspace(3) [[TMP0]], i64 256
+; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM3]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_VEC]], ptr addrspace(3) [[TMP2]], i64 512
+; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM4]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_VEC]], ptr addrspace(3) [[TMP4]], i64 768
+; CHECK-NEXT: ret void
+;
+entry:
+ %base = getelementptr %struct.vec, ptr addrspace(3) %in.ptr, i64 %in.idx0
+ %idx0 = getelementptr i8, ptr addrspace(3) %base, i64 %in.idx1
+ %const1 = getelementptr %struct.vec, ptr addrspace(3) %base, i64 256
+ %idx1 = getelementptr i8, ptr addrspace(3) %const1, i64 %in.idx1
+ %const2 = getelementptr %struct.vec, ptr addrspace(3) %base, i64 512
+ %idx2 = getelementptr i8, ptr addrspace(3) %const2, i64 %in.idx1
+ %const3 = getelementptr %struct.vec, ptr addrspace(3) %base, i64 768
+ %idx3 = getelementptr i8, ptr addrspace(3) %const3, i64 %in.idx1
+ ret void
+}
-end:
- call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0)
- call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1)
- call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2)
- call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3)
+define void @struct_type_multiindex(ptr addrspace(3) %in.ptr, i64 %in.idx0, i32 %in.idx1, i64 %in.idx2) {
+; CHECK-LABEL: define void @struct_type_multiindex(
+; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i32 [[IN_IDX1:%.*]], i64 [[IN_IDX2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_VEC:%.*]], ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]], i32 0, i32 0
+; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX2]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i32 [[IDXPROM2]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP1]], i32 2
+; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX0]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_VEC]], ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM3]], i32 0, i32 0
+; CHECK-NEXT: [[IDXPROM5:%.*]] = trunc i64 [[IN_IDX2]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP3]], i32 [[IDXPROM5]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i32 4
+; CHECK-NEXT: [[IDXPROM6:%.*]] = trunc i64 [[IN_IDX0]] to i32
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_VEC]], ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM6]], i32 0, i32 0
+; CHECK-NEXT: [[IDXPROM8:%.*]] = trunc i64 [[IN_IDX2]] to i32
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP6]], i32 [[IDXPROM8]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP7]], i32 6
+; CHECK-NEXT: ret void
+;
+entry:
+ %const1 = getelementptr %struct.vec, ptr addrspace(3) %in.ptr, i64 %in.idx0, i32 0, i32 2
+ %idx1 = getelementptr i8, ptr addrspace(3) %const1, i64 %in.idx2
+ %const2 = getelementptr %struct.vec, ptr addrspace(3) %in.ptr, i64 %in.idx0, i32 0, i32 4
+ %idx2 = getelementptr i8, ptr addrspace(3) %const2, i64 %in.idx2
+ %const3 = getelementptr %struct.vec, ptr addrspace(3) %in.ptr, i64 %in.idx0, i32 0, i32 6
+ %idx3 = getelementptr i8, ptr addrspace(3) %const3, i64 %in.idx2
ret void
}
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll
index a91c8172177f9..43dda1ae15176 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll
@@ -7,14 +7,14 @@ define protected amdgpu_kernel void @sink_addr(ptr %in.ptr, i64 %in.idx0, i64 %i
; CHECK-NEXT: entry:
; CHECK-NEXT: [[IDX0:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 [[IN_IDX1]]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 0
-; CHECK-NEXT: [[CONST11:%.*]] = getelementptr i8, ptr [[TMP0]], i64 2048
-; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i64, ptr [[CONST11]], i64 [[IN_IDX1]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[TMP0]], i64 [[IN_IDX1]]
+; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i8, ptr [[TMP3]], i64 2048
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 0
-; CHECK-NEXT: [[CONST22:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4096
-; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i64, ptr [[CONST22]], i64 [[IN_IDX1]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[TMP1]], i64 [[IN_IDX1]]
+; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i8, ptr [[TMP4]], i64 4096
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 0
-; CHECK-NEXT: [[CONST33:%.*]] = getelementptr i8, ptr [[TMP2]], i64 6144
-; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i64, ptr [[CONST33]], i64 [[IN_IDX1]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP2]], i64 [[IN_IDX1]]
+; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i8, ptr [[TMP7]], i64 6144
; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i64 [[IN_IDX0]], 0
; CHECK-NEXT: br i1 [[CMP0]], label [[BB_1:%.*]], label [[END:%.*]]
; CHECK: bb.1:
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll
index a15f11a634db5..2e3b6ca3653fc 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll
@@ -186,3 +186,66 @@ end:
call void asm sideeffect "; use $0", "v"(ptr %idx3)
ret void
}
+
+
+define void @
diff erent_type_reorder2(ptr %in.ptr, i64 %in.idx0, i64 %in.idx1) {
+; CHECK-LABEL: define void @
diff erent_type_reorder2(
+; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[BASE:%.*]] = getelementptr i8, ptr [[IN_PTR]], i64 [[IN_IDX0]]
+; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[IN_IDX1]]
+; CHECK-NEXT: [[CONST1:%.*]] = getelementptr i64, ptr [[BASE]], i64 256
+; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i8, ptr [[CONST1]], i64 [[IN_IDX1]]
+; CHECK-NEXT: [[CONST2:%.*]] = getelementptr i64, ptr [[BASE]], i64 512
+; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i8, ptr [[CONST2]], i64 [[IN_IDX1]]
+; CHECK-NEXT: [[CONST3:%.*]] = getelementptr i64, ptr [[BASE]], i64 768
+; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i8, ptr [[CONST3]], i64 [[IN_IDX1]]
+; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i64 [[IN_IDX0]], 0
+; CHECK-NEXT: br i1 [[CMP0]], label [[BB_1:%.*]], label [[END:%.*]]
+; CHECK: bb.1:
+; CHECK-NEXT: [[VAL0:%.*]] = load <8 x i64>, ptr [[IDX0]], align 16
+; CHECK-NEXT: [[VAL1:%.*]] = load <8 x i64>, ptr [[IDX1]], align 16
+; CHECK-NEXT: [[VAL2:%.*]] = load <8 x i64>, ptr [[IDX2]], align 16
+; CHECK-NEXT: [[VAL3:%.*]] = load <8 x i64>, ptr [[IDX3]], align 16
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: br label [[END]]
+; CHECK: end:
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: ret void
+;
+entry:
+ %base = getelementptr i8, ptr %in.ptr, i64 %in.idx0
+ %idx0 = getelementptr i8, ptr %base, i64 %in.idx1
+ %const1 = getelementptr i64, ptr %base, i64 256
+ %idx1 = getelementptr i8, ptr %const1, i64 %in.idx1
+ %const2 = getelementptr i64, ptr %base, i64 512
+ %idx2 = getelementptr i8, ptr %const2, i64 %in.idx1
+ %const3 = getelementptr i64, ptr %base, i64 768
+ %idx3 = getelementptr i8, ptr %const3, i64 %in.idx1
+ %cmp0 = icmp eq i64 %in.idx0, 0
+ br i1 %cmp0, label %bb.1, label %end
+
+bb.1:
+ %val0 = load <8 x i64>, ptr %idx0, align 16
+ %val1 = load <8 x i64>, ptr %idx1, align 16
+ %val2 = load <8 x i64>, ptr %idx2, align 16
+ %val3 = load <8 x i64>, ptr %idx3, align 16
+ call void asm sideeffect "; use $0", "v"(<8 x i64> %val0)
+ call void asm sideeffect "; use $0", "v"(<8 x i64> %val1)
+ call void asm sideeffect "; use $0", "v"(<8 x i64> %val2)
+ call void asm sideeffect "; use $0", "v"(<8 x i64> %val3)
+ br label %end
+
+end:
+ call void asm sideeffect "; use $0", "v"(ptr %idx0)
+ call void asm sideeffect "; use $0", "v"(ptr %idx1)
+ call void asm sideeffect "; use $0", "v"(ptr %idx2)
+ call void asm sideeffect "; use $0", "v"(ptr %idx3)
+ ret void
+}
More information about the llvm-commits
mailing list