[llvm] c36c0fa - [VectorCombine] Avoid crossing address space boundaries.
Artem Belevich via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 16 13:20:52 PDT 2020
Author: Artem Belevich
Date: 2020-10-16T13:19:31-07:00
New Revision: c36c0fabd17a57513fa00ebd5579f307f889cb64
URL: https://github.com/llvm/llvm-project/commit/c36c0fabd17a57513fa00ebd5579f307f889cb64
DIFF: https://github.com/llvm/llvm-project/commit/c36c0fabd17a57513fa00ebd5579f307f889cb64.diff
LOG: [VectorCombine] Avoid crossing address space boundaries.
We can not bitcast pointers across different address spaces, and VectorCombine
should be careful when it attempts to find the original source of the loaded
data.
Differential Revision: https://reviews.llvm.org/D89577
Added:
llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll
llvm/test/Transforms/VectorCombine/AMDGPU/lit.local.cfg
Modified:
llvm/lib/Transforms/Vectorize/VectorCombine.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 816d44fb2cb1..5f3d5c768a9e 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -111,6 +111,13 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
// TODO: Extend this to match GEP with constant offsets.
Value *PtrOp = Load->getPointerOperand()->stripPointerCasts();
assert(isa<PointerType>(PtrOp->getType()) && "Expected a pointer type");
+ unsigned AS = Load->getPointerAddressSpace();
+
+ // If original AS != Load's AS, we can't bitcast the original pointer and have
+ // to use Load's operand instead. Ideally we would want to strip pointer casts
+ // without changing AS, but there's no API to do that ATM.
+ if (AS != PtrOp->getType()->getPointerAddressSpace())
+ PtrOp = Load->getPointerOperand();
Type *ScalarTy = Scalar->getType();
uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits();
@@ -126,7 +133,6 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
if (!isSafeToLoadUnconditionally(PtrOp, MinVecTy, Alignment, DL, Load, &DT))
return false;
- unsigned AS = Load->getPointerAddressSpace();
// Original pattern: insertelt undef, load [free casts of] ScalarPtr, 0
int OldCost = TTI.getMemoryOpCost(Instruction::Load, ScalarTy, Alignment, AS);
diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll
new file mode 100644
index 000000000000..5ff7fe847047
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -vector-combine -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s --check-prefixes=CHECK
+
+; ModuleID = 'load-as-transition.ll'
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7"
+target triple = "amdgcn-amd-amdhsa"
+
+%struct.hoge = type { float }
+
+define protected amdgpu_kernel void @load_from_other_as(<4 x float>* nocapture nonnull %resultptr) local_unnamed_addr #0 {
+; CHECK-LABEL: @load_from_other_as(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_HOGE:%.*]], align 4, addrspace(5)
+; CHECK-NEXT: [[B:%.*]] = addrspacecast [[STRUCT_HOGE]] addrspace(5)* [[A]] to %struct.hoge*
+; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_HOGE]], %struct.hoge* [[B]], i64 0, i32 0
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[C]] to <1 x float>*
+; CHECK-NEXT: [[TMP1:%.*]] = load <1 x float>, <1 x float>* [[TMP0]], align 4
+; CHECK-NEXT: [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: store <4 x float> [[E]], <4 x float>* [[RESULTPTR:%.*]], align 16
+; CHECK-NEXT: ret void
+;
+bb:
+ %a = alloca %struct.hoge, align 4, addrspace(5)
+ %b = addrspacecast %struct.hoge addrspace(5)* %a to %struct.hoge*
+ %c = getelementptr inbounds %struct.hoge, %struct.hoge* %b, i64 0, i32 0
+ %d = load float, float* %c, align 4
+ %e = insertelement <4 x float> undef, float %d, i32 0
+ store <4 x float> %e, <4 x float>* %resultptr, align 16
+ ret void
+}
+
+attributes #0 = { "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 12.0.0"}
diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/lit.local.cfg b/llvm/test/Transforms/VectorCombine/AMDGPU/lit.local.cfg
new file mode 100644
index 000000000000..2a665f06be72
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+ config.unsupported = True
More information about the llvm-commits
mailing list