[polly] 8e570ab - Polly - specify address space when creating a pointer to a vector type
Michael Kruse via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 14 09:17:25 PDT 2020
Author: Mark Schimmel
Date: 2020-10-14T11:17:15-05:00
New Revision: 8e570abf104389d301419dc44cb58c7362dd5960
URL: https://github.com/llvm/llvm-project/commit/8e570abf104389d301419dc44cb58c7362dd5960
DIFF: https://github.com/llvm/llvm-project/commit/8e570abf104389d301419dc44cb58c7362dd5960.diff
LOG: Polly - specify address space when creating a pointer to a vector type
Polly incorrectly dropped the address space specified for a load instruction when it vectorized the code.
Reviewed By: Meinersbur
Differential Revision: https://reviews.llvm.org/D88907
Added:
polly/test/ScheduleOptimizer/vec-addr-space.ll
Modified:
polly/include/polly/CodeGen/BlockGenerators.h
polly/lib/CodeGen/BlockGenerators.cpp
Removed:
################################################################################
diff --git a/polly/include/polly/CodeGen/BlockGenerators.h b/polly/include/polly/CodeGen/BlockGenerators.h
index f2c52c8fedcf..dbdbbae40e56 100644
--- a/polly/include/polly/CodeGen/BlockGenerators.h
+++ b/polly/include/polly/CodeGen/BlockGenerators.h
@@ -662,7 +662,7 @@ class VectorBlockGenerator : BlockGenerator {
Value *getVectorValue(ScopStmt &Stmt, Value *Old, ValueMapT &VectorMap,
VectorValueMapT &ScalarMaps, Loop *L);
- Type *getVectorPtrTy(const Value *V, int Width);
+ Type *getVectorPtrTy(const Value *V, int Width, unsigned AddrSpace);
/// Load a vector from a set of adjacent scalars
///
diff --git a/polly/lib/CodeGen/BlockGenerators.cpp b/polly/lib/CodeGen/BlockGenerators.cpp
index bf6c96b8091d..b50a66d70e16 100644
--- a/polly/lib/CodeGen/BlockGenerators.cpp
+++ b/polly/lib/CodeGen/BlockGenerators.cpp
@@ -1037,14 +1037,15 @@ Value *VectorBlockGenerator::getVectorValue(ScopStmt &Stmt, Value *Old,
return Vector;
}
-Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
+Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width,
+ unsigned AddrSpace) {
PointerType *PointerTy = dyn_cast<PointerType>(Val->getType());
assert(PointerTy && "PointerType expected");
Type *ScalarType = PointerTy->getElementType();
auto *FVTy = FixedVectorType::get(ScalarType, Width);
- return PointerType::getUnqual(FVTy);
+ return PointerType::get(FVTy, AddrSpace);
}
Value *VectorBlockGenerator::generateStrideOneLoad(
@@ -1052,7 +1053,8 @@ Value *VectorBlockGenerator::generateStrideOneLoad(
__isl_keep isl_id_to_ast_expr *NewAccesses, bool NegativeStride = false) {
unsigned VectorWidth = getVectorWidth();
auto *Pointer = Load->getPointerOperand();
- Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
+ auto AS = Pointer->getType()->getPointerAddressSpace();
+ Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth, AS);
unsigned Offset = NegativeStride ? VectorWidth - 1 : 0;
Value *NewPointer = generateLocationAccessed(Stmt, Load, ScalarMaps[Offset],
@@ -1081,7 +1083,8 @@ Value *VectorBlockGenerator::generateStrideZeroLoad(
ScopStmt &Stmt, LoadInst *Load, ValueMapT &BBMap,
__isl_keep isl_id_to_ast_expr *NewAccesses) {
auto *Pointer = Load->getPointerOperand();
- Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
+ auto AS = Pointer->getType()->getPointerAddressSpace();
+ Type *VectorPtrType = getVectorPtrTy(Pointer, 1, AS);
Value *NewPointer =
generateLocationAccessed(Stmt, Load, BBMap, VLTS[0], NewAccesses);
Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
@@ -1201,7 +1204,8 @@ void VectorBlockGenerator::copyStore(
extractScalarValues(Store, VectorMap, ScalarMaps);
if (Access.isStrideOne(isl::manage_copy(Schedule))) {
- Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
+ auto AS = Pointer->getType()->getPointerAddressSpace();
+ Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth(), AS);
Value *NewPointer = generateLocationAccessed(Stmt, Store, ScalarMaps[0],
VLTS[0], NewAccesses);
@@ -1339,7 +1343,8 @@ void VectorBlockGenerator::generateScalarVectorLoads(
continue;
auto *Address = getOrCreateAlloca(*MA);
- Type *VectorPtrType = getVectorPtrTy(Address, 1);
+ auto AS = Address->getType()->getPointerAddressSpace();
+ Type *VectorPtrType = getVectorPtrTy(Address, 1, AS);
Value *VectorPtr = Builder.CreateBitCast(Address, VectorPtrType,
Address->getName() + "_p_vec_p");
auto *Val = Builder.CreateLoad(VectorPtr, Address->getName() + ".reload");
diff --git a/polly/test/ScheduleOptimizer/vec-addr-space.ll b/polly/test/ScheduleOptimizer/vec-addr-space.ll
new file mode 100644
index 000000000000..7c66d7c58ce2
--- /dev/null
+++ b/polly/test/ScheduleOptimizer/vec-addr-space.ll
@@ -0,0 +1,85 @@
+; RUN: opt %loadPolly -polly-vectorizer=polly -polly-opt-isl -polly-codegen -S < %s | FileCheck %s
+;
+; Polly crashed during codegen with an assertion error while trying to generate
+; a pointer bitcast from a pointer having an address space to one without
+;
+; CHECK-LABEL: entry:
+; CHECK: load <4 x float>, <4 x float> addrspace(4)*
+;
+; ModuleID = '/tmp/lud.bc'
+source_filename = "lud.c"
+; This datalayout was for a 32-bit ARC processor with 512-bit vector extension
+target datalayout = "e-m:e-p:32:32-p1:32:32-p3:32:32-p5:32:32-i64:32-f64:32-v64:32-v128:32-a:0:32-v256:32-v512:32-n8:16:32"
+; Specify x86 because the ARC backend is still experimental and not built by default
+target triple = "x86_64-unknown-unknown"
+
+; Function Attrs: noinline nounwind
+define void @LU_decomp_kij_opt(i32 %n, i32 %lda, float addrspace(4)* %A, float addrspace(4)* %scratch) #0 {
+entry:
+ %cmp34 = icmp sgt i32 %n, 0
+ br i1 %cmp34, label %for.body.lr.ph, label %for.end34
+
+for.body.lr.ph: ; preds = %entry
+ %0 = add nsw i32 %n, -1
+ br label %for.body
+
+for.body: ; preds = %for.inc32, %for.body.lr.ph
+ %k.035 = phi i32 [ 0, %for.body.lr.ph ], [ %add2, %for.inc32 ]
+ %mul = mul nsw i32 %k.035, %lda
+ %add = add nsw i32 %mul, %k.035
+ %arrayidx = getelementptr inbounds float, float addrspace(4)* %A, i32 %add
+ %1 = load float, float addrspace(4)* %arrayidx, align 4
+ %conv1 = fdiv arcp float 1.000000e+00, %1
+ %add2 = add nuw nsw i32 %k.035, 1
+ %exitcond37 = icmp eq i32 %k.035, %0
+ br i1 %exitcond37, label %for.end34, label %for.body6.lr.ph
+
+for.body6.lr.ph: ; preds = %for.body
+ br label %for.body6
+
+for.body6: ; preds = %for.inc29, %for.body6.lr.ph
+ %i.033 = phi i32 [ %add2, %for.body6.lr.ph ], [ %inc30, %for.inc29 ]
+ %mul7 = mul nsw i32 %i.033, %lda
+ %add8 = add nsw i32 %mul7, %k.035
+ %arrayidx9 = getelementptr inbounds float, float addrspace(4)* %A, i32 %add8
+ %2 = load float, float addrspace(4)* %arrayidx9, align 4
+ %mul10 = fmul arcp contract float %conv1, %2
+ store float %mul10, float addrspace(4)* %arrayidx9, align 4
+ br label %for.body18
+
+for.body18: ; preds = %for.body18, %for.body6
+ %j.031 = phi i32 [ %add2, %for.body6 ], [ %inc, %for.body18 ]
+ %3 = load float, float addrspace(4)* %arrayidx9, align 4
+ %add23 = add nsw i32 %j.031, %mul
+ %arrayidx24 = getelementptr inbounds float, float addrspace(4)* %A, i32 %add23
+ %4 = load float, float addrspace(4)* %arrayidx24, align 4
+ %mul25 = fmul arcp contract float %3, %4
+ %add27 = add nsw i32 %j.031, %mul7
+ %arrayidx28 = getelementptr inbounds float, float addrspace(4)* %A, i32 %add27
+ %5 = load float, float addrspace(4)* %arrayidx28, align 4
+ %sub = fsub arcp contract float %5, %mul25
+ store float %sub, float addrspace(4)* %arrayidx28, align 4
+ %inc = add nuw nsw i32 %j.031, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %for.inc29, label %for.body18
+
+for.inc29: ; preds = %for.body18
+ %inc30 = add nuw nsw i32 %i.033, 1
+ %exitcond36 = icmp eq i32 %inc30, %n
+ br i1 %exitcond36, label %for.inc32, label %for.body6
+
+for.inc32: ; preds = %for.inc29
+ br label %for.body
+
+for.end34: ; preds = %for.body, %entry
+ ret void
+}
+
+attributes #0 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"ArcIntrinsicCheck", i32 18224056}
+!1 = !{i32 1, !"wchar_size", i32 2}
+!2 = !{!"clang version 10.0.1 "}
More information about the llvm-commits
mailing list