[PATCH] D89577: [VectorCombine] Avoid crossing address space boundaries.

Fri Oct 16 11:28:13 PDT 2020

tra created this revision.
tra added reviewers: yaxunl, spatel.
Herald added subscribers: kerbowa, bixia, hiraditya, nhaehnle, jvesely.
Herald added a project: LLVM.
tra requested review of this revision.

We can not bitcast pointers across different address spaces, and VectorCombine
should be careful when it attempts to find the original source of the loaded
data.

Fixes an assertion trigger introduced by D81766 <https://reviews.llvm.org/D81766>


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D89577

Files:
  llvm/lib/Transforms/Vectorize/VectorCombine.cpp
  llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll
  llvm/test/Transforms/VectorCombine/AMDGPU/lit.local.cfg


Index: llvm/test/Transforms/VectorCombine/AMDGPU/lit.local.cfg
===================================================================

--- /dev/null
+++ llvm/test/Transforms/VectorCombine/AMDGPU/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True
Index: llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -vector-combine -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s --check-prefixes=CHECK
+
+; ModuleID = 'load-as-transition.ll'
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7"
+target triple = "amdgcn-amd-amdhsa"
+
+%struct.hoge = type { float }
+
+define protected amdgpu_kernel void @load_from_other_as(<4 x float>* nocapture nonnull %resultptr) local_unnamed_addr #0 {
+; CHECK-LABEL: @load_from_other_as(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_HOGE:%.*]], align 4, addrspace(5)
+; CHECK-NEXT:    [[B:%.*]] = addrspacecast [[STRUCT_HOGE]] addrspace(5)* [[A]] to %struct.hoge*
+; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_HOGE]], %struct.hoge* [[B]], i64 0, i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[C]] to <1 x float>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x float>, <1 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    store <4 x float> [[E]], <4 x float>* [[RESULTPTR:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+bb:
+  %a = alloca %struct.hoge, align 4, addrspace(5)
+  %b = addrspacecast %struct.hoge addrspace(5)* %a to %struct.hoge*
+  %c = getelementptr inbounds %struct.hoge, %struct.hoge* %b, i64 0, i32 0
+  %d = load float, float* %c, align 4
+  %e = insertelement <4 x float> undef, float %d, i32 0
+  store <4 x float> %e, <4 x float>* %resultptr, align 16
+  ret void
+}
+
+attributes #0 = { "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 12.0.0"}
Index: llvm/lib/Transforms/Vectorize/VectorCombine.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -111,6 +111,13 @@
   // TODO: Extend this to match GEP with constant offsets.
   Value *PtrOp = Load->getPointerOperand()->stripPointerCasts();
   assert(isa<PointerType>(PtrOp->getType()) && "Expected a pointer type");
+  unsigned AS = Load->getPointerAddressSpace();
+
+  // If original AS != Load's AS, we can't bitcast the original pointer and have
+  // to use Load's operand instead. Ideally we would want to strip pointer casts
+  // without changing AS, but there's no API to do that ATM.
+  if (AS != PtrOp->getType()->getPointerAddressSpace())
+    PtrOp = Load->getPointerOperand();
 
   Type *ScalarTy = Scalar->getType();
   uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits();
@@ -126,7 +133,6 @@
   if (!isSafeToLoadUnconditionally(PtrOp, MinVecTy, Alignment, DL, Load, &DT))
     return false;
 
-  unsigned AS = Load->getPointerAddressSpace();
 
   // Original pattern: insertelt undef, load [free casts of] ScalarPtr, 0
   int OldCost = TTI.getMemoryOpCost(Instruction::Load, ScalarTy, Alignment, AS);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D89577.298691.patch
Type: text/x-patch
Size: 3603 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20201016/199f6751/attachment.bin>