[clang] dccfaac - [InferAddressSpaces] Handle the pair of `ptrtoint`/`inttoptr`.
Michael Liao via cfe-commits
cfe-commits at lists.llvm.org
Thu Jun 25 17:47:31 PDT 2020
Author: Michael Liao
Date: 2020-06-25T20:46:56-04:00
New Revision: dccfaacf93e1c4801cbcc4686f64eb8a35564ff7
URL: https://github.com/llvm/llvm-project/commit/dccfaacf93e1c4801cbcc4686f64eb8a35564ff7
DIFF: https://github.com/llvm/llvm-project/commit/dccfaacf93e1c4801cbcc4686f64eb8a35564ff7.diff
LOG: [InferAddressSpaces] Handle the pair of `ptrtoint`/`inttoptr`.
Summary:
- `ptrtoint` and `inttoptr` are defined as no-op casts if the integer
value as the same size as the pointer value. The pair of
`ptrtoint`/`inttoptr` is in fact a no-op cast sequence between
different address spaces. Teach `infer-address-spaces` to handle them
like a `bitcast`.
Reviewers: arsenm, chandlerc
Subscribers: jvesely, wdng, nhaehnle, hiraditya, kerbowa, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D81938
Added:
llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll
Modified:
clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
Removed:
################################################################################
diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
index 73ab9edf318e..8c102d339863 100644
--- a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
@@ -1,37 +1,52 @@
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=COMMON,CHECK %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=COMMON,OPT
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -x hip %s -o - | FileCheck -check-prefix=HOST %s
#include "Inputs/cuda.h"
// Coerced struct from `struct S` without all generic pointers lowered into
// global ones.
-// CHECK: %struct.S.coerce = type { i32 addrspace(1)*, float addrspace(1)* }
-// CHECK: %struct.T.coerce = type { [2 x float addrspace(1)*] }
+// COMMON: %struct.S.coerce = type { i32 addrspace(1)*, float addrspace(1)* }
+// COMMON: %struct.T.coerce = type { [2 x float addrspace(1)*] }
// On the host-side compilation, generic pointer won't be coerced.
// HOST-NOT: %struct.S.coerce
// HOST-NOT: %struct.T.coerce
-// CHECK: define amdgpu_kernel void @_Z7kernel1Pi(i32 addrspace(1)* %x.coerce)
// HOST: define void @_Z22__device_stub__kernel1Pi(i32* %x)
+// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel1Pi(i32 addrspace(1)*{{.*}} %x.coerce)
+// CHECK: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
+// CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
+// OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4
+// OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1
+// OPT: store i32 [[INC]], i32 addrspace(1)* %x.coerce, align 4
+// OPT: ret void
__global__ void kernel1(int *x) {
x[0]++;
}
-// CHECK: define amdgpu_kernel void @_Z7kernel2Ri(i32 addrspace(1)* nonnull align 4 dereferenceable(4) %x.coerce)
// HOST: define void @_Z22__device_stub__kernel2Ri(i32* nonnull align 4 dereferenceable(4) %x)
+// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel2Ri(i32 addrspace(1)*{{.*}} nonnull align 4 dereferenceable(4) %x.coerce)
+// CHECK: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
+// CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
+// OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4
+// OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1
+// OPT: store i32 [[INC]], i32 addrspace(1)* %x.coerce, align 4
+// OPT: ret void
__global__ void kernel2(int &x) {
x++;
}
-// CHECK: define amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y)
// HOST: define void @_Z22__device_stub__kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y)
+// CHECK-LABEL: define amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)*{{.*}} %x, i32 addrspace(1)*{{.*}} %y)
+// CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
__global__ void kernel3(__attribute__((address_space(2))) int *x,
__attribute__((address_space(1))) int *y) {
y[0] = x[0];
}
-// CHECK: define void @_Z4funcPi(i32* %x)
+// COMMON-LABEL: define void @_Z4funcPi(i32*{{.*}} %x)
+// CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
__device__ void func(int *x) {
x[0]++;
}
@@ -42,16 +57,25 @@ struct S {
};
// `by-val` struct will be coerced into a similar struct with all generic
// pointers lowerd into global ones.
-// CHECK: define amdgpu_kernel void @_Z7kernel41S(%struct.S.coerce %s.coerce)
// HOST: define void @_Z22__device_stub__kernel41S(i32* %s.coerce0, float* %s.coerce1)
+// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel41S(%struct.S.coerce %s.coerce)
+// OPT: [[P0:%.*]] = extractvalue %struct.S.coerce %s.coerce, 0
+// OPT: [[P1:%.*]] = extractvalue %struct.S.coerce %s.coerce, 1
+// OPT: [[V0:%.*]] = load i32, i32 addrspace(1)* [[P0]], align 4
+// OPT: [[INC:%.*]] = add nsw i32 [[V0]], 1
+// OPT: store i32 [[INC]], i32 addrspace(1)* [[P0]], align 4
+// OPT: [[V1:%.*]] = load float, float addrspace(1)* [[P1]], align 4
+// OPT: [[ADD:%.*]] = fadd contract float [[V1]], 1.000000e+00
+// OPT: store float [[ADD]], float addrspace(1)* [[P1]], align 4
+// OPT: ret void
__global__ void kernel4(struct S s) {
s.x[0]++;
s.y[0] += 1.f;
}
// If a pointer to struct is passed, only the pointer itself is coerced into the global one.
-// CHECK: define amdgpu_kernel void @_Z7kernel5P1S(%struct.S addrspace(1)* %s.coerce)
// HOST: define void @_Z22__device_stub__kernel5P1S(%struct.S* %s)
+// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel5P1S(%struct.S addrspace(1)*{{.*}} %s.coerce)
__global__ void kernel5(struct S *s) {
s->x[0]++;
s->y[0] += 1.f;
@@ -61,16 +85,26 @@ struct T {
float *x[2];
};
// `by-val` array is also coerced.
-// CHECK: define amdgpu_kernel void @_Z7kernel61T(%struct.T.coerce %t.coerce)
// HOST: define void @_Z22__device_stub__kernel61T(float* %t.coerce0, float* %t.coerce1)
+// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel61T(%struct.T.coerce %t.coerce)
+// OPT: [[ARR:%.*]] = extractvalue %struct.T.coerce %t.coerce, 0
+// OPT: [[P0:%.*]] = extractvalue [2 x float addrspace(1)*] [[ARR]], 0
+// OPT: [[P1:%.*]] = extractvalue [2 x float addrspace(1)*] [[ARR]], 1
+// OPT: [[V0:%.*]] = load float, float addrspace(1)* [[P0]], align 4
+// OPT: [[ADD0:%.*]] = fadd contract float [[V0]], 1.000000e+00
+// OPT: store float [[ADD0]], float addrspace(1)* [[P0]], align 4
+// OPT: [[V1:%.*]] = load float, float addrspace(1)* [[P1]], align 4
+// OPT: [[ADD1:%.*]] = fadd contract float [[V1]], 2.000000e+00
+// OPT: store float [[ADD1]], float addrspace(1)* [[P1]], align 4
+// OPT: ret void
__global__ void kernel6(struct T t) {
t.x[0][0] += 1.f;
t.x[1][0] += 2.f;
}
// Check that coerced pointers retain the noalias attribute when qualified with __restrict.
-// CHECK: define amdgpu_kernel void @_Z7kernel7Pi(i32 addrspace(1)* noalias %x.coerce)
// HOST: define void @_Z22__device_stub__kernel7Pi(i32* noalias %x)
+// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel7Pi(i32 addrspace(1)* noalias{{.*}} %x.coerce)
__global__ void kernel7(int *__restrict x) {
x[0]++;
}
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index d407d0439cd8..db9cc58bbfc4 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -96,7 +96,6 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -116,11 +115,13 @@
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <cassert>
#include <iterator>
@@ -132,6 +133,11 @@
using namespace llvm;
+static cl::opt<bool> AssumeDefaultIsFlatAddressSpace(
+ "assume-default-is-flat-addrspace", cl::init(false), cl::ReallyHidden,
+ cl::desc("The default address space is assumed as the flat address space. "
+ "This is mainly for test purpose."));
+
static const unsigned UninitializedAddressSpace =
std::numeric_limits<unsigned>::max();
@@ -143,6 +149,7 @@ using PostorderStackTy = llvm::SmallVector<PointerIntPair<Value *, 1, bool>, 4>;
/// InferAddressSpaces
class InferAddressSpaces : public FunctionPass {
const TargetTransformInfo *TTI = nullptr;
+ const DataLayout *DL = nullptr;
/// Target specific address space which uses of should be replaced if
/// possible.
@@ -219,10 +226,45 @@ void initializeInferAddressSpacesPass(PassRegistry &);
INITIALIZE_PASS(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
false, false)
+// Check whether that's no-op pointer bicast using a pair of
+// `ptrtoint`/`inttoptr` due to the missing no-op pointer bitcast over
+//
diff erent address spaces.
+static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL,
+ const TargetTransformInfo *TTI) {
+ assert(I2P->getOpcode() == Instruction::IntToPtr);
+ auto *P2I = dyn_cast<Operator>(I2P->getOperand(0));
+ if (!P2I || P2I->getOpcode() != Instruction::PtrToInt)
+ return false;
+ // Check it's really safe to treat that pair of `ptrtoint`/`inttoptr` as a
+ // no-op cast. Besides checking both of them are no-op casts, as the
+ // reinterpreted pointer may be used in other pointer arithmetic, we also
+ // need to double-check that through the target-specific hook. That ensures
+ // the underlying target also agrees that's a no-op address space cast and
+ // pointer bits are preserved.
+ // The current IR spec doesn't have clear rules on address space casts,
+ // especially a clear definition for pointer bits in non-default address
+ // spaces. It would be undefined if that pointer is dereferenced after an
+ // invalid reinterpret cast. Also, due to the unclearness for the meaning of
+ // bits in non-default address spaces in the current spec, the pointer
+ // arithmetic may also be undefined after invalid pointer reinterpret cast.
+ // However, as we confirm through the target hooks that it's a no-op
+ // addrspacecast, it doesn't matter since the bits should be the same.
+ return CastInst::isNoopCast(Instruction::CastOps(I2P->getOpcode()),
+ I2P->getOperand(0)->getType(), I2P->getType(),
+ DL) &&
+ CastInst::isNoopCast(Instruction::CastOps(P2I->getOpcode()),
+ P2I->getOperand(0)->getType(), P2I->getType(),
+ DL) &&
+ TTI->isNoopAddrSpaceCast(
+ P2I->getOperand(0)->getType()->getPointerAddressSpace(),
+ I2P->getType()->getPointerAddressSpace());
+}
+
// Returns true if V is an address expression.
// TODO: Currently, we consider only phi, bitcast, addrspacecast, and
// getelementptr operators.
-static bool isAddressExpression(const Value &V) {
+static bool isAddressExpression(const Value &V, const DataLayout &DL,
+ const TargetTransformInfo *TTI) {
const Operator *Op = dyn_cast<Operator>(&V);
if (!Op)
return false;
@@ -241,6 +283,8 @@ static bool isAddressExpression(const Value &V) {
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&V);
return II && II->getIntrinsicID() == Intrinsic::ptrmask;
}
+ case Instruction::IntToPtr:
+ return isNoopPtrIntCastPair(Op, DL, TTI);
default:
return false;
}
@@ -249,7 +293,9 @@ static bool isAddressExpression(const Value &V) {
// Returns the pointer operands of V.
//
// Precondition: V is an address expression.
-static SmallVector<Value *, 2> getPointerOperands(const Value &V) {
+static SmallVector<Value *, 2>
+getPointerOperands(const Value &V, const DataLayout &DL,
+ const TargetTransformInfo *TTI) {
const Operator &Op = cast<Operator>(V);
switch (Op.getOpcode()) {
case Instruction::PHI: {
@@ -269,6 +315,11 @@ static SmallVector<Value *, 2> getPointerOperands(const Value &V) {
"unexpected intrinsic call");
return {II.getArgOperand(0)};
}
+ case Instruction::IntToPtr: {
+ assert(isNoopPtrIntCastPair(&Op, DL, TTI));
+ auto *P2I = cast<Operator>(Op.getOperand(0));
+ return {P2I->getOperand(0)};
+ }
default:
llvm_unreachable("Unexpected instruction type.");
}
@@ -337,13 +388,13 @@ void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack(
// expressions.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
// TODO: Look in non-address parts, like icmp operands.
- if (isAddressExpression(*CE) && Visited.insert(CE).second)
+ if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second)
PostorderStack.emplace_back(CE, false);
return;
}
- if (isAddressExpression(*V) &&
+ if (isAddressExpression(*V, *DL, TTI) &&
V->getType()->getPointerAddressSpace() == FlatAddrSpace) {
if (Visited.insert(V).second) {
PostorderStack.emplace_back(V, false);
@@ -351,7 +402,7 @@ void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack(
Operator *Op = cast<Operator>(V);
for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) {
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) {
- if (isAddressExpression(*CE) && Visited.insert(CE).second)
+ if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second)
PostorderStack.emplace_back(CE, false);
}
}
@@ -407,6 +458,10 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
} else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
if (!ASC->getType()->isVectorTy())
PushPtrOperand(ASC->getPointerOperand());
+ } else if (auto *I2P = dyn_cast<IntToPtrInst>(&I)) {
+ if (isNoopPtrIntCastPair(cast<Operator>(I2P), *DL, TTI))
+ PushPtrOperand(
+ cast<PtrToIntInst>(I2P->getOperand(0))->getPointerOperand());
}
}
@@ -423,7 +478,7 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
}
// Otherwise, adds its operands to the stack and explores them.
PostorderStack.back().setInt(true);
- for (Value *PtrOperand : getPointerOperands(*TopVal)) {
+ for (Value *PtrOperand : getPointerOperands(*TopVal, *DL, TTI)) {
appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,
Visited);
}
@@ -536,6 +591,14 @@ Value *InferAddressSpaces::cloneInstructionWithNewAddressSpace(
assert(I->getType()->isPointerTy());
return SelectInst::Create(I->getOperand(0), NewPointerOperands[1],
NewPointerOperands[2], "", nullptr, I);
+ case Instruction::IntToPtr: {
+ assert(isNoopPtrIntCastPair(cast<Operator>(I), *DL, TTI));
+ Value *Src = cast<Operator>(I->getOperand(0))->getOperand(0);
+ assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);
+ if (Src->getType() != NewPtrType)
+ return new BitCastInst(Src, NewPtrType);
+ return Src;
+ }
default:
llvm_unreachable("Unexpected opcode");
}
@@ -545,8 +608,9 @@ Value *InferAddressSpaces::cloneInstructionWithNewAddressSpace(
// constant expression `CE` with its operands replaced as specified in
// ValueWithNewAddrSpace.
static Value *cloneConstantExprWithNewAddressSpace(
- ConstantExpr *CE, unsigned NewAddrSpace,
- const ValueToValueMapTy &ValueWithNewAddrSpace) {
+ ConstantExpr *CE, unsigned NewAddrSpace,
+ const ValueToValueMapTy &ValueWithNewAddrSpace, const DataLayout *DL,
+ const TargetTransformInfo *TTI) {
Type *TargetType =
CE->getType()->getPointerElementType()->getPointerTo(NewAddrSpace);
@@ -577,6 +641,13 @@ static Value *cloneConstantExprWithNewAddressSpace(
}
}
+ if (CE->getOpcode() == Instruction::IntToPtr) {
+ assert(isNoopPtrIntCastPair(cast<Operator>(CE), *DL, TTI));
+ Constant *Src = cast<ConstantExpr>(CE->getOperand(0))->getOperand(0);
+ assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);
+ return ConstantExpr::getBitCast(Src, TargetType);
+ }
+
// Computes the operands of the new constant expression.
bool IsNew = false;
SmallVector<Constant *, 4> NewOperands;
@@ -594,7 +665,7 @@ static Value *cloneConstantExprWithNewAddressSpace(
}
if (auto CExpr = dyn_cast<ConstantExpr>(Operand))
if (Value *NewOperand = cloneConstantExprWithNewAddressSpace(
- CExpr, NewAddrSpace, ValueWithNewAddrSpace)) {
+ CExpr, NewAddrSpace, ValueWithNewAddrSpace, DL, TTI)) {
IsNew = true;
NewOperands.push_back(cast<Constant>(NewOperand));
continue;
@@ -629,7 +700,7 @@ Value *InferAddressSpaces::cloneValueWithNewAddressSpace(
const ValueToValueMapTy &ValueWithNewAddrSpace,
SmallVectorImpl<const Use *> *UndefUsesToFix) const {
// All values in Postorder are flat address expressions.
- assert(isAddressExpression(*V) &&
+ assert(isAddressExpression(*V, *DL, TTI) &&
V->getType()->getPointerAddressSpace() == FlatAddrSpace);
if (Instruction *I = dyn_cast<Instruction>(V)) {
@@ -645,7 +716,7 @@ Value *InferAddressSpaces::cloneValueWithNewAddressSpace(
}
return cloneConstantExprWithNewAddressSpace(
- cast<ConstantExpr>(V), NewAddrSpace, ValueWithNewAddrSpace);
+ cast<ConstantExpr>(V), NewAddrSpace, ValueWithNewAddrSpace, DL, TTI);
}
// Defines the join operation on the address space lattice (see the file header
@@ -669,6 +740,10 @@ bool InferAddressSpaces::runOnFunction(Function &F) {
return false;
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ DL = &F.getParent()->getDataLayout();
+
+ if (AssumeDefaultIsFlatAddressSpace)
+ FlatAddrSpace = 0;
if (FlatAddrSpace == UninitializedAddressSpace) {
FlatAddrSpace = TTI->getFlatAddressSpace();
@@ -773,7 +848,7 @@ Optional<unsigned> InferAddressSpaces::updateAddressSpace(
else
NewAS = joinAddressSpaces(Src0AS, Src1AS);
} else {
- for (Value *PtrOperand : getPointerOperands(V)) {
+ for (Value *PtrOperand : getPointerOperands(V, *DL, TTI)) {
auto I = InferredAddrSpace.find(PtrOperand);
unsigned OperandAS = I != InferredAddrSpace.end() ?
I->second : PtrOperand->getType()->getPointerAddressSpace();
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll
new file mode 100644
index 000000000000..24cab4f7bf6e
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll
@@ -0,0 +1,101 @@
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -o - -infer-address-spaces %s | FileCheck -check-prefixes=COMMON,AMDGCN %s
+; RUN: opt -S -o - -infer-address-spaces -assume-default-is-flat-addrspace %s | FileCheck -check-prefixes=COMMON,NOTTI %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7"
+
+; COMMON-LABEL: @noop_ptrint_pair(
+; AMDGCN-NEXT: store i32 0, i32 addrspace(1)* %{{.*}}
+; AMDGCN-NEXT: ret void
+; NOTTI-NEXT: %1 = ptrtoint i32 addrspace(1)* %x.coerce to i64
+; NOTTI-NEXT: %2 = inttoptr i64 %1 to i32*
+; NOTTI-NEXT: store i32 0, i32* %2
+; NOTTI-NEXT: ret void
+define void @noop_ptrint_pair(i32 addrspace(1)* %x.coerce) {
+ %1 = ptrtoint i32 addrspace(1)* %x.coerce to i64
+ %2 = inttoptr i64 %1 to i32*
+ store i32 0, i32* %2
+ ret void
+}
+
+; COMMON-LABEL: @non_noop_ptrint_pair(
+; AMDGCN-NEXT: ptrtoint i32 addrspace(3)* %{{.*}} to i64
+; AMDGCN-NEXT: inttoptr i64 %{{.*}} to i32*
+; AMDGCN-NEXT: store i32 0, i32* %{{.*}}
+; AMDGCN-NEXT: ret void
+; NOTTI-NEXT: ptrtoint i32 addrspace(3)* %{{.*}} to i64
+; NOTTI-NEXT: inttoptr i64 %{{.*}} to i32*
+; NOTTI-NEXT: store i32 0, i32* %{{.*}}
+; NOTTI-NEXT: ret void
+define void @non_noop_ptrint_pair(i32 addrspace(3)* %x.coerce) {
+ %1 = ptrtoint i32 addrspace(3)* %x.coerce to i64
+ %2 = inttoptr i64 %1 to i32*
+ store i32 0, i32* %2
+ ret void
+}
+
+; COMMON-LABEL: @non_noop_ptrint_pair2(
+; AMDGCN-NEXT: ptrtoint i32 addrspace(1)* %{{.*}} to i32
+; AMDGCN-NEXT: inttoptr i32 %{{.*}} to i32*
+; AMDGCN-NEXT: store i32 0, i32* %{{.*}}
+; AMDGCN-NEXT: ret void
+; NOTTI-NEXT: ptrtoint i32 addrspace(1)* %{{.*}} to i32
+; NOTTI-NEXT: inttoptr i32 %{{.*}} to i32*
+; NOTTI-NEXT: store i32 0, i32* %{{.*}}
+; NOTTI-NEXT: ret void
+define void @non_noop_ptrint_pair2(i32 addrspace(1)* %x.coerce) {
+ %1 = ptrtoint i32 addrspace(1)* %x.coerce to i32
+ %2 = inttoptr i32 %1 to i32*
+ store i32 0, i32* %2
+ ret void
+}
+
+ at g = addrspace(1) global i32 0, align 4
+ at l = addrspace(3) global i32 0, align 4
+
+; COMMON-LABEL: @noop_ptrint_pair_ce(
+; AMDGCN-NEXT: store i32 0, i32 addrspace(1)* @g
+; AMDGCN-NEXT: ret void
+; NOTTI-NEXT: store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32*)
+; NOTTI-NEXT: ret void
+define void @noop_ptrint_pair_ce() {
+ store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32*)
+ ret void
+}
+
+; COMMON-LABEL: @noop_ptrint_pair_ce2(
+; AMDGCN-NEXT: ret i32* addrspacecast (i32 addrspace(1)* @g to i32*)
+; NOTTI-NEXT: ret i32* inttoptr (i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32*)
+define i32* @noop_ptrint_pair_ce2() {
+ ret i32* inttoptr (i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32*)
+}
+
+; COMMON-LABEL: @non_noop_ptrint_pair_ce(
+; AMDGCN-NEXT: store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*)
+; AMDGCN-NEXT: ret void
+; NOTTI-NEXT: store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*)
+; NOTTI-NEXT: ret void
+define void @non_noop_ptrint_pair_ce() {
+ store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*)
+ ret void
+}
+
+; COMMON-LABEL: @non_noop_ptrint_pair_ce2(
+; AMDGCN-NEXT: ret i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*)
+; NOTTI-NEXT: ret i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*)
+define i32* @non_noop_ptrint_pair_ce2() {
+ ret i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*)
+}
+
+; COMMON-LABEL: @non_noop_ptrint_pair_ce3(
+; AMDGCN-NEXT: ret i32* inttoptr (i32 ptrtoint (i32 addrspace(1)* @g to i32) to i32*)
+; NOTTI-NEXT: ret i32* inttoptr (i32 ptrtoint (i32 addrspace(1)* @g to i32) to i32*)
+define i32* @non_noop_ptrint_pair_ce3() {
+ ret i32* inttoptr (i32 ptrtoint (i32 addrspace(1)* @g to i32) to i32*)
+}
+
+; COMMON-LABEL: @non_noop_ptrint_pair_ce4(
+; AMDGCN-NEXT: ret i32* inttoptr (i128 ptrtoint (i32 addrspace(3)* @l to i128) to i32*)
+; NOTTI-NEXT: ret i32* inttoptr (i128 ptrtoint (i32 addrspace(3)* @l to i128) to i32*)
+define i32* @non_noop_ptrint_pair_ce4() {
+ ret i32* inttoptr (i128 ptrtoint (i32 addrspace(3)* @l to i128) to i32*)
+}
More information about the cfe-commits
mailing list