[llvm] 2e989bd - [InferAS] Infer the address space of inttoptr (#173244)

Thu Feb 19 17:49:04 PST 2026

Author: Luo Yuanke
Date: 2026-02-20T09:49:00+08:00
New Revision: 2e989bd59596a7e3ea4bdd600ad8af373d99e68d

URL: https://github.com/llvm/llvm-project/commit/2e989bd59596a7e3ea4bdd600ad8af373d99e68d
DIFF: https://github.com/llvm/llvm-project/commit/2e989bd59596a7e3ea4bdd600ad8af373d99e68d.diff

LOG: [InferAS] Infer the address space of inttoptr (#173244)

Currently the InferAddressSpaces would check if the bit value doesn't
change for <ptrtoint, inttoptr> address space cast. However the
condition is too strict. Sometime only the low bit address changes for
swizzling, and the address space is not changed. Take below code as
an example, we can transform `%gp2 = inttoptr i64 %b to ptr` to
`%gp2 = inttoptr i64 %b to ptr addrspace(2)` and specify addrspace(2)
for the following store instruction.
```
   %gp = addrspacecast ptr addrspace(2) %sp to ptr
   %a = ptrtoint ptr %gp to i64
   %b = xor i64 7, %a
   %gp2 = inttoptr i64 %b to ptr
   store i16 0, ptr %gp2, align 2
```
This patch tries to infer the unchanged leading bit for the address
and let the target determine if it is safe to perform address space
cast for inttoptr instruction.

---------

Co-authored-by: Yuanke Luo <ykluo at birentech.com>

Added: 
    llvm/test/Transforms/InferAddressSpaces/NVPTX/int2ptr.ll

Modified: 
    llvm/include/llvm/Analysis/TargetTransformInfo.h
    llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
    llvm/lib/Analysis/TargetTransformInfo.cpp
    llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
    llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
    llvm/test/CodeGen/NVPTX/disjoint-or-addr.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index b06ee091827f7..f8845e0d16434 100644

--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -572,6 +572,21 @@ class TargetTransformInfo {
   LLVM_ABI KnownBits computeKnownBitsAddrSpaceCast(
       unsigned FromAS, unsigned ToAS, const KnownBits &FromPtrBits) const;
 
+  /// Return the preserved ptr bit mask that is safe to cast integer to pointer
+  /// with new address space. The returned APInt size is identical to the source
+  /// address space size. The address of integer form may only change in the
+  /// least significant bit (e.g. within a page). In that case target can
+  /// determine if it is safe to cast the generic address space to the original
+  /// address space. For below example, we can replace `%gp2 = inttoptr i64 %b
+  /// to ptr` with `%gp2 = inttoptr i64 %b to ptr addrspace(2)`
+  ///   %gp = addrspacecast ptr addrspace(2) %sp to ptr
+  ///   %a = ptrtoint ptr %gp to i64
+  ///   %b = xor i64 7, %a
+  ///   %gp2 = inttoptr i64 %b to ptr
+  ///   store i16 0, ptr %gp2, align 2
+  LLVM_ABI APInt getAddrSpaceCastPreservedPtrMask(unsigned SrcAS,
+                                                  unsigned DstAS) const;
+
   /// Return true if globals in this address space can have initializers other
   /// than `undef`.
   LLVM_ABI bool

diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 5ef18fecabd99..a0f91ce9626a1 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -193,6 +193,11 @@ class TargetTransformInfoImplBase {
     return FromPtrBits.anyextOrTrunc(ToASBitSize);
   }
 
+  virtual APInt getAddrSpaceCastPreservedPtrMask(unsigned SrcAS,
+                                                 unsigned DstAS) const {
+    return {DL.getPointerSizeInBits(SrcAS), 0};
+  }
+
   virtual bool
   canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
     return AS == 0;

diff  --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 504fa9b448ec0..e470b560c812c 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -339,6 +339,11 @@ KnownBits TargetTransformInfo::computeKnownBitsAddrSpaceCast(
   return TTIImpl->computeKnownBitsAddrSpaceCast(FromAS, ToAS, FromPtrBits);
 }
 
+APInt TargetTransformInfo::getAddrSpaceCastPreservedPtrMask(
+    unsigned SrcAS, unsigned DstAS) const {
+  return TTIImpl->getAddrSpaceCastPreservedPtrMask(SrcAS, DstAS);
+}
+
 bool TargetTransformInfo::canHaveNonUndefGlobalInitializerInAddressSpace(
     unsigned AS) const {
   return TTIImpl->canHaveNonUndefGlobalInitializerInAddressSpace(AS);

diff  --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 40eb161bc8666..cbb73511c5a08 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -183,6 +183,20 @@ class NVPTXTTIImpl final : public BasicTTIImplBase<NVPTXTTIImpl> {
     }
   }
 
+  APInt getAddrSpaceCastPreservedPtrMask(unsigned SrcAS,
+                                         unsigned DstAS) const override {
+    if (SrcAS != llvm::ADDRESS_SPACE_GENERIC)
+      return BaseT::getAddrSpaceCastPreservedPtrMask(SrcAS, DstAS);
+    if (DstAS != llvm::ADDRESS_SPACE_GLOBAL &&
+        DstAS != llvm::ADDRESS_SPACE_SHARED)
+      return BaseT::getAddrSpaceCastPreservedPtrMask(SrcAS, DstAS);
+
+    // Address change within 4K size does not change the original address space
+    // and is safe to perform address cast form SrcAS to DstAS.
+    APInt PtrMask(DL.getPointerSizeInBits(llvm::ADDRESS_SPACE_GENERIC), 0xfff);
+    return PtrMask;
+  }
+
   bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
                                   Intrinsic::ID IID) const override;
 

diff  --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 2ed9359157fa8..1082625d12a15 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -97,6 +97,7 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Argument.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
@@ -111,6 +112,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
 #include "llvm/IR/User.h"
@@ -122,6 +124,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -129,12 +132,14 @@
 #include <cassert>
 #include <iterator>
 #include <limits>
+#include <optional>
 #include <utility>
 #include <vector>
 
 #define DEBUG_TYPE "infer-address-spaces"
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 static cl::opt<bool> AssumeDefaultIsFlatAddressSpace(
     "assume-default-is-flat-addrspace", cl::init(false), cl::ReallyHidden,
@@ -191,6 +196,33 @@ class InferAddressSpacesImpl {
   /// Target specific address space which uses of should be replaced if
   /// possible.
   unsigned FlatAddrSpace = 0;
+  DenseMap<const Value *, Value *> PtrIntCastPairs;
+
+  // Tries to find if the inttoptr instruction is derived from an pointer have
+  // specific address space, and is safe to propagate the address space to the
+  // new pointer that inttoptr produces.
+  Value *getIntToPtrPointerOperand(const Operator *I2P) const;
+  // Tries to find if the inttoptr instruction is derived from an pointer have
+  // specific address space, and is safe to propagate the address space to the
+  // new pointer that inttoptr produces. If the old pointer is found, cache the
+  // <OldPtr, inttoptr> pairs to a map.
+  void collectIntToPtrPointerOperand();
+  // Check if an old pointer is found ahead of time. The safety has been checked
+  // when collecting the inttoptr original pointer and the result is cached in
+  // PtrIntCastPairs.
+  bool isSafeToCastIntToPtrAddrSpace(const Operator *I2P) const {
+    return PtrIntCastPairs.contains(I2P);
+  }
+  bool isAddressExpression(const Value &V, const DataLayout &DL,
+                           const TargetTransformInfo *TTI) const;
+  Value *cloneConstantExprWithNewAddressSpace(
+      ConstantExpr *CE, unsigned NewAddrSpace,
+      const ValueToValueMapTy &ValueWithNewAddrSpace, const DataLayout *DL,
+      const TargetTransformInfo *TTI) const;
+
+  SmallVector<Value *, 2>
+  getPointerOperands(const Value &V, const DataLayout &DL,
+                     const TargetTransformInfo *TTI) const;
 
   // Try to update the address space of V. If V is updated, returns true and
   // false otherwise.
@@ -314,8 +346,9 @@ static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL,
 // TODO: Currently, we only consider:
 //   - arguments
 //   - phi, bitcast, addrspacecast, and getelementptr operators
-static bool isAddressExpression(const Value &V, const DataLayout &DL,
-                                const TargetTransformInfo *TTI) {
+bool InferAddressSpacesImpl::isAddressExpression(
+    const Value &V, const DataLayout &DL,
+    const TargetTransformInfo *TTI) const {
 
   if (const Argument *Arg = dyn_cast<Argument>(&V))
     return Arg->getType()->isPointerTy() &&
@@ -340,7 +373,8 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL,
     return II && II->getIntrinsicID() == Intrinsic::ptrmask;
   }
   case Instruction::IntToPtr:
-    return isNoopPtrIntCastPair(Op, DL, TTI);
+    return isNoopPtrIntCastPair(Op, DL, TTI) ||
+           isSafeToCastIntToPtrAddrSpace(Op);
   default:
     // That value is an address expression if it has an assumed address space.
     return TTI->getAssumedAddrSpace(&V) != UninitializedAddressSpace;
@@ -350,9 +384,9 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL,
 // Returns the pointer operands of V.
 //
 // Precondition: V is an address expression.
-static SmallVector<Value *, 2>
-getPointerOperands(const Value &V, const DataLayout &DL,
-                   const TargetTransformInfo *TTI) {
+SmallVector<Value *, 2> InferAddressSpacesImpl::getPointerOperands(
+    const Value &V, const DataLayout &DL,
+    const TargetTransformInfo *TTI) const {
   if (isa<Argument>(&V))
     return {};
 
@@ -375,15 +409,88 @@ getPointerOperands(const Value &V, const DataLayout &DL,
     return {II.getArgOperand(0)};
   }
   case Instruction::IntToPtr: {
-    assert(isNoopPtrIntCastPair(&Op, DL, TTI));
-    auto *P2I = cast<Operator>(Op.getOperand(0));
-    return {P2I->getOperand(0)};
+    if (isNoopPtrIntCastPair(&Op, DL, TTI)) {
+      auto *P2I = cast<Operator>(Op.getOperand(0));
+      return {P2I->getOperand(0)};
+    }
+    assert(isSafeToCastIntToPtrAddrSpace(&Op));
+    return {getIntToPtrPointerOperand(&Op)};
   }
   default:
     llvm_unreachable("Unexpected instruction type.");
   }
 }
 
+// Return mask. The 1 in mask indicate the bit is changed.
+// This helper function is to compute the max know changed bits for ptr1 and
+// ptr2 after the operation `ptr2 = ptr1 Op Mask`.
+static APInt computeMaxChangedPtrBits(const Operator *Op, const Value *Mask,
+                                      const DataLayout &DL, AssumptionCache *AC,
+                                      const DominatorTree *DT) {
+  KnownBits Known = computeKnownBits(Mask, DL, AC, nullptr, DT);
+  switch (Op->getOpcode()) {
+  case Instruction::Xor:
+  case Instruction::Or:
+    return ~Known.Zero;
+  case Instruction::And:
+    return ~Known.One;
+  default:
+    return APInt::getAllOnes(Known.getBitWidth());
+  }
+}
+
+Value *
+InferAddressSpacesImpl::getIntToPtrPointerOperand(const Operator *I2P) const {
+  assert(I2P->getOpcode() == Instruction::IntToPtr);
+  if (I2P->getType()->isVectorTy())
+    return nullptr;
+
+  // If I2P has been accessed and has the corresponding old pointer value, just
+  // return true.
+  if (auto *OldPtr = PtrIntCastPairs.lookup(I2P))
+    return OldPtr;
+
+  Value *LogicalOp = I2P->getOperand(0);
+  Value *OldPtr, *Mask;
+  if (!match(LogicalOp,
+             m_c_BitwiseLogic(m_PtrToInt(m_Value(OldPtr)), m_Value(Mask))))
+    return nullptr;
+
+  Operator *AsCast = dyn_cast<AddrSpaceCastOperator>(OldPtr);
+  if (!AsCast)
+    return nullptr;
+
+  unsigned SrcAS = I2P->getType()->getPointerAddressSpace();
+  unsigned DstAS = AsCast->getOperand(0)->getType()->getPointerAddressSpace();
+  APInt PreservedPtrMask = TTI->getAddrSpaceCastPreservedPtrMask(SrcAS, DstAS);
+  if (PreservedPtrMask.isZero())
+    return nullptr;
+  APInt ChangedPtrBits =
+      computeMaxChangedPtrBits(cast<Operator>(LogicalOp), Mask, *DL, &AC, DT);
+  // Check if the address bits change is within the preserved mask. If the bits
+  // change is not preserved, it is not safe to perform address space cast.
+  // The following pattern is not safe to cast address space.
+  //   %1 = ptrtoint ptr addrspace(3) %sp to i32
+  //   %2 = zext i32 %1 to i64
+  //   %gp = inttoptr i64 %2 to ptr
+  assert(ChangedPtrBits.getBitWidth() == PreservedPtrMask.getBitWidth());
+  if (ChangedPtrBits.isSubsetOf(PreservedPtrMask))
+    return OldPtr;
+
+  return nullptr;
+}
+
+void InferAddressSpacesImpl::collectIntToPtrPointerOperand() {
+  // Only collect inttoptr instruction.
+  // TODO: We need to collect inttoptr constant expression as well.
+  for (Instruction &I : instructions(F)) {
+    if (!dyn_cast<IntToPtrInst>(&I))
+      continue;
+    if (auto *OldPtr = getIntToPtrPointerOperand(cast<Operator>(&I)))
+      PtrIntCastPairs.insert({&I, OldPtr});
+  }
+}
+
 bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
                                                       Value *OldV,
                                                       Value *NewV) const {
@@ -592,6 +699,8 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
     } else if (auto *I2P = dyn_cast<IntToPtrInst>(&I)) {
       if (isNoopPtrIntCastPair(cast<Operator>(I2P), *DL, TTI))
         PushPtrOperand(cast<Operator>(I2P->getOperand(0))->getOperand(0));
+      else if (isSafeToCastIntToPtrAddrSpace(cast<Operator>(I2P)))
+        PushPtrOperand(getIntToPtrPointerOperand(cast<Operator>(I2P)));
     } else if (auto *RI = dyn_cast<ReturnInst>(&I)) {
       if (auto *RV = RI->getReturnValue();
           RV && RV->getType()->isPtrOrPtrVectorTy())
@@ -838,15 +947,20 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
     return SelectInst::Create(I->getOperand(0), NewPointerOperands[1],
                               NewPointerOperands[2], "", nullptr, I);
   case Instruction::IntToPtr: {
-    assert(isNoopPtrIntCastPair(cast<Operator>(I), *DL, TTI));
-    Value *Src = cast<Operator>(I->getOperand(0))->getOperand(0);
-    if (Src->getType() == NewPtrType)
-      return Src;
-
-    // If we had a no-op inttoptr/ptrtoint pair, we may still have inferred a
-    // source address space from a generic pointer source need to insert a cast
-    // back.
-    return new AddrSpaceCastInst(Src, NewPtrType);
+    if (isNoopPtrIntCastPair(cast<Operator>(I), *DL, TTI)) {
+      Value *Src = cast<Operator>(I->getOperand(0))->getOperand(0);
+      if (Src->getType() == NewPtrType)
+        return Src;
+
+      // If we had a no-op inttoptr/ptrtoint pair, we may still have inferred a
+      // source address space from a generic pointer source need to insert a
+      // cast back.
+      return new AddrSpaceCastInst(Src, NewPtrType);
+    }
+    assert(isSafeToCastIntToPtrAddrSpace(cast<Operator>(I)));
+    AddrSpaceCastInst *AsCast = new AddrSpaceCastInst(I, NewPtrType);
+    AsCast->insertAfter(I);
+    return AsCast;
   }
   default:
     llvm_unreachable("Unexpected opcode");
@@ -856,10 +970,10 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
 // Similar to cloneInstructionWithNewAddressSpace, returns a clone of the
 // constant expression `CE` with its operands replaced as specified in
 // ValueWithNewAddrSpace.
-static Value *cloneConstantExprWithNewAddressSpace(
+Value *InferAddressSpacesImpl::cloneConstantExprWithNewAddressSpace(
     ConstantExpr *CE, unsigned NewAddrSpace,
     const ValueToValueMapTy &ValueWithNewAddrSpace, const DataLayout *DL,
-    const TargetTransformInfo *TTI) {
+    const TargetTransformInfo *TTI) const {
   Type *TargetType =
       CE->getType()->isPtrOrPtrVectorTy()
           ? getPtrOrVecOfPtrsWithNewAS(CE->getType(), NewAddrSpace)
@@ -881,10 +995,13 @@ static Value *cloneConstantExprWithNewAddressSpace(
   }
 
   if (CE->getOpcode() == Instruction::IntToPtr) {
-    assert(isNoopPtrIntCastPair(cast<Operator>(CE), *DL, TTI));
-    Constant *Src = cast<ConstantExpr>(CE->getOperand(0))->getOperand(0);
-    assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);
-    return Src;
+    if (isNoopPtrIntCastPair(cast<Operator>(CE), *DL, TTI)) {
+      Constant *Src = cast<ConstantExpr>(CE->getOperand(0))->getOperand(0);
+      assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);
+      return Src;
+    }
+    assert(isSafeToCastIntToPtrAddrSpace(cast<Operator>(CE)));
+    return ConstantExpr::getAddrSpaceCast(CE, TargetType);
   }
 
   // Computes the operands of the new constant expression.
@@ -990,6 +1107,7 @@ unsigned InferAddressSpacesImpl::joinAddressSpaces(unsigned AS1,
 bool InferAddressSpacesImpl::run(Function &CurFn) {
   F = &CurFn;
   DL = &F->getDataLayout();
+  PtrIntCastPairs.clear();
 
   if (AssumeDefaultIsFlatAddressSpace)
     FlatAddrSpace = 0;
@@ -1000,6 +1118,7 @@ bool InferAddressSpacesImpl::run(Function &CurFn) {
       return false;
   }
 
+  collectIntToPtrPointerOperand();
   // Collects all flat address expressions in postorder.
   std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(*F);
 

diff  --git a/llvm/test/CodeGen/NVPTX/disjoint-or-addr.ll b/llvm/test/CodeGen/NVPTX/disjoint-or-addr.ll
index a21261c768862..02833ee861315 100644
--- a/llvm/test/CodeGen/NVPTX/disjoint-or-addr.ll
+++ b/llvm/test/CodeGen/NVPTX/disjoint-or-addr.ll
@@ -9,12 +9,14 @@ define i32  @test_disjoint_or_addr(i16 %a) {
 ; CHECK-LABEL: test_disjoint_or_addr(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .b32 %r<2>;
-; CHECK-NEXT:    .reg .b64 %rd<3>;
+; CHECK-NEXT:    .reg .b64 %rd<5>;
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    mov.b64 %rd1, a;
 ; CHECK-NEXT:    cvta.global.u64 %rd2, %rd1;
-; CHECK-NEXT:    ld.b32 %r1, [%rd2+8];
+; CHECK-NEXT:    or.b64 %rd3, %rd2, 8;
+; CHECK-NEXT:    cvta.to.global.u64 %rd4, %rd3;
+; CHECK-NEXT:    ld.global.b32 %r1, [%rd4];
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
 ; CHECK-NEXT:    ret;
   %a1 = ptrtoint ptr @a to i64

diff  --git a/llvm/test/Transforms/InferAddressSpaces/NVPTX/int2ptr.ll b/llvm/test/Transforms/InferAddressSpaces/NVPTX/int2ptr.ll
new file mode 100644
index 0000000000000..90cdab3c09084
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/NVPTX/int2ptr.ll
@@ -0,0 +1,241 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=nvptx64-nvidia-cuda -passes=infer-address-spaces %s | FileCheck %s
+
+define void @test_smem_fail(ptr addrspace(3) %sp) {
+; CHECK-LABEL: define void @test_smem_fail(
+; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) {
+; CHECK-NEXT:    [[GP:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr
+; CHECK-NEXT:    [[A:%.*]] = ptrtoint ptr [[GP]] to i64
+; CHECK-NEXT:    [[B:%.*]] = xor i64 4096, [[A]]
+; CHECK-NEXT:    [[GP2:%.*]] = inttoptr i64 [[B]] to ptr
+; CHECK-NEXT:    store i16 0, ptr [[GP2]], align 2
+; CHECK-NEXT:    ret void
+;
+  %gp = addrspacecast ptr addrspace(3) %sp to ptr
+  %a = ptrtoint ptr %gp to i64
+  %b = xor i64 4096, %a
+  %gp2 = inttoptr i64 %b to ptr
+  store i16 0, ptr %gp2, align 2
+  ret void
+}
+
+define void @test_smem_fail2(ptr addrspace(3) %sp) {
+; CHECK-LABEL: define void @test_smem_fail2(
+; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) {
+; CHECK-NEXT:    [[GP1:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr
+; CHECK-NEXT:    [[GP:%.*]] = getelementptr i8, ptr [[GP1]], i32 8
+; CHECK-NEXT:    [[A:%.*]] = ptrtoint ptr [[GP]] to i64
+; CHECK-NEXT:    [[B:%.*]] = xor i64 4095, [[A]]
+; CHECK-NEXT:    [[GP2:%.*]] = inttoptr i64 [[B]] to ptr
+; CHECK-NEXT:    store i16 0, ptr [[GP2]], align 2
+; CHECK-NEXT:    ret void
+;
+  %gp1 = addrspacecast ptr addrspace(3) %sp to ptr
+  %gp = getelementptr i8, ptr %gp1, i32 8
+  %a = ptrtoint ptr %gp to i64
+  %b = xor i64 4095, %a
+  %gp2 = inttoptr i64 %b to ptr
+  store i16 0, ptr %gp2, align 2
+  ret void
+}
+
+define void @test_xor_smem(ptr addrspace(3) %sp) {
+; CHECK-LABEL: define void @test_xor_smem(
+; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) {
+; CHECK-NEXT:    [[GP:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr
+; CHECK-NEXT:    [[A:%.*]] = ptrtoint ptr [[GP]] to i64
+; CHECK-NEXT:    [[B:%.*]] = xor i64 4095, [[A]]
+; CHECK-NEXT:    [[GP2:%.*]] = inttoptr i64 [[B]] to ptr
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[GP2]] to ptr addrspace(3)
+; CHECK-NEXT:    store i16 0, ptr addrspace(3) [[TMP1]], align 2
+; CHECK-NEXT:    ret void
+;
+  %gp = addrspacecast ptr addrspace(3) %sp to ptr
+  %a = ptrtoint ptr %gp to i64
+  %b = xor i64 4095, %a
+  %gp2 = inttoptr i64 %b to ptr
+  store i16 0, ptr %gp2, align 2
+  ret void
+}
+
+define void @test_xor_smem2(ptr addrspace(3) %sp) {
+; CHECK-LABEL: define void @test_xor_smem2(
+; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) {
+; CHECK-NEXT:    [[GP:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr
+; CHECK-NEXT:    [[A:%.*]] = ptrtoint ptr [[GP]] to i64
+; CHECK-NEXT:    [[B:%.*]] = xor i64 [[A]], 4095
+; CHECK-NEXT:    [[GP2:%.*]] = inttoptr i64 [[B]] to ptr
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[GP2]] to ptr addrspace(3)
+; CHECK-NEXT:    store i16 0, ptr addrspace(3) [[TMP1]], align 2
+; CHECK-NEXT:    ret void
+;
+  %gp = addrspacecast ptr addrspace(3) %sp to ptr
+  %a = ptrtoint ptr %gp to i64
+  %b = xor i64 %a, 4095
+  %gp2 = inttoptr i64 %b to ptr
+  store i16 0, ptr %gp2, align 2
+  ret void
+}
+
+define void @test_or_smem(ptr addrspace(3) %sp) {
+; CHECK-LABEL: define void @test_or_smem(
+; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) {
+; CHECK-NEXT:    [[GP:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr
+; CHECK-NEXT:    [[A:%.*]] = ptrtoint ptr [[GP]] to i64
+; CHECK-NEXT:    [[B:%.*]] = or i64 4095, [[A]]
+; CHECK-NEXT:    [[GP2:%.*]] = inttoptr i64 [[B]] to ptr
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[GP2]] to ptr addrspace(3)
+; CHECK-NEXT:    store i16 0, ptr addrspace(3) [[TMP1]], align 2
+; CHECK-NEXT:    ret void
+;
+  %gp = addrspacecast ptr addrspace(3) %sp to ptr
+  %a = ptrtoint ptr %gp to i64
+  %b = or i64 4095, %a
+  %gp2 = inttoptr i64 %b to ptr
+  store i16 0, ptr %gp2, align 2
+  ret void
+}
+
+define void @test_or_smem2(ptr addrspace(3) %sp) {
+; CHECK-LABEL: define void @test_or_smem2(
+; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) {
+; CHECK-NEXT:    [[GP:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr
+; CHECK-NEXT:    [[A:%.*]] = ptrtoint ptr [[GP]] to i64
+; CHECK-NEXT:    [[B:%.*]] = or i64 4096, [[A]]
+; CHECK-NEXT:    [[GP2:%.*]] = inttoptr i64 [[B]] to ptr
+; CHECK-NEXT:    store i16 0, ptr [[GP2]], align 2
+; CHECK-NEXT:    ret void
+;
+  %gp = addrspacecast ptr addrspace(3) %sp to ptr
+  %a = ptrtoint ptr %gp to i64
+  %b = or i64 4096, %a
+  %gp2 = inttoptr i64 %b to ptr
+  store i16 0, ptr %gp2, align 2
+  ret void
+}
+
+define void @test_and_smem(ptr addrspace(3) %sp) {
+; CHECK-LABEL: define void @test_and_smem(
+; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) {
+; CHECK-NEXT:    [[GP:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr
+; CHECK-NEXT:    [[A:%.*]] = ptrtoint ptr [[GP]] to i64
+; CHECK-NEXT:    [[B:%.*]] = and i64 [[A]], -4096
+; CHECK-NEXT:    [[GP2:%.*]] = inttoptr i64 [[B]] to ptr
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[GP2]] to ptr addrspace(3)
+; CHECK-NEXT:    store i16 0, ptr addrspace(3) [[TMP1]], align 2
+; CHECK-NEXT:    ret void
+;
+  %gp = addrspacecast ptr addrspace(3) %sp to ptr
+  %a = ptrtoint ptr %gp to i64
+  %b = and i64 %a, -4096
+  %gp2 = inttoptr i64 %b to ptr
+  store i16 0, ptr %gp2, align 2
+  ret void
+}
+
+define void @test_and_smem_fail(ptr addrspace(3) %sp) {
+; CHECK-LABEL: define void @test_and_smem_fail(
+; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) {
+; CHECK-NEXT:    [[GP:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr
+; CHECK-NEXT:    [[A:%.*]] = ptrtoint ptr [[GP]] to i64
+; CHECK-NEXT:    [[B:%.*]] = and i64 [[A]], -4097
+; CHECK-NEXT:    [[GP2:%.*]] = inttoptr i64 [[B]] to ptr
+; CHECK-NEXT:    store i16 0, ptr [[GP2]], align 2
+; CHECK-NEXT:    ret void
+;
+  %gp = addrspacecast ptr addrspace(3) %sp to ptr
+  %a = ptrtoint ptr %gp to i64
+  %b = and i64 %a, -4097
+  %gp2 = inttoptr i64 %b to ptr
+  store i16 0, ptr %gp2, align 2
+  ret void
+}
+
+define void @test_gmem(ptr addrspace(1) %sp) {
+; CHECK-LABEL: define void @test_gmem(
+; CHECK-SAME: ptr addrspace(1) [[SP:%.*]]) {
+; CHECK-NEXT:    [[GP:%.*]] = addrspacecast ptr addrspace(1) [[SP]] to ptr
+; CHECK-NEXT:    [[A:%.*]] = ptrtoint ptr [[GP]] to i64
+; CHECK-NEXT:    [[B:%.*]] = xor i64 7, [[A]]
+; CHECK-NEXT:    [[GP2:%.*]] = inttoptr i64 [[B]] to ptr
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[GP2]] to ptr addrspace(1)
+; CHECK-NEXT:    store i16 0, ptr addrspace(1) [[TMP1]], align 2
+; CHECK-NEXT:    ret void
+;
+  %gp = addrspacecast ptr addrspace(1) %sp to ptr
+  %a = ptrtoint ptr %gp to i64
+  %b = xor i64 7, %a
+  %gp2 = inttoptr i64 %b to ptr
+  store i16 0, ptr %gp2, align 2
+  ret void
+}
+
+define void @test_lmem(ptr addrspace(5) %sp) {
+; CHECK-LABEL: define void @test_lmem(
+; CHECK-SAME: ptr addrspace(5) [[SP:%.*]]) {
+; CHECK-NEXT:    [[GP:%.*]] = addrspacecast ptr addrspace(5) [[SP]] to ptr
+; CHECK-NEXT:    [[A:%.*]] = ptrtoint ptr [[GP]] to i64
+; CHECK-NEXT:    [[B:%.*]] = xor i64 7, [[A]]
+; CHECK-NEXT:    [[GP2:%.*]] = inttoptr i64 [[B]] to ptr
+; CHECK-NEXT:    store i16 0, ptr [[GP2]], align 2
+; CHECK-NEXT:    ret void
+;
+  %gp = addrspacecast ptr addrspace(5) %sp to ptr
+  %a = ptrtoint ptr %gp to i64
+  %b = xor i64 7, %a
+  %gp2 = inttoptr i64 %b to ptr
+  store i16 0, ptr %gp2, align 2
+  ret void
+}
+
+define void @test3(ptr addrspace(3) %sp) {
+; CHECK-LABEL: define void @test3(
+; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) {
+; CHECK-NEXT:    [[GP:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr
+; CHECK-NEXT:    [[T1:%.*]] = ptrtoint ptr [[GP]] to i64
+; CHECK-NEXT:    [[AND:%.*]] = lshr i64 [[T1]], 8
+; CHECK-NEXT:    [[SHR:%.*]] = and i64 [[AND]], 8
+; CHECK-NEXT:    [[AND1:%.*]] = lshr i64 [[T1]], 10
+; CHECK-NEXT:    [[SHR2:%.*]] = and i64 [[AND1]], 4
+; CHECK-NEXT:    [[OR:%.*]] = or i64 [[SHR]], [[SHR2]]
+; CHECK-NEXT:    [[AND3:%.*]] = lshr i64 [[T1]], 4
+; CHECK-NEXT:    [[SHR4:%.*]] = and i64 [[AND3]], 112
+; CHECK-NEXT:    [[OR5:%.*]] = or i64 [[OR]], [[SHR4]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i64 [[OR5]], [[T1]]
+; CHECK-NEXT:    [[GP2:%.*]] = inttoptr i64 [[XOR]] to ptr
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[GP2]] to ptr addrspace(3)
+; CHECK-NEXT:    store i16 0, ptr addrspace(3) [[TMP1]], align 2
+; CHECK-NEXT:    ret void
+;
+  %gp = addrspacecast ptr addrspace(3) %sp to ptr
+  %t1 = ptrtoint ptr %gp to i64
+  %and = lshr i64 %t1, 8
+  %shr = and i64 %and, 8
+  %and1 = lshr i64 %t1, 10
+  %shr2 = and i64 %and1, 4
+  %or = or i64 %shr, %shr2
+  %and3 = lshr i64 %t1, 4
+  %shr4 = and i64 %and3, 112
+  %or5 = or i64 %or, %shr4
+  %xor = xor i64 %or5, %t1
+  %gp2 = inttoptr i64 %xor to ptr
+  store i16 0, ptr %gp2, align 2
+  ret void
+}
+
+ at g = addrspace(1) global i32 0, align 4
+
+define void @test_ce() {
+; CHECK-LABEL: define void @test_ce() {
+; CHECK-NEXT:    store i32 0, ptr inttoptr (i64 xor (i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @g to ptr) to i64), i64 7) to ptr), align 4
+; CHECK-NEXT:    ret void
+;
+  store i32 0, ptr inttoptr (i64
+  xor (i64
+  ptrtoint (ptr
+  addrspacecast (ptr addrspace(1) @g to ptr)
+  to i64),
+  i64 7)
+  to ptr)
+  ret void
+}