[llvm] [llvm][aarch64] Add support for the MS qualifiers ptr32, ptr64, _sptr, __uptr (PR #112793)

Thu Dec 5 12:14:45 PST 2024

https://github.com/dpaoliello updated https://github.com/llvm/llvm-project/pull/112793

>From 0077275562d8613ed783e586bb9ac02b57555769 Mon Sep 17 00:00:00 2001
From: Daniel Paoliello <danpao at microsoft.com>
Date: Fri, 4 Oct 2024 10:29:03 -0700
Subject: [PATCH] [llvm][aarch64] Add support for the MS qualifiers __ptr32,
 __ptr64, __sptr, __uptr

---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  83 +++++++-
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |  22 ++-
 .../lib/Target/AArch64/AArch64TargetMachine.h |   3 +-
 .../AArch64/aarch64-mixed-ptr-sizes.ll        | 180 ++++++++++++++++++
 4 files changed, 277 insertions(+), 11 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/aarch64-mixed-ptr-sizes.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0814380b188485..2c7304243f5010 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -533,6 +533,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::XOR, MVT::i32, Custom);
   setOperationAction(ISD::XOR, MVT::i64, Custom);
 
+  setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
+  setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
+
   // Virtually no operation on f128 is legal, but LLVM can't expand them when
   // there's a valid register class, so we need custom operations in most cases.
   setOperationAction(ISD::FABS, MVT::f128, Expand);
@@ -6722,6 +6725,37 @@ static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
                       ST->getBasePtr(), ST->getMemOperand());
 }
 
+static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) {
+  SDLoc dl(Op);
+  SDValue Src = Op.getOperand(0);
+  MVT DestVT = Op.getSimpleValueType();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  AddrSpaceCastSDNode *N = cast<AddrSpaceCastSDNode>(Op.getNode());
+
+  unsigned SrcAS = N->getSrcAddressSpace();
+  unsigned DestAS = N->getDestAddressSpace();
+  assert(SrcAS != DestAS &&
+         "addrspacecast must be between different address spaces");
+  assert(TLI.getTargetMachine().getPointerSize(SrcAS) !=
+             TLI.getTargetMachine().getPointerSize(DestAS) &&
+         "addrspacecast must be between different ptr sizes");
+
+  if (SrcAS == ARM64AS::PTR32_SPTR) {
+    return DAG.getNode(ISD::SIGN_EXTEND, dl, DestVT, Src,
+                       DAG.getTargetConstant(0, dl, DestVT));
+  } else if (SrcAS == ARM64AS::PTR32_UPTR) {
+    return DAG.getNode(ISD::ZERO_EXTEND, dl, DestVT, Src,
+                       DAG.getTargetConstant(0, dl, DestVT));
+  } else if ((DestAS == ARM64AS::PTR32_SPTR) ||
+             (DestAS == ARM64AS::PTR32_UPTR)) {
+    SDValue Ext = DAG.getAnyExtOrTrunc(Src, dl, DestVT);
+    SDValue Trunc = DAG.getZeroExtendInReg(Ext, dl, DestVT);
+    return Trunc;
+  } else {
+    return Src;
+  }
+}
+
 // Custom lowering for any store, vector or scalar and/or default or with
 // a truncate operations.  Currently only custom lower truncate operation
 // from vector v4i16 to v4i8 or volatile stores of i128.
@@ -7375,6 +7409,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
   case ISD::SIGN_EXTEND:
   case ISD::ZERO_EXTEND:
     return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
+  case ISD::ADDRSPACECAST:
+    return LowerADDRSPACECAST(Op, DAG);
   case ISD::SIGN_EXTEND_INREG: {
     // Only custom lower when ExtraVT has a legal byte based element type.
     EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
@@ -23366,6 +23402,26 @@ static SDValue performLOADCombine(SDNode *N,
     performTBISimplification(N->getOperand(1), DCI, DAG);
 
   LoadSDNode *LD = cast<LoadSDNode>(N);
+  EVT RegVT = LD->getValueType(0);
+  EVT MemVT = LD->getMemoryVT();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  SDLoc DL(LD);
+
+  // Cast ptr32 and ptr64 pointers to the default address space before a load.
+  unsigned AddrSpace = LD->getAddressSpace();
+  if (AddrSpace == ARM64AS::PTR64 || AddrSpace == ARM64AS::PTR32_SPTR ||
+      AddrSpace == ARM64AS::PTR32_UPTR) {
+    MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+    if (PtrVT != LD->getBasePtr().getSimpleValueType()) {
+      SDValue Cast =
+          DAG.getAddrSpaceCast(DL, PtrVT, LD->getBasePtr(), AddrSpace, 0);
+      return DAG.getExtLoad(LD->getExtensionType(), DL, RegVT, LD->getChain(),
+                            Cast, LD->getPointerInfo(), MemVT,
+                            LD->getOriginalAlign(),
+                            LD->getMemOperand()->getFlags());
+    }
+  }
+
   if (LD->isVolatile() || !Subtarget->isLittleEndian())
     return SDValue(N, 0);
 
@@ -23375,13 +23431,11 @@ static SDValue performLOADCombine(SDNode *N,
   if (!LD->isNonTemporal())
     return SDValue(N, 0);
 
-  EVT MemVT = LD->getMemoryVT();
   if (MemVT.isScalableVector() || MemVT.getSizeInBits() <= 256 ||
       MemVT.getSizeInBits() % 256 == 0 ||
       256 % MemVT.getScalarSizeInBits() != 0)
     return SDValue(N, 0);
 
-  SDLoc DL(LD);
   SDValue Chain = LD->getChain();
   SDValue BasePtr = LD->getBasePtr();
   SDNodeFlags Flags = LD->getFlags();
@@ -23641,12 +23695,28 @@ static SDValue performSTORECombine(SDNode *N,
   SDValue Value = ST->getValue();
   SDValue Ptr = ST->getBasePtr();
   EVT ValueVT = Value.getValueType();
+  EVT MemVT = ST->getMemoryVT();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  SDLoc DL(ST);
 
   auto hasValidElementTypeForFPTruncStore = [](EVT VT) {
     EVT EltVT = VT.getVectorElementType();
     return EltVT == MVT::f32 || EltVT == MVT::f64;
   };
 
+  // Cast ptr32 and ptr64 pointers to the default address space before a store.
+  unsigned AddrSpace = ST->getAddressSpace();
+  if (AddrSpace == ARM64AS::PTR64 || AddrSpace == ARM64AS::PTR32_SPTR ||
+      AddrSpace == ARM64AS::PTR32_UPTR) {
+    MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+    if (PtrVT != Ptr.getSimpleValueType()) {
+      SDValue Cast = DAG.getAddrSpaceCast(DL, PtrVT, Ptr, AddrSpace, 0);
+      return DAG.getStore(Chain, DL, Value, Cast, ST->getPointerInfo(),
+                          ST->getOriginalAlign(),
+                          ST->getMemOperand()->getFlags(), ST->getAAInfo());
+    }
+  }
+
   if (SDValue Res = combineI8TruncStore(ST, DAG, Subtarget))
     return Res;
 
@@ -23660,8 +23730,8 @@ static SDValue performSTORECombine(SDNode *N,
       ValueVT.isFixedLengthVector() &&
       ValueVT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits() &&
       hasValidElementTypeForFPTruncStore(Value.getOperand(0).getValueType()))
-    return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
-                             ST->getMemoryVT(), ST->getMemOperand());
+    return DAG.getTruncStore(Chain, DL, Value.getOperand(0), Ptr, MemVT,
+                             ST->getMemOperand());
 
   if (SDValue Split = splitStores(N, DCI, DAG, Subtarget))
     return Split;
@@ -26988,6 +27058,11 @@ void AArch64TargetLowering::ReplaceNodeResults(
     ReplaceATOMIC_LOAD_128Results(N, Results, DAG, Subtarget);
     return;
   }
+  case ISD::ADDRSPACECAST: {
+    SDValue V = LowerADDRSPACECAST(SDValue(N, 0), DAG);
+    Results.push_back(V);
+    return;
+  }
   case ISD::ATOMIC_LOAD:
   case ISD::LOAD: {
     MemSDNode *LoadNode = cast<MemSDNode>(N);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index d696355bb062a8..b4ac52790612d2 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -554,6 +554,10 @@ const unsigned StackProbeMaxLoopUnroll = 4;
 
 } // namespace AArch64
 
+namespace ARM64AS {
+enum : unsigned { PTR32_SPTR = 270, PTR32_UPTR = 271, PTR64 = 272 };
+}
+
 class AArch64Subtarget;
 
 class AArch64TargetLowering : public TargetLowering {
@@ -585,11 +589,19 @@ class AArch64TargetLowering : public TargetLowering {
                                            unsigned Depth) const override;
 
   MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
-    // Returning i64 unconditionally here (i.e. even for ILP32) means that the
-    // *DAG* representation of pointers will always be 64-bits. They will be
-    // truncated and extended when transferred to memory, but the 64-bit DAG
-    // allows us to use AArch64's addressing modes much more easily.
-    return MVT::getIntegerVT(64);
+    if ((AS == ARM64AS::PTR32_SPTR) || (AS == ARM64AS::PTR32_UPTR)) {
+      // These are 32-bit pointers created using the `__ptr32` extension or
+      // similar. They are handled by marking them as being in a different
+      // address space, and will be extended to 64-bits when used as the target
+      // of a load or store operation, or cast to a 64-bit pointer type.
+      return MVT::i32;
+    } else {
+      // Returning i64 unconditionally here (i.e. even for ILP32) means that the
+      // *DAG* representation of pointers will always be 64-bits. They will be
+      // truncated and extended when transferred to memory, but the 64-bit DAG
+      // allows us to use AArch64's addressing modes much more easily.
+      return MVT::i64;
+    }
   }
 
   bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/llvm/lib/Target/AArch64/AArch64TargetMachine.h
index e4d0aff50d8f67..56277b460b3f21 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.h
@@ -68,8 +68,7 @@ class AArch64TargetMachine : public LLVMTargetMachine {
 
   /// Returns true if a cast between SrcAS and DestAS is a noop.
   bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
-    // Addrspacecasts are always noops.
-    return true;
+    return (getPointerSize(SrcAS) == getPointerSize(DestAS));
   }
 
 private:
diff --git a/llvm/test/CodeGen/AArch64/aarch64-mixed-ptr-sizes.ll b/llvm/test/CodeGen/AArch64/aarch64-mixed-ptr-sizes.ll
new file mode 100644
index 00000000000000..fc19325925feef
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-mixed-ptr-sizes.ll
@@ -0,0 +1,180 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
+
+; Source to regenerate:
+; struct Foo {
+;   int * __ptr32 p32;
+;   int * __ptr64 p64;
+;   __attribute__((address_space(9))) int *p_other;
+; };
+; extern "C" void use_foo(Foo *f);
+; extern "C" int use_int(int i);
+; extern "C" void test_sign_ext(Foo *f, int * __ptr32 __sptr i) {
+;   f->p64 = i;
+;   use_foo(f);
+; }
+; extern "C" void test_sign_ext_store_load(int * __ptr32 __sptr i) {
+;   *i = use_int(*i);
+; }
+; extern "C" void test_zero_ext(Foo *f, int * __ptr32 __uptr i) {
+;   f->p64 = i;
+;   use_foo(f);
+; }
+; extern "C" void test_zero_ext_store_load(int * __ptr32 __uptr i) {
+;   *i = use_int(*i);
+; }
+; extern "C" void test_trunc(Foo *f, int * __ptr64 i) {
+;   f->p32 = i;
+;   use_foo(f);
+; }
+; extern "C" void test_noop1(Foo *f, int * __ptr32 i) {
+;   f->p32 = i;
+;   use_foo(f);
+; }
+; extern "C" void test_noop2(Foo *f, int * __ptr64 i) {
+;   f->p64 = i;
+;   use_foo(f);
+; }
+; extern "C" void test_null_arg(Foo *f, int * __ptr32 i) {
+;   test_noop1(f, 0);
+; }
+; extern "C" void test_unrecognized(Foo *f, __attribute__((address_space(14))) int *i) {
+;   f->p32 = (int * __ptr32)i;
+;   use_foo(f);
+; }
+;
+; $ clang --target=aarch64-windows-msvc -fms-extensions -O2 -S -emit-llvm t.cpp
+
+target datalayout = "e-m:w-p:64:64-i32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-windows-msvc"
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @test_sign_ext(ptr noundef %f, ptr addrspace(270) noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_sign_ext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxtw x8, w1
+; CHECK-NEXT:    str  x8, [x0, #8]
+; CHECK-NEXT:    b    use_foo
+entry:
+  %0 = addrspacecast ptr addrspace(270) %i to ptr
+  %p64 = getelementptr inbounds nuw i8, ptr %f, i64 8
+  store ptr %0, ptr %p64, align 8
+  tail call void @use_foo(ptr noundef %f)
+  ret void
+}
+
+declare dso_local void @use_foo(ptr noundef) local_unnamed_addr #1
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @test_sign_ext_store_load(ptr addrspace(270) nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_sign_ext_store_load:
+; CHECK:       // %bb.0: // %entry
+; CHECK:       sxtw    x19, w0
+; CHECK-NEXT:  ldr     w0, [x19]
+; CHECK-NEXT:  bl      use_int
+; CHECK-NEXT:  str     w0, [x19]
+entry:
+  %0 = load i32, ptr addrspace(270) %i, align 4
+  %call = tail call i32 @use_int(i32 noundef %0)
+  store i32 %call, ptr addrspace(270) %i, align 4
+  ret void
+}
+
+declare dso_local i32 @use_int(i32 noundef) local_unnamed_addr #1
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @test_zero_ext(ptr noundef %f, ptr addrspace(271) noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_zero_ext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:  mov     w8, w1
+; CHECK-NEXT:  str     x8, [x0, #8]
+; CHECK-NEXT:  b       use_foo
+entry:
+  %0 = addrspacecast ptr addrspace(271) %i to ptr
+  %p64 = getelementptr inbounds nuw i8, ptr %f, i64 8
+  store ptr %0, ptr %p64, align 8
+  tail call void @use_foo(ptr noundef %f)
+  ret void
+}
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @test_zero_ext_store_load(ptr addrspace(271) nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_zero_ext_store_load:
+; CHECK:       // %bb.0: // %entry
+; CHECK:       mov     w19, w0
+; CHECK-NEXT:  ldr     w0, [x19]
+; CHECK-NEXT:  bl      use_int
+; CHECK-NEXT:  str     w0, [x19]
+entry:
+  %0 = load i32, ptr addrspace(271) %i, align 4
+  %call = tail call i32 @use_int(i32 noundef %0)
+  store i32 %call, ptr addrspace(271) %i, align 4
+  ret void
+}
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @test_trunc(ptr noundef %f, ptr noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_trunc:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str     w1, [x0]
+; CHECK-NEXT:    b      use_foo
+entry:
+  %0 = addrspacecast ptr %i to ptr addrspace(270)
+  store ptr addrspace(270) %0, ptr %f, align 8
+  tail call void @use_foo(ptr noundef nonnull %f)
+  ret void
+}
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @test_noop1(ptr noundef %f, ptr addrspace(270) noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_noop1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str  w1, [x0]
+; CHECK-NEXT:    b    use_foo
+entry:
+  store ptr addrspace(270) %i, ptr %f, align 8
+  tail call void @use_foo(ptr noundef nonnull %f)
+  ret void
+}
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @test_noop2(ptr noundef %f, ptr noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_noop2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str  x1, [x0, #8]
+; CHECK-NEXT:    b    use_foo
+entry:
+  %p64 = getelementptr inbounds nuw i8, ptr %f, i64 8
+  store ptr %i, ptr %p64, align 8
+  tail call void @use_foo(ptr noundef %f)
+  ret void
+}
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @test_null_arg(ptr noundef %f, ptr addrspace(270) nocapture noundef readnone %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_null_arg:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str  wzr, [x0]
+; CHECK-NEXT:    b    use_foo
+entry:
+  store ptr addrspace(270) null, ptr %f, align 8
+  tail call void @use_foo(ptr noundef nonnull %f)
+  ret void
+}
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @test_unrecognized(ptr noundef %f, ptr addrspace(14) noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_unrecognized:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str   w1, [x0]
+; CHECK-NEXT:    b    use_foo
+entry:
+  %0 = addrspacecast ptr addrspace(14) %i to ptr addrspace(270)
+  store ptr addrspace(270) %0, ptr %f, align 8
+  tail call void @use_foo(ptr noundef nonnull %f)
+  ret void
+}
+
+attributes #0 = { mustprogress uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" }
+attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" }



[llvm] [llvm][aarch64] Add support for the MS qualifiers __ptr32, __ptr64, _sptr, __uptr (PR #112793)

[llvm] [llvm][aarch64] Add support for the MS qualifiers ptr32, ptr64, _sptr, __uptr (PR #112793)