[llvm] ef8e033 - [llvm][aarch64] Add support for the MS qualifiers __ptr32, __ptr64, _sptr, __uptr (#112793)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 31 10:39:57 PST 2025
Author: Daniel Paoliello
Date: 2025-01-31T10:39:53-08:00
New Revision: ef8e0330801b43f8c7bec81c649d5a3e86238d7f
URL: https://github.com/llvm/llvm-project/commit/ef8e0330801b43f8c7bec81c649d5a3e86238d7f
DIFF: https://github.com/llvm/llvm-project/commit/ef8e0330801b43f8c7bec81c649d5a3e86238d7f.diff
LOG: [llvm][aarch64] Add support for the MS qualifiers __ptr32, __ptr64, _sptr, __uptr (#112793)
MSVC has a set of qualifiers to allow using 32-bit signed/unsigned
pointers when building 64-bit targets. This is useful for WoW code
(i.e., the part of Windows that handles running 32-bit application on a
64-bit OS). Currently this is supported on x64 using the 270, 271 and
272 address spaces, but does not work for AArch64 at all.
This change handles pointers in the new address spaces by truncating or
extending the value as required. The implementation is modeled after
x86.
Note that the initial version of this change that was never merged
(<https://reviews.llvm.org/D158931>) took a much different approach that
involved arch-specific handling in the DAG combiner/selector, which
didn't feel like the correct approach.
That previous approach also used `UBFM` for all 32-bit to 64-bit
zero-extensions, which resulted in a lot of `lsr` instructions being
added. For example, in the `ptradd.ll` test, it resulted in:
```
%add = add i32 %b, %a
%conv = zext i32 %add to i64
```
Being expanded to:
```
add w8, w1, w0
lsr w0, w8, #0
```
Where the `lsr` instruction wasn't previously being added. I don't know
enough about the exact details of AArch64 to know if that's a desirable
change, so I've left it out of my change.
Backend half of #111879
Added:
llvm/test/CodeGen/AArch64/aarch64-mixed-ptr-sizes.ll
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/AArch64/AArch64TargetMachine.h
llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
llvm/test/CodeGen/X86/mixed-ptr-sizes.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a396198013fc3c..2d3b89a3289333 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -530,6 +530,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::XOR, MVT::i32, Custom);
setOperationAction(ISD::XOR, MVT::i64, Custom);
+ setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
+ setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
+
// Virtually no operation on f128 is legal, but LLVM can't expand them when
// there's a valid register class, so we need custom operations in most cases.
setOperationAction(ISD::FABS, MVT::f128, Expand);
@@ -6880,6 +6883,37 @@ static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
ST->getBasePtr(), ST->getMemOperand());
}
+static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) {
+ SDLoc dl(Op);
+ SDValue Src = Op.getOperand(0);
+ MVT DestVT = Op.getSimpleValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ AddrSpaceCastSDNode *N = cast<AddrSpaceCastSDNode>(Op.getNode());
+
+ unsigned SrcAS = N->getSrcAddressSpace();
+ unsigned DestAS = N->getDestAddressSpace();
+ assert(SrcAS != DestAS &&
+ "addrspacecast must be between
diff erent address spaces");
+ assert(TLI.getTargetMachine().getPointerSize(SrcAS) !=
+ TLI.getTargetMachine().getPointerSize(DestAS) &&
+ "addrspacecast must be between
diff erent ptr sizes");
+
+ if (SrcAS == ARM64AS::PTR32_SPTR) {
+ return DAG.getNode(ISD::SIGN_EXTEND, dl, DestVT, Src,
+ DAG.getTargetConstant(0, dl, DestVT));
+ } else if (SrcAS == ARM64AS::PTR32_UPTR) {
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, DestVT, Src,
+ DAG.getTargetConstant(0, dl, DestVT));
+ } else if ((DestAS == ARM64AS::PTR32_SPTR) ||
+ (DestAS == ARM64AS::PTR32_UPTR)) {
+ SDValue Ext = DAG.getAnyExtOrTrunc(Src, dl, DestVT);
+ SDValue Trunc = DAG.getZeroExtendInReg(Ext, dl, DestVT);
+ return Trunc;
+ } else {
+ return Src;
+ }
+}
+
// Custom lowering for any store, vector or scalar and/or default or with
// a truncate operations. Currently only custom lower truncate operation
// from vector v4i16 to v4i8 or volatile stores of i128.
@@ -7541,6 +7575,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
+ case ISD::ADDRSPACECAST:
+ return LowerADDRSPACECAST(Op, DAG);
case ISD::SIGN_EXTEND_INREG: {
// Only custom lower when ExtraVT has a legal byte based element type.
EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
@@ -23555,6 +23591,26 @@ static SDValue performLOADCombine(SDNode *N,
performTBISimplification(N->getOperand(1), DCI, DAG);
LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT RegVT = LD->getValueType(0);
+ EVT MemVT = LD->getMemoryVT();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDLoc DL(LD);
+
+ // Cast ptr32 and ptr64 pointers to the default address space before a load.
+ unsigned AddrSpace = LD->getAddressSpace();
+ if (AddrSpace == ARM64AS::PTR64 || AddrSpace == ARM64AS::PTR32_SPTR ||
+ AddrSpace == ARM64AS::PTR32_UPTR) {
+ MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ if (PtrVT != LD->getBasePtr().getSimpleValueType()) {
+ SDValue Cast =
+ DAG.getAddrSpaceCast(DL, PtrVT, LD->getBasePtr(), AddrSpace, 0);
+ return DAG.getExtLoad(LD->getExtensionType(), DL, RegVT, LD->getChain(),
+ Cast, LD->getPointerInfo(), MemVT,
+ LD->getOriginalAlign(),
+ LD->getMemOperand()->getFlags());
+ }
+ }
+
if (LD->isVolatile() || !Subtarget->isLittleEndian())
return SDValue(N, 0);
@@ -23564,13 +23620,11 @@ static SDValue performLOADCombine(SDNode *N,
if (!LD->isNonTemporal())
return SDValue(N, 0);
- EVT MemVT = LD->getMemoryVT();
if (MemVT.isScalableVector() || MemVT.getSizeInBits() <= 256 ||
MemVT.getSizeInBits() % 256 == 0 ||
256 % MemVT.getScalarSizeInBits() != 0)
return SDValue(N, 0);
- SDLoc DL(LD);
SDValue Chain = LD->getChain();
SDValue BasePtr = LD->getBasePtr();
SDNodeFlags Flags = LD->getFlags();
@@ -23830,12 +23884,28 @@ static SDValue performSTORECombine(SDNode *N,
SDValue Value = ST->getValue();
SDValue Ptr = ST->getBasePtr();
EVT ValueVT = Value.getValueType();
+ EVT MemVT = ST->getMemoryVT();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDLoc DL(ST);
auto hasValidElementTypeForFPTruncStore = [](EVT VT) {
EVT EltVT = VT.getVectorElementType();
return EltVT == MVT::f32 || EltVT == MVT::f64;
};
+ // Cast ptr32 and ptr64 pointers to the default address space before a store.
+ unsigned AddrSpace = ST->getAddressSpace();
+ if (AddrSpace == ARM64AS::PTR64 || AddrSpace == ARM64AS::PTR32_SPTR ||
+ AddrSpace == ARM64AS::PTR32_UPTR) {
+ MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ if (PtrVT != Ptr.getSimpleValueType()) {
+ SDValue Cast = DAG.getAddrSpaceCast(DL, PtrVT, Ptr, AddrSpace, 0);
+ return DAG.getStore(Chain, DL, Value, Cast, ST->getPointerInfo(),
+ ST->getOriginalAlign(),
+ ST->getMemOperand()->getFlags(), ST->getAAInfo());
+ }
+ }
+
if (SDValue Res = combineI8TruncStore(ST, DAG, Subtarget))
return Res;
@@ -23849,8 +23919,8 @@ static SDValue performSTORECombine(SDNode *N,
ValueVT.isFixedLengthVector() &&
ValueVT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits() &&
hasValidElementTypeForFPTruncStore(Value.getOperand(0).getValueType()))
- return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
- ST->getMemoryVT(), ST->getMemOperand());
+ return DAG.getTruncStore(Chain, DL, Value.getOperand(0), Ptr, MemVT,
+ ST->getMemOperand());
if (SDValue Split = splitStores(N, DCI, DAG, Subtarget))
return Split;
@@ -27391,6 +27461,11 @@ void AArch64TargetLowering::ReplaceNodeResults(
ReplaceATOMIC_LOAD_128Results(N, Results, DAG, Subtarget);
return;
}
+ case ISD::ADDRSPACECAST: {
+ SDValue V = LowerADDRSPACECAST(SDValue(N, 0), DAG);
+ Results.push_back(V);
+ return;
+ }
case ISD::ATOMIC_LOAD:
case ISD::LOAD: {
MemSDNode *LoadNode = cast<MemSDNode>(N);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 470ed2a06b706a..b26f28dc79f886 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -563,6 +563,10 @@ const unsigned StackProbeMaxLoopUnroll = 4;
} // namespace AArch64
+namespace ARM64AS {
+enum : unsigned { PTR32_SPTR = 270, PTR32_UPTR = 271, PTR64 = 272 };
+}
+
class AArch64Subtarget;
class AArch64TargetLowering : public TargetLowering {
@@ -594,11 +598,19 @@ class AArch64TargetLowering : public TargetLowering {
unsigned Depth) const override;
MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
- // Returning i64 unconditionally here (i.e. even for ILP32) means that the
- // *DAG* representation of pointers will always be 64-bits. They will be
- // truncated and extended when transferred to memory, but the 64-bit DAG
- // allows us to use AArch64's addressing modes much more easily.
- return MVT::getIntegerVT(64);
+ if ((AS == ARM64AS::PTR32_SPTR) || (AS == ARM64AS::PTR32_UPTR)) {
+ // These are 32-bit pointers created using the `__ptr32` extension or
+ // similar. They are handled by marking them as being in a
diff erent
+ // address space, and will be extended to 64-bits when used as the target
+ // of a load or store operation, or cast to a 64-bit pointer type.
+ return MVT::i32;
+ } else {
+ // Returning i64 unconditionally here (i.e. even for ILP32) means that the
+ // *DAG* representation of pointers will always be 64-bits. They will be
+ // truncated and extended when transferred to memory, but the 64-bit DAG
+ // allows us to use AArch64's addressing modes much more easily.
+ return MVT::i64;
+ }
}
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/llvm/lib/Target/AArch64/AArch64TargetMachine.h
index 76b1c9d917ecd0..621adb380dbcd0 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.h
@@ -68,8 +68,7 @@ class AArch64TargetMachine : public CodeGenTargetMachineImpl {
/// Returns true if a cast between SrcAS and DestAS is a noop.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
- // Addrspacecasts are always noops.
- return true;
+ return getPointerSize(SrcAS) == getPointerSize(DestAS);
}
private:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 07f03644336cdd..467094e9befef4 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2999,9 +2999,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
LLT PtrTy = MRI.getType(LdSt.getPointerReg());
+ // Can only handle AddressSpace 0, 64-bit pointers.
if (PtrTy != LLT::pointer(0, 64)) {
- LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
- << ", expected: " << LLT::pointer(0, 64) << '\n');
return false;
}
diff --git a/llvm/test/CodeGen/AArch64/aarch64-mixed-ptr-sizes.ll b/llvm/test/CodeGen/AArch64/aarch64-mixed-ptr-sizes.ll
new file mode 100644
index 00000000000000..b7c8df7310adca
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-mixed-ptr-sizes.ll
@@ -0,0 +1,182 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
+; RUN: llc --fast-isel < %s | FileCheck %s
+; RUN: llc --global-isel --global-isel-abort=2 < %s | FileCheck %s
+
+; Source to regenerate:
+; struct Foo {
+; int * __ptr32 p32;
+; int * __ptr64 p64;
+; __attribute__((address_space(9))) int *p_other;
+; };
+; extern "C" void use_foo(Foo *f);
+; extern "C" int use_int(int i);
+; extern "C" void test_sign_ext(Foo *f, int * __ptr32 __sptr i) {
+; f->p64 = i;
+; use_foo(f);
+; }
+; extern "C" void test_sign_ext_store_load(int * __ptr32 __sptr i) {
+; *i = use_int(*i);
+; }
+; extern "C" void test_zero_ext(Foo *f, int * __ptr32 __uptr i) {
+; f->p64 = i;
+; use_foo(f);
+; }
+; extern "C" void test_zero_ext_store_load(int * __ptr32 __uptr i) {
+; *i = use_int(*i);
+; }
+; extern "C" void test_trunc(Foo *f, int * __ptr64 i) {
+; f->p32 = i;
+; use_foo(f);
+; }
+; extern "C" void test_noop1(Foo *f, int * __ptr32 i) {
+; f->p32 = i;
+; use_foo(f);
+; }
+; extern "C" void test_noop2(Foo *f, int * __ptr64 i) {
+; f->p64 = i;
+; use_foo(f);
+; }
+; extern "C" void test_null_arg(Foo *f, int * __ptr32 i) {
+; test_noop1(f, 0);
+; }
+; extern "C" void test_unrecognized(Foo *f, __attribute__((address_space(14))) int *i) {
+; f->p32 = (int * __ptr32)i;
+; use_foo(f);
+; }
+;
+; $ clang --target=aarch64-windows-msvc -fms-extensions -O2 -S -emit-llvm t.cpp
+
+target datalayout = "e-m:w-p:64:64-i32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-windows-msvc"
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @test_sign_ext(ptr noundef %f, ptr addrspace(270) noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_sign_ext:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: str x8, [x0, #8]
+; CHECK-NEXT: b use_foo
+entry:
+ %0 = addrspacecast ptr addrspace(270) %i to ptr
+ %p64 = getelementptr inbounds nuw i8, ptr %f, i64 8
+ store ptr %0, ptr %p64, align 8
+ tail call void @use_foo(ptr noundef %f)
+ ret void
+}
+
+declare dso_local void @use_foo(ptr noundef) local_unnamed_addr #1
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @test_sign_ext_store_load(ptr addrspace(270) nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_sign_ext_store_load:
+; CHECK: // %bb.0: // %entry
+; CHECK: sxtw x19, w0
+; CHECK-NEXT: ldr w0, [x19]
+; CHECK-NEXT: bl use_int
+; CHECK-NEXT: str w0, [x19]
+entry:
+ %0 = load i32, ptr addrspace(270) %i, align 4
+ %call = tail call i32 @use_int(i32 noundef %0)
+ store i32 %call, ptr addrspace(270) %i, align 4
+ ret void
+}
+
+declare dso_local i32 @use_int(i32 noundef) local_unnamed_addr #1
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @test_zero_ext(ptr noundef %f, ptr addrspace(271) noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_zero_ext:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, w1
+; CHECK-NEXT: str x8, [x0, #8]
+; CHECK-NEXT: b use_foo
+entry:
+ %0 = addrspacecast ptr addrspace(271) %i to ptr
+ %p64 = getelementptr inbounds nuw i8, ptr %f, i64 8
+ store ptr %0, ptr %p64, align 8
+ tail call void @use_foo(ptr noundef %f)
+ ret void
+}
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @test_zero_ext_store_load(ptr addrspace(271) nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_zero_ext_store_load:
+; CHECK: // %bb.0: // %entry
+; CHECK: mov w19, w0
+; CHECK-NEXT: ldr w0, [x19]
+; CHECK-NEXT: bl use_int
+; CHECK-NEXT: str w0, [x19]
+entry:
+ %0 = load i32, ptr addrspace(271) %i, align 4
+ %call = tail call i32 @use_int(i32 noundef %0)
+ store i32 %call, ptr addrspace(271) %i, align 4
+ ret void
+}
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @test_trunc(ptr noundef %f, ptr noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_trunc:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str w1, [x0]
+; CHECK-NEXT: b use_foo
+entry:
+ %0 = addrspacecast ptr %i to ptr addrspace(270)
+ store ptr addrspace(270) %0, ptr %f, align 8
+ tail call void @use_foo(ptr noundef nonnull %f)
+ ret void
+}
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @test_noop1(ptr noundef %f, ptr addrspace(270) noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_noop1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str w1, [x0]
+; CHECK-NEXT: b use_foo
+entry:
+ store ptr addrspace(270) %i, ptr %f, align 8
+ tail call void @use_foo(ptr noundef nonnull %f)
+ ret void
+}
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @test_noop2(ptr noundef %f, ptr noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_noop2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x1, [x0, #8]
+; CHECK-NEXT: b use_foo
+entry:
+ %p64 = getelementptr inbounds nuw i8, ptr %f, i64 8
+ store ptr %i, ptr %p64, align 8
+ tail call void @use_foo(ptr noundef %f)
+ ret void
+}
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @test_null_arg(ptr noundef %f, ptr addrspace(270) nocapture noundef readnone %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_null_arg:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str wzr, [x0]
+; CHECK-NEXT: b use_foo
+entry:
+ store ptr addrspace(270) null, ptr %f, align 8
+ tail call void @use_foo(ptr noundef nonnull %f)
+ ret void
+}
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @test_unrecognized(ptr noundef %f, ptr addrspace(14) noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_unrecognized:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str w1, [x0]
+; CHECK-NEXT: b use_foo
+entry:
+ %0 = addrspacecast ptr addrspace(14) %i to ptr addrspace(270)
+ store ptr addrspace(270) %0, ptr %f, align 8
+ tail call void @use_foo(ptr noundef nonnull %f)
+ ret void
+}
+
+attributes #0 = { mustprogress uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" }
+attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" }
diff --git a/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll b/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll
index 7d242dd2700126..48d0ea49b70e67 100644
--- a/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll
+++ b/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s | FileCheck %s --check-prefixes=ALL,CHECK
; RUN: llc -O0 < %s | FileCheck %s --check-prefixes=ALL,CHECK-O0
+; RUN: llc --fast-isel < %s | FileCheck %s --check-prefixes=ALL,CHECK
+; RUN: llc --global-isel --global-isel-abort=2 < %s | FileCheck %s --check-prefixes=ALL,CHECK
; Source to regenerate:
; struct Foo {
More information about the llvm-commits
mailing list