[llvm] [AArch64] Add CodeGen support for FEAT_CPA (PR #79569)
Rodolfo Wottrich via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 26 02:16:37 PST 2024
https://github.com/rgwott created https://github.com/llvm/llvm-project/pull/79569
CPA stands for Checked Pointer Arithmetic and is part of the 2023 MTE architecture extensions for A-profile.
The new CPA instructions perform regular pointer arithmetic (such as base register + offset) but check for overflow in the most significant bits of the result.
In this patch we intend to capture the semantics of pointer arithmetic when it is not folded into loads/stores, then generate the appropriate CPA instructions.
Mode details about the extension can be found at:
* https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/arm-a-profile-architecture-developments-2023
* https://developer.arm.com/documentation/ddi0602/2023-09/
>From 3cbea617b206ad81d5232ac20b7d31f55277a645 Mon Sep 17 00:00:00 2001
From: Rodolfo Wottrich <rodolfo.wottrich at arm.com>
Date: Fri, 26 Jan 2024 10:09:33 +0000
Subject: [PATCH] [AArch64] Add CodeGen support for FEAT_CPA
CPA stands for Checked Pointer Arithmetic and is part of the 2023 MTE
architecture extensions for A-profile.
The new CPA instructions perform regular pointer arithmetic (such as
base register + offset) but check for overflow in the most significant
bits of the result.
In this patch we intend to capture the semantics of pointer arithmetic
when it is not folded into loads/stores, then generate
the appropriate CPA instructions.
Mode details about the extension can be found at:
* https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/arm-a-profile-architecture-developments-2023
* https://developer.arm.com/documentation/ddi0602/2023-09/
---
llvm/include/llvm/Target/TargetMachine.h | 5 +
.../include/llvm/Target/TargetSelectionDAG.td | 5 +-
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 20 ++
.../Target/AArch64/AArch64TargetMachine.cpp | 4 +
.../lib/Target/AArch64/AArch64TargetMachine.h | 4 +
.../GISel/AArch64InstructionSelector.cpp | 4 +
llvm/test/CodeGen/AArch64/cpa-globalisel.ll | 171 ++++++++++++++++++
llvm/test/CodeGen/AArch64/cpa-selectiondag.ll | 169 +++++++++++++++++
8 files changed, 380 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/cpa-globalisel.ll
create mode 100644 llvm/test/CodeGen/AArch64/cpa-selectiondag.ll
diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
index a522a12299bb029..42b2d8ef536e827 100644
--- a/llvm/include/llvm/Target/TargetMachine.h
+++ b/llvm/include/llvm/Target/TargetMachine.h
@@ -420,6 +420,11 @@ class TargetMachine {
virtual unsigned getAddressSpaceForPseudoSourceKind(unsigned Kind) const {
return 0;
}
+
+ /// True if target has some form of pointer arithmetic checking.
+ /// Helps identify whether pointer arithmetic semantics should be preserved
+ /// for passes such as instruction selection.
+ virtual bool isPtrArithmeticChecked(const Function &F) const { return false; }
};
/// This class describes a target machine that is implemented with the LLVM
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 22360353790dbce..071e801a4785e4e 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -109,7 +109,7 @@ def SDTOther : SDTypeProfile<1, 0, [SDTCisVT<0, OtherVT>]>; // for 'vt'.
def SDTUNDEF : SDTypeProfile<1, 0, []>; // for 'undef'.
def SDTUnaryOp : SDTypeProfile<1, 1, []>; // for bitconvert.
-def SDTPtrAddOp : SDTypeProfile<1, 2, [ // ptradd
+def SDTPtrAddSubOp : SDTypeProfile<1, 2, [ // ptradd, ptrsub
SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisPtrTy<1>
]>;
def SDTIntBinOp : SDTypeProfile<1, 2, [ // add, and, or, xor, udiv, etc.
@@ -384,8 +384,9 @@ def tblockaddress: SDNode<"ISD::TargetBlockAddress", SDTPtrLeaf, [],
def add : SDNode<"ISD::ADD" , SDTIntBinOp ,
[SDNPCommutative, SDNPAssociative]>;
-def ptradd : SDNode<"ISD::ADD" , SDTPtrAddOp, []>;
+def ptradd : SDNode<"ISD::ADD" , SDTPtrAddSubOp, []>;
def sub : SDNode<"ISD::SUB" , SDTIntBinOp>;
+def ptrsub : SDNode<"ISD::SUB" , SDTPtrAddSubOp, []>;
def mul : SDNode<"ISD::MUL" , SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]>;
def mulhs : SDNode<"ISD::MULHS" , SDTIntBinOp, [SDNPCommutative]>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 03baa7497615e3d..ccc7817cab18985 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -9587,6 +9587,26 @@ let Predicates = [HasCPA] in {
// Scalar multiply-add/subtract
def MADDPT : MulAccumCPA<0, "maddpt">;
def MSUBPT : MulAccumCPA<1, "msubpt">;
+
+ // Rules to use CPA instructions in pointer arithmetic patterns which are not
+ // folded into loads/stores. The AddedComplexity serves to help supersede
+ // other simpler (non-CPA) patterns and make sure CPA is used instead.
+ let AddedComplexity = 20 in {
+ def : Pat<(ptradd GPR64sp:$Rn, GPR64sp:$Rm),
+ (ADDPT_shift GPR64sp:$Rn, GPR64sp:$Rm, (i32 0))>;
+ def : Pat<(ptradd GPR64sp:$Rn, (shl GPR64sp:$Rm, (i64 imm0_7:$imm))),
+ (ADDPT_shift GPR64sp:$Rn, GPR64sp:$Rm,
+ (i32 (trunc_imm imm0_7:$imm)))>;
+ def : Pat<(ptrsub GPR64sp:$Rn, GPR64sp:$Rm),
+ (SUBPT_shift GPR64sp:$Rn, GPR64sp:$Rm, (i32 0))>;
+ def : Pat<(ptrsub GPR64sp:$Rn, (shl GPR64sp:$Rm, (i64 imm0_7:$imm))),
+ (SUBPT_shift GPR64sp:$Rn, GPR64sp:$Rm,
+ (i32 (trunc_imm imm0_7:$imm)))>;
+ def : Pat<(ptradd GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)),
+ (MADDPT GPR64:$Rn, GPR64:$Rm, GPR64:$Ra)>;
+ def : Pat<(ptradd GPR64:$Ra, (mul GPR64:$Rn, (sub (i64 0), GPR64:$Rm))),
+ (MSUBPT GPR64:$Rn, GPR64:$Rm, GPR64:$Ra)>;
+ }
}
include "AArch64InstrAtomics.td"
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 6fbc13d8904f2e2..e67df1f417c55cd 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -892,3 +892,7 @@ bool AArch64TargetMachine::parseMachineFunctionInfo(
MF.getInfo<AArch64FunctionInfo>()->initializeBaseYamlFields(YamlMFI);
return false;
}
+
+bool AArch64TargetMachine::isPtrArithmeticChecked(const Function &F) const {
+ return getSubtargetImpl(F)->hasCPA();
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/llvm/lib/Target/AArch64/AArch64TargetMachine.h
index 8fb68b06f137803..e21d35f87290661 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.h
@@ -71,6 +71,10 @@ class AArch64TargetMachine : public LLVMTargetMachine {
return true;
}
+ /// In AArch64, true if FEAT_CPA is present. Helps preserve pointer arithmetic
+ /// semantics for instruction selection.
+ bool isPtrArithmeticChecked(const Function &F) const override;
+
private:
bool isLittle;
};
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 8344e79f78e1eb6..7480328f4b5dec3 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2075,6 +2075,10 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
return Changed;
}
case TargetOpcode::G_PTR_ADD:
+ // If Checked Pointer Arithmetic (FEAT_CPA) is present, preserve the pointer
+ // arithmetic semantics instead of falling back to regular arithmetic.
+ if (TM.isPtrArithmeticChecked(MF.getFunction()))
+ return false;
return convertPtrAddToAdd(I, MRI);
case TargetOpcode::G_LOAD: {
// For scalar loads of pointers, we try to convert the dest type from p0
diff --git a/llvm/test/CodeGen/AArch64/cpa-globalisel.ll b/llvm/test/CodeGen/AArch64/cpa-globalisel.ll
new file mode 100644
index 000000000000000..d7880454146b2c1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cpa-globalisel.ll
@@ -0,0 +1,171 @@
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs --mattr=+cpa -O0 -global-isel=1 -global-isel-abort=1 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK-CPA-O0
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs --mattr=+cpa -O3 -global-isel=1 -global-isel-abort=1 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK-CPA-O3
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs --mattr=-cpa -O0 -global-isel=1 -global-isel-abort=1 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK-NOCPA-O0
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs --mattr=-cpa -O3 -global-isel=1 -global-isel-abort=1 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK-NOCPA-O3
+
+%struct.my_type = type { i64, i64 }
+%struct.my_type2 = type { i64, i64, i64, i64, i64, i64 }
+
+ at array = external dso_local global [10 x %struct.my_type], align 8
+ at array2 = external dso_local global [10 x %struct.my_type2], align 8
+
+define void @addpt1(i64 %index, i64 %arg) {
+; CHECK-CPA-O0-LABEL: addpt1:
+; CHECK-CPA-O0: addpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #4
+; CHECK-CPA-O0: str x{{[0-9]+}}, [[[REG1]], #8]
+;
+; CHECK-CPA-O3-LABEL: addpt1:
+; CHECK-CPA-O3: addpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #4
+; CHECK-CPA-O3: str x{{[0-9]+}}, [[[REG1]], #8]
+;
+; CHECK-NOCPA-O0-LABEL: addpt1:
+; CHECK-NOCPA-O0: add [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #4
+; CHECK-NOCPA-O0: str x{{[0-9]+}}, [[[REG1]], #8]
+;
+; CHECK-NOCPA-O3-LABEL: addpt1:
+; CHECK-NOCPA-O3: add [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #4
+; CHECK-NOCPA-O3: str x{{[0-9]+}}, [[[REG1]], #8]
+entry:
+ %e2 = getelementptr inbounds %struct.my_type, ptr @array, i64 %index, i32 1
+ store i64 %arg, ptr %e2, align 8
+ ret void
+}
+
+define void @maddpt1(i32 %pos, ptr %val) {
+; CHECK-CPA-O0-LABEL: maddpt1:
+; CHECK-CPA-O0: maddpt x0, x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
+; CHECK-CPA-O0: b memcpy
+;
+; CHECK-CPA-O3-LABEL: maddpt1:
+; CHECK-CPA-O3: maddpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
+; CHECK-CPA-O3: str q{{[0-9]+}}, [[[REG1]]]
+; CHECK-CPA-O3: str q{{[0-9]+}}, [[[REG1]], #16]
+; CHECK-CPA-O3: str q{{[0-9]+}}, [[[REG1]], #32]
+;
+; CHECK-NOCPA-O0-LABEL: maddpt1:
+; CHECK-NOCPA-O0: smaddl x0, w{{[0-9]+}}, w{{[0-9]+}}, x{{[0-9]+}}
+; CHECK-NOCPA-O0: b memcpy
+;
+; CHECK-NOCPA-O3-LABEL: maddpt1:
+; CHECK-NOCPA-O3: smaddl [[REG1:x[0-9]+]], w{{[0-9]+}}, w{{[0-9]+}}, x{{[0-9]+}}
+; CHECK-NOCPA-O3: str q{{[0-9]+}}, [[[REG1]]]
+; CHECK-NOCPA-O3: str q{{[0-9]+}}, [[[REG1]], #16]
+; CHECK-NOCPA-O3: str q{{[0-9]+}}, [[[REG1]], #32]
+entry:
+ %idxprom = sext i32 %pos to i64
+ %arrayidx = getelementptr inbounds [10 x %struct.my_type2], ptr @array2, i64 0, i64 %idxprom
+ tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 dereferenceable(48) %arrayidx, ptr align 8 dereferenceable(48) %val, i64 48, i1 false)
+ ret void
+}
+
+define void @msubpt1(i32 %index, i32 %elem) {
+; CHECK-CPA-O0-LABEL: msubpt1:
+; CHECK-CPA-O0: addpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}
+; CHECK-CPA-O0: msubpt x0, x{{[0-9]+}}, x{{[0-9]+}}, [[REG1]]
+; CHECK-CPA-O0: addpt x1, x{{[0-9]+}}, x{{[0-9]+}}
+; CHECK-CPA-O0: b memcpy
+;
+; CHECK-CPA-O3-LABEL: msubpt1:
+; CHECK-CPA-O3: msubpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
+; CHECK-CPA-O3: str q{{[0-9]+}}, [[[REG1]], #192]
+; CHECK-CPA-O3: str q{{[0-9]+}}, [[[REG1]], #208]
+; CHECK-CPA-O3: str q{{[0-9]+}}, [[[REG1]], #224]
+;
+; CHECK-NOCPA-O0-LABEL: msubpt1:
+; CHECK-NOCPA-O0: mneg [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}
+; CHECK-NOCPA-O0: add x0, x{{[0-9]+}}, [[REG1]]
+; CHECK-NOCPA-O0: b memcpy
+;
+; CHECK-NOCPA-O3-LABEL: msubpt1:
+; CHECK-NOCPA-O3: mneg [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}
+; CHECK-NOCPA-O3: add [[REG2:x[0-9]+]], x{{[0-9]+}}, [[REG1]]
+; CHECK-NOCPA-O3: str q{{[0-9]+}}, [[[REG1]], #192]
+; CHECK-NOCPA-O3: str q{{[0-9]+}}, [[[REG1]], #208]
+; CHECK-NOCPA-O3: str q{{[0-9]+}}, [[[REG1]], #224]
+entry:
+ %idx.ext = sext i32 %index to i64
+ %idx.neg = sub nsw i64 0, %idx.ext
+ %add.ptr = getelementptr inbounds %struct.my_type2, ptr getelementptr inbounds ([10 x %struct.my_type2], ptr @array2, i64 0, i64 6), i64 %idx.neg
+ tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 dereferenceable(48) %add.ptr, ptr align 8 dereferenceable(48) getelementptr inbounds ([10 x %struct.my_type2], ptr @array2, i64 0, i64 2), i64 48, i1 false), !tbaa.struct !6
+ ret void
+}
+
+define void @subpt1(i32 %index, i32 %elem) {
+; CHECK-CPA-O0-LABEL: subpt1:
+; CHECK-CPA-O0: addpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}
+; CHECK-CPA-O0: str q{{[0-9]+}}, [[[REG1]], x{{[0-9]+}}, lsl #4]
+;
+; CHECK-CPA-O3-LABEL: subpt1:
+; CHECK-CPA-O3: addpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #4
+; CHECK-CPA-O3: str q{{[0-9]+}}, [[[REG1]], #64]
+;
+; CHECK-NOCPA-O0-LABEL: subpt1:
+; CHECK-NOCPA-O0: add [[REG1:x[0-9]+]], x{{[0-9]+}}, #96
+; CHECK-NOCPA-O0: str q{{[0-9]+}}, [[[REG1]], x{{[0-9]+}}, lsl #4]
+;
+; CHECK-NOCPA-O3-LABEL: subpt1:
+; CHECK-NOCPA-O3: add [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #4
+; CHECK-NOCPA-O3: str q{{[0-9]+}}, [[[REG1]], #64]
+entry:
+ %conv = sext i32 %index to i64
+ %mul.neg = mul nsw i64 %conv, -16
+ %add.ptr = getelementptr inbounds %struct.my_type, ptr getelementptr inbounds ([10 x %struct.my_type], ptr @array, i64 0, i64 6), i64 %mul.neg
+ tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %add.ptr, ptr noundef nonnull align 8 dereferenceable(16) getelementptr inbounds ([10 x %struct.my_type], ptr @array, i64 0, i64 2), i64 16, i1 false), !tbaa.struct !6
+ ret void
+}
+
+define void @subpt2(i32 %index, i32 %elem) {
+; CHECK-CPA-O0-LABEL: subpt2:
+; CHECK-CPA-O0: addpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}
+; CHECK-CPA-O0: str q{{[0-9]+}}, [[[REG1]], x{{[0-9]+}}, lsl #4]
+;
+; CHECK-CPA-O3-LABEL: subpt2:
+; CHECK-CPA-O3: addpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #4
+; CHECK-CPA-O3: str q{{[0-9]+}}, [[[REG1]], #64]
+;
+; CHECK-NOCPA-O0-LABEL: subpt2:
+; CHECK-NOCPA-O0: add [[REG1:x[0-9]+]], x{{[0-9]+}}, #96
+; CHECK-NOCPA-O0: str q{{[0-9]+}}, [[[REG1]], x{{[0-9]+}}, lsl #4]
+;
+; CHECK-NOCPA-O3-LABEL: subpt2:
+; CHECK-NOCPA-O3: add [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #4
+; CHECK-NOCPA-O3: str q{{[0-9]+}}, [[[REG1]], #64]
+entry:
+ %idx.ext = sext i32 %index to i64
+ %idx.neg = sub nsw i64 0, %idx.ext
+ %add.ptr = getelementptr inbounds %struct.my_type, ptr getelementptr inbounds ([10 x %struct.my_type], ptr @array, i64 0, i64 6), i64 %idx.neg
+ tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %add.ptr, ptr noundef nonnull align 8 dereferenceable(16) getelementptr inbounds ([10 x %struct.my_type], ptr @array, i64 0, i64 2), i64 16, i1 false), !tbaa.struct !11
+ ret void
+}
+
+define ptr @subpt3(ptr %ptr, i32 %index) {
+; CHECK-CPA-O0-LABEL: subpt3:
+; CHECK-CPA-O0: mov [[REG1:x[0-9]+]], #-8
+; CHECK-CPA-O0: addpt x0, x{{[0-9]+}}, [[REG1]]
+; CHECK-CPA-O0: ret
+;
+; CHECK-CPA-O3-LABEL: subpt3:
+; CHECK-CPA-O3: mov [[REG1:x[0-9]+]], #-8
+; CHECK-CPA-O3: addpt x0, x{{[0-9]+}}, [[REG1]]
+; CHECK-CPA-O3: ret
+;
+; CHECK-NOCPA-O0-LABEL: subpt3:
+; CHECK-NOCPA-O0: subs x0, x{{[0-9]+}}, #8
+; CHECK-NOCPA-O0: ret
+;
+; CHECK-NOCPA-O3-LABEL: subpt3:
+; CHECK-NOCPA-O3: sub x0, x{{[0-9]+}}, #8
+; CHECK-NOCPA-O3: ret
+entry:
+ %incdec.ptr.i.i.i = getelementptr inbounds i64, ptr %ptr, i64 -1
+ ret ptr %incdec.ptr.i.i.i
+}
+
+declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
+
+!6 = !{i64 0, i64 8, !7, i64 8, i64 8, !7, i64 16, i64 8, !7, i64 24, i64 8, !7, i64 32, i64 8, !7, i64 40, i64 8, !7}
+!7 = !{!8, !8, i64 0}
+!8 = !{!"long", !9, i64 0}
+!9 = !{!"omnipotent char", !10, i64 0}
+!10 = !{!"Simple C++ TBAA"}
+!11 = !{i64 0, i64 8, !7, i64 8, i64 8, !7}
diff --git a/llvm/test/CodeGen/AArch64/cpa-selectiondag.ll b/llvm/test/CodeGen/AArch64/cpa-selectiondag.ll
new file mode 100644
index 000000000000000..49798e70ea873a3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cpa-selectiondag.ll
@@ -0,0 +1,169 @@
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs --mattr=+cpa -O0 -global-isel=0 -fast-isel=0 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK-CPA-O0
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs --mattr=+cpa -O3 -global-isel=0 -fast-isel=0 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK-CPA-O3
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs --mattr=-cpa -O0 -global-isel=0 -fast-isel=0 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK-NOCPA-O0
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs --mattr=-cpa -O3 -global-isel=0 -fast-isel=0 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK-NOCPA-O3
+
+%struct.my_type = type { i64, i64 }
+%struct.my_type2 = type { i64, i64, i64, i64, i64, i64 }
+
+ at array = external dso_local global [10 x %struct.my_type], align 8
+ at array2 = external dso_local global [10 x %struct.my_type2], align 8
+
+define void @addpt1(i64 %index, i64 %arg) {
+; CHECK-CPA-O0-LABEL: addpt1:
+; CHECK-CPA-O0: addpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #4
+; CHECK-CPA-O0: str x{{[0-9]+}}, [[[REG1]], #8]
+;
+; CHECK-CPA-O3-LABEL: addpt1:
+; CHECK-CPA-O3: addpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #4
+; CHECK-CPA-O3: str x{{[0-9]+}}, [[[REG1]], #8]
+;
+; CHECK-NOCPA-O0-LABEL: addpt1:
+; CHECK-NOCPA-O0: add [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #4
+; CHECK-NOCPA-O0: str x{{[0-9]+}}, [[[REG1]], #8]
+
+; CHECK-NOCPA-O3-LABEL: addpt1:
+; CHECK-NOCPA-O3: add [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #4
+; CHECK-NOCPA-O3: str x{{[0-9]+}}, [[[REG1]], #8]
+entry:
+ %e2 = getelementptr inbounds %struct.my_type, ptr @array, i64 %index, i32 1
+ store i64 %arg, ptr %e2, align 8
+ ret void
+}
+
+define void @maddpt1(i32 %pos, ptr %val) {
+; CHECK-CPA-O0-LABEL: maddpt1:
+; CHECK-CPA-O0: maddpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
+; CHECK-CPA-O0: str q{{[0-9]+}}, [[[REG1]], #16]
+;
+; CHECK-CPA-O3-LABEL: maddpt1:
+; CHECK-CPA-O3: maddpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
+; CHECK-CPA-O3: stp q{{[0-9]+}}, q{{[0-9]+}}, [[[REG1]], #16]
+; CHECK-CPA-O3: str q{{[0-9]+}}, [[[REG1]]]
+;
+; CHECK-NOCPA-O0-LABEL: maddpt1:
+; CHECK-NOCPA-O0: smaddl [[REG1:x[0-9]+]], w{{[0-9]+}}, w{{[0-9]+}}, x{{[0-9]+}}
+; CHECK-NOCPA-O0: str q{{[0-9]+}}, [[[REG1]], #32]
+; CHECK-NOCPA-O0: str q{{[0-9]+}}, [[[REG1]], #16]
+; CHECK-NOCPA-O0: str q{{[0-9]+}}, [[[REG1]]]
+;
+; CHECK-NOCPA-O3-LABEL: maddpt1:
+; CHECK-NOCPA-O3: smaddl [[REG1:x[0-9]+]], w{{[0-9]+}}, w{{[0-9]+}}, x{{[0-9]+}}
+; CHECK-NOCPA-O3: stp q{{[0-9]+}}, q{{[0-9]+}}, [[[REG1]], #16]
+; CHECK-NOCPA-O3: str q{{[0-9]+}}, [[[REG1]]]
+entry:
+ %idxprom = sext i32 %pos to i64
+ %arrayidx = getelementptr inbounds [10 x %struct.my_type2], ptr @array2, i64 0, i64 %idxprom
+ tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 dereferenceable(48) %arrayidx, ptr align 8 dereferenceable(48) %val, i64 48, i1 false)
+ ret void
+}
+
+define void @msubpt1(i32 %index, i32 %elem) {
+; CHECK-CPA-O0-LABEL: msubpt1:
+; CHECK-CPA-O0: msubpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
+; CHECK-CPA-O0: str q{{[0-9]+}}, [[[REG1]], #288]
+;
+; CHECK-CPA-O3-LABEL: msubpt1:
+; CHECK-CPA-O3: msubpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
+; CHECK-CPA-O3: stp q{{[0-9]+}}, q{{[0-9]+}}, [[[REG1]], #304]
+; CHECK-CPA-O3: str q{{[0-9]+}}, [[[REG1]], #288]
+;
+; CHECK-NOCPA-O0-LABEL: msubpt1:
+; CHECK-NOCPA-O0: mneg [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}
+; CHECK-NOCPA-O0: add [[REG2:x[0-9]+]], x{{[0-9]+}}, [[REG1]]
+; CHECK-NOCPA-O0: str q{{[0-9]+}}, [[[REG2]], #320]
+; CHECK-NOCPA-O0: str q{{[0-9]+}}, [[[REG2]], #304]
+; CHECK-NOCPA-O0: str q{{[0-9]+}}, [[[REG2]], #288]
+;
+; CHECK-NOCPA-O3-LABEL: msubpt1:
+; CHECK-NOCPA-O3: mneg [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}
+; CHECK-NOCPA-O3: add [[REG2:x[0-9]+]], x{{[0-9]+}}, [[REG1]]
+; CHECK-NOCPA-O3: stp q{{[0-9]+}}, q{{[0-9]+}}, [[[REG2]], #304]
+; CHECK-NOCPA-O3: str q{{[0-9]+}}, [[[REG2]], #288]
+entry:
+ %idx.ext = sext i32 %index to i64
+ %idx.neg = sub nsw i64 0, %idx.ext
+ %add.ptr = getelementptr inbounds %struct.my_type2, ptr getelementptr inbounds ([10 x %struct.my_type2], ptr @array2, i64 0, i64 6), i64 %idx.neg
+ tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 dereferenceable(48) %add.ptr, ptr align 8 dereferenceable(48) getelementptr inbounds ([10 x %struct.my_type2], ptr @array2, i64 0, i64 2), i64 48, i1 false), !tbaa.struct !6
+ ret void
+}
+
+define void @subpt1(i32 %index, i32 %elem) {
+; CHECK-CPA-O0-LABEL: subpt1:
+; CHECK-CPA-O0: subpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}
+; CHECK-CPA-O0: str q{{[0-9]+}}, [[[REG1]], #96]
+;
+; CHECK-CPA-O3-LABEL: subpt1:
+; CHECK-CPA-O3: subpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}
+; CHECK-CPA-O3: str q{{[0-9]+}}, [[[REG1]], #96]
+;
+; CHECK-NOCPA-O0-LABEL: subpt1:
+; CHECK-NOCPA-O0: subs [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #8
+; CHECK-NOCPA-O0: str q{{[0-9]+}}, [[[REG1]], #96]
+;
+; CHECK-NOCPA-O3-LABEL: subpt1:
+; CHECK-NOCPA-O3: sub [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #8
+; CHECK-NOCPA-O3: str q{{[0-9]+}}, [[[REG1]], #96]
+entry:
+ %conv = sext i32 %index to i64
+ %mul.neg = mul nsw i64 %conv, -16
+ %add.ptr = getelementptr inbounds %struct.my_type, ptr getelementptr inbounds ([10 x %struct.my_type], ptr @array, i64 0, i64 6), i64 %mul.neg
+ tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %add.ptr, ptr noundef nonnull align 8 dereferenceable(16) getelementptr inbounds ([10 x %struct.my_type], ptr @array, i64 0, i64 2), i64 16, i1 false), !tbaa.struct !6
+ ret void
+}
+
+define void @subpt2(i32 %index, i32 %elem) {
+; CHECK-CPA-O0-LABEL: subpt2:
+; CHECK-CPA-O0: subpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #4
+; CHECK-CPA-O0: str q{{[0-9]+}}, [[[REG1]], #96]
+;
+; CHECK-CPA-O3-LABEL: subpt2:
+; CHECK-CPA-O3: subpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #4
+; CHECK-CPA-O3: str q{{[0-9]+}}, [[[REG1]], #96]
+;
+; CHECK-NOCPA-O0-LABEL: subpt2:
+; CHECK-NOCPA-O0: subs [[REG1:x[0-9]+]], x{{[0-9]+}}, w{{[0-9]+}}, sxtw #4
+; CHECK-NOCPA-O0: str q{{[0-9]+}}, [[[REG1]], #96]
+;
+; CHECK-NOCPA-O3-LABEL: subpt2:
+; CHECK-NOCPA-O3: sub [[REG1:x[0-9]+]], x{{[0-9]+}}, w{{[0-9]+}}, sxtw #4
+; CHECK-NOCPA-O3: str q{{[0-9]+}}, [[[REG1]], #96]
+entry:
+ %idx.ext = sext i32 %index to i64
+ %idx.neg = sub nsw i64 0, %idx.ext
+ %add.ptr = getelementptr inbounds %struct.my_type, ptr getelementptr inbounds ([10 x %struct.my_type], ptr @array, i64 0, i64 6), i64 %idx.neg
+ tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %add.ptr, ptr noundef nonnull align 8 dereferenceable(16) getelementptr inbounds ([10 x %struct.my_type], ptr @array, i64 0, i64 2), i64 16, i1 false), !tbaa.struct !11
+ ret void
+}
+
+define ptr @subpt3(ptr %ptr, i32 %index) {
+; CHECK-CPA-O0-LABEL: subpt3:
+; CHECK-CPA-O0: mov [[REG1:x[0-9]+]], #-8
+; CHECK-CPA-O0: addpt x0, x{{[0-9]+}}, [[REG1]]
+; CHECK-CPA-O0: ret
+;
+; CHECK-CPA-O3-LABEL: subpt3:
+; CHECK-CPA-O3: mov [[REG1:x[0-9]+]], #-8
+; CHECK-CPA-O3: addpt x0, x{{[0-9]+}}, [[REG1]]
+; CHECK-CPA-O3: ret
+;
+; CHECK-NOCPA-O0-LABEL: subpt3:
+; CHECK-NOCPA-O0: subs x0, x{{[0-9]+}}, #8
+; CHECK-NOCPA-O0: ret
+;
+; CHECK-NOCPA-O3-LABEL: subpt3:
+; CHECK-NOCPA-O3: sub x0, x{{[0-9]+}}, #8
+; CHECK-NOCPA-O3: ret
+entry:
+ %incdec.ptr.i.i.i = getelementptr inbounds i64, ptr %ptr, i64 -1
+ ret ptr %incdec.ptr.i.i.i
+}
+
+declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
+
+!6 = !{i64 0, i64 8, !7, i64 8, i64 8, !7, i64 16, i64 8, !7, i64 24, i64 8, !7, i64 32, i64 8, !7, i64 40, i64 8, !7}
+!7 = !{!8, !8, i64 0}
+!8 = !{!"long", !9, i64 0}
+!9 = !{!"omnipotent char", !10, i64 0}
+!10 = !{!"Simple C++ TBAA"}
+!11 = !{i64 0, i64 8, !7, i64 8, i64 8, !7}
More information about the llvm-commits
mailing list