[llvm] 993c1a3 - [AArch64][SVE] Teach copyPhysReg to copy ZPR2/3/4.
Eli Friedman via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 23 16:42:30 PDT 2020
Author: Eli Friedman
Date: 2020-07-23T16:41:37-07:00
New Revision: 993c1a3219a8ae69f1d700183bf174d75f3815d4
URL: https://github.com/llvm/llvm-project/commit/993c1a3219a8ae69f1d700183bf174d75f3815d4
DIFF: https://github.com/llvm/llvm-project/commit/993c1a3219a8ae69f1d700183bf174d75f3815d4.diff
LOG: [AArch64][SVE] Teach copyPhysReg to copy ZPR2/3/4.
It's sort of tricky to hit this in practice, but not impossible. I have
a synthetic C testcase if anyone is interested.
The implementation is identical to the equivalent NEON register copies.
Differential Revision: https://reviews.llvm.org/D84373
Added:
llvm/test/CodeGen/AArch64/sve-copy-zprpair.mir
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 5139ae5ccaf1..08f80c9aa361 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2744,6 +2744,35 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
+ // Copy a Z register pair by copying the individual sub-registers.
+ if (AArch64::ZPR2RegClass.contains(DestReg) &&
+ AArch64::ZPR2RegClass.contains(SrcReg)) {
+ static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
+ copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
+ Indices);
+ return;
+ }
+
+ // Copy a Z register triple by copying the individual sub-registers.
+ if (AArch64::ZPR3RegClass.contains(DestReg) &&
+ AArch64::ZPR3RegClass.contains(SrcReg)) {
+ static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
+ AArch64::zsub2};
+ copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
+ Indices);
+ return;
+ }
+
+ // Copy a Z register quad by copying the individual sub-registers.
+ if (AArch64::ZPR4RegClass.contains(DestReg) &&
+ AArch64::ZPR4RegClass.contains(SrcReg)) {
+ static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
+ AArch64::zsub2, AArch64::zsub3};
+ copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
+ Indices);
+ return;
+ }
+
if (AArch64::GPR64spRegClass.contains(DestReg) &&
(AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
diff --git a/llvm/test/CodeGen/AArch64/sve-copy-zprpair.mir b/llvm/test/CodeGen/AArch64/sve-copy-zprpair.mir
new file mode 100644
index 000000000000..83a0b5dd1c14
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-copy-zprpair.mir
@@ -0,0 +1,78 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -run-pass=postrapseudos -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: copy_zpr2
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$z0_z1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+ liveins: $z0_z1
+ ; CHECK-LABEL: name: copy_zpr2
+ ; CHECK: liveins: $z0_z1
+ ; CHECK: $z2 = ORR_ZZZ $z1, $z1
+ ; CHECK: $z1 = ORR_ZZZ $z0, $z0
+ ; CHECK: $z0 = ORR_ZZZ $z1, $z1
+ ; CHECK: $z1 = ORR_ZZZ $z2, $z2
+ ; CHECK: RET_ReallyLR
+ $z1_z2 = COPY $z0_z1
+ $z0_z1 = COPY $z1_z2
+ RET_ReallyLR
+
+...
+---
+name: copy_zpr3
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$z0_z1_z2' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+ liveins: $z0_z1_z2
+ ; CHECK-LABEL: name: copy_zpr3
+ ; CHECK: liveins: $z0_z1_z2
+ ; CHECK: $z3 = ORR_ZZZ $z2, $z2
+ ; CHECK: $z2 = ORR_ZZZ $z1, $z1
+ ; CHECK: $z1 = ORR_ZZZ $z0, $z0
+ ; CHECK: $z0 = ORR_ZZZ $z1, $z1
+ ; CHECK: $z1 = ORR_ZZZ $z2, $z2
+ ; CHECK: $z2 = ORR_ZZZ $z3, $z3
+ ; CHECK: RET_ReallyLR
+ $z1_z2_z3 = COPY $z0_z1_z2
+ $z0_z1_z2 = COPY $z1_z2_z3
+ RET_ReallyLR
+
+...
+---
+name: copy_zpr4
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$z0_z1_z2_z3' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+ liveins: $z0_z1_z2_z3
+ ; CHECK-LABEL: name: copy_zpr4
+ ; CHECK: liveins: $z0_z1_z2_z3
+ ; CHECK: $z4 = ORR_ZZZ $z3, $z3
+ ; CHECK: $z3 = ORR_ZZZ $z2, $z2
+ ; CHECK: $z2 = ORR_ZZZ $z1, $z1
+ ; CHECK: $z1 = ORR_ZZZ $z0, $z0
+ ; CHECK: $z0 = ORR_ZZZ $z1, $z1
+ ; CHECK: $z1 = ORR_ZZZ $z2, $z2
+ ; CHECK: $z2 = ORR_ZZZ $z3, $z3
+ ; CHECK: $z3 = ORR_ZZZ $z4, $z4
+ ; CHECK: RET_ReallyLR
+ $z1_z2_z3_z4 = COPY $z0_z1_z2_z3
+ $z0_z1_z2_z3 = COPY $z1_z2_z3_z4
+ RET_ReallyLR
+
+...
More information about the llvm-commits
mailing list