[llvm] 993c1a3 - [AArch64][SVE] Teach copyPhysReg to copy ZPR2/3/4.

Eli Friedman via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 23 16:42:30 PDT 2020


Author: Eli Friedman
Date: 2020-07-23T16:41:37-07:00
New Revision: 993c1a3219a8ae69f1d700183bf174d75f3815d4

URL: https://github.com/llvm/llvm-project/commit/993c1a3219a8ae69f1d700183bf174d75f3815d4
DIFF: https://github.com/llvm/llvm-project/commit/993c1a3219a8ae69f1d700183bf174d75f3815d4.diff

LOG: [AArch64][SVE] Teach copyPhysReg to copy ZPR2/3/4.

It's sort of tricky to hit this in practice, but not impossible. I have
a synthetic C testcase if anyone is interested.

The implementation is identical to the equivalent NEON register copies.

Differential Revision: https://reviews.llvm.org/D84373

Added: 
    llvm/test/CodeGen/AArch64/sve-copy-zprpair.mir

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 5139ae5ccaf1..08f80c9aa361 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2744,6 +2744,35 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     return;
   }
 
+  // Copy a Z register pair by copying the individual sub-registers.
+  if (AArch64::ZPR2RegClass.contains(DestReg) &&
+      AArch64::ZPR2RegClass.contains(SrcReg)) {
+    static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
+    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
+                     Indices);
+    return;
+  }
+
+  // Copy a Z register triple by copying the individual sub-registers.
+  if (AArch64::ZPR3RegClass.contains(DestReg) &&
+      AArch64::ZPR3RegClass.contains(SrcReg)) {
+    static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
+                                       AArch64::zsub2};
+    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
+                     Indices);
+    return;
+  }
+
+  // Copy a Z register quad by copying the individual sub-registers.
+  if (AArch64::ZPR4RegClass.contains(DestReg) &&
+      AArch64::ZPR4RegClass.contains(SrcReg)) {
+    static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
+                                       AArch64::zsub2, AArch64::zsub3};
+    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
+                     Indices);
+    return;
+  }
+
   if (AArch64::GPR64spRegClass.contains(DestReg) &&
       (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
     if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {

diff  --git a/llvm/test/CodeGen/AArch64/sve-copy-zprpair.mir b/llvm/test/CodeGen/AArch64/sve-copy-zprpair.mir
new file mode 100644
index 000000000000..83a0b5dd1c14
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-copy-zprpair.mir
@@ -0,0 +1,78 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -run-pass=postrapseudos -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name:            copy_zpr2
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$z0_z1' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0:
+    liveins: $z0_z1
+    ; CHECK-LABEL: name: copy_zpr2
+    ; CHECK: liveins: $z0_z1
+    ; CHECK: $z2 = ORR_ZZZ $z1, $z1
+    ; CHECK: $z1 = ORR_ZZZ $z0, $z0
+    ; CHECK: $z0 = ORR_ZZZ $z1, $z1
+    ; CHECK: $z1 = ORR_ZZZ $z2, $z2
+    ; CHECK: RET_ReallyLR
+    $z1_z2 = COPY $z0_z1
+    $z0_z1 = COPY $z1_z2
+    RET_ReallyLR
+
+...
+---
+name:            copy_zpr3
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$z0_z1_z2' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0:
+    liveins: $z0_z1_z2
+    ; CHECK-LABEL: name: copy_zpr3
+    ; CHECK: liveins: $z0_z1_z2
+    ; CHECK: $z3 = ORR_ZZZ $z2, $z2
+    ; CHECK: $z2 = ORR_ZZZ $z1, $z1
+    ; CHECK: $z1 = ORR_ZZZ $z0, $z0
+    ; CHECK: $z0 = ORR_ZZZ $z1, $z1
+    ; CHECK: $z1 = ORR_ZZZ $z2, $z2
+    ; CHECK: $z2 = ORR_ZZZ $z3, $z3
+    ; CHECK: RET_ReallyLR
+    $z1_z2_z3 = COPY $z0_z1_z2
+    $z0_z1_z2 = COPY $z1_z2_z3
+    RET_ReallyLR
+
+...
+---
+name:            copy_zpr4
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$z0_z1_z2_z3' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0:
+    liveins: $z0_z1_z2_z3
+    ; CHECK-LABEL: name: copy_zpr4
+    ; CHECK: liveins: $z0_z1_z2_z3
+    ; CHECK: $z4 = ORR_ZZZ $z3, $z3
+    ; CHECK: $z3 = ORR_ZZZ $z2, $z2
+    ; CHECK: $z2 = ORR_ZZZ $z1, $z1
+    ; CHECK: $z1 = ORR_ZZZ $z0, $z0
+    ; CHECK: $z0 = ORR_ZZZ $z1, $z1
+    ; CHECK: $z1 = ORR_ZZZ $z2, $z2
+    ; CHECK: $z2 = ORR_ZZZ $z3, $z3
+    ; CHECK: $z3 = ORR_ZZZ $z4, $z4
+    ; CHECK: RET_ReallyLR
+    $z1_z2_z3_z4 = COPY $z0_z1_z2_z3
+    $z0_z1_z2_z3 = COPY $z1_z2_z3_z4
+    RET_ReallyLR
+
+...


        


More information about the llvm-commits mailing list