[llvm] r372412 - [MTE] Handle MTE instructions in AArch64LoadStoreOptimizer.

Evgeniy Stepanov via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 20 10:36:27 PDT 2019


Author: eugenis
Date: Fri Sep 20 10:36:27 2019
New Revision: 372412

URL: http://llvm.org/viewvc/llvm-project?rev=372412&view=rev
Log:
[MTE] Handle MTE instructions in AArch64LoadStoreOptimizer.

Summary: Generate pre- and post-indexed forms of ST*G and STGP when possible.

Reviewers: ostannard, vitalybuka

Subscribers: kristof.beyls, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D67741

Added:
    llvm/trunk/test/CodeGen/AArch64/ldst-opt-mte.mir
Modified:
    llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
    llvm/trunk/test/CodeGen/AArch64/stgp.ll

Modified: llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp?rev=372412&r1=372411&r2=372412&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp Fri Sep 20 10:36:27 2019
@@ -201,8 +201,22 @@ static bool isNarrowStore(unsigned Opc)
   }
 }
 
+// These instruction set memory tag and either keep memory contents unchanged or
+// set it to zero, ignoring the address part of the source register.
+static bool isTagStore(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  default:
+    return false;
+  case AArch64::STGOffset:
+  case AArch64::STZGOffset:
+  case AArch64::ST2GOffset:
+  case AArch64::STZ2GOffset:
+    return true;
+  }
+}
+
 // Scaling factor for unscaled load or store.
-static int getMemScale(MachineInstr &MI) {
+static int getMemScale(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
   default:
     llvm_unreachable("Opcode has unknown scale!");
@@ -255,6 +269,11 @@ static int getMemScale(MachineInstr &MI)
   case AArch64::STURQi:
   case AArch64::LDPQi:
   case AArch64::STPQi:
+  case AArch64::STGOffset:
+  case AArch64::STZGOffset:
+  case AArch64::ST2GOffset:
+  case AArch64::STZ2GOffset:
+  case AArch64::STGPi:
     return 16;
   }
 }
@@ -449,6 +468,16 @@ static unsigned getPreIndexedOpcode(unsi
     return AArch64::STPWpre;
   case AArch64::STPXi:
     return AArch64::STPXpre;
+  case AArch64::STGOffset:
+    return AArch64::STGPreIndex;
+  case AArch64::STZGOffset:
+    return AArch64::STZGPreIndex;
+  case AArch64::ST2GOffset:
+    return AArch64::ST2GPreIndex;
+  case AArch64::STZ2GOffset:
+    return AArch64::STZ2GPreIndex;
+  case AArch64::STGPi:
+    return AArch64::STGPpre;
   }
 }
 
@@ -518,6 +547,16 @@ static unsigned getPostIndexedOpcode(uns
     return AArch64::STPWpost;
   case AArch64::STPXi:
     return AArch64::STPXpost;
+  case AArch64::STGOffset:
+    return AArch64::STGPostIndex;
+  case AArch64::STZGOffset:
+    return AArch64::STZGPostIndex;
+  case AArch64::ST2GOffset:
+    return AArch64::ST2GPostIndex;
+  case AArch64::STZ2GOffset:
+    return AArch64::STZ2GPostIndex;
+  case AArch64::STGPi:
+    return AArch64::STGPpost;
   }
 }
 
@@ -536,10 +575,30 @@ static bool isPairedLdSt(const MachineIn
   case AArch64::STPQi:
   case AArch64::STPWi:
   case AArch64::STPXi:
+  case AArch64::STGPi:
     return true;
   }
 }
 
+// Returns the scale and offset range of pre/post indexed variants of MI.
+static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
+                                       int &MinOffset, int &MaxOffset) {
+  bool IsPaired = isPairedLdSt(MI);
+  bool IsTagStore = isTagStore(MI);
+  // ST*G and all paired ldst have the same scale in pre/post-indexed variants
+  // as in the "unsigned offset" variant.
+  // All other pre/post indexed ldst instructions are unscaled.
+  Scale = (IsTagStore || IsPaired) ? getMemScale(MI) : 1;
+
+  if (IsPaired) {
+    MinOffset = -64;
+    MaxOffset = 63;
+  } else {
+    MinOffset = -256;
+    MaxOffset = 255;
+  }
+}
+
 static const MachineOperand &getLdStRegOp(const MachineInstr &MI,
                                           unsigned PairedRegOp = 0) {
   assert(PairedRegOp < 2 && "Unexpected register operand idx.");
@@ -618,6 +677,11 @@ static bool isMergeableLdStUpdate(Machin
   case AArch64::LDRWui:
   case AArch64::LDRHHui:
   case AArch64::LDRBBui:
+  case AArch64::STGOffset:
+  case AArch64::STZGOffset:
+  case AArch64::ST2GOffset:
+  case AArch64::STZ2GOffset:
+  case AArch64::STGPi:
   // Unscaled instructions.
   case AArch64::STURSi:
   case AArch64::STURDi:
@@ -1328,18 +1392,19 @@ AArch64LoadStoreOpt::mergeUpdateInsn(Mac
   unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
                              : getPostIndexedOpcode(I->getOpcode());
   MachineInstrBuilder MIB;
+  int Scale, MinOffset, MaxOffset;
+  getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
   if (!isPairedLdSt(*I)) {
     // Non-paired instruction.
     MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
               .add(getLdStRegOp(*Update))
               .add(getLdStRegOp(*I))
               .add(getLdStBaseOp(*I))
-              .addImm(Value)
+              .addImm(Value / Scale)
               .setMemRefs(I->memoperands())
               .setMIFlags(I->mergeFlagsWith(*Update));
   } else {
     // Paired instruction.
-    int Scale = getMemScale(*I);
     MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
               .add(getLdStRegOp(*Update))
               .add(getLdStRegOp(*I, 0))
@@ -1395,28 +1460,21 @@ bool AArch64LoadStoreOpt::isMatchingUpda
         MI.getOperand(1).getReg() != BaseReg)
       break;
 
-    bool IsPairedInsn = isPairedLdSt(MemMI);
     int UpdateOffset = MI.getOperand(2).getImm();
     if (MI.getOpcode() == AArch64::SUBXri)
       UpdateOffset = -UpdateOffset;
 
-    // For non-paired load/store instructions, the immediate must fit in a
-    // signed 9-bit integer.
-    if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256))
+    // The immediate must be a multiple of the scaling factor of the pre/post
+    // indexed instruction.
+    int Scale, MinOffset, MaxOffset;
+    getPrePostIndexedMemOpInfo(MemMI, Scale, MinOffset, MaxOffset);
+    if (UpdateOffset % Scale != 0)
       break;
 
-    // For paired load/store instructions, the immediate must be a multiple of
-    // the scaling factor.  The scaled offset must also fit into a signed 7-bit
-    // integer.
-    if (IsPairedInsn) {
-      int Scale = getMemScale(MemMI);
-      if (UpdateOffset % Scale != 0)
-        break;
-
-      int ScaledOffset = UpdateOffset / Scale;
-      if (ScaledOffset > 63 || ScaledOffset < -64)
-        break;
-    }
+    // Scaled offset must fit in the instruction immediate.
+    int ScaledOffset = UpdateOffset / Scale;
+    if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
+      break;
 
     // If we have a non-zero Offset, we check that it matches the amount
     // we're adding to the register.
@@ -1442,13 +1500,19 @@ MachineBasicBlock::iterator AArch64LoadS
   if (MIUnscaledOffset != UnscaledOffset)
     return E;
 
-  // If the base register overlaps a destination register, we can't
-  // merge the update.
-  bool IsPairedInsn = isPairedLdSt(MemMI);
-  for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
-    Register DestReg = getLdStRegOp(MemMI, i).getReg();
-    if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
-      return E;
+  // If the base register overlaps a source/destination register, we can't
+  // merge the update. This does not apply to tag store instructions which
+  // ignore the address part of the source register.
+  // This does not apply to STGPi as well, which does not have unpredictable
+  // behavior in this case unlike normal stores, and always performs writeback
+  // after reading the source register value.
+  if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
+    bool IsPairedInsn = isPairedLdSt(MemMI);
+    for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
+      Register DestReg = getLdStRegOp(MemMI, i).getReg();
+      if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
+        return E;
+    }
   }
 
   // Track which register units have been modified and used between the first
@@ -1496,11 +1560,13 @@ MachineBasicBlock::iterator AArch64LoadS
     return E;
   // If the base register overlaps a destination register, we can't
   // merge the update.
-  bool IsPairedInsn = isPairedLdSt(MemMI);
-  for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
-    Register DestReg = getLdStRegOp(MemMI, i).getReg();
-    if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
-      return E;
+  if (!isTagStore(MemMI)) {
+    bool IsPairedInsn = isPairedLdSt(MemMI);
+    for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
+      Register DestReg = getLdStRegOp(MemMI, i).getReg();
+      if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
+        return E;
+    }
   }
 
   // Track which register units have been modified and used between the first
@@ -1659,7 +1725,7 @@ bool AArch64LoadStoreOpt::tryToMergeLdSt
   // however, is not, so adjust here.
   int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI);
 
-  // Look forward to try to find a post-index instruction. For example,
+  // Look forward to try to find a pre-index instruction. For example,
   // ldr x1, [x0, #64]
   // add x0, x0, #64
   //   merged into:

Added: llvm/trunk/test/CodeGen/AArch64/ldst-opt-mte.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/ldst-opt-mte.mir?rev=372412&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/ldst-opt-mte.mir (added)
+++ llvm/trunk/test/CodeGen/AArch64/ldst-opt-mte.mir Fri Sep 20 10:36:27 2019
@@ -0,0 +1,285 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt  -verify-machineinstrs  -o - %s | FileCheck %s
+---
+
+### STG and its offset limits
+
+# CHECK-LABEL: name: test_STG_post
+# CHECK: STGPostIndex $x0, $x0, 7
+name: test_STG_post
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    STGOffset $x0, $x0, 0
+    $x0 = ADDXri $x0, 112, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STG_post_same_reg
+# CHECK: STGPostIndex $x1, $x0, 7
+name: test_STG_post_same_reg
+body: |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    STGOffset $x1, $x0, 0
+    $x0 = ADDXri $x0, 112, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STG_post_unaligned
+# CHECK:      STGOffset $x0, $x0, 0
+# CHECK-NEXT: ADDXri $x0, 8, 0
+name: test_STG_post_unaligned
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    STGOffset $x0, $x0, 0
+    $x0 = ADDXri $x0, 8, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STG_post2
+# CHECK: STGPostIndex $x0, $x0, -256
+name: test_STG_post2
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    STGOffset $x0, $x0, 0
+    $x0 = SUBXri $x0, 4096, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STG_post3
+# CHECK:      STGOffset $x0, $x0, 0
+# CHECK-NEXT: SUBXri $x0, 4112, 0
+name: test_STG_post3
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    STGOffset $x0, $x0, 0
+    $x0 = SUBXri $x0, 4112, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STG_post4
+# CHECK: STGPostIndex $x0, $x0, 255
+name: test_STG_post4
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    STGOffset $x0, $x0, 0
+    $x0 = ADDXri $x0, 4080, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STG_post5
+# CHECK:      STGOffset $x0, $x0, 0
+# CHECK-NEXT: ADDXri $x0, 4096, 0
+name: test_STG_post5
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    STGOffset $x0, $x0, 0
+    $x0 = ADDXri $x0, 4096, 0
+    RET_ReallyLR implicit $x0
+...
+
+### The rest of ST*G variants.
+
+# CHECK-LABEL: name: test_STZG_post
+# CHECK: STZGPostIndex $x0, $x0, 7
+name: test_STZG_post
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    STZGOffset $x0, $x0, 0
+    $x0 = ADDXri $x0, 112, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_ST2G_post
+# CHECK: ST2GPostIndex $x0, $x0, 7
+name: test_ST2G_post
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    ST2GOffset $x0, $x0, 0
+    $x0 = ADDXri $x0, 112, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STZ2G_post
+# CHECK: STZ2GPostIndex $x0, $x0, 7
+name: test_STZ2G_post
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    STZ2GOffset $x0, $x0, 0
+    $x0 = ADDXri $x0, 112, 0
+    RET_ReallyLR implicit $x0
+...
+
+### STGP and its offset limits
+
+# CHECK-LABEL: name: test_STGP_post
+# CHECK: STGPpost $x1, $x2, $x0, 7
+name: test_STGP_post
+body: |
+  bb.0.entry:
+    liveins: $x0, $x1, $x2
+
+    STGPi $x1, $x2, $x0, 0
+    $x0 = ADDXri $x0, 112, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STGP_post2
+# CHECK: STGPpost $x1, $x2, $x0, -64
+name: test_STGP_post2
+body: |
+  bb.0.entry:
+    liveins: $x0, $x1, $x2
+
+    STGPi $x1, $x2, $x0, 0
+    $x0 = SUBXri $x0, 1024, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STGP_post3
+# CHECK:      STGPi $x1, $x2, $x0, 0
+# CHECK-NEXT: SUBXri $x0, 1040, 0
+name: test_STGP_post3
+body: |
+  bb.0.entry:
+    liveins: $x0, $x1, $x2
+
+    STGPi $x1, $x2, $x0, 0
+    $x0 = SUBXri $x0, 1040, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STGP_post4
+# CHECK: STGPpost $x1, $x2, $x0, 63
+name: test_STGP_post4
+body: |
+  bb.0.entry:
+    liveins: $x0, $x1, $x2
+
+    STGPi $x1, $x2, $x0, 0
+    $x0 = ADDXri $x0, 1008, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STGP_post5
+# CHECK:      STGPi $x1, $x2, $x0, 0
+# CHECK-NEXT: ADDXri $x0, 1024, 0
+name: test_STGP_post5
+body: |
+  bb.0.entry:
+    liveins: $x0, $x1, $x2
+
+    STGPi $x1, $x2, $x0, 0
+    $x0 = ADDXri $x0, 1024, 0
+    RET_ReallyLR implicit $x0
+...
+
+### Pre-indexed forms
+
+# CHECK-LABEL: name: test_STG_pre
+# CHECK: STGPreIndex $x0, $x0, 10
+name: test_STG_pre
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    STGOffset $x0, $x0, 10
+    $x0 = ADDXri $x0, 160, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STGP_pre
+# CHECK: STGPpre $x1, $x2, $x0, 10
+name: test_STGP_pre
+body: |
+  bb.0.entry:
+    liveins: $x0, $x1, $x2
+
+    STGPi $x1, $x2, $x0, 10
+    $x0 = ADDXri $x0, 160, 0
+    RET_ReallyLR implicit $x0
+...
+
+### Pre-indexed forms with add/sub coming before the store.
+
+# CHECK-LABEL: name: test_STG_pre_back
+# CHECK: STGPreIndex $x0, $x0, 2
+name: test_STG_pre_back
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    $x0 = ADDXri $x0, 32, 0
+    STGOffset $x0, $x0, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STGP_pre_back
+# CHECK: STGPpre $x1, $x2, $x0, -3
+name: test_STGP_pre_back
+body: |
+  bb.0.entry:
+    liveins: $x0, $x1, $x2
+
+    $x0 = SUBXri $x0, 48, 0
+    STGPi $x1, $x2, $x0, 0
+    RET_ReallyLR implicit $x0
+...
+
+### STGP with source register == address register
+
+# CHECK-LABEL: name: test_STGP_post_same_reg
+# CHECK: STGPpost $x0, $x0, $x0, 7
+name: test_STGP_post_same_reg
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    STGPi $x0, $x0, $x0, 0
+    $x0 = ADDXri $x0, 112, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STGP_pre_same_reg
+# CHECK: STGPpre $x0, $x0, $x0, 7
+name: test_STGP_pre_same_reg
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    STGPi $x0, $x0, $x0, 7
+    $x0 = ADDXri $x0, 112, 0
+    RET_ReallyLR implicit $x0
+...
+
+# This case can not be merged because the source register is always read before writeback.
+# CHECK-LABEL: name: test_STGP_pre_back_same_reg
+# CHECK:      SUBXri $x0, 48, 0
+# CHECK-NEXT: STGPi $x0, $x0, $x0, 0
+name: test_STGP_pre_back_same_reg
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    $x0 = SUBXri $x0, 48, 0
+    STGPi $x0, $x0, $x0, 0
+    RET_ReallyLR implicit $x0
+...

Modified: llvm/trunk/test/CodeGen/AArch64/stgp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/stgp.ll?rev=372412&r1=372411&r2=372412&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/stgp.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/stgp.ll Fri Sep 20 10:36:27 2019
@@ -65,7 +65,7 @@ entry:
 define void @stgp_alloca(i64 %a, i64 %b) {
 entry:
 ; CHECK-LABEL: stgp_alloca:
-; CHECK: stgp x0, x1, [sp]
+; CHECK: stgp x0, x1, [sp, #-32]!
 ; CHECK: stgp x1, x0, [sp, #16]
 ; CHECK: ret
   %x = alloca i8, i32 32, align 16




More information about the llvm-commits mailing list