[llvm] 2a6e39d - [AVR] Do not emit 'LPM Rd, Z' on devices without FeatureLPMX

Fri Mar 24 02:47:37 PDT 2023

Author: Ben Shi
Date: 2023-03-24T17:47:24+08:00
New Revision: 2a6e39dbf84af4b3f8b31930fed786b3c56287f5

URL: https://github.com/llvm/llvm-project/commit/2a6e39dbf84af4b3f8b31930fed786b3c56287f5
DIFF: https://github.com/llvm/llvm-project/commit/2a6e39dbf84af4b3f8b31930fed786b3c56287f5.diff

LOG: [AVR] Do not emit 'LPM Rd, Z' on devices without FeatureLPMX

The 'LPM' instruction has three forms:

------------------------
| form       | feature |
| ---------- | --------|
| LPM        | hasLPM  |
| LPM Rd, Z  | hasLPMX |
| LPM Rd, Z+ | hasLPMX |
------------------------

The second form is always selected in ISelDAGToDAG, even on devices
without FeatureLPMX. This patch emits "LPM + MOV" on devices with
only FeatureLPM.

Reviewed By: jacquesguan

Differential Revision: https://reviews.llvm.org/D141246

Added: 
    llvm/test/CodeGen/AVR/pseudo/LPMBRdZ.mir

Modified: 
    llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
    llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
    llvm/lib/Target/AVR/AVRInstrInfo.td
    llvm/test/CodeGen/AVR/elpm.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
index 06dc2b7c5b27b..b29eb87a55a01 100644

--- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -98,6 +98,8 @@ class AVRExpandPseudo : public MachineFunctionPass {
 
   // Common implementation of LPMWRdZ and ELPMWRdZ.
   bool expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt);
+  // Common implementation of LPMBRdZ and ELPMBRdZ.
+  bool expandLPMBELPMB(Block &MBB, BlockIt MBBI, bool IsExt);
 };
 
 char AVRExpandPseudo::ID = 0;
@@ -858,28 +860,32 @@ bool AVRExpandPseudo::expand<AVR::ELPMWRdZ>(Block &MBB, BlockIt MBBI) {
   return expandLPMWELPMW(MBB, MBBI, true);
 }
 
-template <>
-bool AVRExpandPseudo::expand<AVR::ELPMBRdZ>(Block &MBB, BlockIt MBBI) {
+bool AVRExpandPseudo::expandLPMBELPMB(Block &MBB, BlockIt MBBI, bool IsExt) {
   MachineInstr &MI = *MBBI;
   Register DstReg = MI.getOperand(0).getReg();
   Register SrcReg = MI.getOperand(1).getReg();
-  Register BankReg = MI.getOperand(2).getReg();
   bool SrcIsKill = MI.getOperand(1).isKill();
   const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
+  bool HasX = IsExt ? STI.hasELPMX() : STI.hasLPMX();
 
   // Set the I/O register RAMPZ for ELPM (out RAMPZ, rtmp).
-  buildMI(MBB, MBBI, AVR::OUTARr).addImm(STI.getIORegRAMPZ()).addReg(BankReg);
+  if (IsExt) {
+    Register BankReg = MI.getOperand(2).getReg();
+    buildMI(MBB, MBBI, AVR::OUTARr).addImm(STI.getIORegRAMPZ()).addReg(BankReg);
+  }
 
   // Load byte.
-  if (STI.hasELPMX()) {
-    auto MILB = buildMI(MBB, MBBI, AVR::ELPMRdZ)
+  if (HasX) {
+    unsigned Opc = IsExt ? AVR::ELPMRdZ : AVR::LPMRdZ;
+    auto MILB = buildMI(MBB, MBBI, Opc)
                     .addReg(DstReg, RegState::Define)
                     .addReg(SrcReg, getKillRegState(SrcIsKill));
     MILB.setMemRefs(MI.memoperands());
   } else {
-    // For the basic 'ELPM' instruction, its operand[0] is the implicit
+    // For the basic ELPM/LPM instruction, its operand[0] is the implicit
     // 'Z' register, and its operand[1] is the implicit 'R0' register.
-    auto MILB = buildMI(MBB, MBBI, AVR::ELPM);
+    unsigned Opc = IsExt ? AVR::ELPM : AVR::LPM;
+    auto MILB = buildMI(MBB, MBBI, Opc);
     buildMI(MBB, MBBI, AVR::MOVRdRr)
         .addReg(DstReg, RegState::Define)
         .addReg(AVR::R0, RegState::Kill);
@@ -890,6 +896,16 @@ bool AVRExpandPseudo::expand<AVR::ELPMBRdZ>(Block &MBB, BlockIt MBBI) {
   return true;
 }
 
+template <>
+bool AVRExpandPseudo::expand<AVR::ELPMBRdZ>(Block &MBB, BlockIt MBBI) {
+  return expandLPMBELPMB(MBB, MBBI, true);
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::LPMBRdZ>(Block &MBB, BlockIt MBBI) {
+  return expandLPMBELPMB(MBB, MBBI, false);
+}
+
 template <>
 bool AVRExpandPseudo::expand<AVR::LPMWRdZPi>(Block &MBB, BlockIt MBBI) {
   llvm_unreachable("16-bit LPMPi is unimplemented");
@@ -2437,6 +2453,7 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) {
     EXPAND(AVR::LDWRdPtrPd);
   case AVR::LDDWRdYQ: //: FIXME: remove this once PR13375 gets fixed
     EXPAND(AVR::LDDWRdPtrQ);
+    EXPAND(AVR::LPMBRdZ);
     EXPAND(AVR::LPMWRdZ);
     EXPAND(AVR::LPMWRdZPi);
     EXPAND(AVR::ELPMBRdZ);

diff  --git a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
index 6ea8e200bd4e9..8718997b86d07 100644
--- a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
@@ -399,8 +399,9 @@ template <> bool AVRDAGToDAGISel::select<ISD::LOAD>(SDNode *N) {
     switch (VT.SimpleTy) {
     case MVT::i8:
       if (ProgMemBank == 0) {
+        unsigned Opc = Subtarget->hasLPMX() ? AVR::LPMRdZ : AVR::LPMBRdZ;
         ResNode =
-            CurDAG->getMachineNode(AVR::LPMRdZ, DL, MVT::i8, MVT::Other, Ptr);
+            CurDAG->getMachineNode(Opc, DL, MVT::i8, MVT::Other, Ptr);
       } else {
         // Do not combine the LDI instruction into the ELPM pseudo instruction,
         // since it may be reused by other ELPM pseudo instructions.

diff  --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td
index c272711bb8663..1e56f9447cb80 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -1690,6 +1690,11 @@ let canFoldAsLoad = 1, isReMaterializable = 1, mayLoad = 1,
       : F16<0b1001010111001000, (outs), (ins), "lpm", []>,
       Requires<[HasLPM]>;
 
+  // This pseudo is combination of LPM and MOV instructions.
+  let Defs = [R0] in
+  def LPMBRdZ : Pseudo<(outs GPR8:$dst), (ins ZREG:$z), "lpmb\t$dst, $z", []>,
+                Requires<[HasLPM]>;
+
   def LPMRdZ : FLPMX<0, 0,
                      (outs GPR8
                       : $rd),

diff  --git a/llvm/test/CodeGen/AVR/elpm.ll b/llvm/test/CodeGen/AVR/elpm.ll
index ba28bc814591d..2e989d1442ffd 100644
--- a/llvm/test/CodeGen/AVR/elpm.ll
+++ b/llvm/test/CodeGen/AVR/elpm.ll
@@ -152,6 +152,24 @@ define signext i8 @foob0(i16 %a, i16 %b) {
 ; CHECK-NEXT:    lsl r25
 ; CHECK-NEXT:    sbc r25, r25
 ; CHECK-NEXT:    ret
+;
+; NOX-LABEL: foob0:
+; NOX:       ; %bb.0: ; %entry
+; NOX-NEXT:    subi r22, lo8(-(arrb1))
+; NOX-NEXT:    sbci r23, hi8(-(arrb1))
+; NOX-NEXT:    movw r30, r22
+; NOX-NEXT:    lpm
+; NOX-NEXT:    mov r18, r0
+; NOX-NEXT:    subi r24, lo8(-(arrb1))
+; NOX-NEXT:    sbci r25, hi8(-(arrb1))
+; NOX-NEXT:    movw r30, r24
+; NOX-NEXT:    lpm
+; NOX-NEXT:    mov r24, r0
+; NOX-NEXT:    sub r24, r18
+; NOX-NEXT:    mov r25, r24
+; NOX-NEXT:    lsl r25
+; NOX-NEXT:    sbc r25, r25
+; NOX-NEXT:    ret
 entry:
   %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(1)* @arrb1, i16 0, i16 %a
   %0 = load i8, i8 addrspace(1)* %arrayidx, align 1
@@ -179,6 +197,26 @@ define signext i8 @foob1(i16 %a, i16 %b) {
 ; CHECK-NEXT:    lsl r25
 ; CHECK-NEXT:    sbc r25, r25
 ; CHECK-NEXT:    ret
+;
+; NOX-LABEL: foob1:
+; NOX:       ; %bb.0: ; %entry
+; NOX-NEXT:    subi r22, lo8(-(arrb3))
+; NOX-NEXT:    sbci r23, hi8(-(arrb3))
+; NOX-NEXT:    movw r30, r22
+; NOX-NEXT:    ldi r18, 2
+; NOX-NEXT:    out 59, r18
+; NOX-NEXT:    elpm
+; NOX-NEXT:    mov r18, r0
+; NOX-NEXT:    subi r24, lo8(-(arrb1))
+; NOX-NEXT:    sbci r25, hi8(-(arrb1))
+; NOX-NEXT:    movw r30, r24
+; NOX-NEXT:    lpm
+; NOX-NEXT:    mov r24, r0
+; NOX-NEXT:    sub r24, r18
+; NOX-NEXT:    mov r25, r24
+; NOX-NEXT:    lsl r25
+; NOX-NEXT:    sbc r25, r25
+; NOX-NEXT:    ret
 entry:
   %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(1)* @arrb1, i16 0, i16 %a
   %0 = load i8, i8 addrspace(1)* %arrayidx, align 1
@@ -206,6 +244,26 @@ define signext i8 @foob2(i16 %a, i16 %b) {
 ; CHECK-NEXT:    lsl r25
 ; CHECK-NEXT:    sbc r25, r25
 ; CHECK-NEXT:    ret
+;
+; NOX-LABEL: foob2:
+; NOX:       ; %bb.0: ; %entry
+; NOX-NEXT:    subi r24, lo8(-(arrb5))
+; NOX-NEXT:    sbci r25, hi8(-(arrb5))
+; NOX-NEXT:    movw r30, r24
+; NOX-NEXT:    ldi r24, 4
+; NOX-NEXT:    out 59, r24
+; NOX-NEXT:    elpm
+; NOX-NEXT:    mov r24, r0
+; NOX-NEXT:    subi r22, lo8(-(arrb1))
+; NOX-NEXT:    sbci r23, hi8(-(arrb1))
+; NOX-NEXT:    movw r30, r22
+; NOX-NEXT:    lpm
+; NOX-NEXT:    mov r25, r0
+; NOX-NEXT:    sub r24, r25
+; NOX-NEXT:    mov r25, r24
+; NOX-NEXT:    lsl r25
+; NOX-NEXT:    sbc r25, r25
+; NOX-NEXT:    ret
 entry:
   %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(5)* @arrb5, i16 0, i16 %a
   %0 = load i8, i8 addrspace(5)* %arrayidx, align 1

diff  --git a/llvm/test/CodeGen/AVR/pseudo/LPMBRdZ.mir b/llvm/test/CodeGen/AVR/pseudo/LPMBRdZ.mir
new file mode 100644
index 0000000000000..6eaa9435220ea
--- /dev/null
+++ b/llvm/test/CodeGen/AVR/pseudo/LPMBRdZ.mir
@@ -0,0 +1,40 @@
+# RUN: llc -mtriple=avr -mattr=+lpm -mattr=+lpmx -start-before=greedy %s -o - \
+# RUN:     | FileCheck %s
+# RUN: llc -mtriple=avr -mattr=+lpm -mattr=-lpmx -start-before=greedy %s -o - \
+# RUN:     | FileCheck --check-prefix=NOX %s
+
+# This test checks the expansion of the 8-bit LPMBRdZ pseudo instruction and that
+# the register allocator won't use R31R30 as an output register (which would
+# lead to undefined behavior).
+
+--- |
+  target triple = "avr--"
+  define void @test_lpmbrdz() {
+  entry:
+    ret void
+  }
+...
+
+---
+name:            test_lpmbrdz
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r31r30
+
+    ; CHECK-LABEL: test_lpmbrdz:
+    ; CHECK:       ; %bb.0:
+    ; CHECK-NEXT:    lpm r30, Z
+    ; CHECK-NEXT:    ret
+
+    ; NOX-LABEL:   test_lpmbrdz
+    ; NOX:         ; %bb.0:
+    ; NOX-NEXT:      lpm
+    ; NOX-NEXT:      mov r30, r0
+    ; NOX-NEXT:      ret
+
+    %1:zreg = COPY killed $r31r30
+    %2:gpr8 = LPMBRdZ %1, implicit-def dead $r0
+    $r30 = COPY %2
+    RET implicit $r30
+...