[llvm] bce2e20 - [AVR] Optimize int16 airthmetic right shift for shift amount 7/14/15

Ben Shi via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 25 23:53:37 PDT 2022


Author: Ben Shi
Date: 2022-03-26T06:53:27Z
New Revision: bce2e208e08f62dd362566e43b16cbfece459bec

URL: https://github.com/llvm/llvm-project/commit/bce2e208e08f62dd362566e43b16cbfece459bec
DIFF: https://github.com/llvm/llvm-project/commit/bce2e208e08f62dd362566e43b16cbfece459bec.diff

LOG: [AVR] Optimize int16 airthmetic right shift for shift amount 7/14/15

Reviewed By: aykevl

Differential Revision: https://reviews.llvm.org/D115618

Added: 
    llvm/test/CodeGen/AVR/pseudo/ASRWNRd.mir

Modified: 
    llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
    llvm/lib/Target/AVR/AVRISelLowering.cpp
    llvm/lib/Target/AVR/AVRInstrInfo.td
    llvm/test/CodeGen/AVR/shift.ll
    llvm/test/CodeGen/AVR/sign-extension.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
index 5618d88d5b607..de93c863abd86 100644
--- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -84,18 +84,23 @@ class AVRExpandPseudo : public MachineFunctionPass {
 
   bool expandAtomicBinaryOp(unsigned Opcode, Block &MBB, BlockIt MBBI);
 
-  /// Specific shift implementation.
+  /// Specific shift implementation for int8.
   bool expandLSLB7Rd(Block &MBB, BlockIt MBBI);
   bool expandLSRB7Rd(Block &MBB, BlockIt MBBI);
   bool expandASRB6Rd(Block &MBB, BlockIt MBBI);
   bool expandASRB7Rd(Block &MBB, BlockIt MBBI);
+
+  /// Specific shift implementation for int16.
   bool expandLSLW4Rd(Block &MBB, BlockIt MBBI);
   bool expandLSRW4Rd(Block &MBB, BlockIt MBBI);
+  bool expandASRW7Rd(Block &MBB, BlockIt MBBI);
   bool expandLSLW8Rd(Block &MBB, BlockIt MBBI);
   bool expandLSRW8Rd(Block &MBB, BlockIt MBBI);
   bool expandASRW8Rd(Block &MBB, BlockIt MBBI);
   bool expandLSLW12Rd(Block &MBB, BlockIt MBBI);
   bool expandLSRW12Rd(Block &MBB, BlockIt MBBI);
+  bool expandASRW14Rd(Block &MBB, BlockIt MBBI);
+  bool expandASRW15Rd(Block &MBB, BlockIt MBBI);
 
   // Common implementation of LPMWRdZ and ELPMWRdZ.
   bool expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt);
@@ -1401,7 +1406,7 @@ bool AVRExpandPseudo::expand<AVR::LSLWHiRd>(Block &MBB, BlockIt MBBI) {
   // add hireg, hireg <==> lsl hireg
   auto MILSL =
       buildMI(MBB, MBBI, AVR::ADDRdRr)
-          .addReg(DstHiReg, RegState::Define, getDeadRegState(DstIsDead))
+          .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
           .addReg(DstHiReg, getKillRegState(DstIsKill))
           .addReg(DstHiReg, getKillRegState(DstIsKill));
 
@@ -1820,6 +1825,53 @@ bool AVRExpandPseudo::expand<AVR::ASRWLoRd>(Block &MBB, BlockIt MBBI) {
   return true;
 }
 
+bool AVRExpandPseudo::expandASRW7Rd(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  Register DstLoReg, DstHiReg;
+  Register DstReg = MI.getOperand(0).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool DstIsKill = MI.getOperand(1).isKill();
+  bool ImpIsDead = MI.getOperand(3).isDead();
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  // lsl r24
+  // mov r24,r25
+  // rol r24
+  // sbc r25,r25
+
+  // lsl r24 <=> add r24, r24
+  buildMI(MBB, MBBI, AVR::ADDRdRr)
+      .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+      .addReg(DstLoReg, RegState::Kill)
+      .addReg(DstLoReg, RegState::Kill);
+
+  // mov r24, r25
+  buildMI(MBB, MBBI, AVR::MOVRdRr)
+      .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+      .addReg(DstHiReg);
+
+  // rol r24 <=> adc r24, r24
+  buildMI(MBB, MBBI, AVR::ADCRdRr)
+      .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+      .addReg(DstLoReg, getKillRegState(DstIsKill))
+      .addReg(DstLoReg, getKillRegState(DstIsKill));
+
+  // sbc r25, r25
+  auto MISBC =
+      buildMI(MBB, MBBI, AVR::SBCRdRr)
+          .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+          .addReg(DstHiReg, getKillRegState(DstIsKill))
+          .addReg(DstHiReg, getKillRegState(DstIsKill));
+
+  if (ImpIsDead)
+    MISBC->getOperand(3).setIsDead();
+  // SREG is always implicitly killed
+  MISBC->getOperand(4).setIsKill();
+
+  MI.eraseFromParent();
+  return true;
+}
+
 bool AVRExpandPseudo::expandASRW8Rd(Block &MBB, BlockIt MBBI) {
   MachineInstr &MI = *MBBI;
   Register DstLoReg, DstHiReg;
@@ -1846,8 +1898,102 @@ bool AVRExpandPseudo::expandASRW8Rd(Block &MBB, BlockIt MBBI) {
           .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
           .addReg(DstHiReg, getKillRegState(DstIsKill))
           .addReg(DstHiReg, getKillRegState(DstIsKill));
+
   if (ImpIsDead)
     MIBHI->getOperand(3).setIsDead();
+  // SREG is always implicitly killed
+  MIBHI->getOperand(4).setIsKill();
+
+  MI.eraseFromParent();
+  return true;
+}
+bool AVRExpandPseudo::expandASRW14Rd(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  Register DstLoReg, DstHiReg;
+  Register DstReg = MI.getOperand(0).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool DstIsKill = MI.getOperand(1).isKill();
+  bool ImpIsDead = MI.getOperand(3).isDead();
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  // lsl r25
+  // sbc r24, r24
+  // lsl r25
+  // mov r25, r24
+  // rol r24
+
+  // lsl r25 <=> add r25, r25
+  buildMI(MBB, MBBI, AVR::ADDRdRr)
+      .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+      .addReg(DstHiReg, RegState::Kill)
+      .addReg(DstHiReg, RegState::Kill);
+
+  // sbc r24, r24
+  buildMI(MBB, MBBI, AVR::SBCRdRr)
+      .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+      .addReg(DstLoReg, RegState::Kill)
+      .addReg(DstLoReg, RegState::Kill);
+
+  // lsl r25 <=> add r25, r25
+  buildMI(MBB, MBBI, AVR::ADDRdRr)
+      .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+      .addReg(DstHiReg, RegState::Kill)
+      .addReg(DstHiReg, RegState::Kill);
+
+  // mov r25, r24
+  buildMI(MBB, MBBI, AVR::MOVRdRr)
+      .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+      .addReg(DstLoReg);
+
+  // rol r24 <=> adc r24, r24
+  auto MIROL =
+      buildMI(MBB, MBBI, AVR::ADCRdRr)
+          .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+          .addReg(DstLoReg, getKillRegState(DstIsKill))
+          .addReg(DstLoReg, getKillRegState(DstIsKill));
+
+  if (ImpIsDead)
+    MIROL->getOperand(3).setIsDead();
+  // SREG is always implicitly killed
+  MIROL->getOperand(4).setIsKill();
+
+  MI.eraseFromParent();
+  return false;
+}
+
+bool AVRExpandPseudo::expandASRW15Rd(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  Register DstLoReg, DstHiReg;
+  Register DstReg = MI.getOperand(0).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool ImpIsDead = MI.getOperand(3).isDead();
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  // lsl r25
+  // sbc r25, r25
+  // mov r24, r25
+
+  // lsl r25 <=> add r25, r25
+  buildMI(MBB, MBBI, AVR::ADDRdRr)
+      .addReg(DstHiReg, RegState::Define)
+      .addReg(DstHiReg, RegState::Kill)
+      .addReg(DstHiReg, RegState::Kill);
+
+  // sbc r25, r25
+  auto MISBC =
+      buildMI(MBB, MBBI, AVR::SBCRdRr)
+          .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+          .addReg(DstHiReg, RegState::Kill)
+          .addReg(DstHiReg, RegState::Kill);
+  if (ImpIsDead)
+    MISBC->getOperand(3).setIsDead();
+  // SREG is always implicitly killed
+  MISBC->getOperand(4).setIsKill();
+
+  // mov r24, r25
+  buildMI(MBB, MBBI, AVR::MOVRdRr)
+      .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+      .addReg(DstHiReg);
 
   MI.eraseFromParent();
   return true;
@@ -1858,8 +2004,14 @@ bool AVRExpandPseudo::expand<AVR::ASRWNRd>(Block &MBB, BlockIt MBBI) {
   MachineInstr &MI = *MBBI;
   unsigned Imm = MI.getOperand(2).getImm();
   switch (Imm) {
+  case 7:
+    return expandASRW7Rd(MBB, MBBI);
   case 8:
     return expandASRW8Rd(MBB, MBBI);
+  case 14:
+    return expandASRW14Rd(MBB, MBBI);
+  case 15:
+    return expandASRW15Rd(MBB, MBBI);
   default:
     llvm_unreachable("unimplemented asrwn");
     return false;

diff  --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp
index 252331e88812e..7a1e7b1535a7a 100644
--- a/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -270,8 +270,6 @@ EVT AVRTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
 }
 
 SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
-  //: TODO: this function has to be completely rewritten to produce optimal
-  // code, for now it's producing very long but correct code.
   unsigned Opc8;
   const SDNode *N = Op.getNode();
   EVT VT = Op.getValueType();
@@ -372,6 +370,27 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
       ShiftAmount = 0;
     }
   } else if (VT.getSizeInBits() == 16) {
+    if (Op.getOpcode() == ISD::SRA)
+      // Special optimization for int16 arithmetic right shift.
+      switch (ShiftAmount) {
+      case 15:
+        Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
+                             DAG.getConstant(15, dl, VT));
+        ShiftAmount = 0;
+        break;
+      case 14:
+        Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
+                             DAG.getConstant(14, dl, VT));
+        ShiftAmount = 0;
+        break;
+      case 7:
+        Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
+                             DAG.getConstant(7, dl, VT));
+        ShiftAmount = 0;
+        break;
+      default:
+        break;
+      }
     if (4 <= ShiftAmount && ShiftAmount < 8)
       switch (Op.getOpcode()) {
       case ISD::SHL:

diff  --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td
index c67f2024be49a..9b45b50ac98bd 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -1943,7 +1943,7 @@ let Constraints = "$src = $rd", Defs = [SREG] in {
                                   : $src)),
                          (implicit SREG)]>;
 
-  def ASRWNRd : Pseudo<(outs DLDREGS
+  def ASRWNRd : Pseudo<(outs DREGS
                         : $rd),
                        (ins DREGS
                         : $src, imm16

diff  --git a/llvm/test/CodeGen/AVR/pseudo/ASRWNRd.mir b/llvm/test/CodeGen/AVR/pseudo/ASRWNRd.mir
new file mode 100644
index 0000000000000..943421ad5f197
--- /dev/null
+++ b/llvm/test/CodeGen/AVR/pseudo/ASRWNRd.mir
@@ -0,0 +1,41 @@
+# RUN: llc -O0 -run-pass=avr-expand-pseudo %s -o - | FileCheck %s
+
+--- |
+  target triple = "avr--"
+  define void @test() {
+  entry:
+    ret void
+  }
+...
+
+---
+name:            test
+body: |
+  bb.0.entry:
+    liveins: $r15r14, $r13r12, $r11r10, $r17r16
+
+    ; CHECK-LABEL: test
+
+    ; CHECK:      $r14 = ADDRdRr killed $r14, killed $r14, implicit-def $sreg
+    ; CHECK-NEXT: $r14 = MOVRdRr $r15
+    ; CHECK-NEXT: $r14 = ADCRdRr $r14, $r14, implicit-def $sreg, implicit $sreg
+    ; CHECK-NEXT: $r15 = SBCRdRr $r15, $r15, implicit-def $sreg, implicit killed $sreg
+    $r15r14 = ASRWNRd $r15r14,  7, implicit-def $sreg
+
+    ; CHECK-NEXT: $r12 = MOVRdRr $r13
+    ; CHECK-NEXT: $r13 = ADDRdRr killed $r13, killed $r13, implicit-def $sreg
+    ; CHECK-NEXT: $r13 = SBCRdRr $r13, $r13, implicit-def $sreg, implicit killed $sreg
+    $r13r12 = ASRWNRd $r13r12,  8, implicit-def $sreg
+
+    ; CHECK-NEXT: $r11 = ADDRdRr killed $r11, killed $r11, implicit-def $sreg
+    ; CHECK-NEXT: $r10 = SBCRdRr killed $r10, killed $r10, implicit-def $sreg, implicit $sreg
+    ; CHECK-NEXT: $r11 = ADDRdRr killed $r11, killed $r11, implicit-def $sreg
+    ; CHECK-NEXT: $r11 = MOVRdRr $r10
+    ; CHECK-NEXT: $r10 = ADCRdRr $r10, $r10, implicit-def $sreg, implicit killed $sreg
+    $r11r10 = ASRWNRd $r11r10, 14, implicit-def $sreg
+
+    ; CHECK-NEXT: $r17 = ADDRdRr killed $r17, killed $r17, implicit-def $sreg
+    ; CHECK-NEXT: $r17 = SBCRdRr killed $r17, killed $r17, implicit-def $sreg, implicit killed $sreg
+    ; CHECK-NEXT: $r16 = MOVRdRr $r17
+    $r17r16 = ASRWNRd $r17r16, 15, implicit-def $sreg
+...

diff  --git a/llvm/test/CodeGen/AVR/shift.ll b/llvm/test/CodeGen/AVR/shift.ll
index beba537cd1043..46993bcc773c2 100644
--- a/llvm/test/CodeGen/AVR/shift.ll
+++ b/llvm/test/CodeGen/AVR/shift.ll
@@ -301,6 +301,17 @@ define i16 @lsr_i16_13(i16 %a) {
   ret i16 %result
 }
 
+define i16 @asr_i16_7(i16 %a) {
+; CHECK-LABEL: asr_i16_7
+; CHECK:       lsl r24
+; CHECK-NEXT:  mov r24, r25
+; CHECK-NEXT:  rol r24
+; CHECK-NEXT:  sbc r25, r25
+; CHECK-NEXT:  ret
+  %result = ashr i16 %a, 7
+  ret i16 %result
+}
+
 define i16 @asr_i16_9(i16 %a) {
 ; CHECK-LABEL: asr_i16_9
 ; CHECK:       mov r24, r25
@@ -325,3 +336,25 @@ define i16 @asr_i16_12(i16 %a) {
   %result = ashr i16 %a, 12
   ret i16 %result
 }
+
+define i16 @asr_i16_14(i16 %a) {
+; CHECK-LABEL: asr_i16_14
+; CHECK:      lsl r25
+; CHECK-NEXT: sbc r24, r24
+; CHECK-NEXT: lsl r25
+; CHECK-NEXT: mov r25, r24
+; CHECK-NEXT: rol r24
+; CHECK-NEXT: ret
+  %result = ashr i16 %a, 14
+  ret i16 %result
+}
+
+define i16 @asr_i16_15(i16 %a) {
+; CHECK-LABEL: asr_i16_15
+; CHECK:      lsl r25
+; CHECK-NEXT: sbc r25, r25
+; CHECK-NEXT: mov r24, r25
+; CHECK-NEXT: ret
+  %result = ashr i16 %a, 15
+  ret i16 %result
+}

diff  --git a/llvm/test/CodeGen/AVR/sign-extension.ll b/llvm/test/CodeGen/AVR/sign-extension.ll
index be2bea7f04e46..116617a9efb3d 100644
--- a/llvm/test/CodeGen/AVR/sign-extension.ll
+++ b/llvm/test/CodeGen/AVR/sign-extension.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=avr < %s | FileCheck %s
+; RUN: llc -march=avr -verify-machineinstrs < %s | FileCheck %s
 
 define i8 @sign_extended_1_to_8(i1) {
 ; CHECK-LABEL: sign_extended_1_to_8


        


More information about the llvm-commits mailing list