[llvm] r362576 - [PowerPC] Collapse RLDICL/RLDICR into RLDIC when possible

Nemanja Ivanovic via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 4 19:36:40 PDT 2019


Author: nemanjai
Date: Tue Jun  4 19:36:40 2019
New Revision: 362576

URL: http://llvm.org/viewvc/llvm-project?rev=362576&view=rev
Log:
[PowerPC] Collapse RLDICL/RLDICR into RLDIC when possible

Generally speaking, we lower to an optimal rotate sequence for nodes visible in
the SDAG. However, there are instances where the two rotates are not visible at
ISEL time - most notably those in a very common sequence when lowering switch
statements to jump tables.

A common situation is a switch on a 32-bit integer. This value has to have the
upper 32 bits cleared and because jump table offsets are word offsets, the value
needs to be shifted left by 2 bits. We currently emit the clear and the left
shift as two separate instructions, but this is not needed as we can lower it to
a single RLDIC.

This patch just cleans that up.

Differential revision: https://reviews.llvm.org/D60402

Added:
    llvm/trunk/test/CodeGen/PowerPC/collapse-rotates.mir
    llvm/trunk/test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll
Modified:
    llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp

Modified: llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp?rev=362576&r1=362575&r2=362576&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp Tue Jun  4 19:36:40 2019
@@ -47,6 +47,8 @@ STATISTIC(NumFunctionsEnteredInMIPeephol
 STATISTIC(NumFixedPointIterations,
           "Number of fixed-point iterations converting reg-reg instructions "
           "to reg-imm ones");
+STATISTIC(NumRotatesCollapsed,
+          "Number of pairs of rotate left, clear left/right collapsed");
 
 static cl::opt<bool>
 FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
@@ -757,6 +759,56 @@ bool PPCMIPeephole::simplifyCode(void) {
         NumOptADDLIs++;
         break;
       }
+      case PPC::RLDICR: {
+        // We miss the opportunity to emit an RLDIC when lowering jump tables
+        // since ISEL sees only a single basic block. When selecting, the clear
+        // and shift left will be in different blocks.
+        unsigned SrcReg = MI.getOperand(1).getReg();
+        if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+          break;
+
+        MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+        if (SrcMI->getOpcode() != PPC::RLDICL)
+          break;
+        MachineOperand MOpSHSrc = SrcMI->getOperand(2);
+        MachineOperand MOpMBSrc = SrcMI->getOperand(3);
+        MachineOperand MOpSHMI = MI.getOperand(2);
+        MachineOperand MOpMEMI = MI.getOperand(3);
+        if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() &&
+              MOpSHMI.isImm() && MOpMEMI.isImm()))
+          break;
+        uint64_t SHSrc = MOpSHSrc.getImm();
+        uint64_t MBSrc = MOpMBSrc.getImm();
+        uint64_t SHMI = MOpSHMI.getImm();
+        uint64_t MEMI = MOpMEMI.getImm();
+        uint64_t NewSH = SHSrc + SHMI;
+        uint64_t NewMB = MBSrc - SHMI;
+        if (NewMB > 63 || NewSH > 63)
+          break;
+
+        // The bits cleared with RLDICL are [0, MBSrc).
+        // The bits cleared with RLDICR are (MEMI, 63].
+        // After the sequence, the bits cleared are:
+        // [0, MBSrc-SHMI) and (MEMI, 63).
+        //
+        // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
+        if ((63 - NewSH) != MEMI)
+          break;
+
+        LLVM_DEBUG(dbgs() << "Converting pair: ");
+        LLVM_DEBUG(SrcMI->dump());
+        LLVM_DEBUG(MI.dump());
+
+        MI.setDesc(TII->get(PPC::RLDIC));
+        MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
+        MI.getOperand(2).setImm(NewSH);
+        MI.getOperand(3).setImm(NewMB);
+
+        LLVM_DEBUG(dbgs() << "To: ");
+        LLVM_DEBUG(MI.dump());
+        NumRotatesCollapsed++;
+        break;
+      }
       }
     }
 

Added: llvm/trunk/test/CodeGen/PowerPC/collapse-rotates.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/collapse-rotates.mir?rev=362576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/collapse-rotates.mir (added)
+++ llvm/trunk/test/CodeGen/PowerPC/collapse-rotates.mir Tue Jun  4 19:36:40 2019
@@ -0,0 +1,65 @@
+# RUN: llc -mtriple=powerpc64le--linux-gnu -start-before ppc-mi-peepholes %s -o - -verify-machineinstrs | FileCheck %s
+
+--- |
+  ; ModuleID = 'b.ll'
+  source_filename = "b.ll"
+  target datalayout = "e-m:e-i64:64-n32:64"
+  
+  define dso_local i64 @test(i64 %l) {
+  entry:
+    %shl = shl i64 %l, 3
+    ret i64 %shl
+  }
+
+...
+---
+name:            test
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:       
+  - { id: 0, class: g8rc, preferred-register: '' }
+  - { id: 1, class: g8rc, preferred-register: '' }
+  - { id: 2, class: g8rc, preferred-register: '' }
+liveins:         
+  - { reg: '$x3', virtual-reg: '%0' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    liveins: $x3
+  
+    %0:g8rc = COPY $x3
+    %1:g8rc = RLDICL %0, 2, 32
+    %2:g8rc = RLDICR %1, 3, 58
+    $x3 = COPY %2
+    BLR8 implicit $lr8, implicit $rm, implicit $x3
+
+...
+# CHECK: rldic 3, 3, 5, 29

Added: llvm/trunk/test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll?rev=362576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll Tue Jun  4 19:36:40 2019
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -o - \
+; RUN:   -ppc-asm-full-reg-names -verify-machineinstrs %s | FileCheck %s
+
+; Function Attrs: nounwind
+define dso_local zeroext i32 @test(i32 signext %l) {
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    addi r3, r3, -1
+; CHECK-NEXT:    cmplwi r3, 5
+; CHECK-NEXT:    bgt cr0, .LBB0_3
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    rldic r3, r3, 2, 30
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-NEXT:    lwax r3, r3, r4
+; CHECK-NEXT:    add r3, r3, r4
+; CHECK-NEXT:    mtctr r3
+; CHECK-NEXT:    bctr
+; CHECK-NEXT:  .LBB0_2: # %sw.bb
+; CHECK-NEXT:    li r3, 2
+; CHECK-NEXT:    bl test1
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    b .LBB0_10
+; CHECK-NEXT:  .LBB0_3: # %sw.default
+; CHECK-NEXT:    li r3, 1
+; CHECK-NEXT:    bl test1
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    bl test3
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    b .LBB0_10
+; CHECK-NEXT:  .LBB0_4: # %sw.bb3
+; CHECK-NEXT:    li r3, 3
+; CHECK-NEXT:    b .LBB0_9
+; CHECK-NEXT:  .LBB0_5: # %sw.bb5
+; CHECK-NEXT:    li r3, 4
+; CHECK-NEXT:    bl test2
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    bl test3
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    b .LBB0_10
+; CHECK-NEXT:  .LBB0_6: # %sw.bb8
+; CHECK-NEXT:    li r3, 5
+; CHECK-NEXT:    bl test4
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    b .LBB0_10
+; CHECK-NEXT:  .LBB0_7: # %sw.bb10
+; CHECK-NEXT:    li r3, 66
+; CHECK-NEXT:    bl test4
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    bl test1
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    b .LBB0_10
+; CHECK-NEXT:  .LBB0_8: # %sw.bb13
+; CHECK-NEXT:    li r3, 66
+; CHECK-NEXT:  .LBB0_9: # %return
+; CHECK-NEXT:    bl test2
+; CHECK-NEXT:    nop
+; CHECK-NEXT:  .LBB0_10: # %return
+; CHECK-NEXT:    clrldi r3, r3, 32
+; CHECK-NEXT:    addi r1, r1, 32
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+entry:
+  switch i32 %l, label %sw.default [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb3
+    i32 3, label %sw.bb5
+    i32 4, label %sw.bb8
+    i32 5, label %sw.bb10
+    i32 6, label %sw.bb13
+  ]
+
+sw.default:                                       ; preds = %entry
+  %call = tail call signext i32 @test1(i32 signext 1)
+  %call1 = tail call signext i32 @test3(i32 signext %call)
+  br label %return
+
+sw.bb:                                            ; preds = %entry
+  %call2 = tail call signext i32 @test1(i32 signext 2)
+  br label %return
+
+sw.bb3:                                           ; preds = %entry
+  %call4 = tail call signext i32 @test2(i32 signext 3)
+  br label %return
+
+sw.bb5:                                           ; preds = %entry
+  %call6 = tail call signext i32 @test2(i32 signext 4)
+  %call7 = tail call signext i32 @test3(i32 signext %call6)
+  br label %return
+
+sw.bb8:                                           ; preds = %entry
+  %call9 = tail call signext i32 @test4(i32 signext 5)
+  br label %return
+
+sw.bb10:                                          ; preds = %entry
+  %call11 = tail call signext i32 @test4(i32 signext 66)
+  %call12 = tail call signext i32 @test1(i32 signext %call11)
+  br label %return
+
+sw.bb13:                                          ; preds = %entry
+  %call14 = tail call signext i32 @test2(i32 signext 66)
+  br label %return
+
+return:                                           ; preds = %sw.bb13, %sw.bb10, %sw.bb8, %sw.bb5, %sw.bb3, %sw.bb, %sw.default
+  %retval.0 = phi i32 [ %call1, %sw.default ], [ %call14, %sw.bb13 ], [ %call12, %sw.bb10 ], [ %call9, %sw.bb8 ], [ %call7, %sw.bb5 ], [ %call4, %sw.bb3 ], [ %call2, %sw.bb ]
+  ret i32 %retval.0
+}
+
+declare signext i32 @test3(i32 signext)
+
+declare signext i32 @test1(i32 signext)
+
+declare signext i32 @test2(i32 signext)
+
+declare signext i32 @test4(i32 signext)




More information about the llvm-commits mailing list