[llvm] r332439 - Emit a left-shift instead of a power-of-two multiply for jump-tables

Wed May 16 01:58:26 PDT 2018

Author: arichardson
Date: Wed May 16 01:58:26 2018
New Revision: 332439

URL: http://llvm.org/viewvc/llvm-project?rev=332439&view=rev
Log:
Emit a left-shift instead of a power-of-two multiply for jump-tables

Summary:
SelectionDAGLegalize::ExpandNode() inserts an ISD::MUL when lowering a
BR_JT opcode. While many backends optimize this multiply into a shift, e.g.
the MIPS backend currently always lowers this into a sequence of
load-immediate+multiply+mflo in MipsSETargetLowering::lowerMulDiv().

I initially changed the multiply to a shift in the MIPS backend but it
turns out that would not have handled the MIPSR6 case and was a lot more
code than doing it in LegalizeDAG.
I believe performing this simple optimization in LegalizeDAG instead of
each individual backend is the better solution since this also fixes other
backeds such as MSP430 which calls the multiply runtime function
__mspabi_mpyi without this patch.

Reviewers: sdardis, atanasyan, pftbest, asl

Reviewed By: sdardis

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D45760

Added:
    llvm/trunk/test/CodeGen/Mips/jump-table-mul.ll
Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
    llvm/trunk/test/CodeGen/MSP430/jumptable.ll
    llvm/trunk/test/CodeGen/Mips/2010-07-20-Switch.ll
    llvm/trunk/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp?rev=332439&r1=332438&r2=332439&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Wed May 16 01:58:26 2018
@@ -3688,8 +3688,17 @@ bool SelectionDAGLegalize::ExpandNode(SD
     unsigned EntrySize =
       DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
 
-    Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
-                        DAG.getConstant(EntrySize, dl, Index.getValueType()));
+    // For power-of-two jumptable entry sizes convert multiplication to a shift.
+    // This transformation needs to be done here since otherwise the MIPS
+    // backend will end up emitting a three instruction multiply sequence
+    // instead of a single shift and MSP430 will call a runtime function.
+    if (llvm::isPowerOf2_32(EntrySize))
+      Index = DAG.getNode(
+          ISD::SHL, dl, Index.getValueType(), Index,
+          DAG.getConstant(llvm::Log2_32(EntrySize), dl, Index.getValueType()));
+    else
+      Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
+                          DAG.getConstant(EntrySize, dl, Index.getValueType()));
     SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(),
                                Index, Table);
 

Modified: llvm/trunk/test/CodeGen/MSP430/jumptable.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MSP430/jumptable.ll?rev=332439&r1=332438&r2=332439&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/MSP430/jumptable.ll (original)
+++ llvm/trunk/test/CodeGen/MSP430/jumptable.ll Wed May 16 01:58:26 2018
@@ -7,13 +7,16 @@ target triple = "msp430---elf"
 define i16 @test(i16 %i) #0 {
 entry:
 ; CHECK-LABEL: test:
+; CHECK:      sub.w   #4, r1
+; CHECK-NEXT: mov.w   r12, 0(r1)
+; CHECK-NEXT: cmp.w   #4, r12
+; CHECK-NEXT: jhs     .LBB0_3
   %retval = alloca i16, align 2
   %i.addr = alloca i16, align 2
   store i16 %i, i16* %i.addr, align 2
   %0 = load i16, i16* %i.addr, align 2
-; CHECK: mov.w #2, r13
-; CHECK: call #__mspabi_mpyi
-; CHECK: br .LJTI0_0(r12)
+; CHECK:      rla.w r12
+; CHECK-NEXT: br .LJTI0_0(r12)
   switch i16 %0, label %sw.default [
     i16 0, label %sw.bb
     i16 1, label %sw.bb1

Modified: llvm/trunk/test/CodeGen/Mips/2010-07-20-Switch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/2010-07-20-Switch.ll?rev=332439&r1=332438&r2=332439&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Mips/2010-07-20-Switch.ll (original)
+++ llvm/trunk/test/CodeGen/Mips/2010-07-20-Switch.ll Wed May 16 01:58:26 2018
@@ -28,7 +28,8 @@ entry:
 ; PIC-O32: addu $[[R5:[0-9]+]], $[[R4:[0-9]+]]
 ; PIC-O32: jr  $[[R5]]
 
-; STATIC-N64: mflo $[[R0:[0-9]]]
+; STATIC-N64: dsrl $[[I32:[0-9]]], ${{[0-9]+}}, 32
+; STATIC-N64: dsll $[[R0:[0-9]]], $[[I32]], 3
 ; STATIC-N64: lui $[[R1:[0-9]]], %highest(.LJTI0_0)
 ; STATIC-N64: daddiu $[[R2:[0-9]]], $[[R1]], %higher(.LJTI0_0)
 ; STATIC-N64: dsll $[[R3:[0-9]]], $[[R2]], 16

Modified: llvm/trunk/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll?rev=332439&r1=332438&r2=332439&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll (original)
+++ llvm/trunk/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll Wed May 16 01:58:26 2018
@@ -161,9 +161,7 @@ define i8* @_Z3fooi(i32 signext %Letter)
 ; MIPS64R2-NEXT:    beqz $1, .LBB0_3
 ; MIPS64R2-NEXT:    nop
 ; MIPS64R2-NEXT:  .LBB0_1: # %entry
-; MIPS64R2-NEXT:    daddiu $1, $zero, 8
-; MIPS64R2-NEXT:    dmult $2, $1
-; MIPS64R2-NEXT:    mflo $1
+; MIPS64R2-NEXT:    dsll $1, $2, 3
 ; MIPS64R2-NEXT:    lui $2, %highest(.LJTI0_0)
 ; MIPS64R2-NEXT:    daddiu $2, $2, %higher(.LJTI0_0)
 ; MIPS64R2-NEXT:    dsll $2, $2, 16
@@ -481,9 +479,7 @@ define i8* @_Z3fooi(i32 signext %Letter)
 ; PIC-MIPS64R2-NEXT:    beqz $1, .LBB0_3
 ; PIC-MIPS64R2-NEXT:    nop
 ; PIC-MIPS64R2-NEXT:  .LBB0_1: # %entry
-; PIC-MIPS64R2-NEXT:    daddiu $1, $zero, 8
-; PIC-MIPS64R2-NEXT:    dmult $3, $1
-; PIC-MIPS64R2-NEXT:    mflo $1
+; PIC-MIPS64R2-NEXT:    dsll $1, $3, 3
 ; PIC-MIPS64R2-NEXT:    ld $3, %got_page(.LJTI0_0)($2)
 ; PIC-MIPS64R2-NEXT:    daddu $1, $1, $3
 ; PIC-MIPS64R2-NEXT:    ld $1, %got_ofst(.LJTI0_0)($1)

Added: llvm/trunk/test/CodeGen/Mips/jump-table-mul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/jump-table-mul.ll?rev=332439&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Mips/jump-table-mul.ll (added)
+++ llvm/trunk/test/CodeGen/Mips/jump-table-mul.ll Wed May 16 01:58:26 2018
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; We used to generate a mul+mflo sequence instead of shifting by 2/3 to get the jump table address
+; RUN: llc %s -O2 -mtriple=mips64-unknown-freebsd -target-abi n64 -relocation-model=pic -o - | FileCheck %s
+
+define i64 @test(i64 %arg) {
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lui $1, %hi(%neg(%gp_rel(test)))
+; CHECK-NEXT:    daddu $2, $1, $25
+; CHECK-NEXT:    sltiu $1, $4, 11
+; CHECK-NEXT:    beqz $1, .LBB0_3
+; CHECK-NEXT:    nop
+; CHECK-NEXT:  .LBB0_1: # %entry
+; CHECK-NEXT:    daddiu $1, $2, %lo(%neg(%gp_rel(test)))
+; CHECK-NEXT:    dsll $2, $4, 3
+; Previously this dsll was the following sequence:
+;	daddiu	$2, $zero, 8
+;	dmult	$4, $2
+;	mflo	$2
+; CHECK-NEXT:    ld $3, %got_page(.LJTI0_0)($1)
+; CHECK-NEXT:    daddu $2, $2, $3
+; CHECK-NEXT:    ld $2, %got_ofst(.LJTI0_0)($2)
+; CHECK-NEXT:    daddu $1, $2, $1
+; CHECK-NEXT:    jr $1
+; CHECK-NEXT:    nop
+; CHECK-NEXT:  .LBB0_2: # %sw.bb
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    daddiu $2, $zero, 1
+; CHECK-NEXT:  .LBB0_3: # %default
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    daddiu $2, $zero, 1234
+; CHECK-NEXT:  .LBB0_4: # %sw.bb1
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    daddiu $2, $zero, 0
+entry:
+  switch i64 %arg, label %default [
+    i64 0, label %sw.bb
+    i64 3, label %sw.bb
+    i64 5, label %sw.bb
+    i64 10, label %sw.bb1
+  ]
+
+default:
+  ret i64 1234
+
+sw.bb:
+  ret i64 1
+
+sw.bb1:
+  ret i64 0
+}
+
+; CHECK-LABEL: 	.section	.rodata,"a", at progbits
+; CHECK-NEXT: 	.p2align	3
+; CHECK-LABEL: .LJTI0_0:
+; CHECK-NEXT: 	.gpdword	.LBB0_2
+; CHECK-NEXT: 	.gpdword	.LBB0_3
+; CHECK-NEXT: 	.gpdword	.LBB0_3
+; CHECK-NEXT: 	.gpdword	.LBB0_2
+; CHECK-NEXT: 	.gpdword	.LBB0_3
+; CHECK-NEXT: 	.gpdword	.LBB0_2
+; CHECK-NEXT: 	.gpdword	.LBB0_3
+; CHECK-NEXT: 	.gpdword	.LBB0_3
+; CHECK-NEXT: 	.gpdword	.LBB0_3
+; CHECK-NEXT: 	.gpdword	.LBB0_3
+; CHECK-NEXT: 	.gpdword	.LBB0_4