[llvm] 42389f1 - [VE] Change threshold for jump table generation
Kazushi Marukawa via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 20 04:27:26 PST 2020
Author: Kazushi (Jam) Marukawa
Date: 2020-11-20T21:27:18+09:00
New Revision: 42389f1e96af9bbca5dbbbb552cc834b21d7d173
URL: https://github.com/llvm/llvm-project/commit/42389f1e96af9bbca5dbbbb552cc834b21d7d173
DIFF: https://github.com/llvm/llvm-project/commit/42389f1e96af9bbca5dbbbb552cc834b21d7d173.diff
LOG: [VE] Change threshold for jump table generation
Implement getMinimumJumpTableEntries() to specify threshold for jump
table genaration. We use 8 for the case of PIC mode to relieve the
impact of PIC calculation required to implement PIC mode jump table.
Update jump table regression test also.
Reviewed By: simoll
Differential Revision: https://reviews.llvm.org/D91785
Added:
Modified:
llvm/lib/Target/VE/VEISelLowering.cpp
llvm/lib/Target/VE/VEISelLowering.h
llvm/test/CodeGen/VE/Scalar/br_jt.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index b95229c94f66..c41d0a416eaa 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -1654,3 +1654,15 @@ VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
+
+//===----------------------------------------------------------------------===//
+// VE Target Optimization Support
+//===----------------------------------------------------------------------===//
+
+unsigned VETargetLowering::getMinimumJumpTableEntries() const {
+ // Specify 8 for PIC model to relieve the impact of PIC load instructions.
+ if (isJumpTableRelative())
+ return 8;
+
+ return TargetLowering::getMinimumJumpTableEntries();
+}
diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index f42aba40d6cd..e12bef882d8a 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -151,6 +151,9 @@ class VETargetLowering : public TargetLowering {
/// Target Optimization {
+ // Return lower limit for number of blocks in a jump table.
+ unsigned getMinimumJumpTableEntries() const override;
+
// SX-Aurora VE's s/udiv is 5-9 times slower than multiply.
bool isIntDivCheap(EVT, AttributeList) const override { return false; }
// VE doesn't have rem.
diff --git a/llvm/test/CodeGen/VE/Scalar/br_jt.ll b/llvm/test/CodeGen/VE/Scalar/br_jt.ll
index a7218965c467..d84e830299ff 100644
--- a/llvm/test/CodeGen/VE/Scalar/br_jt.ll
+++ b/llvm/test/CodeGen/VE/Scalar/br_jt.ll
@@ -2,23 +2,370 @@
; RUN: llc < %s -mtriple=ve -relocation-model=pic \
; RUN: | FileCheck %s -check-prefix=PIC
+ at switch.table.br_jt4 = private unnamed_addr constant [4 x i32] [i32 3, i32 0, i32 4, i32 7], align 4
+ at switch.table.br_jt7 = private unnamed_addr constant [9 x i32] [i32 3, i32 0, i32 4, i32 7, i32 3, i32 3, i32 5, i32 11, i32 10], align 4
+ at switch.table.br_jt8 = private unnamed_addr constant [9 x i32] [i32 3, i32 0, i32 4, i32 7, i32 3, i32 1, i32 5, i32 11, i32 10], align 4
+
+; Function Attrs: norecurse nounwind readnone
+define signext i32 @br_jt3(i32 signext %0) {
+; CHECK-LABEL: br_jt3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: breq.w 1, %s0, .LBB{{[0-9]+}}_1
+; CHECK-NEXT: # %bb.2:
+; CHECK-NEXT: breq.w 4, %s0, .LBB{{[0-9]+}}_5
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: brne.w 2, %s0, .LBB{{[0-9]+}}_6
+; CHECK-NEXT: # %bb.4:
+; CHECK-NEXT: or %s0, 0, (0)1
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+; CHECK-NEXT: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: or %s0, 3, (0)1
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+; CHECK-NEXT: .LBB{{[0-9]+}}_5:
+; CHECK-NEXT: or %s0, 7, (0)1
+; CHECK-NEXT: .LBB{{[0-9]+}}_6:
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+;
+; PIC-LABEL: br_jt3:
+; PIC: # %bb.0:
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: breq.w 1, %s0, .LBB0_1
+; PIC-NEXT: # %bb.2:
+; PIC-NEXT: breq.w 4, %s0, .LBB0_5
+; PIC-NEXT: # %bb.3:
+; PIC-NEXT: brne.w 2, %s0, .LBB0_6
+; PIC-NEXT: # %bb.4:
+; PIC-NEXT: or %s0, 0, (0)1
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: b.l.t (, %s10)
+; PIC-NEXT: .LBB0_1:
+; PIC-NEXT: or %s0, 3, (0)1
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: b.l.t (, %s10)
+; PIC-NEXT: .LBB0_5:
+; PIC-NEXT: or %s0, 7, (0)1
+; PIC-NEXT: .LBB0_6:
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: b.l.t (, %s10)
+ switch i32 %0, label %4 [
+ i32 1, label %5
+ i32 2, label %2
+ i32 4, label %3
+ ]
+
+2: ; preds = %1
+ br label %5
+
+3: ; preds = %1
+ br label %5
+
+4: ; preds = %1
+ br label %5
+
+5: ; preds = %1, %4, %3, %2
+ %6 = phi i32 [ %0, %4 ], [ 7, %3 ], [ 0, %2 ], [ 3, %1 ]
+ ret i32 %6
+}
+
; Function Attrs: norecurse nounwind readnone
-define signext i32 @br_jt(i32 signext %0) {
-; CHECK-LABEL: br_jt:
+define signext i32 @br_jt4(i32 signext %0) {
+; CHECK-LABEL: br_jt4:
; CHECK: # %bb.0:
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
; CHECK-NEXT: adds.w.sx %s1, -1, %s0
; CHECK-NEXT: cmpu.w %s2, 3, %s1
-; CHECK-NEXT: brgt.w 0, %s2, .LBB{{[0-9]+}}_5
+; CHECK-NEXT: brgt.w 0, %s2, .LBB{{[0-9]+}}_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1
-; CHECK-NEXT: sll %s0, %s0, 3
-; CHECK-NEXT: lea %s1, .LJTI0_0 at lo
+; CHECK-NEXT: adds.w.sx %s0, %s1, (0)1
+; CHECK-NEXT: sll %s0, %s0, 2
+; CHECK-NEXT: lea %s1, .Lswitch.table.br_jt4 at lo
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lea.sl %s1, .Lswitch.table.br_jt4 at hi(, %s1)
+; CHECK-NEXT: ldl.sx %s0, (%s0, %s1)
+; CHECK-NEXT: b.l.t (, %s10)
+; CHECK-NEXT: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+;
+; PIC-LABEL: br_jt4:
+; PIC: .LBB{{[0-9]+}}_5:
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: adds.w.sx %s1, -1, %s0
+; PIC-NEXT: cmpu.w %s2, 3, %s1
+; PIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_ at pc_lo(-24)
+; PIC-NEXT: and %s15, %s15, (32)0
+; PIC-NEXT: sic %s16
+; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
+; PIC-NEXT: brgt.w 0, %s2, .LBB1_2
+; PIC-NEXT: # %bb.1:
+; PIC-NEXT: adds.w.sx %s0, %s1, (0)1
+; PIC-NEXT: sll %s0, %s0, 2
+; PIC-NEXT: lea %s1, .Lswitch.table.br_jt4 at gotoff_lo
+; PIC-NEXT: and %s1, %s1, (32)0
+; PIC-NEXT: lea.sl %s1, .Lswitch.table.br_jt4 at gotoff_hi(%s1, %s15)
+; PIC-NEXT: ldl.sx %s0, (%s0, %s1)
+; PIC-NEXT: br.l.t .LBB1_3
+; PIC-NEXT: .LBB1_2:
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: .LBB1_3:
+; PIC-NEXT: or %s11, 0, %s9
+ %2 = add i32 %0, -1
+ %3 = icmp ult i32 %2, 4
+ br i1 %3, label %4, label %8
+
+4: ; preds = %1
+ %5 = sext i32 %2 to i64
+ %6 = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.br_jt4, i64 0, i64 %5
+ %7 = load i32, i32* %6, align 4
+ ret i32 %7
+
+8: ; preds = %1
+ ret i32 %0
+}
+
+; Function Attrs: norecurse nounwind readnone
+define signext i32 @br_jt7(i32 signext %0) {
+; CHECK-LABEL: br_jt7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: adds.w.sx %s1, -1, %s0
+; CHECK-NEXT: cmpu.w %s2, 8, %s1
+; CHECK-NEXT: brgt.w 0, %s2, .LBB{{[0-9]+}}_3
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: and %s2, %s1, (48)0
+; CHECK-NEXT: lea %s3, 463
+; CHECK-NEXT: and %s3, %s3, (32)0
+; CHECK-NEXT: srl %s2, %s3, %s2
+; CHECK-NEXT: and %s2, 1, %s2
+; CHECK-NEXT: brne.w 0, %s2, .LBB{{[0-9]+}}_2
+; CHECK-NEXT: .LBB{{[0-9]+}}_3:
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+; CHECK-NEXT: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: adds.w.sx %s0, %s1, (0)1
+; CHECK-NEXT: sll %s0, %s0, 2
+; CHECK-NEXT: lea %s1, .Lswitch.table.br_jt7 at lo
; CHECK-NEXT: and %s1, %s1, (32)0
-; CHECK-NEXT: lea.sl %s1, .LJTI0_0 at hi(, %s1)
-; CHECK-NEXT: ld %s1, (%s1, %s0)
+; CHECK-NEXT: lea.sl %s1, .Lswitch.table.br_jt7 at hi(, %s1)
+; CHECK-NEXT: ldl.sx %s0, (%s0, %s1)
+; CHECK-NEXT: b.l.t (, %s10)
+;
+; PIC-LABEL: br_jt7:
+; PIC: .LBB{{[0-9]+}}_6:
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: adds.w.sx %s1, -1, %s0
+; PIC-NEXT: cmpu.w %s2, 8, %s1
+; PIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_ at pc_lo(-24)
+; PIC-NEXT: and %s15, %s15, (32)0
+; PIC-NEXT: sic %s16
+; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
+; PIC-NEXT: brgt.w 0, %s2, .LBB2_3
+; PIC-NEXT: # %bb.1:
+; PIC-NEXT: and %s2, %s1, (48)0
+; PIC-NEXT: lea %s3, 463
+; PIC-NEXT: and %s3, %s3, (32)0
+; PIC-NEXT: srl %s2, %s3, %s2
+; PIC-NEXT: and %s2, 1, %s2
+; PIC-NEXT: brne.w 0, %s2, .LBB2_2
+; PIC-NEXT: .LBB2_3:
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: br.l.t .LBB2_4
+; PIC-NEXT: .LBB2_2:
+; PIC-NEXT: adds.w.sx %s0, %s1, (0)1
+; PIC-NEXT: sll %s0, %s0, 2
+; PIC-NEXT: lea %s1, .Lswitch.table.br_jt7 at gotoff_lo
+; PIC-NEXT: and %s1, %s1, (32)0
+; PIC-NEXT: lea.sl %s1, .Lswitch.table.br_jt7 at gotoff_hi(%s1, %s15)
+; PIC-NEXT: ldl.sx %s0, (%s0, %s1)
+; PIC-NEXT: .LBB2_4:
+; PIC-NEXT: or %s11, 0, %s9
+ %2 = add i32 %0, -1
+ %3 = icmp ult i32 %2, 9
+ br i1 %3, label %4, label %13
+
+4: ; preds = %1
+ %5 = trunc i32 %2 to i16
+ %6 = lshr i16 463, %5
+ %7 = and i16 %6, 1
+ %8 = icmp eq i16 %7, 0
+ br i1 %8, label %13, label %9
+
+9: ; preds = %4
+ %10 = sext i32 %2 to i64
+ %11 = getelementptr inbounds [9 x i32], [9 x i32]* @switch.table.br_jt7, i64 0, i64 %10
+ %12 = load i32, i32* %11, align 4
+ ret i32 %12
+
+13: ; preds = %1, %4
+ ret i32 %0
+}
+
+; Function Attrs: norecurse nounwind readnone
+define signext i32 @br_jt8(i32 signext %0) {
+; CHECK-LABEL: br_jt8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: adds.w.sx %s1, -1, %s0
+; CHECK-NEXT: cmpu.w %s2, 8, %s1
+; CHECK-NEXT: brgt.w 0, %s2, .LBB{{[0-9]+}}_3
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: and %s2, %s1, (48)0
+; CHECK-NEXT: lea %s3, 495
+; CHECK-NEXT: and %s3, %s3, (32)0
+; CHECK-NEXT: srl %s2, %s3, %s2
+; CHECK-NEXT: and %s2, 1, %s2
+; CHECK-NEXT: brne.w 0, %s2, .LBB{{[0-9]+}}_2
+; CHECK-NEXT: .LBB{{[0-9]+}}_3:
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+; CHECK-NEXT: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: adds.w.sx %s0, %s1, (0)1
+; CHECK-NEXT: sll %s0, %s0, 2
+; CHECK-NEXT: lea %s1, .Lswitch.table.br_jt8 at lo
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lea.sl %s1, .Lswitch.table.br_jt8 at hi(, %s1)
+; CHECK-NEXT: ldl.sx %s0, (%s0, %s1)
+; CHECK-NEXT: b.l.t (, %s10)
+;
+; PIC-LABEL: br_jt8:
+; PIC: .LBB{{[0-9]+}}_6:
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: adds.w.sx %s1, -1, %s0
+; PIC-NEXT: cmpu.w %s2, 8, %s1
+; PIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_ at pc_lo(-24)
+; PIC-NEXT: and %s15, %s15, (32)0
+; PIC-NEXT: sic %s16
+; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
+; PIC-NEXT: brgt.w 0, %s2, .LBB3_3
+; PIC-NEXT: # %bb.1:
+; PIC-NEXT: and %s2, %s1, (48)0
+; PIC-NEXT: lea %s3, 495
+; PIC-NEXT: and %s3, %s3, (32)0
+; PIC-NEXT: srl %s2, %s3, %s2
+; PIC-NEXT: and %s2, 1, %s2
+; PIC-NEXT: brne.w 0, %s2, .LBB3_2
+; PIC-NEXT: .LBB3_3:
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: br.l.t .LBB3_4
+; PIC-NEXT: .LBB3_2:
+; PIC-NEXT: adds.w.sx %s0, %s1, (0)1
+; PIC-NEXT: sll %s0, %s0, 2
+; PIC-NEXT: lea %s1, .Lswitch.table.br_jt8 at gotoff_lo
+; PIC-NEXT: and %s1, %s1, (32)0
+; PIC-NEXT: lea.sl %s1, .Lswitch.table.br_jt8 at gotoff_hi(%s1, %s15)
+; PIC-NEXT: ldl.sx %s0, (%s0, %s1)
+; PIC-NEXT: .LBB3_4:
+; PIC-NEXT: or %s11, 0, %s9
+ %2 = add i32 %0, -1
+ %3 = icmp ult i32 %2, 9
+ br i1 %3, label %4, label %13
+
+4: ; preds = %1
+ %5 = trunc i32 %2 to i16
+ %6 = lshr i16 495, %5
+ %7 = and i16 %6, 1
+ %8 = icmp eq i16 %7, 0
+ br i1 %8, label %13, label %9
+
+9: ; preds = %4
+ %10 = sext i32 %2 to i64
+ %11 = getelementptr inbounds [9 x i32], [9 x i32]* @switch.table.br_jt8, i64 0, i64 %10
+ %12 = load i32, i32* %11, align 4
+ ret i32 %12
+
+13: ; preds = %1, %4
+ ret i32 %0
+}
+
+; Function Attrs: norecurse nounwind readnone
+define signext i32 @br_jt3_m(i32 signext %0, i32 signext %1) {
+; CHECK-LABEL: br_jt3_m:
+; CHECK: # %bb.0:
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: breq.w 1, %s0, .LBB{{[0-9]+}}_1
+; CHECK-NEXT: # %bb.2:
+; CHECK-NEXT: breq.w 4, %s0, .LBB{{[0-9]+}}_5
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: brne.w 2, %s0, .LBB{{[0-9]+}}_6
+; CHECK-NEXT: # %bb.4:
+; CHECK-NEXT: or %s0, 0, (0)1
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+; CHECK-NEXT: .LBB{{[0-9]+}}_1:
; CHECK-NEXT: or %s0, 3, (0)1
-; CHECK-NEXT: b.l.t (, %s1)
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+; CHECK-NEXT: .LBB{{[0-9]+}}_5:
+; CHECK-NEXT: adds.w.sx %s0, %s1, (0)1
+; CHECK-NEXT: adds.w.sx %s0, 3, %s0
+; CHECK-NEXT: .LBB{{[0-9]+}}_6:
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+;
+; PIC-LABEL: br_jt3_m:
+; PIC: # %bb.0:
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: breq.w 1, %s0, .LBB4_1
+; PIC-NEXT: # %bb.2:
+; PIC-NEXT: breq.w 4, %s0, .LBB4_5
+; PIC-NEXT: # %bb.3:
+; PIC-NEXT: brne.w 2, %s0, .LBB4_6
+; PIC-NEXT: # %bb.4:
+; PIC-NEXT: or %s0, 0, (0)1
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: b.l.t (, %s10)
+; PIC-NEXT: .LBB4_1:
+; PIC-NEXT: or %s0, 3, (0)1
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: b.l.t (, %s10)
+; PIC-NEXT: .LBB4_5:
+; PIC-NEXT: adds.w.sx %s0, %s1, (0)1
+; PIC-NEXT: adds.w.sx %s0, 3, %s0
+; PIC-NEXT: .LBB4_6:
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: b.l.t (, %s10)
+ switch i32 %0, label %6 [
+ i32 1, label %7
+ i32 2, label %3
+ i32 4, label %4
+ ]
+
+3: ; preds = %2
+ br label %7
+
+4: ; preds = %2
+ %5 = add nsw i32 %1, 3
+ br label %7
+
+6: ; preds = %2
+ br label %7
+
+7: ; preds = %2, %6, %4, %3
+ %8 = phi i32 [ %0, %6 ], [ %5, %4 ], [ 0, %3 ], [ 3, %2 ]
+ ret i32 %8
+}
+
+; Function Attrs: norecurse nounwind readnone
+define signext i32 @br_jt4_m(i32 signext %0, i32 signext %1) {
+; CHECK-LABEL: br_jt4_m:
+; CHECK: # %bb.0:
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: adds.w.sx %s2, -1, %s0
+; CHECK-NEXT: cmpu.w %s3, 3, %s2
+; CHECK-NEXT: brgt.w 0, %s3, .LBB{{[0-9]+}}_5
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: adds.w.zx %s0, %s2, (0)1
+; CHECK-NEXT: sll %s0, %s0, 3
+; CHECK-NEXT: lea %s2, .LJTI5_0 at lo
+; CHECK-NEXT: and %s2, %s2, (32)0
+; CHECK-NEXT: lea.sl %s2, .LJTI5_0 at hi(, %s2)
+; CHECK-NEXT: ld %s2, (%s2, %s0)
+; CHECK-NEXT: or %s0, 3, (0)1
+; CHECK-NEXT: b.l.t (, %s2)
; CHECK-NEXT: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: or %s0, 0, (0)1
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
@@ -28,89 +375,344 @@ define signext i32 @br_jt(i32 signext %0) {
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
; CHECK-NEXT: b.l.t (, %s10)
; CHECK-NEXT: .LBB{{[0-9]+}}_4:
-; CHECK-NEXT: or %s0, 7, (0)1
+; CHECK-NEXT: adds.w.sx %s0, %s1, (0)1
+; CHECK-NEXT: adds.w.sx %s0, 3, %s0
; CHECK-NEXT: .LBB{{[0-9]+}}_5:
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
; CHECK-NEXT: b.l.t (, %s10)
;
-; PIC-LABEL: br_jt:
+; PIC-LABEL: br_jt4_m:
; PIC: # %bb.0:
-; PIC-NEXT: st %s9, (, %s11)
-; PIC-NEXT: st %s10, 8(, %s11)
-; PIC-NEXT: st %s15, 24(, %s11)
-; PIC-NEXT: st %s16, 32(, %s11)
-; PIC-NEXT: or %s9, 0, %s11
-; PIC-NEXT: lea %s13, -176
-; PIC-NEXT: and %s13, %s13, (32)0
-; PIC-NEXT: lea.sl %s11, -1(%s13, %s11)
-; PIC-NEXT: brge.l %s11, %s8, .LBB0_7
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: brlt.w 2, %s0, .LBB5_4
+; PIC-NEXT: # %bb.1:
+; PIC-NEXT: breq.w 1, %s0, .LBB5_8
+; PIC-NEXT: # %bb.2:
+; PIC-NEXT: brne.w 2, %s0, .LBB5_7
+; PIC-NEXT: # %bb.3:
+; PIC-NEXT: or %s0, 0, (0)1
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: b.l.t (, %s10)
+; PIC-NEXT: .LBB5_4:
+; PIC-NEXT: breq.w 3, %s0, .LBB5_9
+; PIC-NEXT: # %bb.5:
+; PIC-NEXT: brne.w 4, %s0, .LBB5_7
; PIC-NEXT: # %bb.6:
-; PIC-NEXT: ld %s61, 24(, %s14)
-; PIC-NEXT: or %s62, 0, %s0
-; PIC-NEXT: lea %s63, 315
-; PIC-NEXT: shm.l %s63, (%s61)
-; PIC-NEXT: shm.l %s8, 8(%s61)
-; PIC-NEXT: shm.l %s11, 16(%s61)
-; PIC-NEXT: monc
-; PIC-NEXT: or %s0, 0, %s62
-; PIC-NEXT: .LBB0_7:
+; PIC-NEXT: adds.w.sx %s0, %s1, (0)1
+; PIC-NEXT: adds.w.sx %s0, 3, %s0
+; PIC-NEXT: .LBB5_7:
; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
-; PIC-NEXT: adds.w.sx %s1, -1, %s0
-; PIC-NEXT: cmpu.w %s2, 3, %s1
+; PIC-NEXT: b.l.t (, %s10)
+; PIC-NEXT: .LBB5_8:
+; PIC-NEXT: or %s0, 3, (0)1
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: b.l.t (, %s10)
+; PIC-NEXT: .LBB5_9:
+; PIC-NEXT: or %s0, 4, (0)1
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: b.l.t (, %s10)
+ switch i32 %0, label %7 [
+ i32 1, label %8
+ i32 2, label %3
+ i32 3, label %4
+ i32 4, label %5
+ ]
+
+3: ; preds = %2
+ br label %8
+
+4: ; preds = %2
+ br label %8
+
+5: ; preds = %2
+ %6 = add nsw i32 %1, 3
+ br label %8
+
+7: ; preds = %2
+ br label %8
+
+8: ; preds = %2, %7, %5, %4, %3
+ %9 = phi i32 [ %0, %7 ], [ %6, %5 ], [ 4, %4 ], [ 0, %3 ], [ 3, %2 ]
+ ret i32 %9
+}
+
+; Function Attrs: norecurse nounwind readnone
+define signext i32 @br_jt7_m(i32 signext %0, i32 signext %1) {
+; CHECK-LABEL: br_jt7_m:
+; CHECK: # %bb.0:
+; CHECK-NEXT: adds.w.sx %s2, %s0, (0)1
+; CHECK-NEXT: adds.w.sx %s0, -1, %s2
+; CHECK-NEXT: cmpu.w %s3, 8, %s0
+; CHECK-NEXT: brgt.w 0, %s3, .LBB{{[0-9]+}}_8
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1
+; CHECK-NEXT: sll %s0, %s0, 3
+; CHECK-NEXT: lea %s3, .LJTI6_0 at lo
+; CHECK-NEXT: and %s3, %s3, (32)0
+; CHECK-NEXT: lea.sl %s3, .LJTI6_0 at hi(, %s3)
+; CHECK-NEXT: ld %s3, (%s3, %s0)
+; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1
+; CHECK-NEXT: or %s0, 3, (0)1
+; CHECK-NEXT: b.l.t (, %s3)
+; CHECK-NEXT: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 0, (0)1
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+; CHECK-NEXT: .LBB{{[0-9]+}}_3:
+; CHECK-NEXT: or %s0, 4, (0)1
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+; CHECK-NEXT: .LBB{{[0-9]+}}_4:
+; CHECK-NEXT: adds.w.sx %s0, 3, %s1
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+; CHECK-NEXT: .LBB{{[0-9]+}}_8:
+; CHECK-NEXT: or %s0, 0, %s2
+; CHECK-NEXT: .LBB{{[0-9]+}}_9:
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+; CHECK-NEXT: .LBB{{[0-9]+}}_7:
+; CHECK-NEXT: or %s0, 11, (0)1
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+; CHECK-NEXT: .LBB{{[0-9]+}}_6:
+; CHECK-NEXT: or %s0, 10, (0)1
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+; CHECK-NEXT: .LBB{{[0-9]+}}_5:
+; CHECK-NEXT: adds.w.sx %s0, -2, %s1
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+;
+; PIC-LABEL: br_jt7_m:
+; PIC: # %bb.0:
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: brge.w 3, %s0, .LBB6_1
+; PIC-NEXT: # %bb.6:
+; PIC-NEXT: brlt.w 7, %s0, .LBB6_10
+; PIC-NEXT: # %bb.7:
+; PIC-NEXT: adds.w.sx %s1, %s1, (0)1
+; PIC-NEXT: breq.w 4, %s0, .LBB6_14
+; PIC-NEXT: # %bb.8:
+; PIC-NEXT: brne.w 7, %s0, .LBB6_16
+; PIC-NEXT: # %bb.9:
+; PIC-NEXT: adds.w.sx %s0, -2, %s1
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: b.l.t (, %s10)
+; PIC-NEXT: .LBB6_1:
+; PIC-NEXT: breq.w 1, %s0, .LBB6_2
+; PIC-NEXT: # %bb.3:
+; PIC-NEXT: breq.w 2, %s0, .LBB6_13
+; PIC-NEXT: # %bb.4:
+; PIC-NEXT: brne.w 3, %s0, .LBB6_16
+; PIC-NEXT: # %bb.5:
+; PIC-NEXT: or %s0, 4, (0)1
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: b.l.t (, %s10)
+; PIC-NEXT: .LBB6_10:
+; PIC-NEXT: breq.w 8, %s0, .LBB6_15
+; PIC-NEXT: # %bb.11:
+; PIC-NEXT: brne.w 9, %s0, .LBB6_16
+; PIC-NEXT: # %bb.12:
+; PIC-NEXT: or %s0, 10, (0)1
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: b.l.t (, %s10)
+; PIC-NEXT: .LBB6_14:
+; PIC-NEXT: adds.w.sx %s0, 3, %s1
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: b.l.t (, %s10)
+; PIC-NEXT: .LBB6_2:
+; PIC-NEXT: or %s0, 3, (0)1
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: b.l.t (, %s10)
+; PIC-NEXT: .LBB6_15:
+; PIC-NEXT: or %s0, 11, (0)1
+; PIC-NEXT: .LBB6_16:
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: b.l.t (, %s10)
+; PIC-NEXT: .LBB6_13:
+; PIC-NEXT: or %s0, 0, (0)1
+; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT: b.l.t (, %s10)
+ switch i32 %0, label %11 [
+ i32 1, label %12
+ i32 2, label %3
+ i32 3, label %4
+ i32 4, label %5
+ i32 7, label %7
+ i32 9, label %9
+ i32 8, label %10
+ ]
+
+3: ; preds = %2
+ br label %12
+
+4: ; preds = %2
+ br label %12
+
+5: ; preds = %2
+ %6 = add nsw i32 %1, 3
+ br label %12
+
+7: ; preds = %2
+ %8 = add nsw i32 %1, -2
+ br label %12
+
+9: ; preds = %2
+ br label %12
+
+10: ; preds = %2
+ br label %12
+
+11: ; preds = %2
+ br label %12
+
+12: ; preds = %2, %11, %10, %9, %7, %5, %4, %3
+ %13 = phi i32 [ %0, %11 ], [ 11, %10 ], [ 10, %9 ], [ %8, %7 ], [ %6, %5 ], [ 4, %4 ], [ 0, %3 ], [ 3, %2 ]
+ ret i32 %13
+}
+
+; Function Attrs: norecurse nounwind readnone
+define signext i32 @br_jt8_m(i32 signext %0, i32 signext %1) {
+; CHECK-LABEL: br_jt8_m:
+; CHECK: # %bb.0:
+; CHECK-NEXT: adds.w.sx %s2, %s0, (0)1
+; CHECK-NEXT: adds.w.sx %s0, -1, %s2
+; CHECK-NEXT: cmpu.w %s3, 8, %s0
+; CHECK-NEXT: brgt.w 0, %s3, .LBB{{[0-9]+}}_9
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1
+; CHECK-NEXT: sll %s0, %s0, 3
+; CHECK-NEXT: lea %s3, .LJTI7_0 at lo
+; CHECK-NEXT: and %s3, %s3, (32)0
+; CHECK-NEXT: lea.sl %s3, .LJTI7_0 at hi(, %s3)
+; CHECK-NEXT: ld %s3, (%s3, %s0)
+; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1
+; CHECK-NEXT: or %s0, 3, (0)1
+; CHECK-NEXT: b.l.t (, %s3)
+; CHECK-NEXT: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 0, (0)1
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+; CHECK-NEXT: .LBB{{[0-9]+}}_3:
+; CHECK-NEXT: or %s0, 4, (0)1
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+; CHECK-NEXT: .LBB{{[0-9]+}}_4:
+; CHECK-NEXT: adds.w.sx %s0, 3, %s1
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+; CHECK-NEXT: .LBB{{[0-9]+}}_9:
+; CHECK-NEXT: or %s0, 0, %s2
+; CHECK-NEXT: .LBB{{[0-9]+}}_10:
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+; CHECK-NEXT: .LBB{{[0-9]+}}_5:
+; CHECK-NEXT: adds.w.sx %s0, -5, %s1
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+; CHECK-NEXT: .LBB{{[0-9]+}}_6:
+; CHECK-NEXT: adds.w.sx %s0, -2, %s1
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+; CHECK-NEXT: .LBB{{[0-9]+}}_8:
+; CHECK-NEXT: or %s0, 11, (0)1
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+; CHECK-NEXT: .LBB{{[0-9]+}}_7:
+; CHECK-NEXT: or %s0, 10, (0)1
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: b.l.t (, %s10)
+;
+; PIC-LABEL: br_jt8_m:
+; PIC: .LBB{{[0-9]+}}_12:
+; PIC-NEXT: adds.w.sx %s2, %s0, (0)1
+; PIC-NEXT: adds.w.sx %s0, -1, %s2
+; PIC-NEXT: cmpu.w %s3, 8, %s0
; PIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_ at pc_lo(-24)
; PIC-NEXT: and %s15, %s15, (32)0
; PIC-NEXT: sic %s16
; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
-; PIC-NEXT: brgt.w 0, %s2, .LBB0_5
+; PIC-NEXT: brgt.w 0, %s3, .LBB7_9
; PIC-NEXT: # %bb.1:
-; PIC-NEXT: adds.w.zx %s0, %s1, (0)1
+; PIC-NEXT: adds.w.sx %s1, %s1, (0)1
+; PIC-NEXT: adds.w.zx %s0, %s0, (0)1
; PIC-NEXT: sll %s0, %s0, 2
-; PIC-NEXT: lea %s1, .LJTI0_0 at gotoff_lo
-; PIC-NEXT: and %s1, %s1, (32)0
-; PIC-NEXT: lea.sl %s1, .LJTI0_0 at gotoff_hi(%s1, %s15)
-; PIC-NEXT: ldl.sx %s0, (%s1, %s0)
-; PIC-NEXT: lea %s1, br_jt at gotoff_lo
-; PIC-NEXT: and %s1, %s1, (32)0
-; PIC-NEXT: lea.sl %s1, br_jt at gotoff_hi(%s1, %s15)
-; PIC-NEXT: adds.l %s1, %s0, %s1
+; PIC-NEXT: lea %s3, .LJTI7_0 at gotoff_lo
+; PIC-NEXT: and %s3, %s3, (32)0
+; PIC-NEXT: lea.sl %s3, .LJTI7_0 at gotoff_hi(%s3, %s15)
+; PIC-NEXT: ldl.sx %s0, (%s3, %s0)
+; PIC-NEXT: lea %s3, br_jt8_m at gotoff_lo
+; PIC-NEXT: and %s3, %s3, (32)0
+; PIC-NEXT: lea.sl %s3, br_jt8_m at gotoff_hi(%s3, %s15)
+; PIC-NEXT: adds.l %s3, %s0, %s3
; PIC-NEXT: or %s0, 3, (0)1
-; PIC-NEXT: b.l.t (, %s1)
-; PIC-NEXT: .LBB0_2:
+; PIC-NEXT: b.l.t (, %s3)
+; PIC-NEXT: .LBB7_2:
; PIC-NEXT: or %s0, 0, (0)1
-; PIC-NEXT: br.l.t .LBB0_5
-; PIC-NEXT: .LBB0_3:
+; PIC-NEXT: br.l.t .LBB7_10
+; PIC-NEXT: .LBB7_3:
; PIC-NEXT: or %s0, 4, (0)1
-; PIC-NEXT: br.l.t .LBB0_5
-; PIC-NEXT: .LBB0_4:
-; PIC-NEXT: or %s0, 7, (0)1
-; PIC-NEXT: .LBB0_5:
+; PIC-NEXT: br.l.t .LBB7_10
+; PIC-NEXT: .LBB7_4:
+; PIC-NEXT: adds.w.sx %s0, 3, %s1
+; PIC-NEXT: br.l.t .LBB7_10
+; PIC-NEXT: .LBB7_9:
+; PIC-NEXT: or %s0, 0, %s2
+; PIC-NEXT: br.l.t .LBB7_10
+; PIC-NEXT: .LBB7_5:
+; PIC-NEXT: adds.w.sx %s0, -5, %s1
+; PIC-NEXT: br.l.t .LBB7_10
+; PIC-NEXT: .LBB7_6:
+; PIC-NEXT: adds.w.sx %s0, -2, %s1
+; PIC-NEXT: br.l.t .LBB7_10
+; PIC-NEXT: .LBB7_8:
+; PIC-NEXT: or %s0, 11, (0)1
+; PIC-NEXT: br.l.t .LBB7_10
+; PIC-NEXT: .LBB7_7:
+; PIC-NEXT: or %s0, 10, (0)1
+; PIC-NEXT: .LBB7_10:
; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
; PIC-NEXT: or %s11, 0, %s9
-; PIC-NEXT: ld %s16, 32(, %s11)
-; PIC-NEXT: ld %s15, 24(, %s11)
-; PIC-NEXT: ld %s10, 8(, %s11)
-; PIC-NEXT: ld %s9, (, %s11)
-; PIC-NEXT: b.l.t (, %s10)
- switch i32 %0, label %5 [
- i32 1, label %6
- i32 2, label %2
- i32 3, label %3
- i32 4, label %4
+ switch i32 %0, label %13 [
+ i32 1, label %14
+ i32 2, label %3
+ i32 3, label %4
+ i32 4, label %5
+ i32 6, label %7
+ i32 7, label %9
+ i32 9, label %11
+ i32 8, label %12
]
-2: ; preds = %1
- br label %6
+3: ; preds = %2
+ br label %14
-3: ; preds = %1
- br label %6
+4: ; preds = %2
+ br label %14
-4: ; preds = %1
- br label %6
+5: ; preds = %2
+ %6 = add nsw i32 %1, 3
+ br label %14
-5: ; preds = %1
- br label %6
+7: ; preds = %2
+ %8 = add nsw i32 %1, -5
+ br label %14
-6: ; preds = %1, %5, %4, %3, %2
- %7 = phi i32 [ %0, %5 ], [ 7, %4 ], [ 4, %3 ], [ 0, %2 ], [ 3, %1 ]
- ret i32 %7
+9: ; preds = %2
+ %10 = add nsw i32 %1, -2
+ br label %14
+
+11: ; preds = %2
+ br label %14
+
+12: ; preds = %2
+ br label %14
+
+13: ; preds = %2
+ br label %14
+
+14: ; preds = %2, %13, %12, %11, %9, %7, %5, %4, %3
+ %15 = phi i32 [ %0, %13 ], [ 11, %12 ], [ 10, %11 ], [ %10, %9 ], [ %8, %7 ], [ %6, %5 ], [ 4, %4 ], [ 0, %3 ], [ 3, %2 ]
+ ret i32 %15
}
More information about the llvm-commits
mailing list