[llvm] 30305d7 - [TargetLowering][RISCV][Sparc] Don't emit zero check in CTTZTableLookup for CTTZ_ZERO_UNDEF.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 17 10:15:59 PDT 2022
Author: Craig Topper
Date: 2022-10-17T10:15:39-07:00
New Revision: 30305d794840a4920631a3d8137ee6d7f4023fe8
URL: https://github.com/llvm/llvm-project/commit/30305d794840a4920631a3d8137ee6d7f4023fe8
DIFF: https://github.com/llvm/llvm-project/commit/30305d794840a4920631a3d8137ee6d7f4023fe8.diff
LOG: [TargetLowering][RISCV][Sparc] Don't emit zero check in CTTZTableLookup for CTTZ_ZERO_UNDEF.
The code incorrectly checked for CTLZ_ZERO_UNDEF instead of
CTTZ_ZERO_UNDEF.
While I was there I flipped the condition into an early out.
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D136010
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
llvm/test/CodeGen/RISCV/rv32zbb.ll
llvm/test/CodeGen/RISCV/rv64zbb.ll
llvm/test/CodeGen/SPARC/cttz.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a3070fe31c47e..2aa8c04f43251 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8184,15 +8184,15 @@ SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
PtrInfo, MVT::i8);
- if (Node->getOpcode() != ISD::CTLZ_ZERO_UNDEF) {
- EVT SetCCVT =
- getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
- SDValue Zero = DAG.getConstant(0, DL, VT);
- SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
- ExtLoad = DAG.getSelect(DL, VT, SrcIsZero,
- DAG.getConstant(BitWidth, DL, VT), ExtLoad);
- }
- return ExtLoad;
+ if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
+ return ExtLoad;
+
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
+ return DAG.getSelect(DL, VT, SrcIsZero,
+ DAG.getConstant(BitWidth, DL, VT), ExtLoad);
}
SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
index 390e3d9b3f5d1..a7ab1e0b186a5 100644
--- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
@@ -170,76 +170,57 @@ define i16 @test_cttz_i16(i16 %a) nounwind {
define i32 @test_cttz_i32(i32 %a) nounwind {
; RV32I-LABEL: test_cttz_i32:
; RV32I: # %bb.0:
-; RV32I-NEXT: beqz a0, .LBB2_4
+; RV32I-NEXT: beqz a0, .LBB2_2
; RV32I-NEXT: # %bb.1: # %cond.false
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: neg a0, a0
-; RV32I-NEXT: and a0, s0, a0
+; RV32I-NEXT: neg a1, a0
+; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: lui a1, 30667
; RV32I-NEXT: addi a1, a1, 1329
; RV32I-NEXT: call __mulsi3 at plt
-; RV32I-NEXT: mv a1, a0
-; RV32I-NEXT: li a0, 32
-; RV32I-NEXT: beqz s0, .LBB2_3
-; RV32I-NEXT: # %bb.2: # %cond.false
-; RV32I-NEXT: srli a0, a1, 27
+; RV32I-NEXT: srli a0, a0, 27
; RV32I-NEXT: lui a1, %hi(.LCPI2_0)
; RV32I-NEXT: addi a1, a1, %lo(.LCPI2_0)
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: lbu a0, 0(a0)
-; RV32I-NEXT: .LBB2_3: # %cond.false
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB2_4:
+; RV32I-NEXT: .LBB2_2:
; RV32I-NEXT: li a0, 32
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_cttz_i32:
; RV64I: # %bb.0:
+; RV64I-NEXT: sext.w a1, a0
+; RV64I-NEXT: beqz a1, .LBB2_2
+; RV64I-NEXT: # %bb.1: # %cond.false
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sext.w s0, a0
-; RV64I-NEXT: beqz s0, .LBB2_3
-; RV64I-NEXT: # %bb.1: # %cond.false
; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: lui a1, 30667
; RV64I-NEXT: addiw a1, a1, 1329
; RV64I-NEXT: call __muldi3 at plt
-; RV64I-NEXT: mv a1, a0
-; RV64I-NEXT: li a0, 32
-; RV64I-NEXT: beqz s0, .LBB2_4
-; RV64I-NEXT: # %bb.2: # %cond.false
-; RV64I-NEXT: srliw a0, a1, 27
+; RV64I-NEXT: srliw a0, a0, 27
; RV64I-NEXT: lui a1, %hi(.LCPI2_0)
; RV64I-NEXT: addi a1, a1, %lo(.LCPI2_0)
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: j .LBB2_4
-; RV64I-NEXT: .LBB2_3:
-; RV64I-NEXT: li a0, 32
-; RV64I-NEXT: .LBB2_4: # %cond.end
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB2_2:
+; RV64I-NEXT: li a0, 32
+; RV64I-NEXT: ret
;
; RV32M-LABEL: test_cttz_i32:
; RV32M: # %bb.0:
-; RV32M-NEXT: beqz a0, .LBB2_4
+; RV32M-NEXT: beqz a0, .LBB2_2
; RV32M-NEXT: # %bb.1: # %cond.false
-; RV32M-NEXT: mv a1, a0
-; RV32M-NEXT: li a0, 32
-; RV32M-NEXT: beqz a1, .LBB2_3
-; RV32M-NEXT: # %bb.2: # %cond.false
-; RV32M-NEXT: neg a0, a1
-; RV32M-NEXT: and a0, a1, a0
+; RV32M-NEXT: neg a1, a0
+; RV32M-NEXT: and a0, a0, a1
; RV32M-NEXT: lui a1, 30667
; RV32M-NEXT: addi a1, a1, 1329
; RV32M-NEXT: mul a0, a0, a1
@@ -248,23 +229,18 @@ define i32 @test_cttz_i32(i32 %a) nounwind {
; RV32M-NEXT: addi a1, a1, %lo(.LCPI2_0)
; RV32M-NEXT: add a0, a1, a0
; RV32M-NEXT: lbu a0, 0(a0)
-; RV32M-NEXT: .LBB2_3: # %cond.end
; RV32M-NEXT: ret
-; RV32M-NEXT: .LBB2_4:
+; RV32M-NEXT: .LBB2_2:
; RV32M-NEXT: li a0, 32
; RV32M-NEXT: ret
;
; RV64M-LABEL: test_cttz_i32:
; RV64M: # %bb.0:
-; RV64M-NEXT: sext.w a2, a0
-; RV64M-NEXT: beqz a2, .LBB2_4
+; RV64M-NEXT: sext.w a1, a0
+; RV64M-NEXT: beqz a1, .LBB2_2
; RV64M-NEXT: # %bb.1: # %cond.false
-; RV64M-NEXT: mv a1, a0
-; RV64M-NEXT: li a0, 32
-; RV64M-NEXT: beqz a2, .LBB2_3
-; RV64M-NEXT: # %bb.2: # %cond.false
-; RV64M-NEXT: neg a0, a1
-; RV64M-NEXT: and a0, a1, a0
+; RV64M-NEXT: neg a1, a0
+; RV64M-NEXT: and a0, a0, a1
; RV64M-NEXT: lui a1, 30667
; RV64M-NEXT: addiw a1, a1, 1329
; RV64M-NEXT: mulw a0, a0, a1
@@ -273,9 +249,8 @@ define i32 @test_cttz_i32(i32 %a) nounwind {
; RV64M-NEXT: addi a1, a1, %lo(.LCPI2_0)
; RV64M-NEXT: add a0, a1, a0
; RV64M-NEXT: lbu a0, 0(a0)
-; RV64M-NEXT: .LBB2_3: # %cond.end
; RV64M-NEXT: ret
-; RV64M-NEXT: .LBB2_4:
+; RV64M-NEXT: .LBB2_2:
; RV64M-NEXT: li a0, 32
; RV64M-NEXT: ret
;
@@ -302,8 +277,7 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: and a0, s0, a0
@@ -311,31 +285,29 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
; RV32I-NEXT: addi s3, a1, 1329
; RV32I-NEXT: mv a1, s3
; RV32I-NEXT: call __mulsi3 at plt
-; RV32I-NEXT: lui a1, %hi(.LCPI3_0)
-; RV32I-NEXT: addi s5, a1, %lo(.LCPI3_0)
-; RV32I-NEXT: li s4, 32
-; RV32I-NEXT: li s2, 32
-; RV32I-NEXT: beqz s0, .LBB3_2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lui a0, %hi(.LCPI3_0)
+; RV32I-NEXT: addi s4, a0, %lo(.LCPI3_0)
+; RV32I-NEXT: neg a0, s2
+; RV32I-NEXT: and a0, s2, a0
+; RV32I-NEXT: mv a1, s3
+; RV32I-NEXT: call __mulsi3 at plt
+; RV32I-NEXT: li a1, 32
+; RV32I-NEXT: beqz s2, .LBB3_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: srli a0, a0, 27
-; RV32I-NEXT: add a0, s5, a0
-; RV32I-NEXT: lbu s2, 0(a0)
+; RV32I-NEXT: add a0, s4, a0
+; RV32I-NEXT: lbu a1, 0(a0)
; RV32I-NEXT: .LBB3_2:
-; RV32I-NEXT: neg a0, s1
-; RV32I-NEXT: and a0, s1, a0
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3 at plt
-; RV32I-NEXT: beqz s1, .LBB3_4
+; RV32I-NEXT: bnez s0, .LBB3_4
; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: srli a0, a0, 27
-; RV32I-NEXT: add a0, s5, a0
-; RV32I-NEXT: lbu s4, 0(a0)
+; RV32I-NEXT: addi a0, a1, 32
+; RV32I-NEXT: j .LBB3_5
; RV32I-NEXT: .LBB3_4:
-; RV32I-NEXT: bnez s0, .LBB3_6
-; RV32I-NEXT: # %bb.5:
-; RV32I-NEXT: addi s2, s4, 32
-; RV32I-NEXT: .LBB3_6:
-; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: srli a0, s1, 27
+; RV32I-NEXT: add a0, s4, a0
+; RV32I-NEXT: lbu a0, 0(a0)
+; RV32I-NEXT: .LBB3_5:
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
@@ -343,99 +315,79 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_cttz_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: beqz a0, .LBB3_4
+; RV64I-NEXT: beqz a0, .LBB3_2
; RV64I-NEXT: # %bb.1: # %cond.false
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: neg a0, a0
-; RV64I-NEXT: and a0, s0, a0
+; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: lui a1, %hi(.LCPI3_0)
; RV64I-NEXT: ld a1, %lo(.LCPI3_0)(a1)
; RV64I-NEXT: call __muldi3 at plt
-; RV64I-NEXT: mv a1, a0
-; RV64I-NEXT: li a0, 64
-; RV64I-NEXT: beqz s0, .LBB3_3
-; RV64I-NEXT: # %bb.2: # %cond.false
-; RV64I-NEXT: srli a0, a1, 58
+; RV64I-NEXT: srli a0, a0, 58
; RV64I-NEXT: lui a1, %hi(.LCPI3_1)
; RV64I-NEXT: addi a1, a1, %lo(.LCPI3_1)
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: .LBB3_3: # %cond.false
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
-; RV64I-NEXT: .LBB3_4:
+; RV64I-NEXT: .LBB3_2:
; RV64I-NEXT: li a0, 64
; RV64I-NEXT: ret
;
; RV32M-LABEL: test_cttz_i64:
; RV32M: # %bb.0:
; RV32M-NEXT: lui a2, 30667
-; RV32M-NEXT: addi a4, a2, 1329
-; RV32M-NEXT: lui a2, %hi(.LCPI3_0)
-; RV32M-NEXT: addi a5, a2, %lo(.LCPI3_0)
-; RV32M-NEXT: li a3, 32
-; RV32M-NEXT: li a2, 32
-; RV32M-NEXT: bnez a0, .LBB3_5
+; RV32M-NEXT: addi a2, a2, 1329
+; RV32M-NEXT: lui a3, %hi(.LCPI3_0)
+; RV32M-NEXT: addi a3, a3, %lo(.LCPI3_0)
+; RV32M-NEXT: li a4, 32
+; RV32M-NEXT: beqz a1, .LBB3_2
; RV32M-NEXT: # %bb.1:
-; RV32M-NEXT: bnez a1, .LBB3_6
+; RV32M-NEXT: neg a4, a1
+; RV32M-NEXT: and a1, a1, a4
+; RV32M-NEXT: mul a1, a1, a2
+; RV32M-NEXT: srli a1, a1, 27
+; RV32M-NEXT: add a1, a3, a1
+; RV32M-NEXT: lbu a4, 0(a1)
; RV32M-NEXT: .LBB3_2:
; RV32M-NEXT: bnez a0, .LBB3_4
-; RV32M-NEXT: .LBB3_3:
-; RV32M-NEXT: addi a2, a3, 32
+; RV32M-NEXT: # %bb.3:
+; RV32M-NEXT: addi a0, a4, 32
+; RV32M-NEXT: li a1, 0
+; RV32M-NEXT: ret
; RV32M-NEXT: .LBB3_4:
-; RV32M-NEXT: mv a0, a2
+; RV32M-NEXT: neg a1, a0
+; RV32M-NEXT: and a0, a0, a1
+; RV32M-NEXT: mul a0, a0, a2
+; RV32M-NEXT: srli a0, a0, 27
+; RV32M-NEXT: add a0, a3, a0
+; RV32M-NEXT: lbu a0, 0(a0)
; RV32M-NEXT: li a1, 0
; RV32M-NEXT: ret
-; RV32M-NEXT: .LBB3_5:
-; RV32M-NEXT: neg a2, a0
-; RV32M-NEXT: and a2, a0, a2
-; RV32M-NEXT: mul a2, a2, a4
-; RV32M-NEXT: srli a2, a2, 27
-; RV32M-NEXT: add a2, a5, a2
-; RV32M-NEXT: lbu a2, 0(a2)
-; RV32M-NEXT: beqz a1, .LBB3_2
-; RV32M-NEXT: .LBB3_6:
-; RV32M-NEXT: neg a3, a1
-; RV32M-NEXT: and a1, a1, a3
-; RV32M-NEXT: mul a1, a1, a4
-; RV32M-NEXT: srli a1, a1, 27
-; RV32M-NEXT: add a1, a5, a1
-; RV32M-NEXT: lbu a3, 0(a1)
-; RV32M-NEXT: beqz a0, .LBB3_3
-; RV32M-NEXT: j .LBB3_4
;
; RV64M-LABEL: test_cttz_i64:
; RV64M: # %bb.0:
-; RV64M-NEXT: beqz a0, .LBB3_4
+; RV64M-NEXT: beqz a0, .LBB3_2
; RV64M-NEXT: # %bb.1: # %cond.false
-; RV64M-NEXT: mv a1, a0
-; RV64M-NEXT: li a0, 64
-; RV64M-NEXT: beqz a1, .LBB3_3
-; RV64M-NEXT: # %bb.2: # %cond.false
-; RV64M-NEXT: lui a0, %hi(.LCPI3_0)
-; RV64M-NEXT: ld a0, %lo(.LCPI3_0)(a0)
-; RV64M-NEXT: neg a2, a1
-; RV64M-NEXT: and a1, a1, a2
-; RV64M-NEXT: mul a0, a1, a0
+; RV64M-NEXT: lui a1, %hi(.LCPI3_0)
+; RV64M-NEXT: ld a1, %lo(.LCPI3_0)(a1)
+; RV64M-NEXT: neg a2, a0
+; RV64M-NEXT: and a0, a0, a2
+; RV64M-NEXT: mul a0, a0, a1
; RV64M-NEXT: srli a0, a0, 58
; RV64M-NEXT: lui a1, %hi(.LCPI3_1)
; RV64M-NEXT: addi a1, a1, %lo(.LCPI3_1)
; RV64M-NEXT: add a0, a1, a0
; RV64M-NEXT: lbu a0, 0(a0)
-; RV64M-NEXT: .LBB3_3: # %cond.end
; RV64M-NEXT: ret
-; RV64M-NEXT: .LBB3_4:
+; RV64M-NEXT: .LBB3_2:
; RV64M-NEXT: li a0, 64
; RV64M-NEXT: ret
;
@@ -575,25 +527,17 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: neg a0, a0
-; RV32I-NEXT: and a0, s0, a0
+; RV32I-NEXT: neg a1, a0
+; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: lui a1, 30667
; RV32I-NEXT: addi a1, a1, 1329
; RV32I-NEXT: call __mulsi3 at plt
-; RV32I-NEXT: mv a1, a0
-; RV32I-NEXT: li a0, 32
-; RV32I-NEXT: beqz s0, .LBB6_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: srli a0, a1, 27
+; RV32I-NEXT: srli a0, a0, 27
; RV32I-NEXT: lui a1, %hi(.LCPI6_0)
; RV32I-NEXT: addi a1, a1, %lo(.LCPI6_0)
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: lbu a0, 0(a0)
-; RV32I-NEXT: .LBB6_2:
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
@@ -601,33 +545,22 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sext.w s0, a0
; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: lui a1, 30667
; RV64I-NEXT: addiw a1, a1, 1329
; RV64I-NEXT: call __muldi3 at plt
-; RV64I-NEXT: mv a1, a0
-; RV64I-NEXT: li a0, 32
-; RV64I-NEXT: beqz s0, .LBB6_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: srliw a0, a1, 27
+; RV64I-NEXT: srliw a0, a0, 27
; RV64I-NEXT: lui a1, %hi(.LCPI6_0)
; RV64I-NEXT: addi a1, a1, %lo(.LCPI6_0)
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: .LBB6_2:
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV32M-LABEL: test_cttz_i32_zero_undef:
; RV32M: # %bb.0:
-; RV32M-NEXT: li a1, 32
-; RV32M-NEXT: beqz a0, .LBB6_2
-; RV32M-NEXT: # %bb.1:
; RV32M-NEXT: neg a1, a0
; RV32M-NEXT: and a0, a0, a1
; RV32M-NEXT: lui a1, 30667
@@ -637,17 +570,11 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind {
; RV32M-NEXT: lui a1, %hi(.LCPI6_0)
; RV32M-NEXT: addi a1, a1, %lo(.LCPI6_0)
; RV32M-NEXT: add a0, a1, a0
-; RV32M-NEXT: lbu a1, 0(a0)
-; RV32M-NEXT: .LBB6_2:
-; RV32M-NEXT: mv a0, a1
+; RV32M-NEXT: lbu a0, 0(a0)
; RV32M-NEXT: ret
;
; RV64M-LABEL: test_cttz_i32_zero_undef:
; RV64M: # %bb.0:
-; RV64M-NEXT: sext.w a2, a0
-; RV64M-NEXT: li a1, 32
-; RV64M-NEXT: beqz a2, .LBB6_2
-; RV64M-NEXT: # %bb.1:
; RV64M-NEXT: neg a1, a0
; RV64M-NEXT: and a0, a0, a1
; RV64M-NEXT: lui a1, 30667
@@ -657,9 +584,7 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind {
; RV64M-NEXT: lui a1, %hi(.LCPI6_0)
; RV64M-NEXT: addi a1, a1, %lo(.LCPI6_0)
; RV64M-NEXT: add a0, a1, a0
-; RV64M-NEXT: lbu a1, 0(a0)
-; RV64M-NEXT: .LBB6_2:
-; RV64M-NEXT: mv a0, a1
+; RV64M-NEXT: lbu a0, 0(a0)
; RV64M-NEXT: ret
;
; RV32ZBB-LABEL: test_cttz_i32_zero_undef:
@@ -685,40 +610,33 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind {
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s1, a1
-; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: neg a0, a0
-; RV32I-NEXT: and a0, s0, a0
+; RV32I-NEXT: and a0, s2, a0
; RV32I-NEXT: lui a1, 30667
; RV32I-NEXT: addi s3, a1, 1329
; RV32I-NEXT: mv a1, s3
; RV32I-NEXT: call __mulsi3 at plt
-; RV32I-NEXT: lui a1, %hi(.LCPI7_0)
-; RV32I-NEXT: addi s5, a1, %lo(.LCPI7_0)
-; RV32I-NEXT: li s4, 32
-; RV32I-NEXT: li s2, 32
-; RV32I-NEXT: beqz s0, .LBB7_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: srli a0, a0, 27
-; RV32I-NEXT: add a0, s5, a0
-; RV32I-NEXT: lbu s2, 0(a0)
-; RV32I-NEXT: .LBB7_2:
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: lui a0, %hi(.LCPI7_0)
+; RV32I-NEXT: addi s4, a0, %lo(.LCPI7_0)
; RV32I-NEXT: neg a0, s1
; RV32I-NEXT: and a0, s1, a0
; RV32I-NEXT: mv a1, s3
; RV32I-NEXT: call __mulsi3 at plt
-; RV32I-NEXT: beqz s1, .LBB7_4
-; RV32I-NEXT: # %bb.3:
+; RV32I-NEXT: bnez s2, .LBB7_2
+; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: srli a0, a0, 27
-; RV32I-NEXT: add a0, s5, a0
-; RV32I-NEXT: lbu s4, 0(a0)
-; RV32I-NEXT: .LBB7_4:
-; RV32I-NEXT: bnez s0, .LBB7_6
-; RV32I-NEXT: # %bb.5:
-; RV32I-NEXT: addi s2, s4, 32
-; RV32I-NEXT: .LBB7_6:
-; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: add a0, s4, a0
+; RV32I-NEXT: lbu a0, 0(a0)
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: j .LBB7_3
+; RV32I-NEXT: .LBB7_2:
+; RV32I-NEXT: srli a0, s0, 27
+; RV32I-NEXT: add a0, s4, a0
+; RV32I-NEXT: lbu a0, 0(a0)
+; RV32I-NEXT: .LBB7_3:
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
@@ -726,7 +644,6 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind {
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
@@ -734,70 +651,49 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: neg a0, a0
-; RV64I-NEXT: and a0, s0, a0
+; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: lui a1, %hi(.LCPI7_0)
; RV64I-NEXT: ld a1, %lo(.LCPI7_0)(a1)
; RV64I-NEXT: call __muldi3 at plt
-; RV64I-NEXT: mv a1, a0
-; RV64I-NEXT: li a0, 64
-; RV64I-NEXT: beqz s0, .LBB7_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: srli a0, a1, 58
+; RV64I-NEXT: srli a0, a0, 58
; RV64I-NEXT: lui a1, %hi(.LCPI7_1)
; RV64I-NEXT: addi a1, a1, %lo(.LCPI7_1)
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: .LBB7_2:
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV32M-LABEL: test_cttz_i64_zero_undef:
; RV32M: # %bb.0:
; RV32M-NEXT: lui a2, 30667
-; RV32M-NEXT: addi a4, a2, 1329
+; RV32M-NEXT: addi a3, a2, 1329
; RV32M-NEXT: lui a2, %hi(.LCPI7_0)
-; RV32M-NEXT: addi a5, a2, %lo(.LCPI7_0)
-; RV32M-NEXT: li a3, 32
-; RV32M-NEXT: li a2, 32
-; RV32M-NEXT: bnez a0, .LBB7_5
+; RV32M-NEXT: addi a2, a2, %lo(.LCPI7_0)
+; RV32M-NEXT: bnez a0, .LBB7_2
; RV32M-NEXT: # %bb.1:
-; RV32M-NEXT: bnez a1, .LBB7_6
+; RV32M-NEXT: neg a0, a1
+; RV32M-NEXT: and a0, a1, a0
+; RV32M-NEXT: mul a0, a0, a3
+; RV32M-NEXT: srli a0, a0, 27
+; RV32M-NEXT: add a0, a2, a0
+; RV32M-NEXT: lbu a0, 0(a0)
+; RV32M-NEXT: addi a0, a0, 32
+; RV32M-NEXT: li a1, 0
+; RV32M-NEXT: ret
; RV32M-NEXT: .LBB7_2:
-; RV32M-NEXT: bnez a0, .LBB7_4
-; RV32M-NEXT: .LBB7_3:
-; RV32M-NEXT: addi a2, a3, 32
-; RV32M-NEXT: .LBB7_4:
-; RV32M-NEXT: mv a0, a2
+; RV32M-NEXT: neg a1, a0
+; RV32M-NEXT: and a0, a0, a1
+; RV32M-NEXT: mul a0, a0, a3
+; RV32M-NEXT: srli a0, a0, 27
+; RV32M-NEXT: add a0, a2, a0
+; RV32M-NEXT: lbu a0, 0(a0)
; RV32M-NEXT: li a1, 0
; RV32M-NEXT: ret
-; RV32M-NEXT: .LBB7_5:
-; RV32M-NEXT: neg a2, a0
-; RV32M-NEXT: and a2, a0, a2
-; RV32M-NEXT: mul a2, a2, a4
-; RV32M-NEXT: srli a2, a2, 27
-; RV32M-NEXT: add a2, a5, a2
-; RV32M-NEXT: lbu a2, 0(a2)
-; RV32M-NEXT: beqz a1, .LBB7_2
-; RV32M-NEXT: .LBB7_6:
-; RV32M-NEXT: neg a3, a1
-; RV32M-NEXT: and a1, a1, a3
-; RV32M-NEXT: mul a1, a1, a4
-; RV32M-NEXT: srli a1, a1, 27
-; RV32M-NEXT: add a1, a5, a1
-; RV32M-NEXT: lbu a3, 0(a1)
-; RV32M-NEXT: beqz a0, .LBB7_3
-; RV32M-NEXT: j .LBB7_4
;
; RV64M-LABEL: test_cttz_i64_zero_undef:
; RV64M: # %bb.0:
-; RV64M-NEXT: li a1, 64
-; RV64M-NEXT: beqz a0, .LBB7_2
-; RV64M-NEXT: # %bb.1:
; RV64M-NEXT: lui a1, %hi(.LCPI7_0)
; RV64M-NEXT: ld a1, %lo(.LCPI7_0)(a1)
; RV64M-NEXT: neg a2, a0
@@ -807,9 +703,7 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind {
; RV64M-NEXT: lui a1, %hi(.LCPI7_1)
; RV64M-NEXT: addi a1, a1, %lo(.LCPI7_1)
; RV64M-NEXT: add a0, a1, a0
-; RV64M-NEXT: lbu a1, 0(a0)
-; RV64M-NEXT: .LBB7_2:
-; RV64M-NEXT: mv a0, a1
+; RV64M-NEXT: lbu a0, 0(a0)
; RV64M-NEXT: ret
;
; RV32ZBB-LABEL: test_cttz_i64_zero_undef:
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index ddae369e1a674..0d51cf761d742 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -171,32 +171,24 @@ declare i32 @llvm.cttz.i32(i32, i1)
define i32 @cttz_i32(i32 %a) nounwind {
; RV32I-LABEL: cttz_i32:
; RV32I: # %bb.0:
-; RV32I-NEXT: beqz a0, .LBB2_4
+; RV32I-NEXT: beqz a0, .LBB2_2
; RV32I-NEXT: # %bb.1: # %cond.false
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: neg a0, a0
-; RV32I-NEXT: and a0, s0, a0
+; RV32I-NEXT: neg a1, a0
+; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: lui a1, 30667
; RV32I-NEXT: addi a1, a1, 1329
; RV32I-NEXT: call __mulsi3 at plt
-; RV32I-NEXT: mv a1, a0
-; RV32I-NEXT: li a0, 32
-; RV32I-NEXT: beqz s0, .LBB2_3
-; RV32I-NEXT: # %bb.2: # %cond.false
-; RV32I-NEXT: srli a0, a1, 27
+; RV32I-NEXT: srli a0, a0, 27
; RV32I-NEXT: lui a1, %hi(.LCPI2_0)
; RV32I-NEXT: addi a1, a1, %lo(.LCPI2_0)
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: lbu a0, 0(a0)
-; RV32I-NEXT: .LBB2_3: # %cond.false
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB2_4:
+; RV32I-NEXT: .LBB2_2:
; RV32I-NEXT: li a0, 32
; RV32I-NEXT: ret
;
@@ -220,8 +212,7 @@ define i64 @cttz_i64(i64 %a) nounwind {
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: and a0, s0, a0
@@ -229,31 +220,29 @@ define i64 @cttz_i64(i64 %a) nounwind {
; RV32I-NEXT: addi s3, a1, 1329
; RV32I-NEXT: mv a1, s3
; RV32I-NEXT: call __mulsi3 at plt
-; RV32I-NEXT: lui a1, %hi(.LCPI3_0)
-; RV32I-NEXT: addi s5, a1, %lo(.LCPI3_0)
-; RV32I-NEXT: li s4, 32
-; RV32I-NEXT: li s2, 32
-; RV32I-NEXT: beqz s0, .LBB3_2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lui a0, %hi(.LCPI3_0)
+; RV32I-NEXT: addi s4, a0, %lo(.LCPI3_0)
+; RV32I-NEXT: neg a0, s2
+; RV32I-NEXT: and a0, s2, a0
+; RV32I-NEXT: mv a1, s3
+; RV32I-NEXT: call __mulsi3 at plt
+; RV32I-NEXT: li a1, 32
+; RV32I-NEXT: beqz s2, .LBB3_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: srli a0, a0, 27
-; RV32I-NEXT: add a0, s5, a0
-; RV32I-NEXT: lbu s2, 0(a0)
+; RV32I-NEXT: add a0, s4, a0
+; RV32I-NEXT: lbu a1, 0(a0)
; RV32I-NEXT: .LBB3_2:
-; RV32I-NEXT: neg a0, s1
-; RV32I-NEXT: and a0, s1, a0
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3 at plt
-; RV32I-NEXT: beqz s1, .LBB3_4
+; RV32I-NEXT: bnez s0, .LBB3_4
; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: srli a0, a0, 27
-; RV32I-NEXT: add a0, s5, a0
-; RV32I-NEXT: lbu s4, 0(a0)
+; RV32I-NEXT: addi a0, a1, 32
+; RV32I-NEXT: j .LBB3_5
; RV32I-NEXT: .LBB3_4:
-; RV32I-NEXT: bnez s0, .LBB3_6
-; RV32I-NEXT: # %bb.5:
-; RV32I-NEXT: addi s2, s4, 32
-; RV32I-NEXT: .LBB3_6:
-; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: srli a0, s1, 27
+; RV32I-NEXT: add a0, s4, a0
+; RV32I-NEXT: lbu a0, 0(a0)
+; RV32I-NEXT: .LBB3_5:
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
@@ -261,7 +250,6 @@ define i64 @cttz_i64(i64 %a) nounwind {
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 936e4be021088..d5dc0ec5d6ac7 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -362,32 +362,24 @@ declare i32 @llvm.cttz.i32(i32, i1)
define signext i32 @cttz_i32(i32 signext %a) nounwind {
; RV64I-LABEL: cttz_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: beqz a0, .LBB6_4
+; RV64I-NEXT: beqz a0, .LBB6_2
; RV64I-NEXT: # %bb.1: # %cond.false
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: neg a0, a0
-; RV64I-NEXT: and a0, s0, a0
+; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: lui a1, 30667
; RV64I-NEXT: addiw a1, a1, 1329
; RV64I-NEXT: call __muldi3 at plt
-; RV64I-NEXT: mv a1, a0
-; RV64I-NEXT: li a0, 32
-; RV64I-NEXT: beqz s0, .LBB6_3
-; RV64I-NEXT: # %bb.2: # %cond.false
-; RV64I-NEXT: srliw a0, a1, 27
+; RV64I-NEXT: srliw a0, a0, 27
; RV64I-NEXT: lui a1, %hi(.LCPI6_0)
; RV64I-NEXT: addi a1, a1, %lo(.LCPI6_0)
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: .LBB6_3: # %cond.false
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
-; RV64I-NEXT: .LBB6_4:
+; RV64I-NEXT: .LBB6_2:
; RV64I-NEXT: li a0, 32
; RV64I-NEXT: ret
;
@@ -404,25 +396,17 @@ define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: neg a0, a0
-; RV64I-NEXT: and a0, s0, a0
+; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: lui a1, 30667
; RV64I-NEXT: addiw a1, a1, 1329
; RV64I-NEXT: call __muldi3 at plt
-; RV64I-NEXT: mv a1, a0
-; RV64I-NEXT: li a0, 32
-; RV64I-NEXT: beqz s0, .LBB7_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: srliw a0, a1, 27
+; RV64I-NEXT: srliw a0, a0, 27
; RV64I-NEXT: lui a1, %hi(.LCPI7_0)
; RV64I-NEXT: addi a1, a1, %lo(.LCPI7_0)
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: .LBB7_2:
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
@@ -446,18 +430,14 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
; RV64I-NEXT: lui a1, 30667
; RV64I-NEXT: addiw a1, a1, 1329
; RV64I-NEXT: call __muldi3 at plt
-; RV64I-NEXT: li a1, 32
-; RV64I-NEXT: beqz s0, .LBB8_2
-; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: srliw a0, a0, 27
; RV64I-NEXT: lui a1, %hi(.LCPI8_0)
; RV64I-NEXT: addi a1, a1, %lo(.LCPI8_0)
; RV64I-NEXT: add a0, a1, a0
-; RV64I-NEXT: lbu a1, 0(a0)
-; RV64I-NEXT: .LBB8_2:
-; RV64I-NEXT: snez a0, s0
-; RV64I-NEXT: addi a0, a0, -1
-; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: lbu a0, 0(a0)
+; RV64I-NEXT: snez a1, s0
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
@@ -488,19 +468,15 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind {
; RV64I-NEXT: lui a1, 30667
; RV64I-NEXT: addiw a1, a1, 1329
; RV64I-NEXT: call __muldi3 at plt
-; RV64I-NEXT: li a1, 32
-; RV64I-NEXT: beqz s0, .LBB9_2
-; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: srliw a0, a0, 27
; RV64I-NEXT: lui a1, %hi(.LCPI9_0)
; RV64I-NEXT: addi a1, a1, %lo(.LCPI9_0)
; RV64I-NEXT: add a0, a1, a0
-; RV64I-NEXT: lbu a1, 0(a0)
-; RV64I-NEXT: .LBB9_2:
-; RV64I-NEXT: seqz a0, s0
-; RV64I-NEXT: addi a1, a1, 1
-; RV64I-NEXT: addi a0, a0, -1
-; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lbu a0, 0(a0)
+; RV64I-NEXT: addi a0, a0, 1
+; RV64I-NEXT: seqz a1, s0
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: and a0, a1, a0
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
@@ -526,32 +502,24 @@ declare i64 @llvm.cttz.i64(i64, i1)
define i64 @cttz_i64(i64 %a) nounwind {
; RV64I-LABEL: cttz_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: beqz a0, .LBB10_4
+; RV64I-NEXT: beqz a0, .LBB10_2
; RV64I-NEXT: # %bb.1: # %cond.false
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: neg a0, a0
-; RV64I-NEXT: and a0, s0, a0
+; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: lui a1, %hi(.LCPI10_0)
; RV64I-NEXT: ld a1, %lo(.LCPI10_0)(a1)
; RV64I-NEXT: call __muldi3 at plt
-; RV64I-NEXT: mv a1, a0
-; RV64I-NEXT: li a0, 64
-; RV64I-NEXT: beqz s0, .LBB10_3
-; RV64I-NEXT: # %bb.2: # %cond.false
-; RV64I-NEXT: srli a0, a1, 58
+; RV64I-NEXT: srli a0, a0, 58
; RV64I-NEXT: lui a1, %hi(.LCPI10_1)
; RV64I-NEXT: addi a1, a1, %lo(.LCPI10_1)
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: .LBB10_3: # %cond.false
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
-; RV64I-NEXT: .LBB10_4:
+; RV64I-NEXT: .LBB10_2:
; RV64I-NEXT: li a0, 64
; RV64I-NEXT: ret
;
diff --git a/llvm/test/CodeGen/SPARC/cttz.ll b/llvm/test/CodeGen/SPARC/cttz.ll
index fad74b7f5c71a..f4a033a49813d 100644
--- a/llvm/test/CodeGen/SPARC/cttz.ll
+++ b/llvm/test/CodeGen/SPARC/cttz.ll
@@ -16,7 +16,6 @@ define i32 @f(i32 %x) {
; CHECK-NEXT: add %o2, %lo(.LCPI0_0), %o2
; CHECK-NEXT: ldub [%o2+%o1], %o1
; CHECK-NEXT: cmp %o0, 0
-; CHECK-NEXT: move %icc, 32, %o1
; CHECK-NEXT: move %icc, 0, %o1
; CHECK-NEXT: retl
; CHECK-NEXT: mov %o1, %o0
@@ -39,22 +38,18 @@ define i64 @g(i64 %x) {
; CHECK-NEXT: sethi 122669, %o4
; CHECK-NEXT: or %o4, 305, %o4
; CHECK-NEXT: smul %o3, %o4, %o3
-; CHECK-NEXT: srl %o3, 27, %o3
; CHECK-NEXT: sethi %hi(.LCPI1_0), %o5
; CHECK-NEXT: add %o5, %lo(.LCPI1_0), %o5
-; CHECK-NEXT: ldub [%o5+%o3], %g2
-; CHECK-NEXT: sub %o2, %o0, %o3
-; CHECK-NEXT: and %o0, %o3, %o3
-; CHECK-NEXT: smul %o3, %o4, %o3
+; CHECK-NEXT: sub %o2, %o0, %g2
+; CHECK-NEXT: and %o0, %g2, %g2
+; CHECK-NEXT: smul %g2, %o4, %o4
+; CHECK-NEXT: srl %o4, 27, %o4
+; CHECK-NEXT: ldub [%o5+%o4], %o4
; CHECK-NEXT: srl %o3, 27, %o3
-; CHECK-NEXT: ldub [%o5+%o3], %o3
-; CHECK-NEXT: cmp %o1, 0
-; CHECK-NEXT: move %icc, 32, %g2
-; CHECK-NEXT: cmp %o0, 0
-; CHECK-NEXT: move %icc, 32, %o3
-; CHECK-NEXT: add %o3, 32, %o3
+; CHECK-NEXT: ldub [%o5+%o3], %o5
+; CHECK-NEXT: add %o4, 32, %o3
; CHECK-NEXT: cmp %o1, 0
-; CHECK-NEXT: movne %icc, %g2, %o3
+; CHECK-NEXT: movne %icc, %o5, %o3
; CHECK-NEXT: or %o1, %o0, %o0
; CHECK-NEXT: cmp %o0, 0
; CHECK-NEXT: move %icc, 0, %o3
More information about the llvm-commits
mailing list