[llvm] c8cf8bc - [RISCV] Add some 32-bit ctlz and cttz idiom tests to rv64zbb.ll. NFC
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 24 21:55:49 PDT 2021
Author: Craig Topper
Date: 2021-03-24T21:52:48-07:00
New Revision: c8cf8bc7ec3f8721953d4545812aaac5d21ee97a
URL: https://github.com/llvm/llvm-project/commit/c8cf8bc7ec3f8721953d4545812aaac5d21ee97a
DIFF: https://github.com/llvm/llvm-project/commit/c8cf8bc7ec3f8721953d4545812aaac5d21ee97a.diff
LOG: [RISCV] Add some 32-bit ctlz and cttz idiom tests to rv64zbb.ll. NFC
This implements various idioms using ctlz/cttz like Log2, Log2_Ceil,
findFirstSetBit, etc.
Some of these demonstrate that we fail to use clzw because the
idiom breaks the isel patterns we use. The isel pattern we use
is (add (cttz (and X, 0xffffffff)), -32). Some of the idioms
cause the constant on the add to be different.
Added:
Modified:
llvm/test/CodeGen/RISCV/rv64zbb.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index b0f0fb8a5f35c..a173235d280c8 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -94,6 +94,396 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind {
ret i32 %1
}
+define signext i32 @log2_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: log2_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: beqz a0, .LBB1_2
+; RV64I-NEXT: # %bb.1: # %cond.false
+; RV64I-NEXT: srliw a1, a0, 1
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 2
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 8
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 16
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 32
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: lui a2, 21845
+; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: lui a1, 13107
+; RV64I-NEXT: addiw a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: and a2, a0, a1
+; RV64I-NEXT: srli a0, a0, 2
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lui a1, 3855
+; RV64I-NEXT: addiw a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lui a1, 4112
+; RV64I-NEXT: addiw a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: call __muldi3 at plt
+; RV64I-NEXT: srli a0, a0, 56
+; RV64I-NEXT: addi a0, a0, -32
+; RV64I-NEXT: j .LBB1_3
+; RV64I-NEXT: .LBB1_2:
+; RV64I-NEXT: addi a0, zero, 32
+; RV64I-NEXT: .LBB1_3: # %cond.end
+; RV64I-NEXT: addi a1, zero, 31
+; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: log2_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: zext.w a0, a0
+; RV64IB-NEXT: clz a0, a0
+; RV64IB-NEXT: addi a1, zero, 63
+; RV64IB-NEXT: sub a0, a1, a0
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: log2_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: slli a0, a0, 32
+; RV64IBB-NEXT: srli a0, a0, 32
+; RV64IBB-NEXT: clz a0, a0
+; RV64IBB-NEXT: addi a1, zero, 63
+; RV64IBB-NEXT: sub a0, a1, a0
+; RV64IBB-NEXT: ret
+ %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+ %2 = sub i32 31, %1
+ ret i32 %2
+}
+
+define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: log2_ceil_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addiw a2, a0, -1
+; RV64I-NEXT: addi s0, zero, 32
+; RV64I-NEXT: addi a1, zero, 32
+; RV64I-NEXT: beqz a2, .LBB2_2
+; RV64I-NEXT: # %bb.1: # %cond.false
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: srliw a1, a0, 1
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 2
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 8
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 16
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 32
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: lui a2, 21845
+; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: lui a1, 13107
+; RV64I-NEXT: addiw a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: and a2, a0, a1
+; RV64I-NEXT: srli a0, a0, 2
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lui a1, 3855
+; RV64I-NEXT: addiw a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lui a1, 4112
+; RV64I-NEXT: addiw a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: call __muldi3 at plt
+; RV64I-NEXT: srli a0, a0, 56
+; RV64I-NEXT: addi a1, a0, -32
+; RV64I-NEXT: .LBB2_2: # %cond.end
+; RV64I-NEXT: sub a0, s0, a1
+; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: log2_ceil_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: addi a0, a0, -1
+; RV64IB-NEXT: zext.w a0, a0
+; RV64IB-NEXT: clz a0, a0
+; RV64IB-NEXT: addi a1, zero, 64
+; RV64IB-NEXT: sub a0, a1, a0
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: log2_ceil_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: addi a0, a0, -1
+; RV64IBB-NEXT: slli a0, a0, 32
+; RV64IBB-NEXT: srli a0, a0, 32
+; RV64IBB-NEXT: clz a0, a0
+; RV64IBB-NEXT: addi a1, zero, 64
+; RV64IBB-NEXT: sub a0, a1, a0
+; RV64IBB-NEXT: ret
+ %1 = sub i32 %a, 1
+ %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
+ %3 = sub i32 32, %2
+ ret i32 %3
+}
+
+define signext i32 @findLastSet_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: findLastSet_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: srliw a1, s0, 1
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 2
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 8
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 16
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 32
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: lui a2, 21845
+; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: lui a1, 13107
+; RV64I-NEXT: addiw a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: and a2, a0, a1
+; RV64I-NEXT: srli a0, a0, 2
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lui a1, 3855
+; RV64I-NEXT: addiw a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lui a1, 4112
+; RV64I-NEXT: addiw a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: call __muldi3 at plt
+; RV64I-NEXT: mv a1, a0
+; RV64I-NEXT: addi a0, zero, -1
+; RV64I-NEXT: beqz s0, .LBB3_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: srli a0, a1, 56
+; RV64I-NEXT: addi a0, a0, -32
+; RV64I-NEXT: xori a0, a0, 31
+; RV64I-NEXT: .LBB3_2:
+; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: findLastSet_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: clzw a1, a0
+; RV64IB-NEXT: xori a1, a1, 31
+; RV64IB-NEXT: addi a2, zero, -1
+; RV64IB-NEXT: cmov a0, a0, a1, a2
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: findLastSet_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: mv a1, a0
+; RV64IBB-NEXT: addi a0, zero, -1
+; RV64IBB-NEXT: beqz a1, .LBB3_2
+; RV64IBB-NEXT: # %bb.1:
+; RV64IBB-NEXT: clzw a0, a1
+; RV64IBB-NEXT: xori a0, a0, 31
+; RV64IBB-NEXT: .LBB3_2:
+; RV64IBB-NEXT: ret
+ %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
+ %2 = xor i32 31, %1
+ %3 = icmp eq i32 %a, 0
+ %4 = select i1 %3, i32 -1, i32 %2
+ ret i32 %4
+}
+
+define i32 @ctlz_lshr_i32(i32 signext %a) {
+; RV64I-LABEL: ctlz_lshr_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: .cfi_def_cfa_offset 16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset ra, -8
+; RV64I-NEXT: srliw a0, a0, 1
+; RV64I-NEXT: beqz a0, .LBB4_2
+; RV64I-NEXT: # %bb.1: # %cond.false
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 2
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 8
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 16
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 32
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: lui a2, 21845
+; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: lui a1, 13107
+; RV64I-NEXT: addiw a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: and a2, a0, a1
+; RV64I-NEXT: srli a0, a0, 2
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lui a1, 3855
+; RV64I-NEXT: addiw a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lui a1, 4112
+; RV64I-NEXT: addiw a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: call __muldi3 at plt
+; RV64I-NEXT: srli a0, a0, 56
+; RV64I-NEXT: addi a0, a0, -32
+; RV64I-NEXT: j .LBB4_3
+; RV64I-NEXT: .LBB4_2:
+; RV64I-NEXT: addi a0, zero, 32
+; RV64I-NEXT: .LBB4_3: # %cond.end
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: ctlz_lshr_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: srliw a0, a0, 1
+; RV64IB-NEXT: clz a0, a0
+; RV64IB-NEXT: addi a0, a0, -32
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: ctlz_lshr_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: srliw a0, a0, 1
+; RV64IBB-NEXT: clz a0, a0
+; RV64IBB-NEXT: addi a0, a0, -32
+; RV64IBB-NEXT: ret
+ %1 = lshr i32 %a, 1
+ %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
+ ret i32 %2
+}
+
declare i64 @llvm.ctlz.i64(i64, i1)
define i64 @ctlz_i64(i64 %a) nounwind {
@@ -101,7 +491,7 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: beqz a0, .LBB1_2
+; RV64I-NEXT: beqz a0, .LBB5_2
; RV64I-NEXT: # %bb.1: # %cond.false
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
@@ -158,10 +548,10 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV64I-NEXT: addi a1, a1, 257
; RV64I-NEXT: call __muldi3 at plt
; RV64I-NEXT: srli a0, a0, 56
-; RV64I-NEXT: j .LBB1_3
-; RV64I-NEXT: .LBB1_2:
+; RV64I-NEXT: j .LBB5_3
+; RV64I-NEXT: .LBB5_2:
; RV64I-NEXT: addi a0, zero, 64
-; RV64I-NEXT: .LBB1_3: # %cond.end
+; RV64I-NEXT: .LBB5_3: # %cond.end
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -186,7 +576,7 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: beqz a0, .LBB2_2
+; RV64I-NEXT: beqz a0, .LBB6_2
; RV64I-NEXT: # %bb.1: # %cond.false
; RV64I-NEXT: addi a1, a0, -1
; RV64I-NEXT: not a0, a0
@@ -233,10 +623,10 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind {
; RV64I-NEXT: addi a1, a1, 257
; RV64I-NEXT: call __muldi3 at plt
; RV64I-NEXT: srli a0, a0, 56
-; RV64I-NEXT: j .LBB2_3
-; RV64I-NEXT: .LBB2_2:
+; RV64I-NEXT: j .LBB6_3
+; RV64I-NEXT: .LBB6_2:
; RV64I-NEXT: addi a0, zero, 32
-; RV64I-NEXT: .LBB2_3: # %cond.end
+; RV64I-NEXT: .LBB6_3: # %cond.end
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -254,6 +644,244 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind {
ret i32 %1
}
+define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: cttz_zero_undef_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi a1, a0, -1
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: lui a2, 21845
+; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: lui a1, 13107
+; RV64I-NEXT: addiw a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: and a2, a0, a1
+; RV64I-NEXT: srli a0, a0, 2
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lui a1, 3855
+; RV64I-NEXT: addiw a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lui a1, 4112
+; RV64I-NEXT: addiw a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: call __muldi3 at plt
+; RV64I-NEXT: srli a0, a0, 56
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: cttz_zero_undef_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: ctz a0, a0
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: cttz_zero_undef_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: ctz a0, a0
+; RV64IBB-NEXT: ret
+ %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+ ret i32 %1
+}
+
+define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: findFirstSet_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: not a1, s0
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: lui a2, 21845
+; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: lui a1, 13107
+; RV64I-NEXT: addiw a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: and a2, a0, a1
+; RV64I-NEXT: srli a0, a0, 2
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lui a1, 3855
+; RV64I-NEXT: addiw a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lui a1, 4112
+; RV64I-NEXT: addiw a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: call __muldi3 at plt
+; RV64I-NEXT: mv a1, a0
+; RV64I-NEXT: addi a0, zero, -1
+; RV64I-NEXT: beqz s0, .LBB8_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: srli a0, a1, 56
+; RV64I-NEXT: .LBB8_2:
+; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: findFirstSet_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: ctz a1, a0
+; RV64IB-NEXT: addi a2, zero, -1
+; RV64IB-NEXT: cmov a0, a0, a1, a2
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: findFirstSet_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: mv a1, a0
+; RV64IBB-NEXT: addi a0, zero, -1
+; RV64IBB-NEXT: beqz a1, .LBB8_2
+; RV64IBB-NEXT: # %bb.1:
+; RV64IBB-NEXT: ctz a0, a1
+; RV64IBB-NEXT: .LBB8_2:
+; RV64IBB-NEXT: ret
+ %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+ %2 = icmp eq i32 %a, 0
+ %3 = select i1 %2, i32 -1, i32 %1
+ ret i32 %3
+}
+
+define signext i32 @ffs_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: ffs_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: not a1, s0
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: lui a2, 21845
+; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: lui a1, 13107
+; RV64I-NEXT: addiw a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: and a2, a0, a1
+; RV64I-NEXT: srli a0, a0, 2
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lui a1, 3855
+; RV64I-NEXT: addiw a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lui a1, 4112
+; RV64I-NEXT: addiw a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: call __muldi3 at plt
+; RV64I-NEXT: mv a1, a0
+; RV64I-NEXT: mv a0, zero
+; RV64I-NEXT: beqz s0, .LBB9_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: srli a0, a1, 56
+; RV64I-NEXT: addi a0, a0, 1
+; RV64I-NEXT: .LBB9_2:
+; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: ffs_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: ctz a1, a0
+; RV64IB-NEXT: addi a1, a1, 1
+; RV64IB-NEXT: cmov a0, a0, a1, zero
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: ffs_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: mv a1, a0
+; RV64IBB-NEXT: mv a0, zero
+; RV64IBB-NEXT: beqz a1, .LBB9_2
+; RV64IBB-NEXT: # %bb.1:
+; RV64IBB-NEXT: ctz a0, a1
+; RV64IBB-NEXT: addi a0, a0, 1
+; RV64IBB-NEXT: .LBB9_2:
+; RV64IBB-NEXT: ret
+ %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+ %2 = add i32 %1, 1
+ %3 = icmp eq i32 %a, 0
+ %4 = select i1 %3, i32 0, i32 %2
+ ret i32 %4
+}
+
declare i64 @llvm.cttz.i64(i64, i1)
define i64 @cttz_i64(i64 %a) nounwind {
@@ -261,7 +889,7 @@ define i64 @cttz_i64(i64 %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: beqz a0, .LBB3_2
+; RV64I-NEXT: beqz a0, .LBB10_2
; RV64I-NEXT: # %bb.1: # %cond.false
; RV64I-NEXT: addi a1, a0, -1
; RV64I-NEXT: not a0, a0
@@ -308,10 +936,10 @@ define i64 @cttz_i64(i64 %a) nounwind {
; RV64I-NEXT: addi a1, a1, 257
; RV64I-NEXT: call __muldi3 at plt
; RV64I-NEXT: srli a0, a0, 56
-; RV64I-NEXT: j .LBB3_3
-; RV64I-NEXT: .LBB3_2:
+; RV64I-NEXT: j .LBB10_3
+; RV64I-NEXT: .LBB10_2:
; RV64I-NEXT: addi a0, zero, 64
-; RV64I-NEXT: .LBB3_3: # %cond.end
+; RV64I-NEXT: .LBB10_3: # %cond.end
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -544,10 +1172,10 @@ define i64 @sexth_i64(i64 %a) nounwind {
define signext i32 @min_i32(i32 signext %a, i32 signext %b) nounwind {
; RV64I-LABEL: min_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: blt a0, a1, .LBB10_2
+; RV64I-NEXT: blt a0, a1, .LBB17_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: .LBB10_2:
+; RV64I-NEXT: .LBB17_2:
; RV64I-NEXT: ret
;
; RV64IB-LABEL: min_i32:
@@ -567,10 +1195,10 @@ define signext i32 @min_i32(i32 signext %a, i32 signext %b) nounwind {
define i64 @min_i64(i64 %a, i64 %b) nounwind {
; RV64I-LABEL: min_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: blt a0, a1, .LBB11_2
+; RV64I-NEXT: blt a0, a1, .LBB18_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: .LBB11_2:
+; RV64I-NEXT: .LBB18_2:
; RV64I-NEXT: ret
;
; RV64IB-LABEL: min_i64:
@@ -590,10 +1218,10 @@ define i64 @min_i64(i64 %a, i64 %b) nounwind {
define signext i32 @max_i32(i32 signext %a, i32 signext %b) nounwind {
; RV64I-LABEL: max_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: blt a1, a0, .LBB12_2
+; RV64I-NEXT: blt a1, a0, .LBB19_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: .LBB12_2:
+; RV64I-NEXT: .LBB19_2:
; RV64I-NEXT: ret
;
; RV64IB-LABEL: max_i32:
@@ -613,10 +1241,10 @@ define signext i32 @max_i32(i32 signext %a, i32 signext %b) nounwind {
define i64 @max_i64(i64 %a, i64 %b) nounwind {
; RV64I-LABEL: max_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: blt a1, a0, .LBB13_2
+; RV64I-NEXT: blt a1, a0, .LBB20_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: .LBB13_2:
+; RV64I-NEXT: .LBB20_2:
; RV64I-NEXT: ret
;
; RV64IB-LABEL: max_i64:
@@ -636,10 +1264,10 @@ define i64 @max_i64(i64 %a, i64 %b) nounwind {
define signext i32 @minu_i32(i32 signext %a, i32 signext %b) nounwind {
; RV64I-LABEL: minu_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: bltu a0, a1, .LBB14_2
+; RV64I-NEXT: bltu a0, a1, .LBB21_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: .LBB14_2:
+; RV64I-NEXT: .LBB21_2:
; RV64I-NEXT: ret
;
; RV64IB-LABEL: minu_i32:
@@ -659,10 +1287,10 @@ define signext i32 @minu_i32(i32 signext %a, i32 signext %b) nounwind {
define i64 @minu_i64(i64 %a, i64 %b) nounwind {
; RV64I-LABEL: minu_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: bltu a0, a1, .LBB15_2
+; RV64I-NEXT: bltu a0, a1, .LBB22_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: .LBB15_2:
+; RV64I-NEXT: .LBB22_2:
; RV64I-NEXT: ret
;
; RV64IB-LABEL: minu_i64:
@@ -682,10 +1310,10 @@ define i64 @minu_i64(i64 %a, i64 %b) nounwind {
define signext i32 @maxu_i32(i32 signext %a, i32 signext %b) nounwind {
; RV64I-LABEL: maxu_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: bltu a1, a0, .LBB16_2
+; RV64I-NEXT: bltu a1, a0, .LBB23_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: .LBB16_2:
+; RV64I-NEXT: .LBB23_2:
; RV64I-NEXT: ret
;
; RV64IB-LABEL: maxu_i32:
@@ -705,10 +1333,10 @@ define signext i32 @maxu_i32(i32 signext %a, i32 signext %b) nounwind {
define i64 @maxu_i64(i64 %a, i64 %b) nounwind {
; RV64I-LABEL: maxu_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: bltu a1, a0, .LBB17_2
+; RV64I-NEXT: bltu a1, a0, .LBB24_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: .LBB17_2:
+; RV64I-NEXT: .LBB24_2:
; RV64I-NEXT: ret
;
; RV64IB-LABEL: maxu_i64:
More information about the llvm-commits
mailing list