[llvm] 4c92e31 - [RISCV] Add tests for __builtin_parity idiom.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 27 12:38:04 PDT 2021
Author: Craig Topper
Date: 2021-06-27T12:37:29-07:00
New Revision: 4c92e31dd0f1bd152eda883af20ff7fbcaa14945
URL: https://github.com/llvm/llvm-project/commit/4c92e31dd0f1bd152eda883af20ff7fbcaa14945
DIFF: https://github.com/llvm/llvm-project/commit/4c92e31dd0f1bd152eda883af20ff7fbcaa14945.diff
LOG: [RISCV] Add tests for __builtin_parity idiom.
We use (and (ctpop X), 1) to represent parity.
The generated code for i32 parity on RV64 has more instructions than
necessary which I hope to improve in a followup patch.
Also add missing test for i64 ctpop.
Added:
Modified:
llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
index 9896ca1d1020d..1921e236837ad 100644
--- a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
@@ -13,6 +13,7 @@ declare i32 @llvm.cttz.i32(i32, i1)
declare i64 @llvm.cttz.i64(i64, i1)
declare i32 @llvm.ctlz.i32(i32, i1)
declare i32 @llvm.ctpop.i32(i32)
+declare i64 @llvm.ctpop.i64(i64)
define i16 @test_bswap_i16(i16 %a) nounwind {
; RV32I-LABEL: test_bswap_i16:
@@ -1169,3 +1170,190 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
%1 = call i32 @llvm.ctpop.i32(i32 %a)
ret i32 %1
}
+
+define i64 @test_ctpop_i64(i64 %a) nounwind {
+; RV32I-LABEL: test_ctpop_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s2, a0
+; RV32I-NEXT: srli a0, a1, 1
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: addi s3, a2, 1365
+; RV32I-NEXT: and a0, a0, s3
+; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: lui a1, 209715
+; RV32I-NEXT: addi s0, a1, 819
+; RV32I-NEXT: and a1, a0, s0
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, s0
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: lui a1, 61681
+; RV32I-NEXT: addi s4, a1, -241
+; RV32I-NEXT: and a0, a0, s4
+; RV32I-NEXT: lui a1, 4112
+; RV32I-NEXT: addi s1, a1, 257
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: call __mulsi3 at plt
+; RV32I-NEXT: srli s5, a0, 24
+; RV32I-NEXT: srli a0, s2, 1
+; RV32I-NEXT: and a0, a0, s3
+; RV32I-NEXT: sub a0, s2, a0
+; RV32I-NEXT: and a1, a0, s0
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, s0
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: and a0, a0, s4
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: call __mulsi3 at plt
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: add a0, a0, s5
+; RV32I-NEXT: mv a1, zero
+; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_ctpop_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: lui a2, 21845
+; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: lui a1, 13107
+; RV64I-NEXT: addiw a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: and a2, a0, a1
+; RV64I-NEXT: srli a0, a0, 2
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lui a1, 3855
+; RV64I-NEXT: addiw a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lui a1, 4112
+; RV64I-NEXT: addiw a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: call __muldi3 at plt
+; RV64I-NEXT: srli a0, a0, 56
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = call i64 @llvm.ctpop.i64(i64 %a)
+ ret i64 %1
+}
+
+define i32 @test_parity_i32(i32 %a) {
+; RV32I-LABEL: test_parity_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 8
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 2
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_parity_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a1, a0, 32
+; RV64I-NEXT: srli a1, a1, 32
+; RV64I-NEXT: srliw a0, a0, 16
+; RV64I-NEXT: xor a0, a1, a0
+; RV64I-NEXT: srli a1, a0, 8
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 2
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: andi a0, a0, 1
+; RV64I-NEXT: ret
+ %1 = call i32 @llvm.ctpop.i32(i32 %a)
+ %2 = and i32 %1, 1
+ ret i32 %2
+}
+
+define i64 @test_parity_i64(i64 %a) {
+; RV32I-LABEL: test_parity_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 8
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 2
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: mv a1, zero
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_parity_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srli a1, a0, 32
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 16
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 8
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 2
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: andi a0, a0, 1
+; RV64I-NEXT: ret
+ %1 = call i64 @llvm.ctpop.i64(i64 %a)
+ %2 = and i64 %1, 1
+ ret i64 %2
+}
More information about the llvm-commits
mailing list