[llvm] [NVPTX] Legalize ctpop and ctlz in operation legalization (PR #130668)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 11 10:05:30 PDT 2025
================
@@ -1,175 +1,281 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_60 | FileCheck %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify %}
-; CHECK-LABEL: test_fabsf(
define float @test_fabsf(float %f) {
-; CHECK: abs.f32
+; CHECK-LABEL: test_fabsf(
+; CHECK: {
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.f32 %f1, [test_fabsf_param_0];
+; CHECK-NEXT: abs.f32 %f2, %f1;
+; CHECK-NEXT: st.param.f32 [func_retval0], %f2;
+; CHECK-NEXT: ret;
%x = call float @llvm.fabs.f32(float %f)
ret float %x
}
-; CHECK-LABEL: test_fabs(
define double @test_fabs(double %d) {
-; CHECK: abs.f64
+; CHECK-LABEL: test_fabs(
+; CHECK: {
+; CHECK-NEXT: .reg .f64 %fd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.f64 %fd1, [test_fabs_param_0];
+; CHECK-NEXT: abs.f64 %fd2, %fd1;
+; CHECK-NEXT: st.param.f64 [func_retval0], %fd2;
+; CHECK-NEXT: ret;
%x = call double @llvm.fabs.f64(double %d)
ret double %x
}
-; CHECK-LABEL: test_nvvm_sqrt(
define float @test_nvvm_sqrt(float %a) {
-; CHECK: sqrt.rn.f32
+; CHECK-LABEL: test_nvvm_sqrt(
+; CHECK: {
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.f32 %f1, [test_nvvm_sqrt_param_0];
+; CHECK-NEXT: sqrt.rn.f32 %f2, %f1;
+; CHECK-NEXT: st.param.f32 [func_retval0], %f2;
+; CHECK-NEXT: ret;
%val = call float @llvm.nvvm.sqrt.f(float %a)
ret float %val
}
-; CHECK-LABEL: test_llvm_sqrt(
define float @test_llvm_sqrt(float %a) {
-; CHECK: sqrt.rn.f32
+; CHECK-LABEL: test_llvm_sqrt(
+; CHECK: {
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.f32 %f1, [test_llvm_sqrt_param_0];
+; CHECK-NEXT: sqrt.rn.f32 %f2, %f1;
+; CHECK-NEXT: st.param.f32 [func_retval0], %f2;
+; CHECK-NEXT: ret;
%val = call float @llvm.sqrt.f32(float %a)
ret float %val
}
-; CHECK-LABEL: test_bitreverse32(
define i32 @test_bitreverse32(i32 %a) {
-; CHECK: brev.b32
+; CHECK-LABEL: test_bitreverse32(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u32 %r1, [test_bitreverse32_param_0];
+; CHECK-NEXT: brev.b32 %r2, %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-NEXT: ret;
%val = call i32 @llvm.bitreverse.i32(i32 %a)
ret i32 %val
}
-; CHECK-LABEL: test_bitreverse64(
define i64 @test_bitreverse64(i64 %a) {
-; CHECK: brev.b64
+; CHECK-LABEL: test_bitreverse64(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [test_bitreverse64_param_0];
+; CHECK-NEXT: brev.b64 %rd2, %rd1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
+; CHECK-NEXT: ret;
%val = call i64 @llvm.bitreverse.i64(i64 %a)
ret i64 %val
}
-; CHECK-LABEL: test_popc32(
define i32 @test_popc32(i32 %a) {
-; CHECK: popc.b32
+; CHECK-LABEL: test_popc32(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u32 %r1, [test_popc32_param_0];
+; CHECK-NEXT: popc.b32 %r2, %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-NEXT: ret;
%val = call i32 @llvm.ctpop.i32(i32 %a)
ret i32 %val
}
-; CHECK-LABEL: test_popc64
define i64 @test_popc64(i64 %a) {
-; CHECK: popc.b64
-; CHECK: cvt.u64.u32
+; CHECK-LABEL: test_popc64(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [test_popc64_param_0];
+; CHECK-NEXT: popc.b64 %r1, %rd1;
+; CHECK-NEXT: cvt.u64.u32 %rd2, %r1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
+; CHECK-NEXT: ret;
%val = call i64 @llvm.ctpop.i64(i64 %a)
ret i64 %val
}
; NVPTX popc.b64 returns an i32 even though @llvm.ctpop.i64 returns an i64, so
; if this function returns an i32, there's no need to do any type conversions
; in the ptx.
-; CHECK-LABEL: test_popc64_trunc
define i32 @test_popc64_trunc(i64 %a) {
-; CHECK: popc.b64
-; CHECK-NOT: cvt.
+; CHECK-LABEL: test_popc64_trunc(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [test_popc64_trunc_param_0];
+; CHECK-NEXT: popc.b64 %r1, %rd1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%val = call i64 @llvm.ctpop.i64(i64 %a)
%trunc = trunc i64 %val to i32
ret i32 %trunc
}
; llvm.ctpop.i16 is implemenented by converting to i32, running popc.b32, and
; then converting back to i16.
-; CHECK-LABEL: test_popc16
define void @test_popc16(i16 %a, ptr %b) {
-; CHECK: cvt.u32.u16
-; CHECK: popc.b32
-; CHECK: cvt.u16.u32
----------------
AlexMaclean wrote:
> If you change the test function to return i16, and it does clearly show that we're storing only 16 bits, then we can remove the pointer and collapse this test checks to just one variant.
It looks like the return type just gets promoted and the 32-bit value gets stored:
```
// .globl test_popc16 // -- Begin function test_popc16
.visible .func (.param .b32 func_retval0) test_popc16(
.param .b32 test_popc16_param_0,
.param .b32 test_popc16_param_1
) // @test_popc16
{
.reg .b32 %r<3>;
// %bb.0:
ld.param.u16 %r1, [test_popc16_param_0];
popc.b32 %r2, %r1;
st.param.b32 [func_retval0], %r2;
ret;
// -- End function
}
```
https://github.com/llvm/llvm-project/pull/130668
More information about the llvm-commits
mailing list