[llvm] Handle VECREDUCE intrinsics in NVPTX backend (PR #136253)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 30 10:07:09 PDT 2025
================
@@ -320,33 +272,38 @@ define float @reduce_fmul_float_reassoc_nonpow2(<7 x float> %in) {
; CHECK-NEXT: ld.param.b32 %r7, [reduce_fmul_float_reassoc_nonpow2_param_0+24];
; CHECK-NEXT: ld.param.v2.b32 {%r5, %r6}, [reduce_fmul_float_reassoc_nonpow2_param_0+16];
; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmul_float_reassoc_nonpow2_param_0];
-; CHECK-NEXT: mul.rn.f32 %r8, %r3, %r7;
-; CHECK-NEXT: mul.rn.f32 %r9, %r1, %r5;
-; CHECK-NEXT: mul.rn.f32 %r10, %r9, %r8;
-; CHECK-NEXT: mul.rn.f32 %r11, %r2, %r6;
-; CHECK-NEXT: mul.rn.f32 %r12, %r11, %r4;
-; CHECK-NEXT: mul.rn.f32 %r13, %r10, %r12;
+; CHECK-NEXT: mul.rn.f32 %r8, %r5, %r6;
+; CHECK-NEXT: mul.rn.f32 %r9, %r8, %r7;
+; CHECK-NEXT: mul.rn.f32 %r10, %r3, %r4;
+; CHECK-NEXT: mul.rn.f32 %r11, %r1, %r2;
+; CHECK-NEXT: mul.rn.f32 %r12, %r11, %r10;
+; CHECK-NEXT: mul.rn.f32 %r13, %r12, %r9;
; CHECK-NEXT: st.param.b32 [func_retval0], %r13;
; CHECK-NEXT: ret;
%res = call reassoc float @llvm.vector.reduce.fmul(float 1.0, <7 x float> %in)
ret float %res
}
-; Check straight line reduction.
define half @reduce_fmax_half(<8 x half> %in) {
; CHECK-LABEL: reduce_fmax_half(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<4>;
-; CHECK-NEXT: .reg .b32 %r<8>;
+; CHECK-NEXT: .reg .b16 %rs<16>;
+; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmax_half_param_0];
-; CHECK-NEXT: max.f16x2 %r5, %r2, %r4;
-; CHECK-NEXT: max.f16x2 %r6, %r1, %r3;
-; CHECK-NEXT: max.f16x2 %r7, %r6, %r5;
-; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r7;
-; CHECK-NEXT: max.f16 %rs3, %rs1, %rs2;
-; CHECK-NEXT: st.param.b16 [func_retval0], %rs3;
+; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r4;
+; CHECK-NEXT: mov.b32 {%rs3, %rs4}, %r3;
+; CHECK-NEXT: mov.b32 {%rs5, %rs6}, %r2;
+; CHECK-NEXT: mov.b32 {%rs7, %rs8}, %r1;
+; CHECK-NEXT: max.f16 %rs9, %rs7, %rs8;
+; CHECK-NEXT: max.f16 %rs10, %rs9, %rs5;
+; CHECK-NEXT: max.f16 %rs11, %rs10, %rs6;
+; CHECK-NEXT: max.f16 %rs12, %rs11, %rs3;
+; CHECK-NEXT: max.f16 %rs13, %rs12, %rs4;
+; CHECK-NEXT: max.f16 %rs14, %rs13, %rs1;
+; CHECK-NEXT: max.f16 %rs15, %rs14, %rs2;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs15;
----------------
AlexMaclean wrote:
I'm still a bit confused. Are you saying that the code we emit currently (prior to your change) is incorrect? If so, where is this incorrect expansion coming from? I'd like to confirm our understanding of the associativity of these operations is correct.
https://github.com/llvm/llvm-project/pull/136253
More information about the llvm-commits
mailing list