[llvm] Handle VECREDUCE intrinsics in NVPTX backend (PR #136253)
Princeton Ferro via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 21 01:10:34 PDT 2025
================
@@ -43,45 +42,22 @@ define half @reduce_fadd_half(<8 x half> %in) {
}
define half @reduce_fadd_half_reassoc(<8 x half> %in) {
-; CHECK-SM80-LABEL: reduce_fadd_half_reassoc(
-; CHECK-SM80: {
-; CHECK-SM80-NEXT: .reg .b16 %rs<6>;
-; CHECK-SM80-NEXT: .reg .b32 %r<10>;
-; CHECK-SM80-EMPTY:
-; CHECK-SM80-NEXT: // %bb.0:
-; CHECK-SM80-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fadd_half_reassoc_param_0];
-; CHECK-SM80-NEXT: add.rn.f16x2 %r5, %r2, %r4;
-; CHECK-SM80-NEXT: add.rn.f16x2 %r6, %r1, %r3;
-; CHECK-SM80-NEXT: add.rn.f16x2 %r7, %r6, %r5;
-; CHECK-SM80-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r7; }
-; CHECK-SM80-NEXT: // implicit-def: %rs2
-; CHECK-SM80-NEXT: mov.b32 %r8, {%rs1, %rs2};
-; CHECK-SM80-NEXT: add.rn.f16x2 %r9, %r7, %r8;
-; CHECK-SM80-NEXT: { .reg .b16 tmp; mov.b32 {%rs3, tmp}, %r9; }
-; CHECK-SM80-NEXT: mov.b16 %rs4, 0x0000;
-; CHECK-SM80-NEXT: add.rn.f16 %rs5, %rs3, %rs4;
-; CHECK-SM80-NEXT: st.param.b16 [func_retval0], %rs5;
-; CHECK-SM80-NEXT: ret;
-;
-; CHECK-SM100-LABEL: reduce_fadd_half_reassoc(
-; CHECK-SM100: {
-; CHECK-SM100-NEXT: .reg .b16 %rs<6>;
-; CHECK-SM100-NEXT: .reg .b32 %r<10>;
-; CHECK-SM100-EMPTY:
-; CHECK-SM100-NEXT: // %bb.0:
-; CHECK-SM100-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fadd_half_reassoc_param_0];
-; CHECK-SM100-NEXT: add.rn.f16x2 %r5, %r2, %r4;
-; CHECK-SM100-NEXT: add.rn.f16x2 %r6, %r1, %r3;
-; CHECK-SM100-NEXT: add.rn.f16x2 %r7, %r6, %r5;
-; CHECK-SM100-NEXT: mov.b32 {_, %rs1}, %r7;
-; CHECK-SM100-NEXT: // implicit-def: %rs2
-; CHECK-SM100-NEXT: mov.b32 %r8, {%rs1, %rs2};
-; CHECK-SM100-NEXT: add.rn.f16x2 %r9, %r7, %r8;
-; CHECK-SM100-NEXT: mov.b32 {%rs3, _}, %r9;
-; CHECK-SM100-NEXT: mov.b16 %rs4, 0x0000;
-; CHECK-SM100-NEXT: add.rn.f16 %rs5, %rs3, %rs4;
-; CHECK-SM100-NEXT: st.param.b16 [func_retval0], %rs5;
-; CHECK-SM100-NEXT: ret;
+; CHECK-LABEL: reduce_fadd_half_reassoc(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<6>;
+; CHECK-NEXT: .reg .b32 %r<8>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fadd_half_reassoc_param_0];
+; CHECK-NEXT: add.rn.f16x2 %r5, %r2, %r4;
+; CHECK-NEXT: add.rn.f16x2 %r6, %r1, %r3;
+; CHECK-NEXT: add.rn.f16x2 %r7, %r6, %r5;
+; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r7;
+; CHECK-NEXT: add.rn.f16 %rs3, %rs1, %rs2;
+; CHECK-NEXT: mov.b16 %rs4, 0x0000;
+; CHECK-NEXT: add.rn.f16 %rs5, %rs3, %rs4;
----------------
Prince781 wrote:
Fixed.
https://github.com/llvm/llvm-project/pull/136253
More information about the llvm-commits
mailing list