[llvm] [NVPTX] Fix crash caused by ComputePTXValueVTs (PR #104524)

Artem Belevich via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 19 14:15:03 PDT 2024


================
@@ -0,0 +1,523 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+; XFAIL: *
+; FIXME: Fails on `byte2` because NVPTX wants to lower v2i8 to 2 i8s while 
+;   SelectionDAG wants to lower it to 1 v2i16. 
+
+target triple = "nvptx-nvidia-cuda"
+
+define <3 x i64> @long3() {
+; CHECK-LABEL: long3(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.u64 %rd1, 0;
+; CHECK-NEXT:    st.param.v2.b64 [func_retval0+0], {%rd1, %rd1};
+; CHECK-NEXT:    st.param.b64 [func_retval0+16], %rd1;
+; CHECK-NEXT:    ret;
+  ret <3 x i64> zeroinitializer
+}
+
+define <2 x i64> @long2() {
+; CHECK-LABEL: long2(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.u64 %rd1, 0;
+; CHECK-NEXT:    st.param.v2.b64 [func_retval0+0], {%rd1, %rd1};
+; CHECK-NEXT:    ret;
+  ret <2 x i64> zeroinitializer
+}
+
+define <1 x i64> @long1() {
+; CHECK-LABEL: long1(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.u64 %rd1, 0;
+; CHECK-NEXT:    st.param.b64 [func_retval0+0], %rd1;
+; CHECK-NEXT:    ret;
+  ret <1 x i64> zeroinitializer
+}
+
+define <5 x i32> @int5() {
+; CHECK-LABEL: int5(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.b32 %r1, 0;
+; CHECK-NEXT:    st.param.v4.b32 [func_retval0+0], {%r1, %r1, %r1, %r1};
+; CHECK-NEXT:    st.param.b32 [func_retval0+16], %r1;
+; CHECK-NEXT:    ret;
+  ret <5 x i32> zeroinitializer
+}
+
+define <4 x i32> @int4() {
+; CHECK-LABEL: int4(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.b32 %r1, 0;
+; CHECK-NEXT:    st.param.v4.b32 [func_retval0+0], {%r1, %r1, %r1, %r1};
+; CHECK-NEXT:    ret;
+  ret <4 x i32> zeroinitializer
+}
+
+define <3 x i32> @int3() {
+; CHECK-LABEL: int3(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.b32 %r1, 0;
+; CHECK-NEXT:    st.param.v2.b32 [func_retval0+0], {%r1, %r1};
+; CHECK-NEXT:    st.param.b32 [func_retval0+8], %r1;
+; CHECK-NEXT:    ret;
+  ret <3 x i32> zeroinitializer
+}
+
+define <2 x i32> @int2() {
+; CHECK-LABEL: int2(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.b32 %r1, 0;
+; CHECK-NEXT:    st.param.v2.b32 [func_retval0+0], {%r1, %r1};
+; CHECK-NEXT:    ret;
+  ret <2 x i32> zeroinitializer
+}
+
+define <1 x i32> @int1() {
+; CHECK-LABEL: int1(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.b32 %r1, 0;
+; CHECK-NEXT:    st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT:    ret;
+  ret <1 x i32> zeroinitializer
+}
+
+define <9 x i16> @short9() {
+; CHECK-LABEL: short9(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.u16 %rs1, 0;
+; CHECK-NEXT:    st.param.v4.b16 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1};
+; CHECK-NEXT:    st.param.v4.b16 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1};
+; CHECK-NEXT:    st.param.b16 [func_retval0+16], %rs1;
+; CHECK-NEXT:    ret;
+  ret <9 x i16> zeroinitializer
+}
+
+define <8 x i16> @short8() {
+; CHECK-LABEL: short8(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.b32 %r1, 0;
+; CHECK-NEXT:    st.param.v4.b32 [func_retval0+0], {%r1, %r1, %r1, %r1};
+; CHECK-NEXT:    ret;
+  ret <8 x i16> zeroinitializer
+}
+
+define <7 x i16> @short7() {
+; CHECK-LABEL: short7(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.u16 %rs1, 0;
+; CHECK-NEXT:    st.param.v4.b16 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1};
+; CHECK-NEXT:    st.param.v2.b16 [func_retval0+8], {%rs1, %rs1};
+; CHECK-NEXT:    st.param.b16 [func_retval0+12], %rs1;
+; CHECK-NEXT:    ret;
+  ret <7 x i16> zeroinitializer
+}
+
+define <5 x i16> @short5() {
+; CHECK-LABEL: short5(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.u16 %rs1, 0;
+; CHECK-NEXT:    st.param.v4.b16 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1};
+; CHECK-NEXT:    st.param.b16 [func_retval0+8], %rs1;
+; CHECK-NEXT:    ret;
+  ret <5 x i16> zeroinitializer
+}
+
+define <4 x i16> @short4() {
+; CHECK-LABEL: short4(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.b32 %r1, 0;
+; CHECK-NEXT:    st.param.v2.b32 [func_retval0+0], {%r1, %r1};
+; CHECK-NEXT:    ret;
+  ret <4 x i16> zeroinitializer
+}
+
+define <3 x i16> @short3() {
+; CHECK-LABEL: short3(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.u16 %rs1, 0;
+; CHECK-NEXT:    st.param.v2.b16 [func_retval0+0], {%rs1, %rs1};
+; CHECK-NEXT:    st.param.b16 [func_retval0+4], %rs1;
+; CHECK-NEXT:    ret;
+  ret <3 x i16> zeroinitializer
+}
+
+define <2 x i16> @short2() {
+; CHECK-LABEL: short2(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.b32 %r1, 0;
+; CHECK-NEXT:    st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT:    ret;
+  ret <2 x i16> zeroinitializer
+}
+
+define <1 x i16> @short1() {
+; CHECK-LABEL: short1(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.u16 %rs1, 0;
+; CHECK-NEXT:    st.param.b16 [func_retval0+0], %rs1;
+; CHECK-NEXT:    ret;
+  ret <1 x i16> zeroinitializer
+}
+
+define <17 x i8> @byte17() {
+; CHECK-LABEL: byte17(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.u16 %rs1, 0;
+; CHECK-NEXT:    st.param.v4.b8 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1};
+; CHECK-NEXT:    st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1};
+; CHECK-NEXT:    st.param.v4.b8 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1};
+; CHECK-NEXT:    st.param.v4.b8 [func_retval0+12], {%rs1, %rs1, %rs1, %rs1};
+; CHECK-NEXT:    st.param.b8 [func_retval0+16], %rs1;
+; CHECK-NEXT:    ret;
+  ret <17 x i8> zeroinitializer
+}
+
+define <16 x i8> @byte16() {
+; CHECK-LABEL: byte16(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.b32 %r1, 0;
+; CHECK-NEXT:    st.param.v4.b32 [func_retval0+0], {%r1, %r1, %r1, %r1};
+; CHECK-NEXT:    ret;
+  ret <16 x i8> zeroinitializer
+}
+
+define <15 x i8> @byte15() {
+; CHECK-LABEL: byte15(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.u16 %rs1, 0;
+; CHECK-NEXT:    st.param.v4.b8 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1};
+; CHECK-NEXT:    st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1};
+; CHECK-NEXT:    st.param.v4.b8 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1};
+; CHECK-NEXT:    st.param.v2.b8 [func_retval0+12], {%rs1, %rs1};
----------------
Artem-B wrote:

Another optimization opportunity would be to lower first 8 bytes in one instruction.

https://github.com/llvm/llvm-project/pull/104524


More information about the llvm-commits mailing list