[llvm] [NVPTX] Add intrinsics for the szext instruction (PR #139126)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Thu May 8 16:37:43 PDT 2025
================
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -o - < %s -mcpu=sm_70 -mattr=+ptx76 | FileCheck %s
+
+target triple = "nvptx-unknown-cuda"
+
+define i32 @szext_wrap_u32(i32 %a, i32 %b) {
+; CHECK-LABEL: szext_wrap_u32(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u32 %r1, [szext_wrap_u32_param_0];
+; CHECK-NEXT: ld.param.u32 %r2, [szext_wrap_u32_param_1];
+; CHECK-NEXT: szext.wrap.u32 %r3, %r1, %r2;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
+ %c = call i32 @llvm.nvvm.zext.wrap(i32 %a, i32 %b)
+ ret i32 %c
+}
+
+define i32 @szext_clamp_u32(i32 %a, i32 %b) {
+; CHECK-LABEL: szext_clamp_u32(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u32 %r1, [szext_clamp_u32_param_0];
+; CHECK-NEXT: ld.param.u32 %r2, [szext_clamp_u32_param_1];
+; CHECK-NEXT: szext.clamp.u32 %r3, %r1, %r2;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
+ %c = call i32 @llvm.nvvm.zext.clamp(i32 %a, i32 %b)
+ ret i32 %c
+}
+
+define i32 @szext_wrap_s32(i32 %a, i32 %b) {
+; CHECK-LABEL: szext_wrap_s32(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u32 %r1, [szext_wrap_s32_param_0];
+; CHECK-NEXT: ld.param.u32 %r2, [szext_wrap_s32_param_1];
+; CHECK-NEXT: szext.wrap.s32 %r3, %r1, %r2;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
+ %c = call i32 @llvm.nvvm.sext.wrap(i32 %a, i32 %b)
+ ret i32 %c
+}
+
+define i32 @szext_clamp_s32(i32 %a, i32 %b) {
+; CHECK-LABEL: szext_clamp_s32(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u32 %r1, [szext_clamp_s32_param_0];
+; CHECK-NEXT: ld.param.u32 %r2, [szext_clamp_s32_param_1];
+; CHECK-NEXT: szext.clamp.s32 %r3, %r1, %r2;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
+ %c = call i32 @llvm.nvvm.sext.clamp(i32 %a, i32 %b)
+ ret i32 %c
+}
+
+define i32 @szext_clamp_s32_ii() {
+; CHECK-LABEL: szext_clamp_s32_ii(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: mov.b32 %r1, 3;
+; CHECK-NEXT: szext.clamp.s32 %r2, %r1, 4;
----------------
AlexMaclean wrote:
I think in general, all immediates for pure functions is a case we don't add patterns for. These cases should be pretty rare, and should be handled by constant-folding. Adding code to ISel will likely hurt compile-time on the margin without any clear benefit.
https://github.com/llvm/llvm-project/pull/139126
More information about the llvm-commits
mailing list