[libclc] libclc: clspv: create gen_convert.cl for clspv (PR #66902)

Thu Mar 14 03:26:15 PDT 2024

https://github.com/rjodinchr updated https://github.com/llvm/llvm-project/pull/66902

>From 5614f89c90cf865c88fbcf95d707e34dfeb18a19 Mon Sep 17 00:00:00 2001
From: Romaric Jodin <rjodin at chromium.org>
Date: Tue, 18 Jul 2023 09:30:09 +0200
Subject: [PATCH] libclc: clspv: update gen_convert.cl for clspv

Add a clspv switch in gen_convert.cl
This is needed as Vulkan SPIR-V does not respect the assumptions
needed to have the generic convert.cl compliant on many platforms.

It is needed because of the conversion of TYPE_MAX and
TYPE_MIN. Depending on the platform the behaviour can vary, but most
of them just do not convert correctly those 2 values.

Because of that, we also need to avoid having explicit function for
simple conversions because it allows llvm to optimise the code, thus
removing some of the added checks that are in fact needed.

I did not use python argparse to avoid adding the dependency on it.
---
 libclc/CMakeLists.txt             | 15 +++++-
 libclc/generic/lib/gen_convert.py | 77 +++++++++++++++++++++++++------
 2 files changed, 75 insertions(+), 17 deletions(-)

diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index fa1d8e4adbcc4f..18f77940e76669 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -174,6 +174,12 @@ add_custom_command(
 	DEPENDS ${script_loc} )
 add_custom_target( "generate_convert.cl" DEPENDS convert.cl )
 
+add_custom_command(
+	OUTPUT clspv-convert.cl
+	COMMAND ${Python3_EXECUTABLE} ${script_loc} --clspv > clspv-convert.cl
+	DEPENDS ${script_loc} )
+add_custom_target( "clspv-generate_convert.cl" DEPENDS clspv-convert.cl )
+
 enable_testing()
 
 foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
@@ -218,11 +224,14 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
 	# Add the generated convert.cl here to prevent adding
 	# the one listed in SOURCES
 	if( NOT ${ARCH} STREQUAL "spirv" AND NOT ${ARCH} STREQUAL "spirv64" )
-		set( rel_files convert.cl )
-		set( objects convert.cl )
 		if( NOT ENABLE_RUNTIME_SUBNORMAL AND NOT ${ARCH} STREQUAL "clspv" AND
 		    NOT ${ARCH} STREQUAL "clspv64" )
+			set( rel_files convert.cl )
+			set( objects convert.cl )
 			list( APPEND rel_files generic/lib/subnormal_use_default.ll )
+		elseif(${ARCH} STREQUAL "clspv" OR ${ARCH} STREQUAL "clspv64")
+			set( rel_files clspv-convert.cl )
+			set( objects clspv-convert.cl )
 		endif()
 	else()
 		set( rel_files )
@@ -286,6 +295,8 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
 		# multiple invocations
 		add_dependencies( builtins.link.${arch_suffix}
 			generate_convert.cl )
+		add_dependencies( builtins.link.${arch_suffix}
+			clspv-generate_convert.cl )
 		# CMake will turn this include into absolute path
 		target_include_directories( builtins.link.${arch_suffix} PRIVATE
 			"generic/include" )
diff --git a/libclc/generic/lib/gen_convert.py b/libclc/generic/lib/gen_convert.py
index 612a9184f4b271..afdb589536c969 100644
--- a/libclc/generic/lib/gen_convert.py
+++ b/libclc/generic/lib/gen_convert.py
@@ -2,6 +2,7 @@
 #
 # Copyright (c) 2013 Victor Oliveira <victormatheus at gmail.com>
 # Copyright (c) 2013 Jesse Towner <jessetowner at lavabit.com>
+# Copyright (c) 2024 Romaric Jodin <rjodin at chromium.org>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -26,6 +27,12 @@
 #
 # convert_<destTypen><_sat><_roundingMode>(<sourceTypen>)
 
+import sys
+
+clspv = False
+if len(sys.argv) == 2 and sys.argv[1] == '--clspv':
+    clspv = True
+
 types = [
     "char",
     "uchar",
@@ -250,14 +257,19 @@ def generate_default_conversion(src, dst, mode):
     if close_conditional:
         print("#endif")
 
-
-for src in types:
-    for dst in types:
-        generate_default_conversion(src, dst, "")
+# Do not generate default conversion for clspv as they are handle natively
+if not clspv:
+    for src in types:
+        for dst in types:
+            generate_default_conversion(src, dst, "")
 
 for src in int_types:
     for dst in int_types:
         for mode in rounding_modes:
+            # Do not generate "_rte" conversion for clspv as they are handle
+            # natively
+            if clspv and mode == "_rte":
+                continue
             generate_default_conversion(src, dst, mode)
 
 #
@@ -307,8 +319,8 @@ def generate_saturated_conversion(src, dst, size):
         # Conversion from float to int
         print(
             """  {DST}{N} y = convert_{DST}{N}(x);
-  y = select(y, ({DST}{N}){DST_MIN}, {BP}(x < ({SRC}{N}){DST_MIN}){BS});
-  y = select(y, ({DST}{N}){DST_MAX}, {BP}(x > ({SRC}{N}){DST_MAX}){BS});
+  y = select(y, ({DST}{N}){DST_MIN}, {BP}(x <= ({SRC}{N}){DST_MIN}){BS});
+  y = select(y, ({DST}{N}){DST_MAX}, {BP}(x >= ({SRC}{N}){DST_MAX}){BS});
   return y;""".format(
                 SRC=src,
                 DST=dst,
@@ -432,7 +444,10 @@ def generate_float_conversion(src, dst, size, mode, sat):
         print("  return convert_{DST}{N}(x);".format(DST=dst, N=size))
     else:
         print("  {DST}{N} r = convert_{DST}{N}(x);".format(DST=dst, N=size))
-        print("  {SRC}{N} y = convert_{SRC}{N}(r);".format(SRC=src, N=size))
+        if clspv:
+            print("  {SRC}{N} y = convert_{SRC}{N}_sat(r);".format(SRC=src, N=size))
+        else:
+            print("  {SRC}{N} y = convert_{SRC}{N}(r);".format(SRC=src, N=size))
         if mode == "_rtz":
             if src in int_types:
                 print(
@@ -448,11 +463,25 @@ def generate_float_conversion(src, dst, size, mode, sat):
             else:
                 print("  {SRC}{N} abs_x = fabs(x);".format(SRC=src, N=size))
                 print("  {SRC}{N} abs_y = fabs(y);".format(SRC=src, N=size))
-            print(
-                "  return select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));".format(
-                    DST=dst, N=size, BOOL=bool_type[dst]
+            if clspv:
+                print("  {BOOL}{N} c = convert_{BOOL}{N}(abs_y > abs_x);".format(BOOL=bool_type[dst], N=size))
+                if sizeof_type[src] >= 4 and src in int_types:
+                    print(
+                        "  c = c || convert_{BOOL}{N}(({SRC}{N}){SRC_MAX} == x);".format(
+                            BOOL=bool_type[dst], N=size, SRC=src, SRC_MAX=limit_max[src]
+                        )
+                    )
+                print(
+                    "  return select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), c);".format(
+                        DST=dst, N=size, BOOL=bool_type[dst], SRC=src
+                    )
+                )
+            else:
+                print(
+                    "  return select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));".format(
+                        DST=dst, N=size, BOOL=bool_type[dst]
+                    )
                 )
-            )
         if mode == "_rtp":
             print(
                 "  return select(r, nextafter(r, ({DST}{N})INFINITY), convert_{BOOL}{N}(y < x));".format(
@@ -460,11 +489,25 @@ def generate_float_conversion(src, dst, size, mode, sat):
                 )
             )
         if mode == "_rtn":
-            print(
-                "  return select(r, nextafter(r, ({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));".format(
-                    DST=dst, N=size, BOOL=bool_type[dst]
+            if clspv:
+                print("  {BOOL}{N} c = convert_{BOOL}{N}(y > x);".format(BOOL=bool_type[dst], N=size))
+                if sizeof_type[src] >= 4 and src in int_types:
+                    print(
+                        "  c = c || convert_{BOOL}{N}(({SRC}{N}){SRC_MAX} == x);".format(
+                            BOOL=bool_type[dst], N=size, SRC=src, SRC_MAX=limit_max[src]
+                        )
+                    )
+                print(
+                    "  return select(r, nextafter(r, ({DST}{N})-INFINITY), c);".format(
+                        DST=dst, N=size, BOOL=bool_type[dst], SRC=src
+                    )
+                )
+            else:
+                print(
+                    "  return select(r, nextafter(r, ({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));".format(
+                        DST=dst, N=size, BOOL=bool_type[dst]
+                    )
                 )
-            )
 
     # Footer
     print("}")
@@ -484,4 +527,8 @@ def generate_float_conversion(src, dst, size, mode, sat):
     for dst in float_types:
         for size in vector_sizes:
             for mode in rounding_modes:
+                # Do not generate "_rte" conversion for clspv as they are
+                # handle natively
+                if clspv and mode == "_rte":
+                    continue
                 generate_float_conversion(src, dst, size, mode, "")