[Libclc-dev] [PATCH 2/3] integer/add_sat: Use clang builtin instead of llvm asm
Jan Vesely via Libclc-dev
libclc-dev at lists.llvm.org
Sun Oct 1 14:13:35 PDT 2017
Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
---
CTS integer_add_sat still passes
generic/lib/SOURCES | 2 -
generic/lib/integer/add_sat.cl | 36 ++++++++++++----
generic/lib/integer/add_sat_if.ll | 55 ------------------------
generic/lib/integer/add_sat_impl.ll | 83 -------------------------------------
ptx/lib/OVERRIDES | 1 -
ptx/lib/SOURCES | 3 +-
ptx/lib/integer/add_sat.ll | 55 ------------------------
7 files changed, 29 insertions(+), 206 deletions(-)
delete mode 100644 generic/lib/integer/add_sat_if.ll
delete mode 100644 generic/lib/integer/add_sat_impl.ll
delete mode 100644 ptx/lib/integer/add_sat.ll
diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
index aa7f4aa..c3cc350 100644
--- a/generic/lib/SOURCES
+++ b/generic/lib/SOURCES
@@ -64,8 +64,6 @@ geometric/normalize.cl
integer/abs.cl
integer/abs_diff.cl
integer/add_sat.cl
-integer/add_sat_if.ll
-integer/add_sat_impl.ll
integer/clz.cl
integer/hadd.cl
integer/mad24.cl
diff --git a/generic/lib/integer/add_sat.cl b/generic/lib/integer/add_sat.cl
index d4df66d..252dce9 100644
--- a/generic/lib/integer/add_sat.cl
+++ b/generic/lib/integer/add_sat.cl
@@ -12,35 +12,55 @@ _CLC_DECL long __clc_add_sat_s64(long, long);
_CLC_DECL ulong __clc_add_sat_u64(ulong, ulong);
_CLC_OVERLOAD _CLC_DEF char add_sat(char x, char y) {
- return __clc_add_sat_s8(x, y);
+ short r = x + y;
+ return convert_char_sat(r);
}
_CLC_OVERLOAD _CLC_DEF uchar add_sat(uchar x, uchar y) {
- return __clc_add_sat_u8(x, y);
+ ushort r = x + y;
+ return convert_uchar_sat(r);
}
_CLC_OVERLOAD _CLC_DEF short add_sat(short x, short y) {
- return __clc_add_sat_s16(x, y);
+ int r = x + y;
+ return convert_short_sat(r);
}
_CLC_OVERLOAD _CLC_DEF ushort add_sat(ushort x, ushort y) {
- return __clc_add_sat_u16(x, y);
+ uint r = x + y;
+ return convert_ushort_sat(r);
}
_CLC_OVERLOAD _CLC_DEF int add_sat(int x, int y) {
- return __clc_add_sat_s32(x, y);
+ int r;
+ if (__builtin_sadd_overflow(x, y, &r))
+ // The oveflow can only occur if both are pos or both are neg,
+ // thus we only need to check one operand
+ return x > 0 ? INT_MAX : INT_MIN;
+ return r;
}
_CLC_OVERLOAD _CLC_DEF uint add_sat(uint x, uint y) {
- return __clc_add_sat_u32(x, y);
+ uint r;
+ if (__builtin_uadd_overflow(x, y, &r))
+ return UINT_MAX;
+ return r;
}
_CLC_OVERLOAD _CLC_DEF long add_sat(long x, long y) {
- return __clc_add_sat_s64(x, y);
+ long r;
+ if (__builtin_saddl_overflow(x, y, &r))
+ // The oveflow can only occur if both are pos or both are neg,
+ // thus we only need to check one operand
+ return x > 0 ? LONG_MAX : LONG_MIN;
+ return r;
}
_CLC_OVERLOAD _CLC_DEF ulong add_sat(ulong x, ulong y) {
- return __clc_add_sat_u64(x, y);
+ ulong r;
+ if (__builtin_uaddl_overflow(x, y, &r))
+ return ULONG_MAX;
+ return r;
}
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, add_sat, char, char)
diff --git a/generic/lib/integer/add_sat_if.ll b/generic/lib/integer/add_sat_if.ll
deleted file mode 100644
index bcbe4c0..0000000
--- a/generic/lib/integer/add_sat_if.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-declare i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
-
-define i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
- %call = call i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
- ret i8 %call
-}
-
-declare i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
-
-define i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
- %call = call i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
- ret i8 %call
-}
-
-declare i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
-
-define i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
- %call = call i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
- ret i16 %call
-}
-
-declare i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
-
-define i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
- %call = call i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
- ret i16 %call
-}
-
-declare i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
-
-define i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
- %call = call i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
- ret i32 %call
-}
-
-declare i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
-
-define i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
- %call = call i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
- ret i32 %call
-}
-
-declare i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
-
-define i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
- %call = call i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
- ret i64 %call
-}
-
-declare i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
-
-define i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
- %call = call i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
- ret i64 %call
-}
diff --git a/generic/lib/integer/add_sat_impl.ll b/generic/lib/integer/add_sat_impl.ll
deleted file mode 100644
index c150ecb..0000000
--- a/generic/lib/integer/add_sat_impl.ll
+++ /dev/null
@@ -1,83 +0,0 @@
-declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8)
-declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8)
-
-define i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
- %call = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %x, i8 %y)
- %res = extractvalue {i8, i1} %call, 0
- %over = extractvalue {i8, i1} %call, 1
- %x.msb = ashr i8 %x, 7
- %x.limit = xor i8 %x.msb, 127
- %sat = select i1 %over, i8 %x.limit, i8 %res
- ret i8 %sat
-}
-
-define i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
- %call = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %x, i8 %y)
- %res = extractvalue {i8, i1} %call, 0
- %over = extractvalue {i8, i1} %call, 1
- %sat = select i1 %over, i8 -1, i8 %res
- ret i8 %sat
-}
-
-declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16)
-declare {i16, i1} @llvm.uadd.with.overflow.i16(i16, i16)
-
-define i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
- %call = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 %x, i16 %y)
- %res = extractvalue {i16, i1} %call, 0
- %over = extractvalue {i16, i1} %call, 1
- %x.msb = ashr i16 %x, 15
- %x.limit = xor i16 %x.msb, 32767
- %sat = select i1 %over, i16 %x.limit, i16 %res
- ret i16 %sat
-}
-
-define i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
- %call = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 %x, i16 %y)
- %res = extractvalue {i16, i1} %call, 0
- %over = extractvalue {i16, i1} %call, 1
- %sat = select i1 %over, i16 -1, i16 %res
- ret i16 %sat
-}
-
-declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32)
-declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)
-
-define i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
- %call = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
- %res = extractvalue {i32, i1} %call, 0
- %over = extractvalue {i32, i1} %call, 1
- %x.msb = ashr i32 %x, 31
- %x.limit = xor i32 %x.msb, 2147483647
- %sat = select i1 %over, i32 %x.limit, i32 %res
- ret i32 %sat
-}
-
-define i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
- %call = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
- %res = extractvalue {i32, i1} %call, 0
- %over = extractvalue {i32, i1} %call, 1
- %sat = select i1 %over, i32 -1, i32 %res
- ret i32 %sat
-}
-
-declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64)
-declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64)
-
-define i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
- %call = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %x, i64 %y)
- %res = extractvalue {i64, i1} %call, 0
- %over = extractvalue {i64, i1} %call, 1
- %x.msb = ashr i64 %x, 63
- %x.limit = xor i64 %x.msb, 9223372036854775807
- %sat = select i1 %over, i64 %x.limit, i64 %res
- ret i64 %sat
-}
-
-define i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
- %call = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %x, i64 %y)
- %res = extractvalue {i64, i1} %call, 0
- %over = extractvalue {i64, i1} %call, 1
- %sat = select i1 %over, i64 -1, i64 %res
- ret i64 %sat
-}
diff --git a/ptx/lib/OVERRIDES b/ptx/lib/OVERRIDES
index 475162c..565dcca 100644
--- a/ptx/lib/OVERRIDES
+++ b/ptx/lib/OVERRIDES
@@ -1,2 +1 @@
-integer/add_sat_if.ll
integer/sub_sat_if.ll
diff --git a/ptx/lib/SOURCES b/ptx/lib/SOURCES
index fb6e17f..913e2ea 100644
--- a/ptx/lib/SOURCES
+++ b/ptx/lib/SOURCES
@@ -1,2 +1 @@
-integer/add_sat.ll
-integer/sub_sat.ll
\ No newline at end of file
+integer/sub_sat.ll
diff --git a/ptx/lib/integer/add_sat.ll b/ptx/lib/integer/add_sat.ll
deleted file mode 100644
index f887962..0000000
--- a/ptx/lib/integer/add_sat.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-declare i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
-
-define ptx_device i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
- %call = call i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
- ret i8 %call
-}
-
-declare i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
-
-define ptx_device i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
- %call = call i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
- ret i8 %call
-}
-
-declare i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
-
-define ptx_device i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
- %call = call i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
- ret i16 %call
-}
-
-declare i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
-
-define ptx_device i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
- %call = call i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
- ret i16 %call
-}
-
-declare i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
-
-define ptx_device i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
- %call = call i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
- ret i32 %call
-}
-
-declare i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
-
-define ptx_device i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
- %call = call i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
- ret i32 %call
-}
-
-declare i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
-
-define ptx_device i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
- %call = call i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
- ret i64 %call
-}
-
-declare i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
-
-define ptx_device i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
- %call = call i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
- ret i64 %call
-}
--
2.13.6
More information about the Libclc-dev
mailing list