[llvm] ff30394 - [PhaseOrdering] add tests for x86 abs/max using SSE intrinsics (PR34047); NFC
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 3 06:14:24 PDT 2021
Author: Sanjay Patel
Date: 2021-11-03T09:13:23-04:00
New Revision: ff30394de86bcb52d32ca3cd2a55375386122257
URL: https://github.com/llvm/llvm-project/commit/ff30394de86bcb52d32ca3cd2a55375386122257
DIFF: https://github.com/llvm/llvm-project/commit/ff30394de86bcb52d32ca3cd2a55375386122257.diff
LOG: [PhaseOrdering] add tests for x86 abs/max using SSE intrinsics (PR34047); NFC
D113035
Added:
llvm/test/Transforms/PhaseOrdering/X86/vector-math.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-math.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-math.ll
new file mode 100644
index 0000000000000..a658b19898896
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-math.ll
@@ -0,0 +1,103 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -O2 -S -mattr=sse < %s | FileCheck %s
+
+; This file should represent the nearly raw (mem2reg was run to make it more direct)
+; IR for code written using x86 SSE intrinsics to compute integer abs/max functions.
+;
+; https://llvm.org/PR34047
+
+define available_externally <2 x i64> @cmpgt_i32_sel_m128i(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) {
+ %call = call <2 x i64> @_mm_cmpgt_epi32(<2 x i64> %a, <2 x i64> %b)
+ %call1 = call <2 x i64> @_mm_andnot_si128(<2 x i64> %call, <2 x i64> %c)
+ %call2 = call <2 x i64> @_mm_and_si128(<2 x i64> %call, <2 x i64> %d)
+ %call3 = call <2 x i64> @_mm_or_si128(<2 x i64> %call1, <2 x i64> %call2)
+ ret <2 x i64> %call3
+}
+
+define internal <2 x i64> @_mm_set1_epi32(i32 %__i) {
+ %call = call <2 x i64> @_mm_set_epi32(i32 %__i, i32 %__i, i32 %__i, i32 %__i)
+ ret <2 x i64> %call
+}
+
+define internal <2 x i64> @_mm_sub_epi32(<2 x i64> %__a, <2 x i64> %__b) {
+ %t0 = bitcast <2 x i64> %__a to <4 x i32>
+ %t1 = bitcast <2 x i64> %__b to <4 x i32>
+ %sub = sub <4 x i32> %t0, %t1
+ %t2 = bitcast <4 x i32> %sub to <2 x i64>
+ ret <2 x i64> %t2
+}
+
+define internal <2 x i64> @_mm_setzero_si128() {
+ ret <2 x i64> zeroinitializer
+}
+
+define internal <2 x i64> @_mm_cmpgt_epi32(<2 x i64> %__a, <2 x i64> %__b) {
+ %t0 = bitcast <2 x i64> %__a to <4 x i32>
+ %t1 = bitcast <2 x i64> %__b to <4 x i32>
+ %cmp = icmp sgt <4 x i32> %t0, %t1
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %t2 = bitcast <4 x i32> %sext to <2 x i64>
+ ret <2 x i64> %t2
+}
+
+define internal <2 x i64> @_mm_or_si128(<2 x i64> %__a, <2 x i64> %__b) {
+ %or = or <2 x i64> %__a, %__b
+ ret <2 x i64> %or
+}
+
+define internal <2 x i64> @_mm_andnot_si128(<2 x i64> %__a, <2 x i64> %__b) {
+ %neg = xor <2 x i64> %__a, <i64 -1, i64 -1>
+ %and = and <2 x i64> %neg, %__b
+ ret <2 x i64> %and
+}
+
+define internal <2 x i64> @_mm_and_si128(<2 x i64> %__a, <2 x i64> %__b) {
+ %and = and <2 x i64> %__a, %__b
+ ret <2 x i64> %and
+}
+
+define internal <2 x i64> @_mm_set_epi32(i32 %__i3, i32 %__i2, i32 %__i1, i32 %__i0) {
+ %vecinit = insertelement <4 x i32> undef, i32 %__i0, i32 0
+ %vecinit1 = insertelement <4 x i32> %vecinit, i32 %__i1, i32 1
+ %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %__i2, i32 2
+ %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %__i3, i32 3
+ %t0 = bitcast <4 x i32> %vecinit3 to <2 x i64>
+ ret <2 x i64> %t0
+}
+
+define <2 x i64> @abs_v4i32(<2 x i64> %x) {
+; CHECK-LABEL: @abs_v4i32(
+; CHECK-NEXT: [[T1_I:%.*]] = bitcast <2 x i64> [[X:%.*]] to <4 x i32>
+; CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, [[T1_I]]
+; CHECK-NEXT: [[T1_I_LOBIT:%.*]] = ashr <4 x i32> [[T1_I]], <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[T1_I_LOBIT]] to <2 x i64>
+; CHECK-NEXT: [[T2_I_I:%.*]] = xor <2 x i64> [[TMP1]], <i64 -1, i64 -1>
+; CHECK-NEXT: [[AND_I_I1:%.*]] = and <4 x i32> [[T1_I_LOBIT]], [[SUB_I]]
+; CHECK-NEXT: [[AND_I_I:%.*]] = bitcast <4 x i32> [[AND_I_I1]] to <2 x i64>
+; CHECK-NEXT: [[AND_I1_I:%.*]] = and <2 x i64> [[T2_I_I]], [[X]]
+; CHECK-NEXT: [[OR_I_I:%.*]] = or <2 x i64> [[AND_I1_I]], [[AND_I_I]]
+; CHECK-NEXT: ret <2 x i64> [[OR_I_I]]
+;
+ %call = call <2 x i64> @_mm_set1_epi32(i32 -1)
+ %call1 = call <2 x i64> @_mm_setzero_si128()
+ %call2 = call <2 x i64> @_mm_sub_epi32(<2 x i64> %call1, <2 x i64> %x)
+ %call3 = call <2 x i64> @cmpgt_i32_sel_m128i(<2 x i64> %x, <2 x i64> %call, <2 x i64> %call2, <2 x i64> %x)
+ ret <2 x i64> %call3
+}
+
+define <2 x i64> @max_v4i32(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @max_v4i32(
+; CHECK-NEXT: [[T0_I_I:%.*]] = bitcast <2 x i64> [[X:%.*]] to <4 x i32>
+; CHECK-NEXT: [[T1_I_I:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <4 x i32>
+; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp sgt <4 x i32> [[T0_I_I]], [[T1_I_I]]
+; CHECK-NEXT: [[SEXT_I_I:%.*]] = sext <4 x i1> [[CMP_I_I]] to <4 x i32>
+; CHECK-NEXT: [[T2_I_I:%.*]] = bitcast <4 x i32> [[SEXT_I_I]] to <2 x i64>
+; CHECK-NEXT: [[NEG_I_I:%.*]] = xor <2 x i64> [[T2_I_I]], <i64 -1, i64 -1>
+; CHECK-NEXT: [[AND_I_I:%.*]] = and <2 x i64> [[NEG_I_I]], [[Y]]
+; CHECK-NEXT: [[AND_I1_I:%.*]] = and <2 x i64> [[T2_I_I]], [[X]]
+; CHECK-NEXT: [[OR_I_I:%.*]] = or <2 x i64> [[AND_I1_I]], [[AND_I_I]]
+; CHECK-NEXT: ret <2 x i64> [[OR_I_I]]
+;
+ %call = call <2 x i64> @cmpgt_i32_sel_m128i(<2 x i64> %x, <2 x i64> %y, <2 x i64> %y, <2 x i64> %x)
+ ret <2 x i64> %call
+}
More information about the llvm-commits
mailing list