[llvm] ff30394 - [PhaseOrdering] add tests for x86 abs/max using SSE intrinsics (PR34047); NFC

Wed Nov 3 06:14:24 PDT 2021

Author: Sanjay Patel
Date: 2021-11-03T09:13:23-04:00
New Revision: ff30394de86bcb52d32ca3cd2a55375386122257

URL: https://github.com/llvm/llvm-project/commit/ff30394de86bcb52d32ca3cd2a55375386122257
DIFF: https://github.com/llvm/llvm-project/commit/ff30394de86bcb52d32ca3cd2a55375386122257.diff

LOG: [PhaseOrdering] add tests for x86 abs/max using SSE intrinsics (PR34047); NFC

D113035

Added: 
    llvm/test/Transforms/PhaseOrdering/X86/vector-math.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-math.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-math.ll
new file mode 100644
index 0000000000000..a658b19898896

--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-math.ll
@@ -0,0 +1,103 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -O2 -S -mattr=sse < %s | FileCheck %s
+
+; This file should represent the nearly raw (mem2reg was run to make it more direct)
+; IR for code written using x86 SSE intrinsics to compute integer abs/max functions.
+;
+; https://llvm.org/PR34047
+
+define available_externally <2 x i64> @cmpgt_i32_sel_m128i(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) {
+  %call = call <2 x i64> @_mm_cmpgt_epi32(<2 x i64> %a, <2 x i64> %b)
+  %call1 = call <2 x i64> @_mm_andnot_si128(<2 x i64> %call, <2 x i64> %c)
+  %call2 = call <2 x i64> @_mm_and_si128(<2 x i64> %call, <2 x i64> %d)
+  %call3 = call <2 x i64> @_mm_or_si128(<2 x i64> %call1, <2 x i64> %call2)
+  ret <2 x i64> %call3
+}
+
+define internal <2 x i64> @_mm_set1_epi32(i32 %__i) {
+  %call = call <2 x i64> @_mm_set_epi32(i32 %__i, i32 %__i, i32 %__i, i32 %__i)
+  ret <2 x i64> %call
+}
+
+define internal <2 x i64> @_mm_sub_epi32(<2 x i64> %__a, <2 x i64> %__b) {
+  %t0 = bitcast <2 x i64> %__a to <4 x i32>
+  %t1 = bitcast <2 x i64> %__b to <4 x i32>
+  %sub = sub <4 x i32> %t0, %t1
+  %t2 = bitcast <4 x i32> %sub to <2 x i64>
+  ret <2 x i64> %t2
+}
+
+define internal <2 x i64> @_mm_setzero_si128() {
+  ret <2 x i64> zeroinitializer
+}
+
+define internal <2 x i64> @_mm_cmpgt_epi32(<2 x i64> %__a, <2 x i64> %__b) {
+  %t0 = bitcast <2 x i64> %__a to <4 x i32>
+  %t1 = bitcast <2 x i64> %__b to <4 x i32>
+  %cmp = icmp sgt <4 x i32> %t0, %t1
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %t2 = bitcast <4 x i32> %sext to <2 x i64>
+  ret <2 x i64> %t2
+}
+
+define internal <2 x i64> @_mm_or_si128(<2 x i64> %__a, <2 x i64> %__b) {
+  %or = or <2 x i64> %__a, %__b
+  ret <2 x i64> %or
+}
+
+define internal <2 x i64> @_mm_andnot_si128(<2 x i64> %__a, <2 x i64> %__b) {
+  %neg = xor <2 x i64> %__a, <i64 -1, i64 -1>
+  %and = and <2 x i64> %neg, %__b
+  ret <2 x i64> %and
+}
+
+define internal <2 x i64> @_mm_and_si128(<2 x i64> %__a, <2 x i64> %__b) {
+  %and = and <2 x i64> %__a, %__b
+  ret <2 x i64> %and
+}
+
+define internal <2 x i64> @_mm_set_epi32(i32 %__i3, i32 %__i2, i32 %__i1, i32 %__i0) {
+  %vecinit = insertelement <4 x i32> undef, i32 %__i0, i32 0
+  %vecinit1 = insertelement <4 x i32> %vecinit, i32 %__i1, i32 1
+  %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %__i2, i32 2
+  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %__i3, i32 3
+  %t0 = bitcast <4 x i32> %vecinit3 to <2 x i64>
+  ret <2 x i64> %t0
+}
+
+define <2 x i64> @abs_v4i32(<2 x i64> %x) {
+; CHECK-LABEL: @abs_v4i32(
+; CHECK-NEXT:    [[T1_I:%.*]] = bitcast <2 x i64> [[X:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, [[T1_I]]
+; CHECK-NEXT:    [[T1_I_LOBIT:%.*]] = ashr <4 x i32> [[T1_I]], <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[T1_I_LOBIT]] to <2 x i64>
+; CHECK-NEXT:    [[T2_I_I:%.*]] = xor <2 x i64> [[TMP1]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[AND_I_I1:%.*]] = and <4 x i32> [[T1_I_LOBIT]], [[SUB_I]]
+; CHECK-NEXT:    [[AND_I_I:%.*]] = bitcast <4 x i32> [[AND_I_I1]] to <2 x i64>
+; CHECK-NEXT:    [[AND_I1_I:%.*]] = and <2 x i64> [[T2_I_I]], [[X]]
+; CHECK-NEXT:    [[OR_I_I:%.*]] = or <2 x i64> [[AND_I1_I]], [[AND_I_I]]
+; CHECK-NEXT:    ret <2 x i64> [[OR_I_I]]
+;
+  %call = call <2 x i64> @_mm_set1_epi32(i32 -1)
+  %call1 = call <2 x i64> @_mm_setzero_si128()
+  %call2 = call <2 x i64> @_mm_sub_epi32(<2 x i64> %call1, <2 x i64> %x)
+  %call3 = call <2 x i64> @cmpgt_i32_sel_m128i(<2 x i64> %x, <2 x i64> %call, <2 x i64> %call2, <2 x i64> %x)
+  ret <2 x i64> %call3
+}
+
+define <2 x i64> @max_v4i32(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @max_v4i32(
+; CHECK-NEXT:    [[T0_I_I:%.*]] = bitcast <2 x i64> [[X:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[T1_I_I:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[CMP_I_I:%.*]] = icmp sgt <4 x i32> [[T0_I_I]], [[T1_I_I]]
+; CHECK-NEXT:    [[SEXT_I_I:%.*]] = sext <4 x i1> [[CMP_I_I]] to <4 x i32>
+; CHECK-NEXT:    [[T2_I_I:%.*]] = bitcast <4 x i32> [[SEXT_I_I]] to <2 x i64>
+; CHECK-NEXT:    [[NEG_I_I:%.*]] = xor <2 x i64> [[T2_I_I]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[AND_I_I:%.*]] = and <2 x i64> [[NEG_I_I]], [[Y]]
+; CHECK-NEXT:    [[AND_I1_I:%.*]] = and <2 x i64> [[T2_I_I]], [[X]]
+; CHECK-NEXT:    [[OR_I_I:%.*]] = or <2 x i64> [[AND_I1_I]], [[AND_I_I]]
+; CHECK-NEXT:    ret <2 x i64> [[OR_I_I]]
+;
+  %call = call <2 x i64> @cmpgt_i32_sel_m128i(<2 x i64> %x, <2 x i64> %y, <2 x i64> %y, <2 x i64> %x)
+  ret <2 x i64> %call
+}