[llvm] r272806 - [x86, SSE] remove the GCCBuiltins from the integer min/max intrinsics
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 15 10:17:27 PDT 2016
Author: spatel
Date: Wed Jun 15 12:17:27 2016
New Revision: 272806
URL: http://llvm.org/viewvc/llvm-project?rev=272806&view=rev
Log:
[x86, SSE] remove the GCCBuiltins from the integer min/max intrinsics
This allows us to emit native IR in Clang (next commit).
Also, update the intrinsic tests to show that codegen already knows how to handle
the IR that Clang will soon produce.
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=272806&r1=272805&r2=272806&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Wed Jun 15 12:17:27 2016
@@ -406,16 +406,16 @@ let TargetPrefix = "x86" in { // All in
def int_x86_sse2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
- def int_x86_sse2_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub128">,
+ def int_x86_sse2_pmaxu_b :
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem, Commutative]>;
- def int_x86_sse2_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw128">,
+ def int_x86_sse2_pmaxs_w :
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
- def int_x86_sse2_pminu_b : GCCBuiltin<"__builtin_ia32_pminub128">,
+ def int_x86_sse2_pminu_b :
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem, Commutative]>;
- def int_x86_sse2_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw128">,
+ def int_x86_sse2_pmins_w :
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
def int_x86_sse2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw128">,
@@ -737,28 +737,28 @@ let TargetPrefix = "x86" in { // All in
// Vector compare, min, max
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse41_pmaxsb : GCCBuiltin<"__builtin_ia32_pmaxsb128">,
+ def int_x86_sse41_pmaxsb :
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem, Commutative]>;
- def int_x86_sse41_pmaxsd : GCCBuiltin<"__builtin_ia32_pmaxsd128">,
+ def int_x86_sse41_pmaxsd :
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem, Commutative]>;
- def int_x86_sse41_pmaxud : GCCBuiltin<"__builtin_ia32_pmaxud128">,
+ def int_x86_sse41_pmaxud :
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem, Commutative]>;
- def int_x86_sse41_pmaxuw : GCCBuiltin<"__builtin_ia32_pmaxuw128">,
+ def int_x86_sse41_pmaxuw :
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem, Commutative]>;
- def int_x86_sse41_pminsb : GCCBuiltin<"__builtin_ia32_pminsb128">,
+ def int_x86_sse41_pminsb :
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem, Commutative]>;
- def int_x86_sse41_pminsd : GCCBuiltin<"__builtin_ia32_pminsd128">,
+ def int_x86_sse41_pminsd :
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem, Commutative]>;
- def int_x86_sse41_pminud : GCCBuiltin<"__builtin_ia32_pminud128">,
+ def int_x86_sse41_pminud :
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem, Commutative]>;
- def int_x86_sse41_pminuw : GCCBuiltin<"__builtin_ia32_pminuw128">,
+ def int_x86_sse41_pminuw :
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem, Commutative]>;
}
Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll?rev=272806&r1=272805&r2=272806&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll Wed Jun 15 12:17:27 2016
@@ -1598,8 +1598,9 @@ define <2 x i64> @test_mm_max_epi16(<2 x
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%arg1 = bitcast <2 x i64> %a1 to <8 x i16>
- %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %arg0, <8 x i16> %arg1)
- %bc = bitcast <8 x i16> %res to <2 x i64>
+ %cmp = icmp sgt <8 x i16> %arg0, %arg1
+ %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
+ %bc = bitcast <8 x i16> %sel to <2 x i64>
ret <2 x i64> %bc
}
declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
@@ -1616,8 +1617,9 @@ define <2 x i64> @test_mm_max_epu8(<2 x
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
- %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %arg0, <16 x i8> %arg1)
- %bc = bitcast <16 x i8> %res to <2 x i64>
+ %cmp = icmp ugt <16 x i8> %arg0, %arg1
+ %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
+ %bc = bitcast <16 x i8> %sel to <2 x i64>
ret <2 x i64> %bc
}
declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
@@ -1679,8 +1681,9 @@ define <2 x i64> @test_mm_min_epi16(<2 x
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%arg1 = bitcast <2 x i64> %a1 to <8 x i16>
- %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %arg0, <8 x i16> %arg1)
- %bc = bitcast <8 x i16> %res to <2 x i64>
+ %cmp = icmp slt <8 x i16> %arg0, %arg1
+ %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
+ %bc = bitcast <8 x i16> %sel to <2 x i64>
ret <2 x i64> %bc
}
declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
@@ -1697,8 +1700,9 @@ define <2 x i64> @test_mm_min_epu8(<2 x
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
- %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %arg0, <16 x i8> %arg1)
- %bc = bitcast <16 x i8> %res to <2 x i64>
+ %cmp = icmp ult <16 x i8> %arg0, %arg1
+ %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
+ %bc = bitcast <16 x i8> %sel to <2 x i64>
ret <2 x i64> %bc
}
declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
Modified: llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll?rev=272806&r1=272805&r2=272806&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll Wed Jun 15 12:17:27 2016
@@ -602,11 +602,11 @@ define <2 x i64> @test_mm_max_epi8(<2 x
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
- %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %arg0, <16 x i8> %arg1)
- %bc = bitcast <16 x i8> %res to <2 x i64>
+ %cmp = icmp sgt <16 x i8> %arg0, %arg1
+ %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
+ %bc = bitcast <16 x i8> %sel to <2 x i64>
ret <2 x i64> %bc
}
-declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
define <2 x i64> @test_mm_max_epi32(<2 x i64> %a0, <2 x i64> %a1) {
; X32-LABEL: test_mm_max_epi32:
@@ -620,11 +620,11 @@ define <2 x i64> @test_mm_max_epi32(<2 x
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
- %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %arg0, <4 x i32> %arg1)
- %bc = bitcast <4 x i32> %res to <2 x i64>
+ %cmp = icmp sgt <4 x i32> %arg0, %arg1
+ %sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1
+ %bc = bitcast <4 x i32> %sel to <2 x i64>
ret <2 x i64> %bc
}
-declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_mm_max_epu16(<2 x i64> %a0, <2 x i64> %a1) {
; X32-LABEL: test_mm_max_epu16:
@@ -638,11 +638,11 @@ define <2 x i64> @test_mm_max_epu16(<2 x
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%arg1 = bitcast <2 x i64> %a1 to <8 x i16>
- %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %arg0, <8 x i16> %arg1)
- %bc = bitcast <8 x i16> %res to <2 x i64>
+ %cmp = icmp ugt <8 x i16> %arg0, %arg1
+ %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
+ %bc = bitcast <8 x i16> %sel to <2 x i64>
ret <2 x i64> %bc
}
-declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
define <2 x i64> @test_mm_max_epu32(<2 x i64> %a0, <2 x i64> %a1) {
; X32-LABEL: test_mm_max_epu32:
@@ -656,11 +656,11 @@ define <2 x i64> @test_mm_max_epu32(<2 x
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
- %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %arg0, <4 x i32> %arg1)
- %bc = bitcast <4 x i32> %res to <2 x i64>
+ %cmp = icmp ugt <4 x i32> %arg0, %arg1
+ %sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1
+ %bc = bitcast <4 x i32> %sel to <2 x i64>
ret <2 x i64> %bc
}
-declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_mm_min_epi8(<2 x i64> %a0, <2 x i64> %a1) {
; X32-LABEL: test_mm_min_epi8:
@@ -674,11 +674,11 @@ define <2 x i64> @test_mm_min_epi8(<2 x
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
- %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %arg0, <16 x i8> %arg1)
- %bc = bitcast <16 x i8> %res to <2 x i64>
+ %cmp = icmp slt <16 x i8> %arg0, %arg1
+ %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
+ %bc = bitcast <16 x i8> %sel to <2 x i64>
ret <2 x i64> %bc
}
-declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
define <2 x i64> @test_mm_min_epi32(<2 x i64> %a0, <2 x i64> %a1) {
; X32-LABEL: test_mm_min_epi32:
@@ -692,11 +692,11 @@ define <2 x i64> @test_mm_min_epi32(<2 x
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
- %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %arg0, <4 x i32> %arg1)
- %bc = bitcast <4 x i32> %res to <2 x i64>
+ %cmp = icmp slt <4 x i32> %arg0, %arg1
+ %sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1
+ %bc = bitcast <4 x i32> %sel to <2 x i64>
ret <2 x i64> %bc
}
-declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_mm_min_epu16(<2 x i64> %a0, <2 x i64> %a1) {
; X32-LABEL: test_mm_min_epu16:
@@ -710,11 +710,11 @@ define <2 x i64> @test_mm_min_epu16(<2 x
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%arg1 = bitcast <2 x i64> %a1 to <8 x i16>
- %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %arg0, <8 x i16> %arg1)
- %bc = bitcast <8 x i16> %res to <2 x i64>
+ %cmp = icmp ult <8 x i16> %arg0, %arg1
+ %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
+ %bc = bitcast <8 x i16> %sel to <2 x i64>
ret <2 x i64> %bc
}
-declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
define <2 x i64> @test_mm_min_epu32(<2 x i64> %a0, <2 x i64> %a1) {
; X32-LABEL: test_mm_min_epu32:
@@ -728,11 +728,11 @@ define <2 x i64> @test_mm_min_epu32(<2 x
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
- %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %arg0, <4 x i32> %arg1)
- %bc = bitcast <4 x i32> %res to <2 x i64>
+ %cmp = icmp ult <4 x i32> %arg0, %arg1
+ %sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1
+ %bc = bitcast <4 x i32> %sel to <2 x i64>
ret <2 x i64> %bc
}
-declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_mm_minpos_epu16(<2 x i64> %a0) {
; X32-LABEL: test_mm_minpos_epu16:
More information about the llvm-commits
mailing list