[PATCH][AVX512] Hook up vector int_ctlz for AVX512
Cameron McInally
cameron.mcinally at nyu.edu
Fri Jun 13 06:13:14 PDT 2014
Hey guys,
Attached is a patch to hook up the int_ctlz intrinsic to the AVX512
VPLZCNT* instructions.
Tia,
Cameron
-------------- next part --------------
Index: test/CodeGen/X86/avx512-intrinsics.ll
===================================================================
--- test/CodeGen/X86/avx512-intrinsics.ll (revision 210892)
+++ test/CodeGen/X86/avx512-intrinsics.ll (working copy)
@@ -356,6 +356,24 @@
ret <8 x i64> %res
}
+define <16 x i32> @test_ctlz_d(<16 x i32> %a) {
+ ; CHECK-LABEL: test_ctlz_d
+ ; CHECK: vplzcntd
+ %res = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false)
+ ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) nounwind readonly
+
+define <8 x i64> @test_ctlz_q(<8 x i64> %a) {
+ ; CHECK-LABEL: test_ctlz_q
+ ; CHECK: vplzcntq
+ %res = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false)
+ ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) nounwind readonly
+
define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
; CHECK: vblendmps
%res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
Index: lib/Target/X86/X86InstrAVX512.td
===================================================================
--- lib/Target/X86/X86InstrAVX512.td (revision 210892)
+++ lib/Target/X86/X86InstrAVX512.td (working copy)
@@ -4378,6 +4378,15 @@
(VPLZCNTQrrk VR512:$src1,
(v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
+def : Pat<(v16i32 (ctlz (memopv16i32 addr:$src))),
+ (VPLZCNTDrm addr:$src)>;
+def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))),
+ (VPLZCNTDrr VR512:$src)>;
+def : Pat<(v8i64 (ctlz (memopv8i64 addr:$src))),
+ (VPLZCNTQrm addr:$src)>;
+def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))),
+ (VPLZCNTQrr VR512:$src)>;
+
def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp (revision 210892)
+++ lib/Target/X86/X86ISelLowering.cpp (working copy)
@@ -1439,6 +1439,11 @@
setOperationAction(ISD::OR, MVT::v16i32, Legal);
setOperationAction(ISD::XOR, MVT::v16i32, Legal);
+ if (Subtarget->hasCDI()) {
+ setOperationAction(ISD::CTLZ, MVT::v8i64, Legal);
+ setOperationAction(ISD::CTLZ, MVT::v16i32, Legal);
+ }
+
// Custom lower several nodes.
for (int i = MVT::FIRST_VECTOR_VALUETYPE;
i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
More information about the llvm-commits
mailing list