[PATCH][AVX512] Add intrinsics for Packed Absolute Value
Cameron McInally
cameron.mcinally at nyu.edu
Tue Dec 24 08:32:02 PST 2013
Hi Elena,
Attached is an updated patch to handle masked packed absolute value. I
modeled this after the conflict patterns and intrinsics.
Also, does GCC mention how they will handle zero-merging intrinsics?
It wasn't obvious to me looking at their documents.
Thanks,
Cameron
On Tue, Dec 24, 2013 at 9:53 AM, Demikhovsky, Elena
<elena.demikhovsky at intel.com> wrote:
> Hi Cameron,
>
> I'm looking at GCC intrinsics.
> I see
> __builtin_ia32_pabsq512_mask and
> __builtin_ia32_pabsd512_mask.
>
> I think we should be aligned with GCC.
>
>
> - Elena
>
>
> -----Original Message-----
> From: Cameron McInally [mailto:cameron.mcinally at nyu.edu]
> Sent: Tuesday, December 24, 2013 16:45
> To: llvm-commits at cs.uiuc.edu
> Cc: Demikhovsky, Elena
> Subject: [PATCH][AVX512] Add intrinsics for Packed Absolute Value
>
> Hey guys,
>
> Attached is a patch to support AVX512 Packed Absolute Value intrinsics.
>
> Thanks,
> Cameron
> ---------------------------------------------------------------------
> Intel Israel (74) Limited
>
> This e-mail and any attachments may contain confidential material for
> the sole use of the intended recipient(s). Any review or distribution
> by others is strictly prohibited. If you are not the intended
> recipient, please contact the sender and delete all copies.
-------------- next part --------------
Index: test/CodeGen/X86/avx512-intrinsics.ll
===================================================================
--- test/CodeGen/X86/avx512-intrinsics.ll (revision 197980)
+++ test/CodeGen/X86/avx512-intrinsics.ll (working copy)
@@ -346,6 +346,20 @@
}
declare <8 x i64> @llvm.x86.avx512.pmins.q(<8 x i64>, <8 x i64>) nounwind readonly
+define <16 x i32> @test_x86_pabs_d(<16 x i32> %a0) {
+ ; CHECK: vpabsd
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %a0, <16 x i32> zeroinitializer, i16 -1)
+ ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16) nounwind readnone
+
+define <8 x i64> @test_x86_pabs_q(<8 x i64> %a0) {
+ ; CHECK: vpabsq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %a0, <8 x i64> zeroinitializer, i8 -1)
+ ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8) nounwind readnone
+
define <16 x i32> @test_conflict_d(<16 x i32> %a) {
; CHECK: movw $-1, %ax
; CHECK: vpxor
Index: include/llvm/IR/IntrinsicsX86.td
===================================================================
--- include/llvm/IR/IntrinsicsX86.td (revision 197980)
+++ include/llvm/IR/IntrinsicsX86.td (working copy)
@@ -2747,6 +2747,14 @@
Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty], [IntrNoMem]>;
}
+// Absolute value ops
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_avx512_mask_pabs_d_512 : GCCBuiltin<"__builtin_ia32_pabsd512_mask">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pabs_q_512 : GCCBuiltin<"__builtin_ia32_pabsq512_mask">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+}
+
// Vector sign and zero extend
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_pmovzxbq : GCCBuiltin<"__builtin_ia32_pmovzxbq512">,
Index: lib/Target/X86/X86InstrAVX512.td
===================================================================
--- lib/Target/X86/X86InstrAVX512.td (revision 197980)
+++ lib/Target/X86/X86InstrAVX512.td (working copy)
@@ -3542,7 +3542,8 @@
def : Pat<(v8i64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
(VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>;
-multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, RegisterClass RC,
+multiclass avx512_vpabs<bits<8> opc, string OpcodeStr,
+ RegisterClass RC, RegisterClass KRC,
X86MemOperand x86memop> {
def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
@@ -3551,13 +3552,35 @@
(ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
EVEX;
+
+ let Constraints = "$src1 = $dst" in {
+ def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, RC:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
+ []>, EVEX, EVEX_K;
+ def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, x86memop:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
+ []>, EVEX, EVEX_K;
+ }
}
-defm VPABSD : avx512_vpabs<0x1E, "vpabsd", VR512, i512mem>, EVEX_V512,
+defm VPABSD : avx512_vpabs<0x1E, "vpabsd", VR512, VK16WM, i512mem>, EVEX_V512,
EVEX_CD8<32, CD8VF>;
-defm VPABSQ : avx512_vpabs<0x1F, "vpabsq", VR512, i512mem>, EVEX_V512, VEX_W,
+defm VPABSQ : avx512_vpabs<0x1F, "vpabsq", VR512, VK8WM, i512mem>, EVEX_V512, VEX_W,
EVEX_CD8<64, CD8VF>;
+def : Pat<(int_x86_avx512_mask_pabs_d_512 VR512:$src2, VR512:$src1,
+ GR16:$mask),
+ (VPABSDrrk VR512:$src1,
+ (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
+def : Pat<(int_x86_avx512_mask_pabs_q_512 VR512:$src2, VR512:$src1,
+ GR8:$mask),
+ (VPABSQrrk VR512:$src1,
+ (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
+
multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
RegisterClass RC, RegisterClass KRC,
X86MemOperand x86memop,
More information about the llvm-commits
mailing list