[llvm] r280465 - [AVX-512] Add more patterns for masked and broadcasted logical operations where the select or broadcast has a floating point type.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 1 22:29:13 PDT 2016
Author: ctopper
Date: Fri Sep 2 00:29:13 2016
New Revision: 280465
URL: http://llvm.org/viewvc/llvm-project?rev=280465&view=rev
Log:
[AVX-512] Add more patterns for masked and broadcasted logical operations where the select or broadcast has a floating point type.
These are needed in order to remove the masked floating point logical operation intrinsics and use native IR.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/avx512-arith.ll
llvm/trunk/test/CodeGen/X86/vec_fabs.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=280465&r1=280464&r2=280465&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Fri Sep 2 00:29:13 2016
@@ -4238,215 +4238,72 @@ defm VXOR : avx512_fp_binop_p<0x57, "vx
SSE_ALU_ITINS_P, 1>;
// Patterns catch floating point selects with bitcasted integer logic ops.
-let Predicates = [HasVLX] in {
- def : Pat<(v4f32 (vselect VK4WM:$mask,
- (bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
- VR128X:$src0)),
- (VPANDDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
- def : Pat<(v4f32 (vselect VK4WM:$mask,
- (bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
- VR128X:$src0)),
- (VPORDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
- def : Pat<(v4f32 (vselect VK4WM:$mask,
- (bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
- VR128X:$src0)),
- (VPXORDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
- def : Pat<(v4f32 (vselect VK4WM:$mask,
- (bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
- VR128X:$src0)),
- (VPANDNDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1,
- VR128X:$src2)>;
-
- def : Pat<(v4f32 (vselect VK4WM:$mask,
- (bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
- (bitconvert (v4i32 immAllZerosV)))),
- (VPANDDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
- def : Pat<(v4f32 (vselect VK4WM:$mask,
- (bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
- (bitconvert (v4i32 immAllZerosV)))),
- (VPORDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
- def : Pat<(v4f32 (vselect VK4WM:$mask,
- (bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
- (bitconvert (v4i32 immAllZerosV)))),
- (VPXORDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
- def : Pat<(v4f32 (vselect VK4WM:$mask,
- (bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
- (bitconvert (v4i32 immAllZerosV)))),
- (VPANDNDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
-
- def : Pat<(v2f64 (vselect VK2WM:$mask,
- (bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
- VR128X:$src0)),
- (VPANDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
- def : Pat<(v2f64 (vselect VK2WM:$mask,
- (bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
- VR128X:$src0)),
- (VPORQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
- def : Pat<(v2f64 (vselect VK2WM:$mask,
- (bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
- VR128X:$src0)),
- (VPXORQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
- def : Pat<(v2f64 (vselect VK2WM:$mask,
- (bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
- VR128X:$src0)),
- (VPANDNQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
-
- def : Pat<(v2f64 (vselect VK2WM:$mask,
- (bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
- (bitconvert (v4i32 immAllZerosV)))),
- (VPANDQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
- def : Pat<(v2f64 (vselect VK2WM:$mask,
- (bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
- (bitconvert (v4i32 immAllZerosV)))),
- (VPORQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
- def : Pat<(v2f64 (vselect VK2WM:$mask,
- (bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
- (bitconvert (v4i32 immAllZerosV)))),
- (VPXORQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
- def : Pat<(v2f64 (vselect VK2WM:$mask,
- (bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
- (bitconvert (v4i32 immAllZerosV)))),
- (VPANDNQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
-
- def : Pat<(v8f32 (vselect VK8WM:$mask,
- (bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
- VR256X:$src0)),
- (VPANDDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
- def : Pat<(v8f32 (vselect VK8WM:$mask,
- (bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
- VR256X:$src0)),
- (VPORDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
- def : Pat<(v8f32 (vselect VK8WM:$mask,
- (bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
- VR256X:$src0)),
- (VPXORDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
- def : Pat<(v8f32 (vselect VK8WM:$mask,
- (bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
- VR256X:$src0)),
- (VPANDNDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
-
- def : Pat<(v8f32 (vselect VK8WM:$mask,
- (bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
- (bitconvert (v8i32 immAllZerosV)))),
- (VPANDDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
- def : Pat<(v8f32 (vselect VK8WM:$mask,
- (bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
- (bitconvert (v8i32 immAllZerosV)))),
- (VPORDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
- def : Pat<(v8f32 (vselect VK8WM:$mask,
- (bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
- (bitconvert (v8i32 immAllZerosV)))),
- (VPXORDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
- def : Pat<(v8f32 (vselect VK8WM:$mask,
- (bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
- (bitconvert (v8i32 immAllZerosV)))),
- (VPANDNDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
-
- def : Pat<(v4f64 (vselect VK4WM:$mask,
- (bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
- VR256X:$src0)),
- (VPANDQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
- def : Pat<(v4f64 (vselect VK4WM:$mask,
- (bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
- VR256X:$src0)),
- (VPORQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
- def : Pat<(v4f64 (vselect VK4WM:$mask,
- (bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
- VR256X:$src0)),
- (VPXORQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
- def : Pat<(v4f64 (vselect VK4WM:$mask,
- (bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
- VR256X:$src0)),
- (VPANDNQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
-
- def : Pat<(v4f64 (vselect VK4WM:$mask,
- (bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
- (bitconvert (v8i32 immAllZerosV)))),
- (VPANDQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
- def : Pat<(v4f64 (vselect VK4WM:$mask,
- (bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
- (bitconvert (v8i32 immAllZerosV)))),
- (VPORQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
- def : Pat<(v4f64 (vselect VK4WM:$mask,
- (bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
- (bitconvert (v8i32 immAllZerosV)))),
- (VPXORQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
- def : Pat<(v4f64 (vselect VK4WM:$mask,
- (bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
- (bitconvert (v8i32 immAllZerosV)))),
- (VPANDNQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
+multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
+ X86VectorVTInfo _, Predicate prd> {
+let Predicates = [prd] in {
+ // Masked register-register logical operations.
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
+ _.RC:$src0)),
+ (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
+ _.RC:$src1, _.RC:$src2)>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
+ _.ImmAllZerosV)),
+ (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
+ _.RC:$src2)>;
+ // Masked register-memory logical operations.
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (bitconvert (_.i64VT (OpNode _.RC:$src1,
+ (load addr:$src2)))),
+ _.RC:$src0)),
+ (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
+ _.RC:$src1, addr:$src2)>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
+ _.ImmAllZerosV)),
+ (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
+ addr:$src2)>;
+ // Register-broadcast logical operations.
+ def : Pat<(_.i64VT (OpNode _.RC:$src1,
+ (bitconvert (_.VT (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2)))))),
+ (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (bitconvert
+ (_.i64VT (OpNode _.RC:$src1,
+ (bitconvert (_.VT
+ (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2))))))),
+ _.RC:$src0)),
+ (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
+ _.RC:$src1, addr:$src2)>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (bitconvert
+ (_.i64VT (OpNode _.RC:$src1,
+ (bitconvert (_.VT
+ (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2))))))),
+ _.ImmAllZerosV)),
+ (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
+ _.RC:$src1, addr:$src2)>;
+}
}
-let Predicates = [HasAVX512] in {
- def : Pat<(v16f32 (vselect VK16WM:$mask,
- (bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
- VR512:$src0)),
- (VPANDDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
- def : Pat<(v16f32 (vselect VK16WM:$mask,
- (bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
- VR512:$src0)),
- (VPORDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
- def : Pat<(v16f32 (vselect VK16WM:$mask,
- (bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
- VR512:$src0)),
- (VPXORDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
- def : Pat<(v16f32 (vselect VK16WM:$mask,
- (bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
- VR512:$src0)),
- (VPANDNDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
-
- def : Pat<(v16f32 (vselect VK16WM:$mask,
- (bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
- (bitconvert (v16i32 immAllZerosV)))),
- (VPANDDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
- def : Pat<(v16f32 (vselect VK16WM:$mask,
- (bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
- (bitconvert (v16i32 immAllZerosV)))),
- (VPORDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
- def : Pat<(v16f32 (vselect VK16WM:$mask,
- (bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
- (bitconvert (v16i32 immAllZerosV)))),
- (VPXORDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
- def : Pat<(v16f32 (vselect VK16WM:$mask,
- (bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
- (bitconvert (v16i32 immAllZerosV)))),
- (VPANDNDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
-
- def : Pat<(v8f64 (vselect VK8WM:$mask,
- (bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
- VR512:$src0)),
- (VPANDQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
- def : Pat<(v8f64 (vselect VK8WM:$mask,
- (bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
- VR512:$src0)),
- (VPORQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
- def : Pat<(v8f64 (vselect VK8WM:$mask,
- (bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
- VR512:$src0)),
- (VPXORQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
- def : Pat<(v8f64 (vselect VK8WM:$mask,
- (bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
- VR512:$src0)),
- (VPANDNQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
-
- def : Pat<(v8f64 (vselect VK8WM:$mask,
- (bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
- (bitconvert (v16i32 immAllZerosV)))),
- (VPANDQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
- def : Pat<(v8f64 (vselect VK8WM:$mask,
- (bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
- (bitconvert (v16i32 immAllZerosV)))),
- (VPORQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
- def : Pat<(v8f64 (vselect VK8WM:$mask,
- (bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
- (bitconvert (v16i32 immAllZerosV)))),
- (VPXORQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
- def : Pat<(v8f64 (vselect VK8WM:$mask,
- (bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
- (bitconvert (v16i32 immAllZerosV)))),
- (VPANDNQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
+multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
+ defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
+ defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
+ defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
+ defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
+ defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
+ defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
}
+defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
+defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
+defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
+defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
+
multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Modified: llvm/trunk/test/CodeGen/X86/avx512-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-arith.ll?rev=280465&r1=280464&r2=280465&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-arith.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-arith.ll Fri Sep 2 00:29:13 2016
@@ -985,20 +985,17 @@ define <8 x double> @test_maskz_broadcas
define <16 x float> @test_fxor(<16 x float> %a) {
; AVX512F-LABEL: test_fxor:
; AVX512F: ## BB#0:
-; AVX512F-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
-; AVX512F-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: test_fxor:
; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
-; AVX512VL-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: test_fxor:
; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
-; AVX512BW-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: test_fxor:
@@ -1051,20 +1048,17 @@ define <8 x float> @test_fxor_8f32(<8 x
define <8 x double> @fabs_v8f64(<8 x double> %p)
; AVX512F-LABEL: fabs_v8f64:
; AVX512F: ## BB#0:
-; AVX512F-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1
-; AVX512F-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fabs_v8f64:
; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1
-; AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: fabs_v8f64:
; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1
-; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: fabs_v8f64:
@@ -1085,20 +1079,17 @@ declare <8 x double> @llvm.fabs.v8f64(<8
define <16 x float> @fabs_v16f32(<16 x float> %p)
; AVX512F-LABEL: fabs_v16f32:
; AVX512F: ## BB#0:
-; AVX512F-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
-; AVX512F-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fabs_v16f32:
; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
-; AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: fabs_v16f32:
; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
-; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: fabs_v16f32:
Modified: llvm/trunk/test/CodeGen/X86/vec_fabs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fabs.ll?rev=280465&r1=280464&r2=280465&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fabs.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fabs.ll Fri Sep 2 00:29:13 2016
@@ -145,8 +145,7 @@ define <8 x double> @fabs_v8f64(<8 x dou
;
; X32_AVX512VL-LABEL: fabs_v8f64:
; X32_AVX512VL: # BB#0:
-; X32_AVX512VL-NEXT: vbroadcastsd {{\.LCPI.*}}, %zmm1
-; X32_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; X32_AVX512VL-NEXT: vpandq {{\.LCPI.*}}{1to8}, %zmm0, %zmm0
; X32_AVX512VL-NEXT: retl
;
; X32_AVX512VLDQ-LABEL: fabs_v8f64:
@@ -163,8 +162,7 @@ define <8 x double> @fabs_v8f64(<8 x dou
;
; X64_AVX512VL-LABEL: fabs_v8f64:
; X64_AVX512VL: # BB#0:
-; X64_AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1
-; X64_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; X64_AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; X64_AVX512VL-NEXT: retq
;
; X64_AVX512VLDQ-LABEL: fabs_v8f64:
@@ -186,8 +184,7 @@ define <16 x float> @fabs_v16f32(<16 x f
;
; X32_AVX512VL-LABEL: fabs_v16f32:
; X32_AVX512VL: # BB#0:
-; X32_AVX512VL-NEXT: vbroadcastss {{\.LCPI.*}}, %zmm1
-; X32_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; X32_AVX512VL-NEXT: vpandd {{\.LCPI.*}}{1to16}, %zmm0, %zmm0
; X32_AVX512VL-NEXT: retl
;
; X32_AVX512VLDQ-LABEL: fabs_v16f32:
@@ -204,8 +201,7 @@ define <16 x float> @fabs_v16f32(<16 x f
;
; X64_AVX512VL-LABEL: fabs_v16f32:
; X64_AVX512VL: # BB#0:
-; X64_AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
-; X64_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; X64_AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
; X64_AVX512VL-NEXT: retq
;
; X64_AVX512VLDQ-LABEL: fabs_v16f32:
More information about the llvm-commits
mailing list