[llvm] r293464 - [AVX-512] Remove duplicate CodeGenOnly patterns for scalar register broadcast. We can use COPY_TO_REGCLASS like AVX does.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 29 22:59:07 PST 2017
Author: ctopper
Date: Mon Jan 30 00:59:06 2017
New Revision: 293464
URL: http://llvm.org/viewvc/llvm-project?rev=293464&view=rev
Log:
[AVX-512] Remove duplicate CodeGenOnly patterns for scalar register broadcast. We can use COPY_TO_REGCLASS like AVX does.
This causes stack spill slots be oversized sometimes, but the same should already be happening with AVX.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/lib/Target/X86/X86InstrTablesInfo.h
llvm/trunk/test/CodeGen/X86/avx512-vbroadcast.ll
llvm/trunk/test/CodeGen/X86/avx512vl-vbroadcast.ll
llvm/trunk/test/CodeGen/X86/evex-to-vex-compress.mir
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=293464&r1=293463&r2=293464&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Mon Jan 30 00:59:06 2017
@@ -846,32 +846,20 @@ def VEXTRACTPSZmr : AVX512AIi8<0x17, MRM
// broadcast with a scalar argument.
multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
-
- let isCodeGenOnly = 1 in {
- def r_s : I< opc, MRMSrcReg, (outs DestInfo.RC:$dst),
- (ins SrcInfo.FRC:$src), OpcodeStr#"\t{$src, $dst|$dst, $src}",
- [(set DestInfo.RC:$dst, (DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)))]>,
- Requires<[HasAVX512]>, T8PD, EVEX;
-
- let Constraints = "$src0 = $dst" in
- def rk_s : I< opc, MRMSrcReg, (outs DestInfo.RC:$dst),
- (ins DestInfo.RC:$src0, DestInfo.KRCWM:$mask, SrcInfo.FRC:$src),
- OpcodeStr#"\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
- [(set DestInfo.RC:$dst,
- (vselect DestInfo.KRCWM:$mask,
- (DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
- DestInfo.RC:$src0))]>,
- Requires<[HasAVX512]>, T8PD, EVEX, EVEX_K;
-
- def rkz_s : I< opc, MRMSrcReg, (outs DestInfo.RC:$dst),
- (ins DestInfo.KRCWM:$mask, SrcInfo.FRC:$src),
- OpcodeStr#"\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
- [(set DestInfo.RC:$dst,
- (vselect DestInfo.KRCWM:$mask,
- (DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
- DestInfo.ImmAllZerosV))]>,
- Requires<[HasAVX512]>, T8PD, EVEX, EVEX_KZ;
- } // let isCodeGenOnly = 1 in
+ def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
+ (!cast<Instruction>(NAME#DestInfo.ZSuffix#r)
+ (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
+ def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
+ (X86VBroadcast SrcInfo.FRC:$src),
+ DestInfo.RC:$src0)),
+ (!cast<Instruction>(NAME#DestInfo.ZSuffix#rk)
+ DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
+ (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
+ def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
+ (X86VBroadcast SrcInfo.FRC:$src),
+ DestInfo.ImmAllZerosV)),
+ (!cast<Instruction>(NAME#DestInfo.ZSuffix#rkz)
+ DestInfo.KRCWM:$mask, (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
}
multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=293464&r1=293463&r2=293464&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Mon Jan 30 00:59:06 2017
@@ -867,9 +867,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
// AVX-512 foldable instructions
{ X86::VBROADCASTSSZr, X86::VBROADCASTSSZm, TB_NO_REVERSE },
- { X86::VBROADCASTSSZr_s, X86::VBROADCASTSSZm, TB_NO_REVERSE },
{ X86::VBROADCASTSDZr, X86::VBROADCASTSDZm, TB_NO_REVERSE },
- { X86::VBROADCASTSDZr_s, X86::VBROADCASTSDZm, TB_NO_REVERSE },
{ X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 },
{ X86::VMOVZPQILo2PQIZrr,X86::VMOVQI2PQIZrm, TB_NO_REVERSE },
{ X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 },
@@ -907,9 +905,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
// AVX-512 foldable instructions (256-bit versions)
{ X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ256r_s, X86::VBROADCASTSSZ256m, TB_NO_REVERSE },
{ X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256m, TB_NO_REVERSE },
- { X86::VBROADCASTSDZ256r_s, X86::VBROADCASTSDZ256m, TB_NO_REVERSE },
{ X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 },
{ X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 },
{ X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 },
@@ -942,7 +938,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
// AVX-512 foldable instructions (128-bit versions)
{ X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ128r_s, X86::VBROADCASTSSZ128m, TB_NO_REVERSE },
{ X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 },
{ X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 },
{ X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 },
Modified: llvm/trunk/lib/Target/X86/X86InstrTablesInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrTablesInfo.h?rev=293464&r1=293463&r2=293464&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrTablesInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86InstrTablesInfo.h Mon Jan 30 00:59:06 2017
@@ -296,7 +296,6 @@ static const X86EvexToVexCompressTableEn
{ X86::VANDPSZ128rr , X86::VANDPSrr },
{ X86::VBROADCASTSSZ128m , X86::VBROADCASTSSrm },
{ X86::VBROADCASTSSZ128r , X86::VBROADCASTSSrr },
- { X86::VBROADCASTSSZ128r_s , X86::VBROADCASTSSrr },
{ X86::VCVTDQ2PDZ128rm , X86::VCVTDQ2PDrm },
{ X86::VCVTDQ2PDZ128rr , X86::VCVTDQ2PDrr },
{ X86::VCVTDQ2PSZ128rm , X86::VCVTDQ2PSrm },
@@ -727,10 +726,8 @@ static const X86EvexToVexCompressTableEn
{ X86::VANDPSZ256rr , X86::VANDPSYrr },
{ X86::VBROADCASTSDZ256m , X86::VBROADCASTSDYrm },
{ X86::VBROADCASTSDZ256r , X86::VBROADCASTSDYrr },
- { X86::VBROADCASTSDZ256r_s , X86::VBROADCASTSDYrr },
{ X86::VBROADCASTSSZ256m , X86::VBROADCASTSSYrm },
{ X86::VBROADCASTSSZ256r , X86::VBROADCASTSSYrr },
- { X86::VBROADCASTSSZ256r_s , X86::VBROADCASTSSYrr },
{ X86::VCVTDQ2PDZ256rm , X86::VCVTDQ2PDYrm },
{ X86::VCVTDQ2PDZ256rr , X86::VCVTDQ2PDYrr },
{ X86::VCVTDQ2PSZ256rm , X86::VCVTDQ2PSYrm },
Modified: llvm/trunk/test/CodeGen/X86/avx512-vbroadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vbroadcast.ll?rev=293464&r1=293463&r2=293464&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vbroadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vbroadcast.ll Mon Jan 30 00:59:06 2017
@@ -128,7 +128,7 @@ define <8 x double> @_sd8xdouble_mask(
; ALL-NEXT: vpxor %ymm3, %ymm3, %ymm3
; ALL-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
; ALL-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
-; ALL-NEXT: vmovaps %zmm1, %zmm0
+; ALL-NEXT: vmovapd %zmm1, %zmm0
; ALL-NEXT: retq
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%b = insertelement <8 x double> undef, double %a, i32 0
@@ -406,14 +406,14 @@ declare void @func_f32(float)
define <16 x float> @broadcast_ss_spill(float %x) {
; ALL-LABEL: broadcast_ss_spill:
; ALL: # BB#0:
-; ALL-NEXT: pushq %rax
+; ALL-NEXT: subq $24, %rsp
; ALL-NEXT: .Lcfi0:
-; ALL-NEXT: .cfi_def_cfa_offset 16
+; ALL-NEXT: .cfi_def_cfa_offset 32
; ALL-NEXT: vaddss %xmm0, %xmm0, %xmm0
-; ALL-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
+; ALL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; ALL-NEXT: callq func_f32
-; ALL-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %zmm0 # 4-byte Folded Reload
-; ALL-NEXT: popq %rax
+; ALL-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload
+; ALL-NEXT: addq $24, %rsp
; ALL-NEXT: retq
%a = fadd float %x, %x
call void @func_f32(float %a)
@@ -426,14 +426,14 @@ declare void @func_f64(double)
define <8 x double> @broadcast_sd_spill(double %x) {
; ALL-LABEL: broadcast_sd_spill:
; ALL: # BB#0:
-; ALL-NEXT: pushq %rax
+; ALL-NEXT: subq $24, %rsp
; ALL-NEXT: .Lcfi1:
-; ALL-NEXT: .cfi_def_cfa_offset 16
+; ALL-NEXT: .cfi_def_cfa_offset 32
; ALL-NEXT: vaddsd %xmm0, %xmm0, %xmm0
-; ALL-NEXT: vmovsd %xmm0, (%rsp) # 8-byte Spill
+; ALL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; ALL-NEXT: callq func_f64
-; ALL-NEXT: vbroadcastsd (%rsp), %zmm0 # 8-byte Folded Reload
-; ALL-NEXT: popq %rax
+; ALL-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload
+; ALL-NEXT: addq $24, %rsp
; ALL-NEXT: retq
%a = fadd double %x, %x
call void @func_f64(double %a)
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-vbroadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-vbroadcast.ll?rev=293464&r1=293463&r2=293464&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-vbroadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-vbroadcast.ll Mon Jan 30 00:59:06 2017
@@ -5,14 +5,14 @@ declare void @func_f32(float)
define <8 x float> @_256_broadcast_ss_spill(float %x) {
; CHECK-LABEL: _256_broadcast_ss_spill:
; CHECK: # BB#0:
-; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: subq $24, %rsp
; CHECK-NEXT: .Lcfi0:
-; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: vaddss %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: callq func_f32
-; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0 # 4-byte Folded Reload
-; CHECK-NEXT: popq %rax
+; CHECK-NEXT: vbroadcastss (%rsp), %ymm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: retq
%a = fadd float %x, %x
call void @func_f32(float %a)
@@ -24,14 +24,14 @@ define <8 x float> @_256_broadcast_ss_sp
define <4 x float> @_128_broadcast_ss_spill(float %x) {
; CHECK-LABEL: _128_broadcast_ss_spill:
; CHECK: # BB#0:
-; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: subq $24, %rsp
; CHECK-NEXT: .Lcfi1:
-; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: vaddss %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: callq func_f32
-; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload
-; CHECK-NEXT: popq %rax
+; CHECK-NEXT: vbroadcastss (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: retq
%a = fadd float %x, %x
call void @func_f32(float %a)
@@ -44,14 +44,14 @@ declare void @func_f64(double)
define <4 x double> @_256_broadcast_sd_spill(double %x) {
; CHECK-LABEL: _256_broadcast_sd_spill:
; CHECK: # BB#0:
-; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: subq $24, %rsp
; CHECK-NEXT: .Lcfi2:
-; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: vaddsd %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vmovsd %xmm0, (%rsp) # 8-byte Spill
+; CHECK-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: callq func_f64
-; CHECK-NEXT: vbroadcastsd (%rsp), %ymm0 # 8-byte Folded Reload
-; CHECK-NEXT: popq %rax
+; CHECK-NEXT: vbroadcastsd (%rsp), %ymm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: retq
%a = fadd double %x, %x
call void @func_f64(double %a)
Modified: llvm/trunk/test/CodeGen/X86/evex-to-vex-compress.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/evex-to-vex-compress.mir?rev=293464&r1=293463&r2=293464&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/evex-to-vex-compress.mir (original)
+++ llvm/trunk/test/CodeGen/X86/evex-to-vex-compress.mir Mon Jan 30 00:59:06 2017
@@ -691,14 +691,10 @@ body: |
%ymm0 = VBROADCASTSDZ256m %rip, 1, _, %rax, _
; CHECK: %ymm0 = VBROADCASTSDYrr %xmm0
%ymm0 = VBROADCASTSDZ256r %xmm0
- ; CHECK: %ymm0 = VBROADCASTSDYrr %xmm0
- %ymm0 = VBROADCASTSDZ256r_s %xmm0
; CHECK: %ymm0 = VBROADCASTSSYrm %rip, 1, _, %rax, _
%ymm0 = VBROADCASTSSZ256m %rip, 1, _, %rax, _
; CHECK: %ymm0 = VBROADCASTSSYrr %xmm0
%ymm0 = VBROADCASTSSZ256r %xmm0
- ; CHECK: %ymm0 = VBROADCASTSSYrr %xmm0
- %ymm0 = VBROADCASTSSZ256r_s %xmm0
; CHECK: %ymm0 = VPBROADCASTBYrm %rip, 1, _, %rax, _
%ymm0 = VPBROADCASTBZ256m %rip, 1, _, %rax, _
; CHECK: %ymm0 = VPBROADCASTBYrr %xmm0
@@ -1695,8 +1691,6 @@ body: |
%xmm0 = VBROADCASTSSZ128m %rip, _, _, _, _
; CHECK: %xmm0 = VBROADCASTSSrr %xmm0
%xmm0 = VBROADCASTSSZ128r %xmm0
- ; CHECK: %xmm0 = VBROADCASTSSrr %xmm0
- %xmm0 = VBROADCASTSSZ128r_s %xmm0
; CHECK: %xmm0 = VPBROADCASTBrm %rip, _, _, _, _
%xmm0 = VPBROADCASTBZ128m %rip, _, _, _, _
; CHECK: %xmm0 = VPBROADCASTBrr %xmm0
@@ -2928,14 +2922,10 @@ body: |
%ymm16 = VBROADCASTSDZ256m %rip, 1, _, %rax, _
; CHECK: %ymm16 = VBROADCASTSDZ256r %xmm0
%ymm16 = VBROADCASTSDZ256r %xmm0
- ; CHECK: %ymm16 = VBROADCASTSDZ256r_s %xmm0
- %ymm16 = VBROADCASTSDZ256r_s %xmm0
; CHECK: %ymm16 = VBROADCASTSSZ256m %rip, 1, _, %rax, _
%ymm16 = VBROADCASTSSZ256m %rip, 1, _, %rax, _
; CHECK: %ymm16 = VBROADCASTSSZ256r %xmm0
%ymm16 = VBROADCASTSSZ256r %xmm0
- ; CHECK: %ymm16 = VBROADCASTSSZ256r_s %xmm0
- %ymm16 = VBROADCASTSSZ256r_s %xmm0
; CHECK: %ymm16 = VPBROADCASTBZ256m %rip, 1, _, %rax, _
%ymm16 = VPBROADCASTBZ256m %rip, 1, _, %rax, _
; CHECK: %ymm16 = VPBROADCASTBZ256r %xmm0
@@ -3932,8 +3922,6 @@ body: |
%xmm16 = VBROADCASTSSZ128m %rip, _, _, _, _
; CHECK: %xmm16 = VBROADCASTSSZ128r %xmm16
%xmm16 = VBROADCASTSSZ128r %xmm16
- ; CHECK: %xmm16 = VBROADCASTSSZ128r_s %xmm16
- %xmm16 = VBROADCASTSSZ128r_s %xmm16
; CHECK: %xmm16 = VPBROADCASTBZ128m %rip, _, _, _, _
%xmm16 = VPBROADCASTBZ128m %rip, _, _, _, _
; CHECK: %xmm16 = VPBROADCASTBZ128r %xmm16
More information about the llvm-commits
mailing list