[llvm] r282835 - [AVX-512] Always use the full 32 register vector classes for addRegisterClass regardless of whether AVX512/VLX is enabled or not.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 29 21:31:34 PDT 2016
Author: ctopper
Date: Thu Sep 29 23:31:33 2016
New Revision: 282835
URL: http://llvm.org/viewvc/llvm-project?rev=282835&view=rev
Log:
[AVX-512] Always use the full 32 register vector classes for addRegisterClass regardless of whether AVX512/VLX is enabled or not.
If AVX512 is disabled, the registers should already be marked reserved. Pattern predicates and register classes on instructions should take care of most of the rest. Loads/stores and physical register copies for XMM16-31 and YMM16-31 without VLX have already been taken care of.
I'm a little unclear why this changed the register allocation of the SSE2 run of the sad.ll test, but the registers selected appear to be valid after this change.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/sad.ll
llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=282835&r1=282834&r2=282835&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Sep 29 23:31:33 2016
@@ -485,10 +485,8 @@ X86TargetLowering::X86TargetLowering(con
if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
// f32 and f64 use SSE.
// Set up the FP register classes.
- addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
- : &X86::FR32RegClass);
- addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
- : &X86::FR64RegClass);
+ addRegisterClass(MVT::f32, &X86::FR32XRegClass);
+ addRegisterClass(MVT::f64, &X86::FR64XRegClass);
for (auto VT : { MVT::f32, MVT::f64 }) {
// Use ANDPD to simulate FABS.
@@ -517,8 +515,7 @@ X86TargetLowering::X86TargetLowering(con
} else if (UseX87 && X86ScalarSSEf32) {
// Use SSE for f32, x87 for f64.
// Set up the FP register classes.
- addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
- : &X86::FR32RegClass);
+ addRegisterClass(MVT::f32, &X86::FR32XRegClass);
addRegisterClass(MVT::f64, &X86::RFP64RegClass);
// Use ANDPS to simulate FABS.
@@ -721,8 +718,7 @@ X86TargetLowering::X86TargetLowering(con
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
- addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
- : &X86::VR128RegClass);
+ addRegisterClass(MVT::v4f32, &X86::VR128XRegClass);
setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
setOperationAction(ISD::FABS, MVT::v4f32, Custom);
@@ -735,19 +731,14 @@ X86TargetLowering::X86TargetLowering(con
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
- addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
- : &X86::VR128RegClass);
+ addRegisterClass(MVT::v2f64, &X86::VR128XRegClass);
// FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
// registers cannot be used even for integer operations.
- addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
- : &X86::VR128RegClass);
- addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
- : &X86::VR128RegClass);
- addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
- : &X86::VR128RegClass);
- addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
- : &X86::VR128RegClass);
+ addRegisterClass(MVT::v16i8, &X86::VR128XRegClass);
+ addRegisterClass(MVT::v8i16, &X86::VR128XRegClass);
+ addRegisterClass(MVT::v4i32, &X86::VR128XRegClass);
+ addRegisterClass(MVT::v2i64, &X86::VR128XRegClass);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
@@ -955,18 +946,12 @@ X86TargetLowering::X86TargetLowering(con
if (!Subtarget.useSoftFloat() && Subtarget.hasFp256()) {
bool HasInt256 = Subtarget.hasInt256();
- addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
- : &X86::VR256RegClass);
- addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
- : &X86::VR256RegClass);
- addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
- : &X86::VR256RegClass);
- addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
- : &X86::VR256RegClass);
- addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
- : &X86::VR256RegClass);
- addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
- : &X86::VR256RegClass);
+ addRegisterClass(MVT::v32i8, &X86::VR256XRegClass);
+ addRegisterClass(MVT::v16i16, &X86::VR256XRegClass);
+ addRegisterClass(MVT::v8i32, &X86::VR256XRegClass);
+ addRegisterClass(MVT::v8f32, &X86::VR256XRegClass);
+ addRegisterClass(MVT::v4i64, &X86::VR256XRegClass);
+ addRegisterClass(MVT::v4f64, &X86::VR256XRegClass);
for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
setOperationAction(ISD::FFLOOR, VT, Legal);
Modified: llvm/trunk/test/CodeGen/X86/sad.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sad.ll?rev=282835&r1=282834&r2=282835&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sad.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sad.ll Thu Sep 29 23:31:33 2016
@@ -155,12 +155,12 @@ define i32 @sad_32i8() nounwind {
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pxor %xmm13, %xmm13
; SSE2-NEXT: pxor %xmm15, %xmm15
-; SSE2-NEXT: pxor %xmm5, %xmm5
; SSE2-NEXT: pxor %xmm14, %xmm14
+; SSE2-NEXT: pxor %xmm6, %xmm6
; SSE2-NEXT: .p2align 4, 0x90
; SSE2-NEXT: .LBB1_1: # %vector.body
; SSE2-NEXT: # =>This Inner Loop Header: Depth=1
-; SSE2-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp) # 16-byte Spill
+; SSE2-NEXT: movdqa %xmm6, -{{[0-9]+}}(%rsp) # 16-byte Spill
; SSE2-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp) # 16-byte Spill
; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) # 16-byte Spill
; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) # 16-byte Spill
@@ -252,11 +252,9 @@ define i32 @sad_32i8() nounwind {
; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; SSE2-NEXT: paddd %xmm3, %xmm4
; SSE2-NEXT: paddd %xmm6, %xmm0
-; SSE2-NEXT: paddd %xmm7, %xmm14
-; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm3 # 16-byte Reload
-; SSE2-NEXT: paddd %xmm5, %xmm3
-; SSE2-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp) # 16-byte Spill
-; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm5 # 16-byte Reload
+; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm6 # 16-byte Reload
+; SSE2-NEXT: paddd %xmm7, %xmm6
+; SSE2-NEXT: paddd %xmm5, %xmm14
; SSE2-NEXT: paddd %xmm8, %xmm1
; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm3 # 16-byte Reload
; SSE2-NEXT: paddd %xmm2, %xmm3
@@ -266,9 +264,9 @@ define i32 @sad_32i8() nounwind {
; SSE2-NEXT: jne .LBB1_1
; SSE2-NEXT: # BB#2: # %middle.block
; SSE2-NEXT: paddd %xmm15, %xmm4
-; SSE2-NEXT: paddd %xmm14, %xmm1
+; SSE2-NEXT: paddd %xmm6, %xmm1
; SSE2-NEXT: paddd %xmm13, %xmm0
-; SSE2-NEXT: paddd %xmm5, %xmm2
+; SSE2-NEXT: paddd %xmm14, %xmm2
; SSE2-NEXT: paddd %xmm4, %xmm1
; SSE2-NEXT: paddd %xmm2, %xmm1
; SSE2-NEXT: paddd %xmm0, %xmm1
Modified: llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll?rev=282835&r1=282834&r2=282835&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll Thu Sep 29 23:31:33 2016
@@ -3350,69 +3350,69 @@ define <16 x i16> @cvt_16f32_to_16i16(<1
;
; AVX512F-LABEL: cvt_16f32_to_16i16:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm2
-; AVX512F-NEXT: vmovd %xmm2, %eax
-; AVX512F-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm2
-; AVX512F-NEXT: vmovd %eax, %xmm3
-; AVX512F-NEXT: vmovd %xmm2, %eax
+; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
-; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm2
-; AVX512F-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: vmovd %xmm2, %eax
-; AVX512F-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm14
+; AVX512F-NEXT: vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; AVX512F-NEXT: vcvtps2ph $4, %zmm3, %ymm3
+; AVX512F-NEXT: vpermilps {{.*#+}} xmm4 = xmm0[3,1,2,3]
+; AVX512F-NEXT: vcvtps2ph $4, %zmm4, %ymm4
+; AVX512F-NEXT: vpermilpd {{.*#+}} xmm5 = xmm0[1,0]
+; AVX512F-NEXT: vcvtps2ph $4, %zmm5, %ymm5
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm7
+; AVX512F-NEXT: vextractf128 $1, %ymm7, %xmm8
+; AVX512F-NEXT: vpermilps {{.*#+}} xmm6 = xmm8[3,1,2,3]
+; AVX512F-NEXT: vcvtps2ph $4, %zmm6, %ymm6
+; AVX512F-NEXT: vpermilpd {{.*#+}} xmm9 = xmm8[1,0]
+; AVX512F-NEXT: vcvtps2ph $4, %zmm9, %ymm9
+; AVX512F-NEXT: vmovshdup {{.*#+}} xmm10 = xmm8[1,1,3,3]
+; AVX512F-NEXT: vcvtps2ph $4, %zmm10, %ymm10
+; AVX512F-NEXT: vcvtps2ph $4, %zmm8, %ymm8
+; AVX512F-NEXT: vpermilps {{.*#+}} xmm11 = xmm7[3,1,2,3]
+; AVX512F-NEXT: vcvtps2ph $4, %zmm11, %ymm11
+; AVX512F-NEXT: vpermilpd {{.*#+}} xmm12 = xmm7[1,0]
+; AVX512F-NEXT: vcvtps2ph $4, %zmm12, %ymm12
+; AVX512F-NEXT: vcvtps2ph $4, %zmm7, %ymm13
+; AVX512F-NEXT: vmovd %xmm13, %eax
+; AVX512F-NEXT: vmovshdup {{.*#+}} xmm7 = xmm7[1,1,3,3]
+; AVX512F-NEXT: vcvtps2ph $4, %zmm7, %ymm7
+; AVX512F-NEXT: vmovd %eax, %xmm2
+; AVX512F-NEXT: vmovd %xmm7, %eax
+; AVX512F-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; AVX512F-NEXT: vmovd %xmm12, %eax
+; AVX512F-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; AVX512F-NEXT: vmovd %xmm11, %eax
+; AVX512F-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
+; AVX512F-NEXT: vmovd %xmm8, %eax
+; AVX512F-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; AVX512F-NEXT: vmovd %xmm10, %eax
+; AVX512F-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
+; AVX512F-NEXT: vmovd %xmm9, %eax
+; AVX512F-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
+; AVX512F-NEXT: vmovd %xmm6, %eax
+; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm6
+; AVX512F-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; AVX512F-NEXT: vmovd %xmm6, %eax
+; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0
+; AVX512F-NEXT: vmovd %eax, %xmm6
+; AVX512F-NEXT: vmovd %xmm0, %eax
+; AVX512F-NEXT: vpinsrw $1, %eax, %xmm6, %xmm0
+; AVX512F-NEXT: vmovd %xmm5, %eax
+; AVX512F-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
+; AVX512F-NEXT: vmovd %xmm4, %eax
+; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm4
+; AVX512F-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
+; AVX512F-NEXT: vmovd %xmm4, %eax
+; AVX512F-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; AVX512F-NEXT: vmovd %xmm3, %eax
+; AVX512F-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
+; AVX512F-NEXT: vmovd %xmm14, %eax
; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1
-; AVX512F-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: vmovd %xmm1, %eax
-; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm1
-; AVX512F-NEXT: vpinsrw $3, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: vmovd %xmm1, %eax
-; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1
-; AVX512F-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; AVX512F-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
; AVX512F-NEXT: vmovd %xmm1, %eax
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0]
-; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1
-; AVX512F-NEXT: vpinsrw $5, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: vmovd %xmm1, %eax
-; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm1
-; AVX512F-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3]
-; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm2
-; AVX512F-NEXT: vpinsrw $6, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: vmovd %xmm2, %eax
-; AVX512F-NEXT: vpinsrw $7, %eax, %xmm3, %xmm2
-; AVX512F-NEXT: vmovd %xmm1, %eax
-; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1
-; AVX512F-NEXT: vmovd %eax, %xmm3
-; AVX512F-NEXT: vmovd %xmm1, %eax
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1
-; AVX512F-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: vmovd %xmm1, %eax
-; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0
-; AVX512F-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: vmovd %xmm0, %eax
-; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm0
-; AVX512F-NEXT: vpinsrw $3, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: vmovd %xmm0, %eax
-; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
-; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0
-; AVX512F-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: vmovd %xmm0, %eax
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
-; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0
-; AVX512F-NEXT: vpinsrw $5, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: vmovd %xmm0, %eax
-; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[3,1,2,3]
-; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0
-; AVX512F-NEXT: vpinsrw $6, %eax, %xmm3, %xmm1
-; AVX512F-NEXT: vmovd %xmm0, %eax
-; AVX512F-NEXT: vpinsrw $7, %eax, %xmm1, %xmm0
+; AVX512F-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
More information about the llvm-commits
mailing list