[PATCH] D38300: [x86] Correct the implementation of isTruncateFree to be more accurate

Tue Sep 26 16:53:06 PDT 2017

craig.topper created this revision.

Currently we returned true as long as the source type is larger than the dest type, but truncates are only "free" if we can use a subregister extract. This corrects the implementation to match that.

It looks like the EVT signature was also running the check on vectors which was probably unintentional. So I've corrected that here. I think this may have exposed some missing cases in the cost model.

The avx512-mask-op.ll changed because we previously promoted the load to 32-bits under the assumption that truncating from i32 to i1 is free. This ultimately allowed the two ands to be CSEd by the DAG since there were then both i32. Now we have one in i32 and one in i8.


https://reviews.llvm.org/D38300

Files:
  lib/Target/X86/X86ISelLowering.cpp
  test/Analysis/CostModel/X86/trunc.ll
  test/CodeGen/X86/avx512-mask-op.ll


Index: test/CodeGen/X86/avx512-mask-op.ll
===================================================================

--- test/CodeGen/X86/avx512-mask-op.ll
+++ test/CodeGen/X86/avx512-mask-op.ll
@@ -1629,9 +1629,12 @@
 define void @f1(i32 %c) {
 ; CHECK-LABEL: f1:
 ; CHECK:       ## BB#0: ## %entry
-; CHECK-NEXT:    movzbl {{.*}}(%rip), %edi
-; CHECK-NEXT:    xorl $1, %edi
-; CHECK-NEXT:    movb %dil, {{.*}}(%rip)
+; CHECK-NEXT:    movb {{.*}}(%rip), %al
+; CHECK-NEXT:    xorb $1, %al
+; CHECK-NEXT:    movzbl %al, %edi
+; CHECK-NEXT:    andb $1, %al
+; CHECK-NEXT:    movb %al, {{.*}}(%rip)
+; CHECK-NEXT:    andl $1, %edi
 ; CHECK-NEXT:    jmp _f2 ## TAILCALL
 entry:
   %.b1 = load i1, i1* @f1.v, align 4
Index: test/Analysis/CostModel/X86/trunc.ll
===================================================================
--- test/Analysis/CostModel/X86/trunc.ll
+++ test/Analysis/CostModel/X86/trunc.ll
@@ -45,7 +45,8 @@
   %V4i64 = trunc <4 x i64> undef to <4 x i16>
 
   ; SSE: cost of 3 {{.*}} %V8i64 = trunc
-  ; AVX: cost of 0 {{.*}} %V8i64 = trunc
+  ; AVX1: cost of 9 {{.*}} %V8i64 = trunc
+  ; AVX2: cost of 5 {{.*}} %V8i64 = trunc
   %V8i64 = trunc <8 x i64> undef to <8 x i16>
 
   ; SSE2: cost of 3 {{.*}} %V4i32 = trunc
@@ -88,7 +89,8 @@
   %V4i64 = trunc <4 x i64> undef to <4 x i8>
 
   ; SSE: cost of 3 {{.*}} %V8i64 = trunc
-  ; AVX: cost of 0 {{.*}} %V8i64 = trunc
+  ; AVX1: cost of 9 {{.*}} %V8i64 = trunc
+  ; AVX2: cost of 5 {{.*}} %V8i64 = trunc
   %V8i64 = trunc <8 x i64> undef to <8 x i8>
 
   ; SSE: cost of 0 {{.*}} %V2i32 = trunc
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -24777,12 +24777,14 @@
   return true;
 }
 
-bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
-  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+bool X86TargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
+  if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
     return false;
-  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
-  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
-  return NumBits1 > NumBits2;
+  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
+  unsigned DstBits = DstTy->getPrimitiveSizeInBits();
+  return SrcBits > DstBits &&
+         (SrcBits == 64 || SrcBits == 32 || SrcBits == 16) &&
+         (DstBits == 32 || DstBits == 16 || DstBits == 8);
 }
 
 bool X86TargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
@@ -24808,12 +24810,14 @@
   return isInt<32>(Imm);
 }
 
-bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
-  if (!VT1.isInteger() || !VT2.isInteger())
+bool X86TargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
+  if (!SrcVT.isScalarInteger() || !DstVT.isScalarInteger())
     return false;
-  unsigned NumBits1 = VT1.getSizeInBits();
-  unsigned NumBits2 = VT2.getSizeInBits();
-  return NumBits1 > NumBits2;
+  unsigned SrcBits = SrcVT.getSizeInBits();
+  unsigned DstBits = DstVT.getSizeInBits();
+  return SrcBits > DstBits &&
+         (SrcBits == 64 || SrcBits == 32 || SrcBits == 16) &&
+         (DstBits == 32 || DstBits == 16 || DstBits == 8);
 }
 
 bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D38300.116738.patch
Type: text/x-patch
Size: 3299 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170926/2ff131c8/attachment.bin>