[PATCH] D23893: [DAGCombine] Don't fold a trunc if it feeds an anyext

Thu Aug 25 15:13:27 PDT 2016

mkuper created this revision.
mkuper added reviewers: RKSimon, arsenm.
mkuper added a subscriber: llvm-commits.

Legalization tends to create anyext(trunc) patterns. This should always be combined - into either a single trunc, a single ext, or nothing if the types match exactly. But if we happen to combine the trunc first, we may pull the trunc away from the anyext or make it implicit (e.g. the truncate(extract) -> extract(bitcast) fold). To prevent this, we can avoid doing the fold, similarly to how we already handle fpround(fpextend).

This is somewhat ugly, but the alternative is to try to undo this later when we combine the anyext, and that gets painful.

https://reviews.llvm.org/D23893

Files:
  lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  test/CodeGen/X86/2011-10-21-widen-cmp.ll
  test/CodeGen/X86/mem-intrin-base-reg.ll

Index: test/CodeGen/X86/mem-intrin-base-reg.ll
===================================================================

--- test/CodeGen/X86/mem-intrin-base-reg.ll
+++ test/CodeGen/X86/mem-intrin-base-reg.ll
@@ -8,15 +8,12 @@
 ; for when this is necessary. Typically, we chose ESI for the base register,
 ; which all of the X86 string instructions use.
 
-; The pattern of vector icmp and extractelement is used in these tests because
-; it forces creation of an aligned stack temporary. Perhaps such temporaries
-; shouldn't be aligned.
-
 declare void @escape_vla_and_icmp(i8*, i1 zeroext)
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
 
 define i32 @memcpy_novla_vector(<4 x i32>* %vp0, i8* %a, i8* %b, i32 %n, i1 zeroext %cond) {
+  %foo = alloca <4 x i32>, align 16
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %b, i32 128, i32 4, i1 false)
   br i1 %cond, label %spill_vectors, label %no_vectors
 
@@ -42,6 +39,7 @@
 ; CHECK: rep;movsl
 
 define i32 @memcpy_vla_vector(<4 x i32>* %vp0, i8* %a, i8* %b, i32 %n, i1 zeroext %cond) {
+  %foo = alloca <4 x i32>, align 16
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %b, i32 128, i32 4, i1 false)
   br i1 %cond, label %spill_vectors, label %no_vectors
 
@@ -70,6 +68,7 @@
 ; stosd doesn't clobber esi, so we can use it.
 
 define i32 @memset_vla_vector(<4 x i32>* %vp0, i8* %a, i32 %n, i1 zeroext %cond) {
+  %foo = alloca <4 x i32>, align 16
   call void @llvm.memset.p0i8.i32(i8* %a, i8 42, i32 128, i32 4, i1 false)
   br i1 %cond, label %spill_vectors, label %no_vectors
 
Index: test/CodeGen/X86/2011-10-21-widen-cmp.ll
===================================================================
--- test/CodeGen/X86/2011-10-21-widen-cmp.ll
+++ test/CodeGen/X86/2011-10-21-widen-cmp.ll
@@ -10,10 +10,7 @@
 ; CHECK-NEXT:    cmpordps %xmm0, %xmm0
 ; CHECK-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; CHECK-NEXT:    psllq $32, %xmm0
-; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-NEXT:    psrad $31, %xmm0
-; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
-; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
 ; CHECK-NEXT:    pslld $31, %xmm0
 ; CHECK-NEXT:    blendvps %xmm0, %xmm0
 ; CHECK-NEXT:    movlps %xmm0, (%rax)
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7135,6 +7135,10 @@
     return N0.getOperand(0);
   }
 
+  // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
+  if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
+    return SDValue();
+
   // Fold extract-and-trunc into a narrow extract. For example:
   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
   //   i32 y = TRUNCATE(i64 x)


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D23893.69270.patch
Type: text/x-patch
Size: 3013 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160825/19363dd3/attachment.bin>