[llvm] r271548 - Only attempt to detect AVG if SSE2 is available

Dimitry Andric via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 2 10:30:49 PDT 2016


Author: dim
Date: Thu Jun  2 12:30:49 2016
New Revision: 271548

URL: http://llvm.org/viewvc/llvm-project?rev=271548&view=rev
Log:
Only attempt to detect AVG if SSE2 is available

Summary:
In PR29973 Sanjay Patel reported an assertion failure when a certain
loop was optimized, for a target without SSE2 support.  It turned out
this was because of the AVG pattern detection introduced in rL253952.

Prevent the assertion failure by bailing out early in
`detectAVGPattern()`, if the target does not support SSE2.

Also add a minimized test case.

Reviewers: congh, eli.friedman, spatel

Subscribers: emaste, llvm-commits

Differential Revision: http://reviews.llvm.org/D20905

Added:
    llvm/trunk/test/CodeGen/X86/no-sse2-avg.ll
Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=271548&r1=271547&r2=271548&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Jun  2 12:30:49 2016
@@ -27793,6 +27793,8 @@ static SDValue detectAVGPattern(SDValue
   if (InScalarVT.getSizeInBits() <= ScalarVT.getSizeInBits())
     return SDValue();
 
+  if (!Subtarget.hasSSE2())
+    return SDValue();
   if (Subtarget.hasAVX512()) {
     if (VT.getSizeInBits() > 512)
       return SDValue();

Added: llvm/trunk/test/CodeGen/X86/no-sse2-avg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/no-sse2-avg.ll?rev=271548&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/no-sse2-avg.ll (added)
+++ llvm/trunk/test/CodeGen/X86/no-sse2-avg.ll Thu Jun  2 12:30:49 2016
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2 | FileCheck %s
+
+define <16 x i8> @PR27973() {
+; CHECK-LABEL: PR27973:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movb $0, 15(%rdi)
+; CHECK-NEXT:    movb $0, 14(%rdi)
+; CHECK-NEXT:    movb $0, 13(%rdi)
+; CHECK-NEXT:    movb $0, 12(%rdi)
+; CHECK-NEXT:    movb $0, 11(%rdi)
+; CHECK-NEXT:    movb $0, 10(%rdi)
+; CHECK-NEXT:    movb $0, 9(%rdi)
+; CHECK-NEXT:    movb $0, 8(%rdi)
+; CHECK-NEXT:    movb $0, 7(%rdi)
+; CHECK-NEXT:    movb $0, 6(%rdi)
+; CHECK-NEXT:    movb $0, 5(%rdi)
+; CHECK-NEXT:    movb $0, 4(%rdi)
+; CHECK-NEXT:    movb $0, 3(%rdi)
+; CHECK-NEXT:    movb $0, 2(%rdi)
+; CHECK-NEXT:    movb $0, 1(%rdi)
+; CHECK-NEXT:    movb $0, (%rdi)
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    retq
+;
+  %t0 = zext <16 x i8> zeroinitializer to <16 x i32>
+  %t1 = add nuw nsw <16 x i32> %t0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %t2 = lshr <16 x i32> %t1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %t3 = trunc <16 x i32> %t2 to <16 x i8>
+  ret <16 x i8> %t3
+}




More information about the llvm-commits mailing list