[PATCH] D49080: [PowerPC] Don't run BV DAG Combine before legalization if it assumes legal types

Mon Jul 9 07:59:07 PDT 2018

nemanjai created this revision.
nemanjai added reviewers: hfinkel, tstellar, echristo.
Herald added a subscriber: kbarton.

When trying to combine a DAG that builds a vector out of sign-extensions of vector extracts, the code assumes legal input types. Due to that, we have to disable this combine prior to legalization.
In some cases, the DAG will look slightly different after legalization so account for that in the matching code.

This is a fix for https://bugs.llvm.org/show_bug.cgi?id=38087


Repository:
  rL LLVM

https://reviews.llvm.org/D49080

Files:
  lib/Target/PowerPC/PPCISelLowering.cpp
  test/CodeGen/PowerPC/pr38087.ll


Index: test/CodeGen/PowerPC/pr38087.ll
===================================================================

--- test/CodeGen/PowerPC/pr38087.ll
+++ test/CodeGen/PowerPC/pr38087.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr \
+; RUN:   -mtriple=powerpc64le-unknown-unknown -ppc-asm-full-reg-names < %s | \
+; RUN:   FileCheck %s
+; Function Attrs: nounwind readnone speculatable
+declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #0
+
+; Function Attrs: nounwind readnone speculatable
+declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #0
+
+define void @draw_llvm_vs_variant0() {
+; CHECK-LABEL: draw_llvm_vs_variant0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ldx r3, 0, r3
+; CHECK-NEXT:    mtvsrd f0, r3
+; CHECK-NEXT:    xxswapd v2, vs0
+; CHECK-NEXT:    vmrglh v2, v2, v2
+; CHECK-NEXT:    vextsh2w v2, v2
+; CHECK-NEXT:    xvcvsxwsp vs13, v2
+; CHECK-NEXT:    xxspltw vs0, vs13, 2
+; CHECK-NEXT:    xvmaddasp vs0, vs0, vs0
+; CHECK-NEXT:    stxvx vs0, 0, r3
+; CHECK-NEXT:    blr
+entry:
+  %.size = load i32, i32* undef
+  %0 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %.size, i32 7)
+  %1 = extractvalue { i32, i1 } %0, 0
+  %2 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %1, i32 0)
+  %3 = extractvalue { i32, i1 } %2, 0
+  %4 = select i1 false, i32 0, i32 %3
+  %5 = xor i1 false, true
+  %6 = sext i1 %5 to i32
+  %7 = load <4 x i16>, <4 x i16>* undef, align 2
+  %8 = extractelement <4 x i16> %7, i32 0
+  %9 = sext i16 %8 to i32
+  %10 = insertelement <4 x i32> undef, i32 %9, i32 0
+  %11 = extractelement <4 x i16> %7, i32 1
+  %12 = sext i16 %11 to i32
+  %13 = insertelement <4 x i32> %10, i32 %12, i32 1
+  %14 = extractelement <4 x i16> %7, i32 2
+  %15 = sext i16 %14 to i32
+  %16 = insertelement <4 x i32> %13, i32 %15, i32 2
+  %17 = extractelement <4 x i16> %7, i32 3
+  %18 = sext i16 %17 to i32
+  %19 = insertelement <4 x i32> %16, i32 %18, i32 3
+  %20 = sitofp <4 x i32> %19 to <4 x float>
+  %21 = insertelement <4 x i32> undef, i32 %6, i32 0
+  %22 = shufflevector <4 x i32> %21, <4 x i32> undef, <4 x i32> zeroinitializer
+  %23 = bitcast <4 x float> %20 to <4 x i32>
+  %24 = and <4 x i32> %23, %22
+  %25 = bitcast <4 x i32> %24 to <4 x float>
+  %26 = shufflevector <4 x float> %25, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %27 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> %26)
+  store <4 x float> %27, <4 x float>* undef
+  ret void
+}
Index: lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.cpp
+++ lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11862,10 +11862,15 @@
   auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
     if (!Op)
       return false;
-    if (Op.getOpcode() != ISD::SIGN_EXTEND)
+    if (Op.getOpcode() != ISD::SIGN_EXTEND &&
+        Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
       return false;
 
+    // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
+    // of the right width.
     SDValue Extract = Op.getOperand(0);
+    if (Extract.getOpcode() == ISD::ANY_EXTEND)
+      Extract = Extract.getOperand(0);
     if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
       return false;
 
@@ -11953,8 +11958,10 @@
     return Reduced;
 
   // If we're building a vector out of extended elements from another vector
-  // we have P9 vector integer extend instructions.
-  if (Subtarget.hasP9Altivec()) {
+  // we have P9 vector integer extend instructions. The code assumes legal
+  // input types (i.e. it can't handle things like v4i16) so do not run before
+  // legalization.
+  if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
     Reduced = combineBVOfVecSExt(N, DAG);
     if (Reduced)
       return Reduced;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D49080.154606.patch
Type: text/x-patch
Size: 3952 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180709/dbce3ac4/attachment.bin>