[llvm] r347997 - [SLP]PR39774: Update references of the replaced external instructions.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 30 07:14:20 PST 2018


Author: abataev
Date: Fri Nov 30 07:14:20 2018
New Revision: 347997

URL: http://llvm.org/viewvc/llvm-project?rev=347997&view=rev
Log:
[SLP]PR39774: Update references of the replaced external instructions.

Summary:
An additional fix for PR39774. Need to update the references for the
RedcutionRoot instruction when it is replaced during the vectorization
phase to avoid compiler crash on reduction vectorization.

Reviewers: RKSimon, spatel

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D55017

Modified:
    llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_1.ll
    llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
    llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll
    llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll

Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=347997&r1=347996&r2=347997&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Fri Nov 30 07:14:20 2018
@@ -3643,6 +3643,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebug
       auto &Locs = ExternallyUsedValues[Scalar];
       ExternallyUsedValues.insert({Ex, Locs});
       ExternallyUsedValues.erase(Scalar);
+      // Required to update internally referenced instructions.
+      Scalar->replaceAllUsesWith(Ex);
       continue;
     }
 

Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_1.ll?rev=347997&r1=347996&r2=347997&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_1.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_1.ll Fri Nov 30 07:14:20 2018
@@ -19,12 +19,12 @@ define void @mainTest(i32* %ptr) #0  {
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul <4 x i32> [[TMP4]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = add i32 1, undef
-; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[TMP9]], undef
+; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[TMP10]], undef
-; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[TMP11]], undef
+; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[TMP11]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = add i32 [[TMP12]], undef
 ; CHECK-NEXT:    [[TMP14:%.*]] = sext i32 [[TMP6]] to i64
-; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[TMP13]], undef
+; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[TMP13]], [[TMP5]]
 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP8]], [[RDX_SHUF]]
 ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>

Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_2.ll?rev=347997&r1=347996&r2=347997&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_2.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35628_2.ll Fri Nov 30 07:14:20 2018
@@ -23,7 +23,7 @@ define void @test() #0 {
 ; CHECK-NEXT:    [[SUM1:%.*]] = add i64 undef, undef
 ; CHECK-NEXT:    [[SUM2:%.*]] = add i64 [[SUM1]], undef
 ; CHECK-NEXT:    [[ZSUM:%.*]] = add i64 [[SUM2]], 0
-; CHECK-NEXT:    [[JOIN:%.*]] = add i64 undef, [[ZSUM]]
+; CHECK-NEXT:    [[JOIN:%.*]] = add i64 [[TMP6]], [[ZSUM]]
 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i64> [[TMP9]], [[RDX_SHUF]]
 ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>

Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll?rev=347997&r1=347996&r2=347997&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll Fri Nov 30 07:14:20 2018
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-7 | FileCheck %s
+; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-8 -slp-min-tree-size=6 | FileCheck %s --check-prefix=FORCE_REDUCTION
 
 define void @Test(i32) {
 ; CHECK-LABEL: @Test(
@@ -54,6 +55,94 @@ define void @Test(i32) {
 ; CHECK-NEXT:    [[VAL_43]] = add i32 [[LOCAL_8_43_US]], 14910
 ; CHECK-NEXT:    br label [[LOOP]]
 ;
+; FORCE_REDUCTION-LABEL: @Test(
+; FORCE_REDUCTION-NEXT:  entry:
+; FORCE_REDUCTION-NEXT:    br label [[LOOP:%.*]]
+; FORCE_REDUCTION:       loop:
+; FORCE_REDUCTION-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
+; FORCE_REDUCTION-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; FORCE_REDUCTION-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1
+; FORCE_REDUCTION-NEXT:    [[TMP3:%.*]] = add <8 x i32> <i32 0, i32 55, i32 285, i32 1240, i32 1496, i32 8555, i32 12529, i32 13685>, [[SHUFFLE]]
+; FORCE_REDUCTION-NEXT:    [[VAL_1:%.*]] = and i32 [[TMP2]], undef
+; FORCE_REDUCTION-NEXT:    [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
+; FORCE_REDUCTION-NEXT:    [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_7:%.*]] = and i32 [[VAL_5]], undef
+; FORCE_REDUCTION-NEXT:    [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_12:%.*]] = and i32 [[VAL_10]], undef
+; FORCE_REDUCTION-NEXT:    [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_19:%.*]] = and i32 [[VAL_17]], undef
+; FORCE_REDUCTION-NEXT:    [[VAL_21:%.*]] = and i32 [[VAL_19]], undef
+; FORCE_REDUCTION-NEXT:    [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_35:%.*]] = and i32 [[VAL_33]], undef
+; FORCE_REDUCTION-NEXT:    [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_40:%.*]] = and i32 [[VAL_38]], undef
+; FORCE_REDUCTION-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> undef, i32 [[VAL_40]], i32 0
+; FORCE_REDUCTION-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[TMP2]], i32 1
+; FORCE_REDUCTION-NEXT:    [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7
+; FORCE_REDUCTION-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP6]], i32 0
+; FORCE_REDUCTION-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 14910, i32 1
+; FORCE_REDUCTION-NEXT:    [[TMP9:%.*]] = and <2 x i32> [[TMP5]], [[TMP8]]
+; FORCE_REDUCTION-NEXT:    [[TMP10:%.*]] = add <2 x i32> [[TMP5]], [[TMP8]]
+; FORCE_REDUCTION-NEXT:    [[TMP11]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> <i32 0, i32 3>
+; FORCE_REDUCTION-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+; FORCE_REDUCTION-NEXT:    [[BIN_RDX:%.*]] = and <8 x i32> [[TMP3]], [[RDX_SHUF]]
+; FORCE_REDUCTION-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; FORCE_REDUCTION-NEXT:    [[BIN_RDX2:%.*]] = and <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
+; FORCE_REDUCTION-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; FORCE_REDUCTION-NEXT:    [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
+; FORCE_REDUCTION-NEXT:    [[TMP12:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP12]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA30:%.*]] = and i32 [[OP_EXTRA29]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA31:%.*]] = and i32 [[OP_EXTRA30]], [[TMP2]]
+; FORCE_REDUCTION-NEXT:    [[TMP13:%.*]] = extractelement <2 x i32> [[TMP11]], i32 0
+; FORCE_REDUCTION-NEXT:    br label [[LOOP]]
+;
 entry:
   br label %loop
 

Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll?rev=347997&r1=347996&r2=347997&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-list.ll Fri Nov 30 07:14:20 2018
@@ -1679,7 +1679,7 @@ define i32 @wobble(i32 %arg, i32 %bar) {
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
 ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]]
 ; CHECK-NEXT:    [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]]
-; CHECK-NEXT:    [[R5:%.*]] = add nsw i32 [[R4]], undef
+; CHECK-NEXT:    [[R5:%.*]] = add nsw i32 [[R4]], [[TMP9]]
 ; CHECK-NEXT:    ret i32 [[OP_EXTRA3]]
 ;
 ; THRESHOLD-LABEL: @wobble(
@@ -1707,7 +1707,7 @@ define i32 @wobble(i32 %arg, i32 %bar) {
 ; THRESHOLD-NEXT:    [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
 ; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]]
 ; THRESHOLD-NEXT:    [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]]
-; THRESHOLD-NEXT:    [[R5:%.*]] = add nsw i32 [[R4]], undef
+; THRESHOLD-NEXT:    [[R5:%.*]] = add nsw i32 [[R4]], [[TMP9]]
 ; THRESHOLD-NEXT:    ret i32 [[OP_EXTRA3]]
 ;
   bb:




More information about the llvm-commits mailing list