[llvm] f01b9aa - [MachineScheduler] Enable AA in PostRA Machine scheduler

David Green via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 5 04:48:43 PST 2019


Author: David Green
Date: 2019-11-05T11:58:50Z
New Revision: f01b9aa89e8cd5d3cca0e13835302f69c1f879d2

URL: https://github.com/llvm/llvm-project/commit/f01b9aa89e8cd5d3cca0e13835302f69c1f879d2
DIFF: https://github.com/llvm/llvm-project/commit/f01b9aa89e8cd5d3cca0e13835302f69c1f879d2.diff

LOG: [MachineScheduler] Enable AA in PostRA Machine scheduler

This adds AA to Post-RA Machine Scheduling, allowing the pass more
freedom when handling memory operations.

My understanding is that this was just never done, not that it is
inherently incorrect to do so. The older PostRA List scheduler already
makes use of AA, it's just that the MI PostRA Scheduler was never taught
to use it.

Differential Revision: https://reviews.llvm.org/D69814

Added: 
    

Modified: 
    llvm/lib/CodeGen/MachineScheduler.cpp
    llvm/test/CodeGen/AArch64/merge-store-dependency.ll
    llvm/test/CodeGen/PowerPC/extract-and-store.ll
    llvm/test/CodeGen/PowerPC/f128-aggregates.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index caebb9534390..df88a879db7f 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -238,6 +238,7 @@ void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
   AU.addRequired<MachineDominatorTree>();
   AU.addRequired<MachineLoopInfo>();
+  AU.addRequired<AAResultsWrapperPass>();
   AU.addRequired<TargetPassConfig>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
@@ -412,6 +413,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
   MF = &mf;
   MLI = &getAnalysis<MachineLoopInfo>();
   PassConfig = &getAnalysis<TargetPassConfig>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
 
   if (VerifyScheduling)
     MF->verify(this, "Before post machine scheduling.");

diff  --git a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
index 4c561f337dcf..5613db1e5214 100644
--- a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
+++ b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
@@ -17,15 +17,15 @@ define void @test(%struct1* %fde, i32 %fd, void (i32, i32, i8*)* %func, i8* %arg
 ; A53-NEXT:    movi v0.2d, #0000000000000000
 ; A53-NEXT:    mov x8, x0
 ; A53-NEXT:    mov x19, x8
-; A53-NEXT:    mov w9, #256
 ; A53-NEXT:    mov w0, w1
-; A53-NEXT:    str q0, [x8]
+; A53-NEXT:    mov w9, #256
 ; A53-NEXT:    str q0, [x19, #16]!
-; A53-NEXT:    strh w9, [x8, #24]
 ; A53-NEXT:    str w1, [x19]
 ; A53-NEXT:    mov w1, #4
 ; A53-NEXT:    stp x2, x3, [x8, #32]
 ; A53-NEXT:    mov x2, x8
+; A53-NEXT:    str q0, [x8]
+; A53-NEXT:    strh w9, [x8, #24]
 ; A53-NEXT:    str wzr, [x8, #20]
 ; A53-NEXT:    bl fcntl
 ; A53-NEXT:    adrp x9, gv0

diff  --git a/llvm/test/CodeGen/PowerPC/extract-and-store.ll b/llvm/test/CodeGen/PowerPC/extract-and-store.ll
index 9a5bacda86aa..2731ffd07125 100644
--- a/llvm/test/CodeGen/PowerPC/extract-and-store.ll
+++ b/llvm/test/CodeGen/PowerPC/extract-and-store.ll
@@ -484,8 +484,8 @@ define dso_local void @test_consecutive_i32(<4 x i32> %a, i32* nocapture %b) loc
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 2
 ; CHECK-NEXT:    li r3, 4
-; CHECK-NEXT:    stfiwx f0, 0, r5
 ; CHECK-NEXT:    stxsiwx vs34, r5, r3
+; CHECK-NEXT:    stfiwx f0, 0, r5
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test_consecutive_i32:
@@ -501,8 +501,8 @@ define dso_local void @test_consecutive_i32(<4 x i32> %a, i32* nocapture %b) loc
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 2
 ; CHECK-P9-NEXT:    li r3, 4
-; CHECK-P9-NEXT:    stfiwx f0, 0, r5
 ; CHECK-P9-NEXT:    stxsiwx vs34, r5, r3
+; CHECK-P9-NEXT:    stfiwx f0, 0, r5
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-P9-BE-LABEL: test_consecutive_i32:
@@ -590,8 +590,8 @@ define dso_local void @test_stores_exceed_vec_size(<4 x i32> %a, i32* nocapture
 ; CHECK-BE-NEXT:    li r4, 20
 ; CHECK-BE-NEXT:    stxsiwx vs34, r5, r3
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs0, 2
-; CHECK-BE-NEXT:    stxvw4x vs0, 0, r5
 ; CHECK-BE-NEXT:    stfiwx f1, r5, r4
+; CHECK-BE-NEXT:    stxvw4x vs0, 0, r5
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test_stores_exceed_vec_size:
@@ -599,13 +599,13 @@ define dso_local void @test_stores_exceed_vec_size(<4 x i32> %a, i32* nocapture
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI16_0 at toc@ha
 ; CHECK-P9-NEXT:    addi r3, r3, .LCPI16_0 at toc@l
 ; CHECK-P9-NEXT:    lxvx vs35, 0, r3
-; CHECK-P9-NEXT:    li r3, 16
-; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 1
-; CHECK-P9-NEXT:    stxv vs35, 0(r5)
+; CHECK-P9-NEXT:    li r3, 16
 ; CHECK-P9-NEXT:    stfiwx f0, r5, r3
 ; CHECK-P9-NEXT:    li r3, 20
 ; CHECK-P9-NEXT:    stxsiwx vs34, r5, r3
+; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P9-NEXT:    stxv vs35, 0(r5)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-P9-BE-LABEL: test_stores_exceed_vec_size:
@@ -613,10 +613,10 @@ define dso_local void @test_stores_exceed_vec_size(<4 x i32> %a, i32* nocapture
 ; CHECK-P9-BE-NEXT:    xxspltw vs0, vs34, 0
 ; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs0, 2
 ; CHECK-P9-BE-NEXT:    li r3, 16
-; CHECK-P9-BE-NEXT:    stxv vs0, 0(r5)
-; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs34, 1
 ; CHECK-P9-BE-NEXT:    stxsiwx vs34, r5, r3
 ; CHECK-P9-BE-NEXT:    li r3, 20
+; CHECK-P9-BE-NEXT:    stxv vs0, 0(r5)
+; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs34, 1
 ; CHECK-P9-BE-NEXT:    stfiwx f0, r5, r3
 ; CHECK-P9-BE-NEXT:    blr
 entry:
@@ -930,8 +930,8 @@ define void @test_elements_from_two_vec(<4 x i32> %a, <4 x i32> %b, i32* nocaptu
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 3
 ; CHECK-BE-NEXT:    li r3, 4
-; CHECK-BE-NEXT:    stfiwx f0, r7, r3
 ; CHECK-BE-NEXT:    stxsiwx vs35, 0, r7
+; CHECK-BE-NEXT:    stfiwx f0, r7, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test_elements_from_two_vec:
@@ -977,19 +977,19 @@ define dso_local void @test_elements_from_three_vec(<4 x float> %a, <4 x float>
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs35, vs35, 1
 ; CHECK-BE-NEXT:    li r3, 4
 ; CHECK-BE-NEXT:    li r4, 8
+; CHECK-BE-NEXT:    stxsiwx vs36, r9, r4
 ; CHECK-BE-NEXT:    stfiwx f1, r9, r3
 ; CHECK-BE-NEXT:    stfiwx f0, 0, r9
-; CHECK-BE-NEXT:    stxsiwx vs36, r9, r4
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test_elements_from_three_vec:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 3
 ; CHECK-P9-NEXT:    li r3, 4
-; CHECK-P9-NEXT:    stfiwx f0, 0, r9
-; CHECK-P9-NEXT:    xxsldwi vs0, vs36, vs36, 1
 ; CHECK-P9-NEXT:    stxsiwx vs35, r9, r3
 ; CHECK-P9-NEXT:    li r3, 8
+; CHECK-P9-NEXT:    stfiwx f0, 0, r9
+; CHECK-P9-NEXT:    xxsldwi vs0, vs36, vs36, 1
 ; CHECK-P9-NEXT:    stfiwx f0, r9, r3
 ; CHECK-P9-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/f128-aggregates.ll b/llvm/test/CodeGen/PowerPC/f128-aggregates.ll
index 8c21b85d0ce0..6e782c2b02a1 100644
--- a/llvm/test/CodeGen/PowerPC/f128-aggregates.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-aggregates.ll
@@ -353,10 +353,10 @@ define fp128 @sum_float128(i32 signext %count, ...) {
 ; CHECK-NEXT:    addi r3, r1, 40
 ; CHECK-NEXT:    lxvx v3, 0, r3
 ; CHECK-NEXT:    xsaddqp v2, v3, v2
-; CHECK-NEXT:    addi [[REG2:r[0-9]+]], r1, 72
-; CHECK-NEXT:    std [[REG2]], -8(r1)
 ; CHECK-NEXT:    lxv v3, 16(r3)
 ; CHECK-NEXT:    xsaddqp v2, v2, v3
+; CHECK-NEXT:    addi [[REG2:r[0-9]+]], r1, 72
+; CHECK-NEXT:    std [[REG2]], -8(r1)
 ; CHECK-NEXT:    blr
 entry:
   %ap = alloca i8*, align 8

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
index 6b945d468806..cf4a6d636207 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
@@ -328,27 +328,27 @@ define void @test16elt(<16 x i64>* noalias nocapture sret %agg.result, <16 x flo
 ; CHECK-P9-NEXT:    lxv vs2, 48(r4)
 ; CHECK-P9-NEXT:    xxswapd vs8, vs2
 ; CHECK-P9-NEXT:    xscvspdpn f8, vs8
-; CHECK-P9-NEXT:    xvcvdpuxds vs0, vs0
-; CHECK-P9-NEXT:    stxv vs5, 32(r3)
-; CHECK-P9-NEXT:    xvcvdpuxds vs6, vs6
+; CHECK-P9-NEXT:    xvcvdpuxds vs4, vs4
 ; CHECK-P9-NEXT:    xscvspdpn f3, vs3
 ; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs3
 ; CHECK-P9-NEXT:    xxsldwi vs7, vs2, vs2, 3
+; CHECK-P9-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-P9-NEXT:    xvcvdpuxds vs6, vs6
+; CHECK-P9-NEXT:    stxv vs6, 64(r3)
 ; CHECK-P9-NEXT:    xscvspdpn f7, vs7
 ; CHECK-P9-NEXT:    xxmrghd vs7, vs8, vs7
 ; CHECK-P9-NEXT:    xscvspdpn f8, vs2
 ; CHECK-P9-NEXT:    xxsldwi vs2, vs2, vs2, 1
-; CHECK-P9-NEXT:    stxv vs6, 64(r3)
-; CHECK-P9-NEXT:    xvcvdpuxds vs4, vs4
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs8, vs2
 ; CHECK-P9-NEXT:    xvcvdpuxds vs3, vs3
 ; CHECK-P9-NEXT:    xvcvdpuxds vs7, vs7
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    stxv vs3, 80(r3)
-; CHECK-P9-NEXT:    xxmrghd vs2, vs8, vs2
 ; CHECK-P9-NEXT:    xvcvdpuxds vs2, vs2
-; CHECK-P9-NEXT:    stxv vs2, 112(r3)
 ; CHECK-P9-NEXT:    stxv vs7, 96(r3)
+; CHECK-P9-NEXT:    stxv vs2, 112(r3)
 ; CHECK-P9-NEXT:    stxv vs4, 48(r3)
+; CHECK-P9-NEXT:    stxv vs5, 32(r3)
 ; CHECK-P9-NEXT:    stxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    stxv vs1, 0(r3)
 ; CHECK-P9-NEXT:    blr
@@ -738,27 +738,27 @@ define void @test16elt_signed(<16 x i64>* noalias nocapture sret %agg.result, <1
 ; CHECK-P9-NEXT:    lxv vs2, 48(r4)
 ; CHECK-P9-NEXT:    xxswapd vs8, vs2
 ; CHECK-P9-NEXT:    xscvspdpn f8, vs8
-; CHECK-P9-NEXT:    xvcvdpuxds vs0, vs0
-; CHECK-P9-NEXT:    stxv vs5, 32(r3)
-; CHECK-P9-NEXT:    xvcvdpuxds vs6, vs6
+; CHECK-P9-NEXT:    xvcvdpuxds vs4, vs4
 ; CHECK-P9-NEXT:    xscvspdpn f3, vs3
 ; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs3
 ; CHECK-P9-NEXT:    xxsldwi vs7, vs2, vs2, 3
+; CHECK-P9-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-P9-NEXT:    xvcvdpuxds vs6, vs6
+; CHECK-P9-NEXT:    stxv vs6, 64(r3)
 ; CHECK-P9-NEXT:    xscvspdpn f7, vs7
 ; CHECK-P9-NEXT:    xxmrghd vs7, vs8, vs7
 ; CHECK-P9-NEXT:    xscvspdpn f8, vs2
 ; CHECK-P9-NEXT:    xxsldwi vs2, vs2, vs2, 1
-; CHECK-P9-NEXT:    stxv vs6, 64(r3)
-; CHECK-P9-NEXT:    xvcvdpuxds vs4, vs4
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs8, vs2
 ; CHECK-P9-NEXT:    xvcvdpuxds vs3, vs3
 ; CHECK-P9-NEXT:    xvcvdpuxds vs7, vs7
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    stxv vs3, 80(r3)
-; CHECK-P9-NEXT:    xxmrghd vs2, vs8, vs2
 ; CHECK-P9-NEXT:    xvcvdpuxds vs2, vs2
-; CHECK-P9-NEXT:    stxv vs2, 112(r3)
 ; CHECK-P9-NEXT:    stxv vs7, 96(r3)
+; CHECK-P9-NEXT:    stxv vs2, 112(r3)
 ; CHECK-P9-NEXT:    stxv vs4, 48(r3)
+; CHECK-P9-NEXT:    stxv vs5, 32(r3)
 ; CHECK-P9-NEXT:    stxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    stxv vs1, 0(r3)
 ; CHECK-P9-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
index f03d0696f217..883cf7e51709 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
@@ -712,24 +712,24 @@ define void @test16elt_signed(<16 x double>* noalias nocapture sret %agg.result,
 ; CHECK-P9-NEXT:    vextsh2d v2, v2
 ; CHECK-P9-NEXT:    xvcvsxddp vs3, v2
 ; CHECK-P9-NEXT:    vperm v2, v4, v4, v3
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
 ; CHECK-P9-NEXT:    vextsh2d v2, v2
 ; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
 ; CHECK-P9-NEXT:    xvcvsxddp vs4, v2
 ; CHECK-P9-NEXT:    vperm v2, v4, v4, v5
 ; CHECK-P9-NEXT:    vextsh2d v2, v2
 ; CHECK-P9-NEXT:    xvcvsxddp vs5, v2
 ; CHECK-P9-NEXT:    vperm v2, v4, v4, v0
 ; CHECK-P9-NEXT:    stxv vs4, 64(r3)
-; CHECK-P9-NEXT:    stxv vs5, 80(r3)
 ; CHECK-P9-NEXT:    vextsh2d v2, v2
 ; CHECK-P9-NEXT:    xvcvsxddp vs6, v2
 ; CHECK-P9-NEXT:    vperm v2, v4, v4, v1
-; CHECK-P9-NEXT:    vextsh2d v2, v2
+; CHECK-P9-NEXT:    stxv vs5, 80(r3)
 ; CHECK-P9-NEXT:    stxv vs6, 96(r3)
+; CHECK-P9-NEXT:    vextsh2d v2, v2
 ; CHECK-P9-NEXT:    xvcvsxddp vs7, v2
 ; CHECK-P9-NEXT:    stxv vs7, 112(r3)
-; CHECK-P9-NEXT:    stxv vs2, 32(r3)
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
 ; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
index 08e6f70bbe6d..66e85e9f81b1 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
@@ -239,18 +239,18 @@ define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i
 ; CHECK-P9-NEXT:    xxsldwi v6, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvuxdsp vs0, v0
 ; CHECK-P9-NEXT:    lxv v5, 64(r4)
+; CHECK-P9-NEXT:    stxv v1, 0(r3)
 ; CHECK-P9-NEXT:    xxsldwi v0, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvuxdsp vs0, v5
 ; CHECK-P9-NEXT:    lxv v4, 80(r4)
 ; CHECK-P9-NEXT:    vpkudum v0, v0, v6
+; CHECK-P9-NEXT:    stxv v0, 16(r3)
 ; CHECK-P9-NEXT:    xxsldwi v5, vs0, vs0, 3
-; CHECK-P9-NEXT:    lxv v3, 96(r4)
 ; CHECK-P9-NEXT:    xvcvuxdsp vs0, v4
+; CHECK-P9-NEXT:    lxv v3, 96(r4)
 ; CHECK-P9-NEXT:    xxsldwi v4, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvuxdsp vs0, v3
 ; CHECK-P9-NEXT:    lxv v2, 112(r4)
-; CHECK-P9-NEXT:    stxv v0, 16(r3)
-; CHECK-P9-NEXT:    stxv v1, 0(r3)
 ; CHECK-P9-NEXT:    vpkudum v4, v4, v5
 ; CHECK-P9-NEXT:    stxv v4, 32(r3)
 ; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 3
@@ -275,18 +275,18 @@ define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i
 ; CHECK-BE-NEXT:    xxsldwi v6, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvuxdsp vs0, v0
 ; CHECK-BE-NEXT:    lxv v5, 80(r4)
+; CHECK-BE-NEXT:    stxv v1, 0(r3)
 ; CHECK-BE-NEXT:    xxsldwi v0, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvuxdsp vs0, v5
 ; CHECK-BE-NEXT:    lxv v4, 64(r4)
 ; CHECK-BE-NEXT:    vpkudum v0, v0, v6
+; CHECK-BE-NEXT:    stxv v0, 16(r3)
 ; CHECK-BE-NEXT:    xxsldwi v5, vs0, vs0, 3
-; CHECK-BE-NEXT:    lxv v3, 112(r4)
 ; CHECK-BE-NEXT:    xvcvuxdsp vs0, v4
+; CHECK-BE-NEXT:    lxv v3, 112(r4)
 ; CHECK-BE-NEXT:    xxsldwi v4, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvuxdsp vs0, v3
 ; CHECK-BE-NEXT:    lxv v2, 96(r4)
-; CHECK-BE-NEXT:    stxv v0, 16(r3)
-; CHECK-BE-NEXT:    stxv v1, 0(r3)
 ; CHECK-BE-NEXT:    vpkudum v4, v4, v5
 ; CHECK-BE-NEXT:    stxv v4, 32(r3)
 ; CHECK-BE-NEXT:    xxsldwi v3, vs0, vs0, 3
@@ -532,18 +532,18 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret %agg.result,
 ; CHECK-P9-NEXT:    xxsldwi v6, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvsxdsp vs0, v0
 ; CHECK-P9-NEXT:    lxv v5, 64(r4)
+; CHECK-P9-NEXT:    stxv v1, 0(r3)
 ; CHECK-P9-NEXT:    xxsldwi v0, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvsxdsp vs0, v5
 ; CHECK-P9-NEXT:    lxv v4, 80(r4)
 ; CHECK-P9-NEXT:    vpkudum v0, v0, v6
+; CHECK-P9-NEXT:    stxv v0, 16(r3)
 ; CHECK-P9-NEXT:    xxsldwi v5, vs0, vs0, 3
-; CHECK-P9-NEXT:    lxv v3, 96(r4)
 ; CHECK-P9-NEXT:    xvcvsxdsp vs0, v4
+; CHECK-P9-NEXT:    lxv v3, 96(r4)
 ; CHECK-P9-NEXT:    xxsldwi v4, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvsxdsp vs0, v3
 ; CHECK-P9-NEXT:    lxv v2, 112(r4)
-; CHECK-P9-NEXT:    stxv v0, 16(r3)
-; CHECK-P9-NEXT:    stxv v1, 0(r3)
 ; CHECK-P9-NEXT:    vpkudum v4, v4, v5
 ; CHECK-P9-NEXT:    stxv v4, 32(r3)
 ; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 3
@@ -568,18 +568,18 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret %agg.result,
 ; CHECK-BE-NEXT:    xxsldwi v6, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvsxdsp vs0, v0
 ; CHECK-BE-NEXT:    lxv v5, 80(r4)
+; CHECK-BE-NEXT:    stxv v1, 0(r3)
 ; CHECK-BE-NEXT:    xxsldwi v0, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvsxdsp vs0, v5
 ; CHECK-BE-NEXT:    lxv v4, 64(r4)
 ; CHECK-BE-NEXT:    vpkudum v0, v0, v6
+; CHECK-BE-NEXT:    stxv v0, 16(r3)
 ; CHECK-BE-NEXT:    xxsldwi v5, vs0, vs0, 3
-; CHECK-BE-NEXT:    lxv v3, 112(r4)
 ; CHECK-BE-NEXT:    xvcvsxdsp vs0, v4
+; CHECK-BE-NEXT:    lxv v3, 112(r4)
 ; CHECK-BE-NEXT:    xxsldwi v4, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvsxdsp vs0, v3
 ; CHECK-BE-NEXT:    lxv v2, 96(r4)
-; CHECK-BE-NEXT:    stxv v0, 16(r3)
-; CHECK-BE-NEXT:    stxv v1, 0(r3)
 ; CHECK-BE-NEXT:    vpkudum v4, v4, v5
 ; CHECK-BE-NEXT:    stxv v4, 32(r3)
 ; CHECK-BE-NEXT:    xxsldwi v3, vs0, vs0, 3


        


More information about the llvm-commits mailing list