[llvm] [PowerPC] Add `SDNPMemOperand` to some nodes (PR #115580)
Sergei Barannikov via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 8 20:41:34 PST 2024
https://github.com/s-barannikov created https://github.com/llvm/llvm-project/pull/115580
Nodes created with `getMemIntrinsicNode` have memory operands. In order
for operands to be propagated to machine instructions, the nodes should
have `SDNPMemOperand` property.
Similar to 3c8c385a.
>From a19ea4eb0c6d61503e51f103db5ac2eb2e0ec332 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sat, 9 Nov 2024 07:35:59 +0300
Subject: [PATCH 1/2] Autogenerate check lines for one test
---
llvm/test/CodeGen/PowerPC/swaps-le-1.ll | 336 ++++++++++++++++++------
1 file changed, 256 insertions(+), 80 deletions(-)
diff --git a/llvm/test/CodeGen/PowerPC/swaps-le-1.ll b/llvm/test/CodeGen/PowerPC/swaps-le-1.ll
index e2a61d7060ff2a..9c7f1343d7292f 100644
--- a/llvm/test/CodeGen/PowerPC/swaps-le-1.ll
+++ b/llvm/test/CodeGen/PowerPC/swaps-le-1.ll
@@ -1,9 +1,12 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr8 \
-; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \
+; RUN: -check-prefix=CHECK-P8 %s
; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr8 -disable-ppc-vsx-swap-removal \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \
-; RUN: -check-prefix=NOOPTSWAP %s
+; RUN: -check-prefix=NOOPTSWAP-P8 %s
; RUN: llc -O3 -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -verify-machineinstrs -ppc-vsr-nums-as-vr < %s | FileCheck \
@@ -11,7 +14,7 @@
; RUN: llc -O3 -mcpu=pwr9 -disable-ppc-vsx-swap-removal -mattr=-power9-vector \
; RUN: -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s \
-; RUN: | FileCheck -check-prefix=NOOPTSWAP %s
+; RUN: | FileCheck -check-prefix=NOOPTSWAP-P9 %s
; LH: 2016-11-17
; Updated align attritue from 16 to 8 to keep swap instructions tests.
@@ -41,6 +44,256 @@
@ca = common global [4096 x i32] zeroinitializer, align 8
define void @foo() {
+; CHECK-P8-LABEL: foo:
+; CHECK-P8: # %bb.0: # %entry
+; CHECK-P8-NEXT: li 3, 256
+; CHECK-P8-NEXT: std 29, -24(1) # 8-byte Folded Spill
+; CHECK-P8-NEXT: std 30, -16(1) # 8-byte Folded Spill
+; CHECK-P8-NEXT: addis 4, 2, .LC0 at toc@ha
+; CHECK-P8-NEXT: addis 5, 2, .LC1 at toc@ha
+; CHECK-P8-NEXT: addis 6, 2, .LC2 at toc@ha
+; CHECK-P8-NEXT: addis 7, 2, .LC3 at toc@ha
+; CHECK-P8-NEXT: li 8, 16
+; CHECK-P8-NEXT: mtctr 3
+; CHECK-P8-NEXT: ld 4, .LC0 at toc@l(4)
+; CHECK-P8-NEXT: ld 5, .LC1 at toc@l(5)
+; CHECK-P8-NEXT: ld 6, .LC2 at toc@l(6)
+; CHECK-P8-NEXT: ld 7, .LC3 at toc@l(7)
+; CHECK-P8-NEXT: li 3, 0
+; CHECK-P8-NEXT: li 9, 32
+; CHECK-P8-NEXT: li 10, 48
+; CHECK-P8-NEXT: .p2align 4
+; CHECK-P8-NEXT: .LBB0_1: # %vector.body
+; CHECK-P8-NEXT: #
+; CHECK-P8-NEXT: lxvd2x 34, 4, 3
+; CHECK-P8-NEXT: lxvd2x 35, 5, 3
+; CHECK-P8-NEXT: add 11, 4, 3
+; CHECK-P8-NEXT: add 12, 5, 3
+; CHECK-P8-NEXT: add 30, 6, 3
+; CHECK-P8-NEXT: add 29, 7, 3
+; CHECK-P8-NEXT: vadduwm 2, 3, 2
+; CHECK-P8-NEXT: lxvd2x 35, 6, 3
+; CHECK-P8-NEXT: vmuluwm 2, 2, 3
+; CHECK-P8-NEXT: stxvd2x 34, 7, 3
+; CHECK-P8-NEXT: addi 3, 3, 64
+; CHECK-P8-NEXT: lxvd2x 34, 11, 8
+; CHECK-P8-NEXT: lxvd2x 35, 12, 8
+; CHECK-P8-NEXT: vadduwm 2, 3, 2
+; CHECK-P8-NEXT: lxvd2x 35, 30, 8
+; CHECK-P8-NEXT: vmuluwm 2, 2, 3
+; CHECK-P8-NEXT: stxvd2x 34, 29, 8
+; CHECK-P8-NEXT: lxvd2x 34, 11, 9
+; CHECK-P8-NEXT: lxvd2x 35, 12, 9
+; CHECK-P8-NEXT: vadduwm 2, 3, 2
+; CHECK-P8-NEXT: lxvd2x 35, 30, 9
+; CHECK-P8-NEXT: vmuluwm 2, 2, 3
+; CHECK-P8-NEXT: stxvd2x 34, 29, 9
+; CHECK-P8-NEXT: lxvd2x 34, 11, 10
+; CHECK-P8-NEXT: lxvd2x 35, 12, 10
+; CHECK-P8-NEXT: vadduwm 2, 3, 2
+; CHECK-P8-NEXT: lxvd2x 35, 30, 10
+; CHECK-P8-NEXT: vmuluwm 2, 2, 3
+; CHECK-P8-NEXT: stxvd2x 34, 29, 10
+; CHECK-P8-NEXT: bdnz .LBB0_1
+; CHECK-P8-NEXT: # %bb.2: # %for.end
+; CHECK-P8-NEXT: ld 30, -16(1) # 8-byte Folded Reload
+; CHECK-P8-NEXT: ld 29, -24(1) # 8-byte Folded Reload
+; CHECK-P8-NEXT: blr
+;
+; NOOPTSWAP-P8-LABEL: foo:
+; NOOPTSWAP-P8: # %bb.0: # %entry
+; NOOPTSWAP-P8-NEXT: li 3, 256
+; NOOPTSWAP-P8-NEXT: std 29, -24(1) # 8-byte Folded Spill
+; NOOPTSWAP-P8-NEXT: std 30, -16(1) # 8-byte Folded Spill
+; NOOPTSWAP-P8-NEXT: addis 4, 2, .LC0 at toc@ha
+; NOOPTSWAP-P8-NEXT: addis 5, 2, .LC1 at toc@ha
+; NOOPTSWAP-P8-NEXT: addis 6, 2, .LC2 at toc@ha
+; NOOPTSWAP-P8-NEXT: addis 7, 2, .LC3 at toc@ha
+; NOOPTSWAP-P8-NEXT: li 8, 16
+; NOOPTSWAP-P8-NEXT: mtctr 3
+; NOOPTSWAP-P8-NEXT: ld 4, .LC0 at toc@l(4)
+; NOOPTSWAP-P8-NEXT: ld 5, .LC1 at toc@l(5)
+; NOOPTSWAP-P8-NEXT: ld 6, .LC2 at toc@l(6)
+; NOOPTSWAP-P8-NEXT: ld 7, .LC3 at toc@l(7)
+; NOOPTSWAP-P8-NEXT: li 3, 0
+; NOOPTSWAP-P8-NEXT: li 9, 32
+; NOOPTSWAP-P8-NEXT: li 10, 48
+; NOOPTSWAP-P8-NEXT: .p2align 4
+; NOOPTSWAP-P8-NEXT: .LBB0_1: # %vector.body
+; NOOPTSWAP-P8-NEXT: #
+; NOOPTSWAP-P8-NEXT: lxvd2x 0, 4, 3
+; NOOPTSWAP-P8-NEXT: add 11, 4, 3
+; NOOPTSWAP-P8-NEXT: add 12, 5, 3
+; NOOPTSWAP-P8-NEXT: add 30, 6, 3
+; NOOPTSWAP-P8-NEXT: add 29, 7, 3
+; NOOPTSWAP-P8-NEXT: xxswapd 34, 0
+; NOOPTSWAP-P8-NEXT: lxvd2x 0, 5, 3
+; NOOPTSWAP-P8-NEXT: xxswapd 35, 0
+; NOOPTSWAP-P8-NEXT: lxvd2x 0, 6, 3
+; NOOPTSWAP-P8-NEXT: vadduwm 2, 3, 2
+; NOOPTSWAP-P8-NEXT: xxswapd 36, 0
+; NOOPTSWAP-P8-NEXT: vmuluwm 2, 2, 4
+; NOOPTSWAP-P8-NEXT: xxswapd 0, 34
+; NOOPTSWAP-P8-NEXT: stxvd2x 0, 7, 3
+; NOOPTSWAP-P8-NEXT: addi 3, 3, 64
+; NOOPTSWAP-P8-NEXT: lxvd2x 0, 11, 8
+; NOOPTSWAP-P8-NEXT: xxswapd 34, 0
+; NOOPTSWAP-P8-NEXT: lxvd2x 0, 12, 8
+; NOOPTSWAP-P8-NEXT: xxswapd 35, 0
+; NOOPTSWAP-P8-NEXT: lxvd2x 0, 30, 8
+; NOOPTSWAP-P8-NEXT: vadduwm 2, 3, 2
+; NOOPTSWAP-P8-NEXT: xxswapd 36, 0
+; NOOPTSWAP-P8-NEXT: vmuluwm 2, 2, 4
+; NOOPTSWAP-P8-NEXT: xxswapd 0, 34
+; NOOPTSWAP-P8-NEXT: stxvd2x 0, 29, 8
+; NOOPTSWAP-P8-NEXT: lxvd2x 0, 11, 9
+; NOOPTSWAP-P8-NEXT: xxswapd 34, 0
+; NOOPTSWAP-P8-NEXT: lxvd2x 0, 12, 9
+; NOOPTSWAP-P8-NEXT: xxswapd 35, 0
+; NOOPTSWAP-P8-NEXT: lxvd2x 0, 30, 9
+; NOOPTSWAP-P8-NEXT: vadduwm 2, 3, 2
+; NOOPTSWAP-P8-NEXT: xxswapd 36, 0
+; NOOPTSWAP-P8-NEXT: vmuluwm 2, 2, 4
+; NOOPTSWAP-P8-NEXT: xxswapd 0, 34
+; NOOPTSWAP-P8-NEXT: stxvd2x 0, 29, 9
+; NOOPTSWAP-P8-NEXT: lxvd2x 0, 11, 10
+; NOOPTSWAP-P8-NEXT: xxswapd 34, 0
+; NOOPTSWAP-P8-NEXT: lxvd2x 0, 12, 10
+; NOOPTSWAP-P8-NEXT: xxswapd 35, 0
+; NOOPTSWAP-P8-NEXT: lxvd2x 0, 30, 10
+; NOOPTSWAP-P8-NEXT: vadduwm 2, 3, 2
+; NOOPTSWAP-P8-NEXT: xxswapd 36, 0
+; NOOPTSWAP-P8-NEXT: vmuluwm 2, 2, 4
+; NOOPTSWAP-P8-NEXT: xxswapd 0, 34
+; NOOPTSWAP-P8-NEXT: stxvd2x 0, 29, 10
+; NOOPTSWAP-P8-NEXT: bdnz .LBB0_1
+; NOOPTSWAP-P8-NEXT: # %bb.2: # %for.end
+; NOOPTSWAP-P8-NEXT: ld 30, -16(1) # 8-byte Folded Reload
+; NOOPTSWAP-P8-NEXT: ld 29, -24(1) # 8-byte Folded Reload
+; NOOPTSWAP-P8-NEXT: blr
+;
+; CHECK-P9-LABEL: foo:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: li 6, 256
+; CHECK-P9-NEXT: addis 3, 2, .LC0 at toc@ha
+; CHECK-P9-NEXT: addis 4, 2, .LC1 at toc@ha
+; CHECK-P9-NEXT: addis 5, 2, .LC2 at toc@ha
+; CHECK-P9-NEXT: mtctr 6
+; CHECK-P9-NEXT: addis 6, 2, .LC3 at toc@ha
+; CHECK-P9-NEXT: ld 3, .LC0 at toc@l(3)
+; CHECK-P9-NEXT: ld 4, .LC1 at toc@l(4)
+; CHECK-P9-NEXT: ld 5, .LC2 at toc@l(5)
+; CHECK-P9-NEXT: ld 6, .LC3 at toc@l(6)
+; CHECK-P9-NEXT: addi 3, 3, 32
+; CHECK-P9-NEXT: addi 4, 4, 32
+; CHECK-P9-NEXT: addi 5, 5, 32
+; CHECK-P9-NEXT: addi 6, 6, 32
+; CHECK-P9-NEXT: .p2align 4
+; CHECK-P9-NEXT: .LBB0_1: # %vector.body
+; CHECK-P9-NEXT: #
+; CHECK-P9-NEXT: lxv 2, -32(6)
+; CHECK-P9-NEXT: lxv 3, -32(5)
+; CHECK-P9-NEXT: lxv 4, -16(5)
+; CHECK-P9-NEXT: vadduwm 2, 3, 2
+; CHECK-P9-NEXT: lxv 3, -32(4)
+; CHECK-P9-NEXT: vmuluwm 2, 2, 3
+; CHECK-P9-NEXT: lxv 3, -16(6)
+; CHECK-P9-NEXT: vadduwm 3, 4, 3
+; CHECK-P9-NEXT: lxv 4, 0(5)
+; CHECK-P9-NEXT: stxv 2, -32(3)
+; CHECK-P9-NEXT: lxv 2, -16(4)
+; CHECK-P9-NEXT: vmuluwm 2, 3, 2
+; CHECK-P9-NEXT: lxv 3, 0(6)
+; CHECK-P9-NEXT: vadduwm 3, 4, 3
+; CHECK-P9-NEXT: lxv 4, 16(5)
+; CHECK-P9-NEXT: addi 5, 5, 64
+; CHECK-P9-NEXT: stxv 2, -16(3)
+; CHECK-P9-NEXT: lxv 2, 0(4)
+; CHECK-P9-NEXT: vmuluwm 2, 3, 2
+; CHECK-P9-NEXT: lxv 3, 16(6)
+; CHECK-P9-NEXT: addi 6, 6, 64
+; CHECK-P9-NEXT: vadduwm 3, 4, 3
+; CHECK-P9-NEXT: stxv 2, 0(3)
+; CHECK-P9-NEXT: lxv 2, 16(4)
+; CHECK-P9-NEXT: addi 4, 4, 64
+; CHECK-P9-NEXT: vmuluwm 2, 3, 2
+; CHECK-P9-NEXT: stxv 2, 16(3)
+; CHECK-P9-NEXT: addi 3, 3, 64
+; CHECK-P9-NEXT: bdnz .LBB0_1
+; CHECK-P9-NEXT: # %bb.2: # %for.end
+; CHECK-P9-NEXT: blr
+;
+; NOOPTSWAP-P9-LABEL: foo:
+; NOOPTSWAP-P9: # %bb.0: # %entry
+; NOOPTSWAP-P9-NEXT: addis 4, 2, .LC0 at toc@ha
+; NOOPTSWAP-P9-NEXT: addis 5, 2, .LC1 at toc@ha
+; NOOPTSWAP-P9-NEXT: addis 6, 2, .LC2 at toc@ha
+; NOOPTSWAP-P9-NEXT: addis 7, 2, .LC3 at toc@ha
+; NOOPTSWAP-P9-NEXT: std 29, -24(1) # 8-byte Folded Spill
+; NOOPTSWAP-P9-NEXT: ld 4, .LC0 at toc@l(4)
+; NOOPTSWAP-P9-NEXT: li 3, 256
+; NOOPTSWAP-P9-NEXT: std 30, -16(1) # 8-byte Folded Spill
+; NOOPTSWAP-P9-NEXT: ld 5, .LC1 at toc@l(5)
+; NOOPTSWAP-P9-NEXT: ld 6, .LC2 at toc@l(6)
+; NOOPTSWAP-P9-NEXT: ld 7, .LC3 at toc@l(7)
+; NOOPTSWAP-P9-NEXT: mtctr 3
+; NOOPTSWAP-P9-NEXT: li 3, 0
+; NOOPTSWAP-P9-NEXT: li 8, 16
+; NOOPTSWAP-P9-NEXT: li 9, 32
+; NOOPTSWAP-P9-NEXT: li 10, 48
+; NOOPTSWAP-P9-NEXT: .p2align 4
+; NOOPTSWAP-P9-NEXT: .LBB0_1: # %vector.body
+; NOOPTSWAP-P9-NEXT: #
+; NOOPTSWAP-P9-NEXT: lxvd2x 0, 4, 3
+; NOOPTSWAP-P9-NEXT: add 11, 4, 3
+; NOOPTSWAP-P9-NEXT: add 12, 5, 3
+; NOOPTSWAP-P9-NEXT: add 30, 6, 3
+; NOOPTSWAP-P9-NEXT: add 29, 7, 3
+; NOOPTSWAP-P9-NEXT: xxswapd 34, 0
+; NOOPTSWAP-P9-NEXT: lxvd2x 0, 5, 3
+; NOOPTSWAP-P9-NEXT: xxswapd 35, 0
+; NOOPTSWAP-P9-NEXT: lxvd2x 0, 6, 3
+; NOOPTSWAP-P9-NEXT: vadduwm 2, 3, 2
+; NOOPTSWAP-P9-NEXT: xxswapd 35, 0
+; NOOPTSWAP-P9-NEXT: vmuluwm 2, 2, 3
+; NOOPTSWAP-P9-NEXT: xxswapd 0, 34
+; NOOPTSWAP-P9-NEXT: stxvd2x 0, 7, 3
+; NOOPTSWAP-P9-NEXT: addi 3, 3, 64
+; NOOPTSWAP-P9-NEXT: lxvd2x 0, 11, 8
+; NOOPTSWAP-P9-NEXT: xxswapd 34, 0
+; NOOPTSWAP-P9-NEXT: lxvd2x 0, 12, 8
+; NOOPTSWAP-P9-NEXT: xxswapd 35, 0
+; NOOPTSWAP-P9-NEXT: lxvd2x 0, 30, 8
+; NOOPTSWAP-P9-NEXT: vadduwm 2, 3, 2
+; NOOPTSWAP-P9-NEXT: xxswapd 35, 0
+; NOOPTSWAP-P9-NEXT: vmuluwm 2, 2, 3
+; NOOPTSWAP-P9-NEXT: xxswapd 0, 34
+; NOOPTSWAP-P9-NEXT: stxvd2x 0, 29, 8
+; NOOPTSWAP-P9-NEXT: lxvd2x 0, 11, 9
+; NOOPTSWAP-P9-NEXT: xxswapd 34, 0
+; NOOPTSWAP-P9-NEXT: lxvd2x 0, 12, 9
+; NOOPTSWAP-P9-NEXT: xxswapd 35, 0
+; NOOPTSWAP-P9-NEXT: lxvd2x 0, 30, 9
+; NOOPTSWAP-P9-NEXT: vadduwm 2, 3, 2
+; NOOPTSWAP-P9-NEXT: xxswapd 35, 0
+; NOOPTSWAP-P9-NEXT: vmuluwm 2, 2, 3
+; NOOPTSWAP-P9-NEXT: xxswapd 0, 34
+; NOOPTSWAP-P9-NEXT: stxvd2x 0, 29, 9
+; NOOPTSWAP-P9-NEXT: lxvd2x 0, 11, 10
+; NOOPTSWAP-P9-NEXT: xxswapd 34, 0
+; NOOPTSWAP-P9-NEXT: lxvd2x 0, 12, 10
+; NOOPTSWAP-P9-NEXT: xxswapd 35, 0
+; NOOPTSWAP-P9-NEXT: lxvd2x 0, 30, 10
+; NOOPTSWAP-P9-NEXT: vadduwm 2, 3, 2
+; NOOPTSWAP-P9-NEXT: xxswapd 35, 0
+; NOOPTSWAP-P9-NEXT: vmuluwm 2, 2, 3
+; NOOPTSWAP-P9-NEXT: xxswapd 0, 34
+; NOOPTSWAP-P9-NEXT: stxvd2x 0, 29, 10
+; NOOPTSWAP-P9-NEXT: bdnz .LBB0_1
+; NOOPTSWAP-P9-NEXT: # %bb.2: # %for.end
+; NOOPTSWAP-P9-NEXT: ld 30, -16(1) # 8-byte Folded Reload
+; NOOPTSWAP-P9-NEXT: ld 29, -24(1) # 8-byte Folded Reload
+; NOOPTSWAP-P9-NEXT: blr
entry:
br label %vector.body
@@ -96,80 +349,3 @@ vector.body:
for.end:
ret void
}
-
-; CHECK-LABEL: @foo
-; CHECK-NOT: xxpermdi
-; CHECK-NOT: xxswapd
-; CHECK-P9-NOT: xxpermdi
-
-; CHECK: lxvd2x
-; CHECK: lxvd2x
-; CHECK-DAG: lxvd2x
-; CHECK-DAG: vadduwm
-; CHECK: vmuluwm
-; CHECK: stxvd2x
-
-; CHECK: lxvd2x
-; CHECK: lxvd2x
-; CHECK-DAG: lxvd2x
-; CHECK-DAG: vadduwm
-; CHECK: vmuluwm
-; CHECK: stxvd2x
-
-; CHECK: lxvd2x
-; CHECK: lxvd2x
-; CHECK-DAG: lxvd2x
-; CHECK-DAG: vadduwm
-; CHECK: vmuluwm
-; CHECK: stxvd2x
-
-; CHECK: lxvd2x
-; CHECK: lxvd2x
-; CHECK-DAG: lxvd2x
-; CHECK-DAG: vadduwm
-; CHECK: vmuluwm
-; CHECK: stxvd2x
-
-; NOOPTSWAP-LABEL: @foo
-
-; NOOPTSWAP: lxvd2x
-; NOOPTSWAP-DAG: lxvd2x
-; NOOPTSWAP-DAG: lxvd2x
-; NOOPTSWAP-DAG: xxswapd
-; NOOPTSWAP-DAG: xxswapd
-; NOOPTSWAP-DAG: xxswapd
-; NOOPTSWAP-DAG: vadduwm
-; NOOPTSWAP: vmuluwm
-; NOOPTSWAP: xxswapd
-; NOOPTSWAP-DAG: xxswapd
-; NOOPTSWAP-DAG: xxswapd
-; NOOPTSWAP-DAG: stxvd2x
-; NOOPTSWAP-DAG: stxvd2x
-; NOOPTSWAP: stxvd2x
-
-; CHECK-P9-LABEL: @foo
-; CHECK-P9-DAG: lxv
-; CHECK-P9-DAG: lxv
-; CHECK-P9-DAG: lxv
-; CHECK-P9-DAG: lxv
-; CHECK-P9-DAG: lxv
-; CHECK-P9-DAG: lxv
-; CHECK-P9-DAG: lxv
-; CHECK-P9-DAG: lxv
-; CHECK-P9-DAG: lxv
-; CHECK-P9-DAG: lxv
-; CHECK-P9-DAG: lxv
-; CHECK-P9-DAG: lxv
-; CHECK-P9-DAG: vadduwm
-; CHECK-P9-DAG: vadduwm
-; CHECK-P9-DAG: vadduwm
-; CHECK-P9-DAG: vadduwm
-; CHECK-P9-DAG: vmuluwm
-; CHECK-P9-DAG: vmuluwm
-; CHECK-P9-DAG: vmuluwm
-; CHECK-P9-DAG: vmuluwm
-; CHECK-P9-DAG: stxv
-; CHECK-P9-DAG: stxv
-; CHECK-P9-DAG: stxv
-; CHECK-P9-DAG: stxv
-
>From 8acf305dbdc8e030541a953be9101323534751e4 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sat, 9 Nov 2024 07:37:49 +0300
Subject: [PATCH 2/2] [PowerPC] Add `SDNPMemOperand` to some nodes
Nodes created with `getMemIntrinsicNode` have memory operands. In order
for operands to be propagated to machine instructions, the nodes should
have `SDNPMemOperand` property.
---
llvm/lib/Target/PowerPC/PPCInstrInfo.td | 10 +-
llvm/lib/Target/PowerPC/PPCInstrP10.td | 2 +-
llvm/lib/Target/PowerPC/PPCInstrVSX.td | 4 +-
.../PowerPC/const-nonsplat-array-init.ll | 24 +--
.../CodeGen/PowerPC/const-splat-array-init.ll | 12 +-
.../test/CodeGen/PowerPC/extract-and-store.ll | 6 +-
llvm/test/CodeGen/PowerPC/f128-fma.ll | 16 +-
llvm/test/CodeGen/PowerPC/f128-passByValue.ll | 8 +-
llvm/test/CodeGen/PowerPC/pr45301.ll | 2 +-
llvm/test/CodeGen/PowerPC/pr47891.ll | 2 +-
llvm/test/CodeGen/PowerPC/pr59074.ll | 6 +-
llvm/test/CodeGen/PowerPC/swaps-le-1.ll | 194 +++++++++---------
llvm/test/CodeGen/PowerPC/vec-itofp.ll | 6 +-
.../PowerPC/vec_conv_fp32_to_i16_elts.ll | 4 +-
.../PowerPC/vec_conv_fp32_to_i64_elts.ll | 20 +-
.../PowerPC/vec_conv_fp64_to_i16_elts.ll | 4 +-
.../PowerPC/vec_conv_fp64_to_i32_elts.ll | 8 +-
.../PowerPC/vec_conv_fp_to_i_4byte_elts.ll | 4 +-
.../PowerPC/vec_conv_fp_to_i_8byte_elts.ll | 4 +-
.../PowerPC/vec_conv_i16_to_fp32_elts.ll | 4 +-
.../PowerPC/vec_conv_i16_to_fp64_elts.ll | 16 +-
.../PowerPC/vec_conv_i32_to_fp64_elts.ll | 8 +-
.../PowerPC/vec_conv_i64_to_fp32_elts.ll | 8 +-
.../PowerPC/vec_conv_i8_to_fp32_elts.ll | 8 +-
.../PowerPC/vec_conv_i8_to_fp64_elts.ll | 12 +-
.../PowerPC/vec_conv_i_to_fp_4byte_elts.ll | 4 +-
.../PowerPC/vec_conv_i_to_fp_8byte_elts.ll | 4 +-
...lar-shift-by-byte-multiple-legalization.ll | 122 +++++------
.../PowerPC/wide-scalar-shift-legalization.ll | 50 ++---
29 files changed, 283 insertions(+), 289 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index b4a5e41c0107a3..4daa442d001eb1 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -166,17 +166,17 @@ def PPCany_fcfidus : PatFrags<(ops node:$op),
def PPCstore_scal_int_from_vsr:
SDNode<"PPCISD::ST_VSR_SCAL_INT", SDT_PPCstore_scal_int_from_vsr,
- [SDNPHasChain, SDNPMayStore]>;
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
- [SDNPHasChain, SDNPMayStore]>;
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPClxsizx : SDNode<"PPCISD::LXSIZX", SDT_PPCLxsizx,
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCstxsix : SDNode<"PPCISD::STXSIX", SDT_PPCstxsix,
- [SDNPHasChain, SDNPMayStore]>;
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def PPCVexts : SDNode<"PPCISD::VEXTS", SDT_PPCVexts, []>;
// Extract FPSCR (not modeled at the DAG level).
@@ -376,7 +376,7 @@ def PPCatomicCmpSwap_16 :
def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
- [SDNPHasChain, SDNPMayStore]>;
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def PPCStoreCond : SDNode<"PPCISD::STORE_COND", SDT_StoreCond,
[SDNPHasChain, SDNPMayStore,
SDNPMemOperand, SDNPOutGlue]>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td
index c4b8597b1df9ff..2fe94f9462b26c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrP10.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td
@@ -105,7 +105,7 @@ def SDT_PPCLXVRZX : SDTypeProfile<1, 2, [
// PPC Specific DAG Nodes.
def PPClxvrzx : SDNode<"PPCISD::LXVRZX", SDT_PPCLXVRZX,
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
// Top-level class for prefixed instructions.
class PI<bits<6> pref, bits<6> opcode, dag OOL, dag IOL, string asmstr,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index fe9ab22c576349..8e400bc63b7851 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -90,11 +90,11 @@ def SDT_PPCxxperm : SDTypeProfile<1, 3, [
def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
- [SDNPHasChain, SDNPMayStore]>;
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def PPCld_vec_be : SDNode<"PPCISD::LOAD_VEC_BE", SDT_PPCld_vec_be,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCst_vec_be : SDNode<"PPCISD::STORE_VEC_BE", SDT_PPCst_vec_be,
- [SDNPHasChain, SDNPMayStore]>;
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>;
def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>;
def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>;
diff --git a/llvm/test/CodeGen/PowerPC/const-nonsplat-array-init.ll b/llvm/test/CodeGen/PowerPC/const-nonsplat-array-init.ll
index 18a61d071cca6c..0a701c22b4621c 100644
--- a/llvm/test/CodeGen/PowerPC/const-nonsplat-array-init.ll
+++ b/llvm/test/CodeGen/PowerPC/const-nonsplat-array-init.ll
@@ -55,9 +55,9 @@ define dso_local void @foo1_int_be_reuse4B(ptr nocapture noundef writeonly %a) l
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 1798
; P8-LE-NEXT: ori 4, 4, 1284
-; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
; P8-LE-NEXT: li 4, 2312
+; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: sth 4, 20(3)
; P8-LE-NEXT: blr
;
@@ -143,9 +143,9 @@ define dso_local void @foo2_int_le_reuse4B(ptr nocapture noundef writeonly %a) l
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 2826
; P8-LE-NEXT: ori 4, 4, 2312
-; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
; P8-LE-NEXT: li 4, 3340
+; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: sth 4, 20(3)
; P8-LE-NEXT: blr
;
@@ -231,9 +231,9 @@ define dso_local void @foo3_int_be_reuse4B(ptr nocapture noundef writeonly %a) l
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 1543
; P8-LE-NEXT: ori 4, 4, 1029
-; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
; P8-LE-NEXT: li 4, 2057
+; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: sth 4, 20(3)
; P8-LE-NEXT: blr
;
@@ -313,9 +313,9 @@ define dso_local void @foo4_int_le_reuse4B(ptr nocapture noundef writeonly %a) l
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 2571
; P8-LE-NEXT: ori 4, 4, 2057
-; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
; P8-LE-NEXT: li 4, 3085
+; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: sth 4, 20(3)
; P8-LE-NEXT: blr
;
@@ -389,8 +389,8 @@ define dso_local void @foo5_int_be_reuse4B(ptr nocapture noundef writeonly %a) l
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 1029
; P8-LE-NEXT: ori 4, 4, 1543
-; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
+; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo5_int_be_reuse4B:
@@ -455,8 +455,8 @@ define dso_local void @foo6_int_le_reuse4B(ptr nocapture noundef writeonly %a) l
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 2057
; P8-LE-NEXT: ori 4, 4, 2571
-; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
+; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo6_int_le_reuse4B:
@@ -1221,8 +1221,8 @@ define dso_local void @foo15_int_noreuse4B(ptr nocapture noundef writeonly %a) l
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 1029
; P8-LE-NEXT: ori 4, 4, 1544
-; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
+; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo15_int_noreuse4B:
@@ -1371,8 +1371,8 @@ define dso_local void @foo17_fp_be_reuse4B(ptr nocapture noundef writeonly %a) l
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 16673
; P8-LE-NEXT: ori 4, 4, 39322
-; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
+; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo17_fp_be_reuse4B:
@@ -1437,8 +1437,8 @@ define dso_local void @foo18_fp_le_reuse4B(ptr nocapture noundef writeonly %a) l
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 16675
; P8-LE-NEXT: ori 4, 4, 13107
-; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
+; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo18_fp_le_reuse4B:
@@ -1504,8 +1504,8 @@ define dso_local void @foo19_fp_be_reuse8B(ptr nocapture noundef writeonly %a) l
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: li 4, 4105
; P8-LE-NEXT: rldic 4, 4, 50, 1
-; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: std 4, 16(3)
+; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo19_fp_be_reuse8B:
@@ -1649,8 +1649,8 @@ define dso_local void @foo21_fp_noreuse4B(ptr nocapture noundef writeonly %a) lo
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 16268
; P8-LE-NEXT: ori 4, 4, 52430
-; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
+; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo21_fp_noreuse4B:
@@ -1716,8 +1716,8 @@ define dso_local void @foo22_fp_noreuse8B(ptr nocapture noundef writeonly %a) lo
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: li 4, 21503
; P8-LE-NEXT: rotldi 4, 4, 52
-; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: std 4, 16(3)
+; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo22_fp_noreuse8B:
diff --git a/llvm/test/CodeGen/PowerPC/const-splat-array-init.ll b/llvm/test/CodeGen/PowerPC/const-splat-array-init.ll
index 4139a8fbcbb4f1..83acb4fac8a76a 100644
--- a/llvm/test/CodeGen/PowerPC/const-splat-array-init.ll
+++ b/llvm/test/CodeGen/PowerPC/const-splat-array-init.ll
@@ -45,8 +45,8 @@ define dso_local void @foo1(ptr nocapture noundef writeonly %a) local_unnamed_ad
; P8-LE-NEXT: addi 4, 4, .LCPI0_0 at toc@l
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: li 4, 3333
-; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: sth 4, 16(3)
+; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo1:
@@ -109,8 +109,8 @@ define dso_local void @foo2(ptr nocapture noundef writeonly %a) local_unnamed_ad
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 3333
; P8-LE-NEXT: ori 4, 4, 3333
-; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
+; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo2:
@@ -182,9 +182,9 @@ define dso_local void @foo3(ptr nocapture noundef writeonly %a) local_unnamed_ad
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 3333
; P8-LE-NEXT: ori 4, 4, 3333
-; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
; P8-LE-NEXT: li 4, 3333
+; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: sth 4, 20(3)
; P8-LE-NEXT: blr
;
@@ -334,8 +334,8 @@ define dso_local void @foo5(ptr nocapture noundef writeonly %a) local_unnamed_ad
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 5
; P8-LE-NEXT: ori 4, 4, 5653
-; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
+; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo5:
@@ -473,8 +473,8 @@ define dso_local void @foo7(ptr nocapture noundef writeonly %a) local_unnamed_ad
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 508
; P8-LE-NEXT: ori 4, 4, 41045
-; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: std 4, 16(3)
+; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo7:
@@ -539,8 +539,8 @@ define dso_local void @foo8(ptr nocapture noundef writeonly %a) local_unnamed_ad
; P8-LE-NEXT: lxvd2x 0, 0, 4
; P8-LE-NEXT: lis 4, 16469
; P8-LE-NEXT: ori 4, 4, 7864
-; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: stw 4, 16(3)
+; P8-LE-NEXT: stxvd2x 0, 0, 3
; P8-LE-NEXT: blr
;
; P9-LE-LABEL: foo8:
diff --git a/llvm/test/CodeGen/PowerPC/extract-and-store.ll b/llvm/test/CodeGen/PowerPC/extract-and-store.ll
index 8bf4013160d8e9..13839a7cd20760 100644
--- a/llvm/test/CodeGen/PowerPC/extract-and-store.ll
+++ b/llvm/test/CodeGen/PowerPC/extract-and-store.ll
@@ -574,13 +574,13 @@ define dso_local void @test_stores_exceed_vec_size(<4 x i32> %a, ptr nocapture %
; CHECK-NEXT: addi r3, r3, .LCPI16_0 at toc@l
; CHECK-NEXT: lxvd2x vs0, 0, r3
; CHECK-NEXT: li r3, 16
+; CHECK-NEXT: stfiwx f1, r5, r3
+; CHECK-NEXT: li r3, 20
+; CHECK-NEXT: stxsiwx vs34, r5, r3
; CHECK-NEXT: xxswapd vs35, vs0
; CHECK-NEXT: vperm v3, v2, v2, v3
; CHECK-NEXT: xxswapd vs0, vs35
; CHECK-NEXT: stxvd2x vs0, 0, r5
-; CHECK-NEXT: stfiwx f1, r5, r3
-; CHECK-NEXT: li r3, 20
-; CHECK-NEXT: stxsiwx vs34, r5, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: test_stores_exceed_vec_size:
diff --git a/llvm/test/CodeGen/PowerPC/f128-fma.ll b/llvm/test/CodeGen/PowerPC/f128-fma.ll
index d830727e78fbf1..d55697422c7eba 100644
--- a/llvm/test/CodeGen/PowerPC/f128-fma.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-fma.ll
@@ -39,10 +39,10 @@ define void @qpFmadd(ptr nocapture readonly %a, ptr nocapture %b,
; CHECK-P8-NEXT: vmr v3, v31
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: li r3, 48
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: addi r1, r1, 80
; CHECK-P8-NEXT: ld r0, 16(r1)
@@ -95,10 +95,10 @@ define void @qpFmadd_02(ptr nocapture readonly %a,
; CHECK-P8-NEXT: vmr v2, v31
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: li r3, 48
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: addi r1, r1, 80
; CHECK-P8-NEXT: ld r0, 16(r1)
@@ -214,8 +214,8 @@ define void @qpFnmadd(ptr nocapture readonly %a,
; CHECK-P8-NEXT: stb r4, 63(r1)
; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: li r3, 64
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: ld r30, 80(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: addi r1, r1, 96
; CHECK-P8-NEXT: ld r0, 16(r1)
@@ -331,10 +331,10 @@ define void @qpFmsub(ptr nocapture readonly %a,
; CHECK-P8-NEXT: vmr v2, v31
; CHECK-P8-NEXT: bl __subkf3
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: li r3, 48
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: addi r1, r1, 80
; CHECK-P8-NEXT: ld r0, 16(r1)
@@ -451,8 +451,8 @@ define void @qpFnmsub(ptr nocapture readonly %a,
; CHECK-P8-NEXT: stb r4, 63(r1)
; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: li r3, 64
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: ld r30, 80(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: addi r1, r1, 96
; CHECK-P8-NEXT: ld r0, 16(r1)
diff --git a/llvm/test/CodeGen/PowerPC/f128-passByValue.ll b/llvm/test/CodeGen/PowerPC/f128-passByValue.ll
index 04a7d78d714cc5..1572cc082af3ea 100644
--- a/llvm/test/CodeGen/PowerPC/f128-passByValue.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-passByValue.ll
@@ -576,13 +576,13 @@ define void @mixParam_03(fp128 %f1, ptr nocapture %d1, <4 x i32> %vec1,
; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: .cfi_offset v31, -32
; CHECK-P8-NEXT: ld r4, 184(r1)
-; CHECK-P8-NEXT: li r3, 48
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: xxswapd vs1, v3
+; CHECK-P8-NEXT: li r3, 48
; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: mr r30, r5
-; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
; CHECK-P8-NEXT: stxvd2x vs0, 0, r9
+; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
; CHECK-P8-NEXT: mr r3, r10
; CHECK-P8-NEXT: stxvd2x vs1, 0, r4
; CHECK-P8-NEXT: lxvd2x vs0, 0, r9
@@ -639,15 +639,15 @@ define fastcc void @mixParam_03f(fp128 %f1, ptr nocapture %d1, <4 x i32> %vec1,
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: .cfi_offset v31, -32
-; CHECK-P8-NEXT: li r6, 48
; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: xxswapd vs1, v3
; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill
+; CHECK-P8-NEXT: li r6, 48
; CHECK-P8-NEXT: mr r30, r3
; CHECK-P8-NEXT: mr r3, r5
-; CHECK-P8-NEXT: stvx v31, r1, r6 # 16-byte Folded Spill
; CHECK-P8-NEXT: stxvd2x vs0, 0, r4
; CHECK-P8-NEXT: stxvd2x vs1, 0, r7
+; CHECK-P8-NEXT: stvx v31, r1, r6 # 16-byte Folded Spill
; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: xxswapd v31, vs0
; CHECK-P8-NEXT: bl __floatsikf
diff --git a/llvm/test/CodeGen/PowerPC/pr45301.ll b/llvm/test/CodeGen/PowerPC/pr45301.ll
index bb6252e572a1b9..40054ce73188d7 100644
--- a/llvm/test/CodeGen/PowerPC/pr45301.ll
+++ b/llvm/test/CodeGen/PowerPC/pr45301.ll
@@ -23,9 +23,9 @@ define dso_local void @g(ptr %agg.result) local_unnamed_addr #0 {
; CHECK-NEXT: ld r7, 24(r5)
; CHECK-NEXT: std r7, 24(r3)
; CHECK-NEXT: ld r5, 32(r5)
-; CHECK-NEXT: std r5, 32(r3)
; CHECK-NEXT: stwbrx r4, 0, r3
; CHECK-NEXT: li r4, 20
+; CHECK-NEXT: std r5, 32(r3)
; CHECK-NEXT: stwbrx r6, r3, r4
; CHECK-NEXT: addi r1, r1, 112
; CHECK-NEXT: ld r0, 16(r1)
diff --git a/llvm/test/CodeGen/PowerPC/pr47891.ll b/llvm/test/CodeGen/PowerPC/pr47891.ll
index 0949b814a13101..4e41b3ee121550 100644
--- a/llvm/test/CodeGen/PowerPC/pr47891.ll
+++ b/llvm/test/CodeGen/PowerPC/pr47891.ll
@@ -55,9 +55,9 @@ define dso_local void @poly2_lshift1(ptr nocapture %p) local_unnamed_addr #0 {
; CHECK-NEXT: std r6, 56(r3)
; CHECK-NEXT: rotldi r6, r7, 1
; CHECK-NEXT: xxswapd vs0, vs0
+; CHECK-NEXT: stxvd2x vs0, r3, r4
; CHECK-NEXT: rldimi r6, r5, 1, 0
; CHECK-NEXT: std r6, 64(r3)
-; CHECK-NEXT: stxvd2x vs0, r3, r4
; CHECK-NEXT: blr
entry:
%0 = load i64, ptr %p, align 8
diff --git a/llvm/test/CodeGen/PowerPC/pr59074.ll b/llvm/test/CodeGen/PowerPC/pr59074.ll
index d3ca1139b4fd11..6264b9f22876cc 100644
--- a/llvm/test/CodeGen/PowerPC/pr59074.ll
+++ b/llvm/test/CodeGen/PowerPC/pr59074.ll
@@ -33,13 +33,13 @@ define void @pr59074(ptr %0) {
; LE32-NEXT: li 8, 12
; LE32-NEXT: xxswapd 0, 0
; LE32-NEXT: rlwimi 5, 6, 0, 30, 28
-; LE32-NEXT: addi 4, 4, -12
-; LE32-NEXT: rlwinm 9, 4, 29, 28, 29
-; LE32-NEXT: stxvd2x 0, 0, 5
; LE32-NEXT: stw 7, 44(1)
+; LE32-NEXT: addi 4, 4, -12
; LE32-NEXT: stw 7, 40(1)
; LE32-NEXT: stw 7, 36(1)
; LE32-NEXT: stw 8, 16(1)
+; LE32-NEXT: rlwinm 9, 4, 29, 28, 29
+; LE32-NEXT: stxvd2x 0, 0, 5
; LE32-NEXT: clrlwi 4, 4, 27
; LE32-NEXT: lwzux 5, 9, 6
; LE32-NEXT: lwz 6, 8(9)
diff --git a/llvm/test/CodeGen/PowerPC/swaps-le-1.ll b/llvm/test/CodeGen/PowerPC/swaps-le-1.ll
index 9c7f1343d7292f..f3e34101efa29a 100644
--- a/llvm/test/CodeGen/PowerPC/swaps-le-1.ll
+++ b/llvm/test/CodeGen/PowerPC/swaps-le-1.ll
@@ -47,20 +47,19 @@ define void @foo() {
; CHECK-P8-LABEL: foo:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li 3, 256
-; CHECK-P8-NEXT: std 29, -24(1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std 30, -16(1) # 8-byte Folded Spill
; CHECK-P8-NEXT: addis 4, 2, .LC0 at toc@ha
; CHECK-P8-NEXT: addis 5, 2, .LC1 at toc@ha
; CHECK-P8-NEXT: addis 6, 2, .LC2 at toc@ha
; CHECK-P8-NEXT: addis 7, 2, .LC3 at toc@ha
; CHECK-P8-NEXT: li 8, 16
+; CHECK-P8-NEXT: li 9, 32
; CHECK-P8-NEXT: mtctr 3
; CHECK-P8-NEXT: ld 4, .LC0 at toc@l(4)
; CHECK-P8-NEXT: ld 5, .LC1 at toc@l(5)
; CHECK-P8-NEXT: ld 6, .LC2 at toc@l(6)
; CHECK-P8-NEXT: ld 7, .LC3 at toc@l(7)
; CHECK-P8-NEXT: li 3, 0
-; CHECK-P8-NEXT: li 9, 32
; CHECK-P8-NEXT: li 10, 48
; CHECK-P8-NEXT: .p2align 4
; CHECK-P8-NEXT: .LBB0_1: # %vector.body
@@ -69,107 +68,104 @@ define void @foo() {
; CHECK-P8-NEXT: lxvd2x 35, 5, 3
; CHECK-P8-NEXT: add 11, 4, 3
; CHECK-P8-NEXT: add 12, 5, 3
+; CHECK-P8-NEXT: lxvd2x 36, 6, 3
; CHECK-P8-NEXT: add 30, 6, 3
-; CHECK-P8-NEXT: add 29, 7, 3
+; CHECK-P8-NEXT: lxvd2x 37, 11, 8
+; CHECK-P8-NEXT: lxvd2x 32, 12, 10
; CHECK-P8-NEXT: vadduwm 2, 3, 2
-; CHECK-P8-NEXT: lxvd2x 35, 6, 3
-; CHECK-P8-NEXT: vmuluwm 2, 2, 3
+; CHECK-P8-NEXT: lxvd2x 35, 12, 8
+; CHECK-P8-NEXT: vmuluwm 2, 2, 4
+; CHECK-P8-NEXT: lxvd2x 36, 11, 9
+; CHECK-P8-NEXT: vadduwm 3, 3, 5
+; CHECK-P8-NEXT: lxvd2x 37, 12, 9
; CHECK-P8-NEXT: stxvd2x 34, 7, 3
+; CHECK-P8-NEXT: lxvd2x 34, 30, 10
+; CHECK-P8-NEXT: vadduwm 4, 5, 4
+; CHECK-P8-NEXT: lxvd2x 37, 11, 10
+; CHECK-P8-NEXT: add 11, 7, 3
; CHECK-P8-NEXT: addi 3, 3, 64
-; CHECK-P8-NEXT: lxvd2x 34, 11, 8
-; CHECK-P8-NEXT: lxvd2x 35, 12, 8
-; CHECK-P8-NEXT: vadduwm 2, 3, 2
-; CHECK-P8-NEXT: lxvd2x 35, 30, 8
-; CHECK-P8-NEXT: vmuluwm 2, 2, 3
-; CHECK-P8-NEXT: stxvd2x 34, 29, 8
-; CHECK-P8-NEXT: lxvd2x 34, 11, 9
-; CHECK-P8-NEXT: lxvd2x 35, 12, 9
-; CHECK-P8-NEXT: vadduwm 2, 3, 2
-; CHECK-P8-NEXT: lxvd2x 35, 30, 9
-; CHECK-P8-NEXT: vmuluwm 2, 2, 3
-; CHECK-P8-NEXT: stxvd2x 34, 29, 9
-; CHECK-P8-NEXT: lxvd2x 34, 11, 10
-; CHECK-P8-NEXT: lxvd2x 35, 12, 10
-; CHECK-P8-NEXT: vadduwm 2, 3, 2
-; CHECK-P8-NEXT: lxvd2x 35, 30, 10
-; CHECK-P8-NEXT: vmuluwm 2, 2, 3
-; CHECK-P8-NEXT: stxvd2x 34, 29, 10
+; CHECK-P8-NEXT: vadduwm 5, 0, 5
+; CHECK-P8-NEXT: lxvd2x 32, 30, 8
+; CHECK-P8-NEXT: vmuluwm 2, 5, 2
+; CHECK-P8-NEXT: vmuluwm 3, 3, 0
+; CHECK-P8-NEXT: lxvd2x 32, 30, 9
+; CHECK-P8-NEXT: stxvd2x 34, 11, 10
+; CHECK-P8-NEXT: vmuluwm 4, 4, 0
+; CHECK-P8-NEXT: stxvd2x 35, 11, 8
+; CHECK-P8-NEXT: stxvd2x 36, 11, 9
; CHECK-P8-NEXT: bdnz .LBB0_1
; CHECK-P8-NEXT: # %bb.2: # %for.end
; CHECK-P8-NEXT: ld 30, -16(1) # 8-byte Folded Reload
-; CHECK-P8-NEXT: ld 29, -24(1) # 8-byte Folded Reload
; CHECK-P8-NEXT: blr
;
; NOOPTSWAP-P8-LABEL: foo:
; NOOPTSWAP-P8: # %bb.0: # %entry
; NOOPTSWAP-P8-NEXT: li 3, 256
-; NOOPTSWAP-P8-NEXT: std 29, -24(1) # 8-byte Folded Spill
; NOOPTSWAP-P8-NEXT: std 30, -16(1) # 8-byte Folded Spill
; NOOPTSWAP-P8-NEXT: addis 4, 2, .LC0 at toc@ha
; NOOPTSWAP-P8-NEXT: addis 5, 2, .LC1 at toc@ha
; NOOPTSWAP-P8-NEXT: addis 6, 2, .LC2 at toc@ha
; NOOPTSWAP-P8-NEXT: addis 7, 2, .LC3 at toc@ha
; NOOPTSWAP-P8-NEXT: li 8, 16
+; NOOPTSWAP-P8-NEXT: li 9, 32
; NOOPTSWAP-P8-NEXT: mtctr 3
; NOOPTSWAP-P8-NEXT: ld 4, .LC0 at toc@l(4)
; NOOPTSWAP-P8-NEXT: ld 5, .LC1 at toc@l(5)
; NOOPTSWAP-P8-NEXT: ld 6, .LC2 at toc@l(6)
; NOOPTSWAP-P8-NEXT: ld 7, .LC3 at toc@l(7)
; NOOPTSWAP-P8-NEXT: li 3, 0
-; NOOPTSWAP-P8-NEXT: li 9, 32
; NOOPTSWAP-P8-NEXT: li 10, 48
; NOOPTSWAP-P8-NEXT: .p2align 4
; NOOPTSWAP-P8-NEXT: .LBB0_1: # %vector.body
; NOOPTSWAP-P8-NEXT: #
; NOOPTSWAP-P8-NEXT: lxvd2x 0, 4, 3
+; NOOPTSWAP-P8-NEXT: lxvd2x 1, 5, 3
+; NOOPTSWAP-P8-NEXT: add 30, 6, 3
; NOOPTSWAP-P8-NEXT: add 11, 4, 3
; NOOPTSWAP-P8-NEXT: add 12, 5, 3
-; NOOPTSWAP-P8-NEXT: add 30, 6, 3
-; NOOPTSWAP-P8-NEXT: add 29, 7, 3
+; NOOPTSWAP-P8-NEXT: lxvd2x 2, 11, 8
+; NOOPTSWAP-P8-NEXT: lxvd2x 3, 12, 8
+; NOOPTSWAP-P8-NEXT: lxvd2x 4, 11, 9
+; NOOPTSWAP-P8-NEXT: lxvd2x 5, 12, 9
+; NOOPTSWAP-P8-NEXT: lxvd2x 6, 11, 10
+; NOOPTSWAP-P8-NEXT: add 11, 7, 3
+; NOOPTSWAP-P8-NEXT: lxvd2x 7, 12, 10
; NOOPTSWAP-P8-NEXT: xxswapd 34, 0
-; NOOPTSWAP-P8-NEXT: lxvd2x 0, 5, 3
-; NOOPTSWAP-P8-NEXT: xxswapd 35, 0
; NOOPTSWAP-P8-NEXT: lxvd2x 0, 6, 3
+; NOOPTSWAP-P8-NEXT: xxswapd 35, 1
+; NOOPTSWAP-P8-NEXT: lxvd2x 1, 30, 8
; NOOPTSWAP-P8-NEXT: vadduwm 2, 3, 2
-; NOOPTSWAP-P8-NEXT: xxswapd 36, 0
-; NOOPTSWAP-P8-NEXT: vmuluwm 2, 2, 4
+; NOOPTSWAP-P8-NEXT: xxswapd 36, 2
+; NOOPTSWAP-P8-NEXT: xxswapd 32, 4
+; NOOPTSWAP-P8-NEXT: xxswapd 38, 6
+; NOOPTSWAP-P8-NEXT: xxswapd 37, 3
+; NOOPTSWAP-P8-NEXT: xxswapd 33, 5
+; NOOPTSWAP-P8-NEXT: xxswapd 39, 7
+; NOOPTSWAP-P8-NEXT: vadduwm 3, 5, 4
+; NOOPTSWAP-P8-NEXT: vadduwm 4, 1, 0
+; NOOPTSWAP-P8-NEXT: xxswapd 40, 0
+; NOOPTSWAP-P8-NEXT: xxswapd 41, 1
+; NOOPTSWAP-P8-NEXT: lxvd2x 0, 30, 9
+; NOOPTSWAP-P8-NEXT: lxvd2x 1, 30, 10
+; NOOPTSWAP-P8-NEXT: vmuluwm 2, 2, 8
+; NOOPTSWAP-P8-NEXT: vmuluwm 3, 3, 9
+; NOOPTSWAP-P8-NEXT: xxswapd 42, 0
+; NOOPTSWAP-P8-NEXT: xxswapd 43, 1
+; NOOPTSWAP-P8-NEXT: vmuluwm 4, 4, 10
; NOOPTSWAP-P8-NEXT: xxswapd 0, 34
+; NOOPTSWAP-P8-NEXT: vadduwm 2, 7, 6
+; NOOPTSWAP-P8-NEXT: xxswapd 1, 35
+; NOOPTSWAP-P8-NEXT: vmuluwm 2, 2, 11
; NOOPTSWAP-P8-NEXT: stxvd2x 0, 7, 3
; NOOPTSWAP-P8-NEXT: addi 3, 3, 64
-; NOOPTSWAP-P8-NEXT: lxvd2x 0, 11, 8
-; NOOPTSWAP-P8-NEXT: xxswapd 34, 0
-; NOOPTSWAP-P8-NEXT: lxvd2x 0, 12, 8
-; NOOPTSWAP-P8-NEXT: xxswapd 35, 0
-; NOOPTSWAP-P8-NEXT: lxvd2x 0, 30, 8
-; NOOPTSWAP-P8-NEXT: vadduwm 2, 3, 2
-; NOOPTSWAP-P8-NEXT: xxswapd 36, 0
-; NOOPTSWAP-P8-NEXT: vmuluwm 2, 2, 4
-; NOOPTSWAP-P8-NEXT: xxswapd 0, 34
-; NOOPTSWAP-P8-NEXT: stxvd2x 0, 29, 8
-; NOOPTSWAP-P8-NEXT: lxvd2x 0, 11, 9
-; NOOPTSWAP-P8-NEXT: xxswapd 34, 0
-; NOOPTSWAP-P8-NEXT: lxvd2x 0, 12, 9
-; NOOPTSWAP-P8-NEXT: xxswapd 35, 0
-; NOOPTSWAP-P8-NEXT: lxvd2x 0, 30, 9
-; NOOPTSWAP-P8-NEXT: vadduwm 2, 3, 2
-; NOOPTSWAP-P8-NEXT: xxswapd 36, 0
-; NOOPTSWAP-P8-NEXT: vmuluwm 2, 2, 4
-; NOOPTSWAP-P8-NEXT: xxswapd 0, 34
-; NOOPTSWAP-P8-NEXT: stxvd2x 0, 29, 9
-; NOOPTSWAP-P8-NEXT: lxvd2x 0, 11, 10
-; NOOPTSWAP-P8-NEXT: xxswapd 34, 0
-; NOOPTSWAP-P8-NEXT: lxvd2x 0, 12, 10
-; NOOPTSWAP-P8-NEXT: xxswapd 35, 0
-; NOOPTSWAP-P8-NEXT: lxvd2x 0, 30, 10
-; NOOPTSWAP-P8-NEXT: vadduwm 2, 3, 2
-; NOOPTSWAP-P8-NEXT: xxswapd 36, 0
-; NOOPTSWAP-P8-NEXT: vmuluwm 2, 2, 4
-; NOOPTSWAP-P8-NEXT: xxswapd 0, 34
-; NOOPTSWAP-P8-NEXT: stxvd2x 0, 29, 10
+; NOOPTSWAP-P8-NEXT: stxvd2x 1, 11, 8
+; NOOPTSWAP-P8-NEXT: xxswapd 2, 36
+; NOOPTSWAP-P8-NEXT: stxvd2x 2, 11, 9
+; NOOPTSWAP-P8-NEXT: xxswapd 3, 34
+; NOOPTSWAP-P8-NEXT: stxvd2x 3, 11, 10
; NOOPTSWAP-P8-NEXT: bdnz .LBB0_1
; NOOPTSWAP-P8-NEXT: # %bb.2: # %for.end
; NOOPTSWAP-P8-NEXT: ld 30, -16(1) # 8-byte Folded Reload
-; NOOPTSWAP-P8-NEXT: ld 29, -24(1) # 8-byte Folded Reload
; NOOPTSWAP-P8-NEXT: blr
;
; CHECK-P9-LABEL: foo:
@@ -229,10 +225,9 @@ define void @foo() {
; NOOPTSWAP-P9-NEXT: addis 5, 2, .LC1 at toc@ha
; NOOPTSWAP-P9-NEXT: addis 6, 2, .LC2 at toc@ha
; NOOPTSWAP-P9-NEXT: addis 7, 2, .LC3 at toc@ha
-; NOOPTSWAP-P9-NEXT: std 29, -24(1) # 8-byte Folded Spill
+; NOOPTSWAP-P9-NEXT: std 30, -16(1) # 8-byte Folded Spill
; NOOPTSWAP-P9-NEXT: ld 4, .LC0 at toc@l(4)
; NOOPTSWAP-P9-NEXT: li 3, 256
-; NOOPTSWAP-P9-NEXT: std 30, -16(1) # 8-byte Folded Spill
; NOOPTSWAP-P9-NEXT: ld 5, .LC1 at toc@l(5)
; NOOPTSWAP-P9-NEXT: ld 6, .LC2 at toc@l(6)
; NOOPTSWAP-P9-NEXT: ld 7, .LC3 at toc@l(7)
@@ -245,54 +240,53 @@ define void @foo() {
; NOOPTSWAP-P9-NEXT: .LBB0_1: # %vector.body
; NOOPTSWAP-P9-NEXT: #
; NOOPTSWAP-P9-NEXT: lxvd2x 0, 4, 3
-; NOOPTSWAP-P9-NEXT: add 11, 4, 3
+; NOOPTSWAP-P9-NEXT: lxvd2x 1, 5, 3
+; NOOPTSWAP-P9-NEXT: lxvd2x 2, 6, 3
; NOOPTSWAP-P9-NEXT: add 12, 5, 3
+; NOOPTSWAP-P9-NEXT: add 11, 4, 3
; NOOPTSWAP-P9-NEXT: add 30, 6, 3
-; NOOPTSWAP-P9-NEXT: add 29, 7, 3
-; NOOPTSWAP-P9-NEXT: xxswapd 34, 0
-; NOOPTSWAP-P9-NEXT: lxvd2x 0, 5, 3
-; NOOPTSWAP-P9-NEXT: xxswapd 35, 0
-; NOOPTSWAP-P9-NEXT: lxvd2x 0, 6, 3
-; NOOPTSWAP-P9-NEXT: vadduwm 2, 3, 2
-; NOOPTSWAP-P9-NEXT: xxswapd 35, 0
-; NOOPTSWAP-P9-NEXT: vmuluwm 2, 2, 3
-; NOOPTSWAP-P9-NEXT: xxswapd 0, 34
-; NOOPTSWAP-P9-NEXT: stxvd2x 0, 7, 3
-; NOOPTSWAP-P9-NEXT: addi 3, 3, 64
-; NOOPTSWAP-P9-NEXT: lxvd2x 0, 11, 8
+; NOOPTSWAP-P9-NEXT: lxvd2x 3, 11, 8
; NOOPTSWAP-P9-NEXT: xxswapd 34, 0
+; NOOPTSWAP-P9-NEXT: xxswapd 35, 1
; NOOPTSWAP-P9-NEXT: lxvd2x 0, 12, 8
-; NOOPTSWAP-P9-NEXT: xxswapd 35, 0
-; NOOPTSWAP-P9-NEXT: lxvd2x 0, 30, 8
+; NOOPTSWAP-P9-NEXT: xxswapd 36, 2
+; NOOPTSWAP-P9-NEXT: lxvd2x 1, 11, 9
; NOOPTSWAP-P9-NEXT: vadduwm 2, 3, 2
-; NOOPTSWAP-P9-NEXT: xxswapd 35, 0
-; NOOPTSWAP-P9-NEXT: vmuluwm 2, 2, 3
-; NOOPTSWAP-P9-NEXT: xxswapd 0, 34
-; NOOPTSWAP-P9-NEXT: stxvd2x 0, 29, 8
-; NOOPTSWAP-P9-NEXT: lxvd2x 0, 11, 9
-; NOOPTSWAP-P9-NEXT: xxswapd 34, 0
+; NOOPTSWAP-P9-NEXT: xxswapd 35, 3
+; NOOPTSWAP-P9-NEXT: vmuluwm 2, 2, 4
+; NOOPTSWAP-P9-NEXT: xxswapd 36, 0
; NOOPTSWAP-P9-NEXT: lxvd2x 0, 12, 9
-; NOOPTSWAP-P9-NEXT: xxswapd 35, 0
+; NOOPTSWAP-P9-NEXT: vadduwm 3, 4, 3
+; NOOPTSWAP-P9-NEXT: xxswapd 36, 1
+; NOOPTSWAP-P9-NEXT: lxvd2x 1, 12, 10
+; NOOPTSWAP-P9-NEXT: xxswapd 37, 0
+; NOOPTSWAP-P9-NEXT: lxvd2x 0, 11, 10
+; NOOPTSWAP-P9-NEXT: add 11, 7, 3
+; NOOPTSWAP-P9-NEXT: vadduwm 4, 5, 4
+; NOOPTSWAP-P9-NEXT: xxswapd 32, 1
+; NOOPTSWAP-P9-NEXT: xxswapd 37, 0
+; NOOPTSWAP-P9-NEXT: lxvd2x 0, 30, 8
+; NOOPTSWAP-P9-NEXT: vadduwm 5, 0, 5
+; NOOPTSWAP-P9-NEXT: xxswapd 32, 0
; NOOPTSWAP-P9-NEXT: lxvd2x 0, 30, 9
-; NOOPTSWAP-P9-NEXT: vadduwm 2, 3, 2
-; NOOPTSWAP-P9-NEXT: xxswapd 35, 0
-; NOOPTSWAP-P9-NEXT: vmuluwm 2, 2, 3
+; NOOPTSWAP-P9-NEXT: vmuluwm 3, 3, 0
+; NOOPTSWAP-P9-NEXT: xxswapd 32, 0
; NOOPTSWAP-P9-NEXT: xxswapd 0, 34
-; NOOPTSWAP-P9-NEXT: stxvd2x 0, 29, 9
-; NOOPTSWAP-P9-NEXT: lxvd2x 0, 11, 10
-; NOOPTSWAP-P9-NEXT: xxswapd 34, 0
-; NOOPTSWAP-P9-NEXT: lxvd2x 0, 12, 10
-; NOOPTSWAP-P9-NEXT: xxswapd 35, 0
+; NOOPTSWAP-P9-NEXT: vmuluwm 4, 4, 0
+; NOOPTSWAP-P9-NEXT: stxvd2x 0, 7, 3
+; NOOPTSWAP-P9-NEXT: addi 3, 3, 64
+; NOOPTSWAP-P9-NEXT: xxswapd 1, 35
+; NOOPTSWAP-P9-NEXT: stxvd2x 1, 11, 8
+; NOOPTSWAP-P9-NEXT: xxswapd 0, 36
+; NOOPTSWAP-P9-NEXT: stxvd2x 0, 11, 9
; NOOPTSWAP-P9-NEXT: lxvd2x 0, 30, 10
-; NOOPTSWAP-P9-NEXT: vadduwm 2, 3, 2
-; NOOPTSWAP-P9-NEXT: xxswapd 35, 0
-; NOOPTSWAP-P9-NEXT: vmuluwm 2, 2, 3
+; NOOPTSWAP-P9-NEXT: xxswapd 34, 0
+; NOOPTSWAP-P9-NEXT: vmuluwm 2, 5, 2
; NOOPTSWAP-P9-NEXT: xxswapd 0, 34
-; NOOPTSWAP-P9-NEXT: stxvd2x 0, 29, 10
+; NOOPTSWAP-P9-NEXT: stxvd2x 0, 11, 10
; NOOPTSWAP-P9-NEXT: bdnz .LBB0_1
; NOOPTSWAP-P9-NEXT: # %bb.2: # %for.end
; NOOPTSWAP-P9-NEXT: ld 30, -16(1) # 8-byte Folded Reload
-; NOOPTSWAP-P9-NEXT: ld 29, -24(1) # 8-byte Folded Reload
; NOOPTSWAP-P9-NEXT: blr
entry:
br label %vector.body
diff --git a/llvm/test/CodeGen/PowerPC/vec-itofp.ll b/llvm/test/CodeGen/PowerPC/vec-itofp.ll
index 37a1e46927b1e1..7535fdaf7cbe4d 100644
--- a/llvm/test/CodeGen/PowerPC/vec-itofp.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-itofp.ll
@@ -45,11 +45,11 @@ define void @test8(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 32
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8:
@@ -138,9 +138,9 @@ define void @test4(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
; CHECK-P8-NEXT: vperm v2, v5, v2, v4
; CHECK-P8-NEXT: xvcvuxddp vs1, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4:
@@ -278,10 +278,10 @@ define void @stest8(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 32
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: stest8:
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
index cc38f921b117bf..c7c2e826997be4 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
@@ -424,12 +424,12 @@ define void @test16elt(ptr noalias nocapture sret(<16 x i16>) %agg.result, ptr n
; CHECK-P8-NEXT: xxmrglw vs0, v3, v0
; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0
; CHECK-P8-NEXT: xxswapd vs1, v2
+; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: vmrghh v4, v4, v8
; CHECK-P8-NEXT: xxmrglw vs3, v4, v7
; CHECK-P8-NEXT: xxmrgld v3, vs3, vs2
; CHECK-P8-NEXT: xxswapd vs0, v3
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
-; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
@@ -1056,12 +1056,12 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x i16>) %agg.result
; CHECK-P8-NEXT: xxmrglw vs0, v3, v0
; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0
; CHECK-P8-NEXT: xxswapd vs1, v2
+; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: vmrghh v4, v4, v8
; CHECK-P8-NEXT: xxmrglw vs3, v4, v7
; CHECK-P8-NEXT: xxmrgld v3, vs3, vs2
; CHECK-P8-NEXT: xxswapd vs0, v3
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
-; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
index e5d5e3a1eb6f26..9801788c9059d8 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
@@ -52,9 +52,9 @@ define void @test4elt(ptr noalias nocapture sret(<4 x i64>) %agg.result, <4 x fl
; CHECK-P8-NEXT: xvcvspdp vs0, vs1
; CHECK-P8-NEXT: xvcvdpuxds v3, vs0
; CHECK-P8-NEXT: xxswapd vs1, v2
+; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: xxswapd vs0, v3
; CHECK-P8-NEXT: stxvd2x vs0, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt:
@@ -113,9 +113,9 @@ define void @test8elt(ptr noalias nocapture sret(<8 x i64>) %agg.result, ptr noc
; CHECK-P8-NEXT: li r4, 32
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: xxswapd vs3, v4
+; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
; CHECK-P8-NEXT: xxswapd vs2, v3
; CHECK-P8-NEXT: stxvd2x vs2, r3, r5
-; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
@@ -206,6 +206,7 @@ define void @test16elt(ptr noalias nocapture sret(<16 x i64>) %agg.result, ptr n
; CHECK-P8-NEXT: xvcvdpuxds v2, vs4
; CHECK-P8-NEXT: xvcvdpuxds v3, vs5
; CHECK-P8-NEXT: xxswapd vs4, v1
+; CHECK-P8-NEXT: stxvd2x vs4, r3, r7
; CHECK-P8-NEXT: xxswapd vs0, v4
; CHECK-P8-NEXT: xvcvdpuxds v4, vs2
; CHECK-P8-NEXT: xxswapd vs1, v0
@@ -218,13 +219,12 @@ define void @test16elt(ptr noalias nocapture sret(<16 x i64>) %agg.result, ptr n
; CHECK-P8-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-NEXT: li r4, 64
; CHECK-P8-NEXT: xxswapd vs3, v2
+; CHECK-P8-NEXT: stxvd2x vs3, r3, r6
; CHECK-P8-NEXT: xxswapd vs1, v3
+; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: xxswapd vs2, v4
; CHECK-P8-NEXT: xxswapd vs5, v0
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
-; CHECK-P8-NEXT: stxvd2x vs3, r3, r6
-; CHECK-P8-NEXT: stxvd2x vs4, r3, r7
; CHECK-P8-NEXT: stxvd2x vs5, 0, r3
; CHECK-P8-NEXT: blr
;
@@ -357,9 +357,9 @@ define void @test4elt_signed(ptr noalias nocapture sret(<4 x i64>) %agg.result,
; CHECK-P8-NEXT: xvcvspdp vs0, vs1
; CHECK-P8-NEXT: xvcvdpuxds v3, vs0
; CHECK-P8-NEXT: xxswapd vs1, v2
+; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: xxswapd vs0, v3
; CHECK-P8-NEXT: stxvd2x vs0, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt_signed:
@@ -418,9 +418,9 @@ define void @test8elt_signed(ptr noalias nocapture sret(<8 x i64>) %agg.result,
; CHECK-P8-NEXT: li r4, 32
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-NEXT: xxswapd vs3, v4
+; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
; CHECK-P8-NEXT: xxswapd vs2, v3
; CHECK-P8-NEXT: stxvd2x vs2, r3, r5
-; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
@@ -511,6 +511,7 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x i64>) %agg.result
; CHECK-P8-NEXT: xvcvdpuxds v2, vs4
; CHECK-P8-NEXT: xvcvdpuxds v3, vs5
; CHECK-P8-NEXT: xxswapd vs4, v1
+; CHECK-P8-NEXT: stxvd2x vs4, r3, r7
; CHECK-P8-NEXT: xxswapd vs0, v4
; CHECK-P8-NEXT: xvcvdpuxds v4, vs2
; CHECK-P8-NEXT: xxswapd vs1, v0
@@ -523,13 +524,12 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x i64>) %agg.result
; CHECK-P8-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-NEXT: li r4, 64
; CHECK-P8-NEXT: xxswapd vs3, v2
+; CHECK-P8-NEXT: stxvd2x vs3, r3, r6
; CHECK-P8-NEXT: xxswapd vs1, v3
+; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: xxswapd vs2, v4
; CHECK-P8-NEXT: xxswapd vs5, v0
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
-; CHECK-P8-NEXT: stxvd2x vs3, r3, r6
-; CHECK-P8-NEXT: stxvd2x vs4, r3, r7
; CHECK-P8-NEXT: stxvd2x vs5, 0, r3
; CHECK-P8-NEXT: blr
;
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
index 00ca205e859725..9229310a2d1178 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
@@ -371,12 +371,12 @@ define void @test16elt(ptr noalias nocapture sret(<16 x i16>) %agg.result, ptr n
; CHECK-P8-NEXT: mtvsrd v8, r4
; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0
; CHECK-P8-NEXT: xxswapd vs1, v2
+; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: vmrghh v7, v7, v8
; CHECK-P8-NEXT: xxmrglw vs3, v7, v6
; CHECK-P8-NEXT: xxmrgld v3, vs3, vs2
; CHECK-P8-NEXT: xxswapd vs0, v3
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
-; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
@@ -918,12 +918,12 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x i16>) %agg.result
; CHECK-P8-NEXT: mtvsrd v8, r4
; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0
; CHECK-P8-NEXT: xxswapd vs1, v2
+; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: vmrghh v7, v7, v8
; CHECK-P8-NEXT: xxmrglw vs3, v7, v6
; CHECK-P8-NEXT: xxmrgld v3, vs3, vs2
; CHECK-P8-NEXT: xxswapd vs0, v3
; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
-; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
index 1cd78ecd5cede4..6a767a8c2f9909 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
@@ -190,11 +190,11 @@ define void @test16elt(ptr noalias nocapture sret(<16 x i32>) %agg.result, ptr n
; CHECK-P8-NEXT: xvcvdpuxws v3, vs0
; CHECK-P8-NEXT: vmrgew v4, v5, v4
; CHECK-P8-NEXT: xvcvdpuxws v5, vs1
+; CHECK-P8-NEXT: stxvd2x v4, r3, r6
+; CHECK-P8-NEXT: stxvd2x v2, r3, r5
; CHECK-P8-NEXT: vmrgew v3, v3, v0
; CHECK-P8-NEXT: xvcvdpuxws v0, vs4
; CHECK-P8-NEXT: stxvd2x v3, r3, r7
-; CHECK-P8-NEXT: stxvd2x v4, r3, r6
-; CHECK-P8-NEXT: stxvd2x v2, r3, r5
; CHECK-P8-NEXT: vmrgew v5, v0, v5
; CHECK-P8-NEXT: stxvd2x v5, 0, r3
; CHECK-P8-NEXT: blr
@@ -458,11 +458,11 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x i32>) %agg.result
; CHECK-P8-NEXT: xvcvdpsxws v3, vs0
; CHECK-P8-NEXT: vmrgew v4, v5, v4
; CHECK-P8-NEXT: xvcvdpsxws v5, vs1
+; CHECK-P8-NEXT: stxvd2x v4, r3, r6
+; CHECK-P8-NEXT: stxvd2x v2, r3, r5
; CHECK-P8-NEXT: vmrgew v3, v3, v0
; CHECK-P8-NEXT: xvcvdpsxws v0, vs4
; CHECK-P8-NEXT: stxvd2x v3, r3, r7
-; CHECK-P8-NEXT: stxvd2x v4, r3, r6
-; CHECK-P8-NEXT: stxvd2x v2, r3, r5
; CHECK-P8-NEXT: vmrgew v5, v0, v5
; CHECK-P8-NEXT: stxvd2x v5, 0, r3
; CHECK-P8-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll
index 5b360f099e34de..fd9e3c2e8c60f1 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll
@@ -68,8 +68,8 @@ define void @test8elt(ptr noalias nocapture sret(<8 x i32>) %agg.result, ptr noc
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: xvcvspuxws vs1, vs1
; CHECK-P8-NEXT: xvcvspuxws vs0, vs0
-; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
@@ -215,8 +215,8 @@ define void @test8elt_signed(ptr noalias nocapture sret(<8 x i32>) %agg.result,
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: xvcvspsxws vs1, vs1
; CHECK-P8-NEXT: xvcvspsxws vs0, vs0
-; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_8byte_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_8byte_elts.ll
index b1e4cc1766cfb0..76bca644075e69 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_8byte_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_8byte_elts.ll
@@ -37,8 +37,8 @@ define void @test4elt(ptr noalias nocapture sret(<4 x i64>) %agg.result, ptr noc
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: xvcvdpuxds vs1, vs1
; CHECK-P8-NEXT: xvcvdpuxds vs0, vs0
-; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt:
@@ -251,8 +251,8 @@ define void @test4elt_signed(ptr noalias nocapture sret(<4 x i64>) %agg.result,
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: xvcvdpsxds vs1, vs1
; CHECK-P8-NEXT: xvcvdpsxds vs0, vs0
-; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt_signed:
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
index f52a92596dec82..70e44fb383325b 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
@@ -166,10 +166,10 @@ define void @test16elt(ptr noalias nocapture sret(<16 x float>) %agg.result, ptr
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 32
+; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
@@ -397,9 +397,9 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x float>) %agg.resu
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 32
-; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
index bfb8b72327f5a6..e6fa7182b61669 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
@@ -68,9 +68,9 @@ define void @test4elt(ptr noalias nocapture sret(<4 x double>) %agg.result, i64
; CHECK-P8-NEXT: vperm v3, v5, v4, v3
; CHECK-P8-NEXT: xvcvuxddp vs1, v3
; CHECK-P8-NEXT: xxswapd vs0, vs0
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt:
@@ -141,6 +141,7 @@ define void @test8elt(ptr noalias nocapture sret(<8 x double>) %agg.result, <8 x
; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: vperm v3, v1, v2, v3
; CHECK-P8-NEXT: xxswapd vs0, vs0
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd v0, vs2
; CHECK-P8-NEXT: xvcvuxddp vs2, v3
; CHECK-P8-NEXT: xxswapd vs1, vs1
@@ -153,7 +154,6 @@ define void @test8elt(ptr noalias nocapture sret(<8 x double>) %agg.result, <8 x
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
@@ -257,6 +257,7 @@ define void @test16elt(ptr noalias nocapture sret(<16 x double>) %agg.result, pt
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: xvcvuxddp vs7, v8
; CHECK-P8-NEXT: xvcvuxddp vs2, v4
+; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: xxswapd v7, vs0
; CHECK-P8-NEXT: xxswapd vs4, vs4
; CHECK-P8-NEXT: xxswapd vs3, vs3
@@ -277,11 +278,10 @@ define void @test16elt(ptr noalias nocapture sret(<16 x double>) %agg.result, pt
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs5, r3, r4
; CHECK-P8-NEXT: li r4, 48
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 32
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
@@ -445,9 +445,9 @@ define void @test4elt_signed(ptr noalias nocapture sret(<4 x double>) %agg.resul
; CHECK-P8-NEXT: vsrad v2, v2, v4
; CHECK-P8-NEXT: xvcvsxddp vs1, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt_signed:
@@ -539,10 +539,10 @@ define void @test8elt_signed(ptr noalias nocapture sret(<8 x double>) %agg.resul
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 32
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
@@ -684,6 +684,8 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x double>) %agg.res
; CHECK-P8-NEXT: stxvd2x vs4, r3, r4
; CHECK-P8-NEXT: li r4, 96
; CHECK-P8-NEXT: xxswapd vs4, vs6
+; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: stxvd2x vs7, r3, r4
; CHECK-P8-NEXT: li r4, 80
; CHECK-P8-NEXT: stxvd2x vs4, r3, r4
@@ -693,8 +695,6 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x double>) %agg.res
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 32
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll
index c001f2ff51bf6f..bacd8cdedd0990 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll
@@ -185,18 +185,18 @@ define void @test16elt(ptr noalias nocapture sret(<16 x double>) %agg.result, pt
; CHECK-P8-NEXT: xvcvuxwdp vs0, v6
; CHECK-P8-NEXT: xvcvuxwdp vs1, v5
; CHECK-P8-NEXT: xxswapd vs5, vs6
+; CHECK-P8-NEXT: stxvd2x vs5, 0, r3
; CHECK-P8-NEXT: xxswapd vs2, vs4
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: xxswapd vs4, vs7
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 64
+; CHECK-P8-NEXT: stxvd2x vs4, r3, r7
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: stxvd2x vs0, r3, r6
-; CHECK-P8-NEXT: stxvd2x vs4, r3, r7
-; CHECK-P8-NEXT: stxvd2x vs5, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
@@ -445,18 +445,18 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x double>) %agg.res
; CHECK-P8-NEXT: xvcvsxwdp vs0, v6
; CHECK-P8-NEXT: xvcvsxwdp vs1, v5
; CHECK-P8-NEXT: xxswapd vs5, vs6
+; CHECK-P8-NEXT: stxvd2x vs5, 0, r3
; CHECK-P8-NEXT: xxswapd vs2, vs4
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: xxswapd vs4, vs7
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 64
+; CHECK-P8-NEXT: stxvd2x vs4, r3, r7
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: stxvd2x vs0, r3, r6
-; CHECK-P8-NEXT: stxvd2x vs4, r3, r7
-; CHECK-P8-NEXT: stxvd2x vs5, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
index 6a50e7a6e8e3b1..435b0ab3fea6cc 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
@@ -211,13 +211,13 @@ define void @test16elt(ptr noalias nocapture sret(<16 x float>) %agg.result, ptr
; CHECK-P8-NEXT: vpkudum v4, v1, v0
; CHECK-P8-NEXT: xxswapd vs1, v3
; CHECK-P8-NEXT: xxswapd vs2, v2
+; CHECK-P8-NEXT: stxvd2x vs1, r3, r7
+; CHECK-P8-NEXT: stxvd2x vs2, r3, r5
; CHECK-P8-NEXT: xxsldwi v6, vs0, vs0, 3
; CHECK-P8-NEXT: xvcvuxdsp vs0, v7
; CHECK-P8-NEXT: xxsldwi v7, vs0, vs0, 3
; CHECK-P8-NEXT: xxswapd vs0, v4
; CHECK-P8-NEXT: stxvd2x vs0, r3, r6
-; CHECK-P8-NEXT: stxvd2x vs1, r3, r7
-; CHECK-P8-NEXT: stxvd2x vs2, r3, r5
; CHECK-P8-NEXT: vpkudum v5, v6, v7
; CHECK-P8-NEXT: xxswapd vs3, v5
; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
@@ -503,13 +503,13 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x float>) %agg.resu
; CHECK-P8-NEXT: vpkudum v4, v1, v0
; CHECK-P8-NEXT: xxswapd vs1, v3
; CHECK-P8-NEXT: xxswapd vs2, v2
+; CHECK-P8-NEXT: stxvd2x vs1, r3, r7
+; CHECK-P8-NEXT: stxvd2x vs2, r3, r5
; CHECK-P8-NEXT: xxsldwi v6, vs0, vs0, 3
; CHECK-P8-NEXT: xvcvsxdsp vs0, v7
; CHECK-P8-NEXT: xxsldwi v7, vs0, vs0, 3
; CHECK-P8-NEXT: xxswapd vs0, v4
; CHECK-P8-NEXT: stxvd2x vs0, r3, r6
-; CHECK-P8-NEXT: stxvd2x vs1, r3, r7
-; CHECK-P8-NEXT: stxvd2x vs2, r3, r5
; CHECK-P8-NEXT: vpkudum v5, v6, v7
; CHECK-P8-NEXT: xxswapd vs3, v5
; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
index 1ff1f6b7bc4e83..170f11f8439e59 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
@@ -122,9 +122,9 @@ define void @test8elt(ptr noalias nocapture sret(<8 x float>) %agg.result, i64 %
; CHECK-P8-NEXT: vperm v3, v5, v4, v3
; CHECK-P8-NEXT: xvcvuxwsp vs1, v3
; CHECK-P8-NEXT: xxswapd vs0, vs0
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
@@ -195,6 +195,7 @@ define void @test16elt(ptr noalias nocapture sret(<16 x float>) %agg.result, <16
; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: vperm v3, v1, v2, v3
; CHECK-P8-NEXT: xxswapd vs0, vs0
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd v0, vs2
; CHECK-P8-NEXT: xvcvuxwsp vs2, v3
; CHECK-P8-NEXT: xxswapd vs1, vs1
@@ -207,7 +208,6 @@ define void @test16elt(ptr noalias nocapture sret(<16 x float>) %agg.result, <16
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
@@ -398,9 +398,9 @@ define void @test8elt_signed(ptr noalias nocapture sret(<8 x float>) %agg.result
; CHECK-P8-NEXT: vsraw v2, v2, v3
; CHECK-P8-NEXT: xvcvsxwsp vs1, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
@@ -480,6 +480,7 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x float>) %agg.resu
; CHECK-P8-NEXT: vslw v4, v4, v3
; CHECK-P8-NEXT: xxswapd v1, vs2
; CHECK-P8-NEXT: xxswapd vs0, vs0
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: vperm v2, v2, v2, v1
; CHECK-P8-NEXT: vslw v2, v2, v3
; CHECK-P8-NEXT: vsraw v2, v2, v3
@@ -494,7 +495,6 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x float>) %agg.resu
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
index af3132f88f001a..e8358897fef2ae 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
@@ -68,9 +68,9 @@ define void @test4elt(ptr noalias nocapture sret(<4 x double>) %agg.result, i32
; CHECK-P8-NEXT: vperm v3, v5, v4, v3
; CHECK-P8-NEXT: xvcvuxddp vs1, v3
; CHECK-P8-NEXT: xxswapd vs0, vs0
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt:
@@ -142,6 +142,7 @@ define void @test8elt(ptr noalias nocapture sret(<8 x double>) %agg.result, i64
; CHECK-P8-NEXT: lxvd2x vs2, 0, r5
; CHECK-P8-NEXT: vperm v2, v1, v0, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd v5, vs2
; CHECK-P8-NEXT: xvcvuxddp vs2, v2
; CHECK-P8-NEXT: xxswapd vs1, vs1
@@ -154,7 +155,6 @@ define void @test8elt(ptr noalias nocapture sret(<8 x double>) %agg.result, i64
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
@@ -256,6 +256,7 @@ define void @test16elt(ptr noalias nocapture sret(<16 x double>) %agg.result, <1
; CHECK-P8-NEXT: xxswapd v1, vs2
; CHECK-P8-NEXT: vperm v1, v0, v2, v1
; CHECK-P8-NEXT: xxswapd vs0, vs0
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd v6, vs3
; CHECK-P8-NEXT: vperm v6, v0, v2, v6
; CHECK-P8-NEXT: xvcvuxddp vs6, v6
@@ -297,7 +298,6 @@ define void @test16elt(ptr noalias nocapture sret(<16 x double>) %agg.result, <1
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
@@ -480,9 +480,9 @@ define void @test4elt_signed(ptr noalias nocapture sret(<4 x double>) %agg.resul
; CHECK-P8-NEXT: vsrad v2, v2, v4
; CHECK-P8-NEXT: xvcvsxddp vs1, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt_signed:
@@ -575,10 +575,10 @@ define void @test8elt_signed(ptr noalias nocapture sret(<8 x double>) %agg.resul
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 32
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
@@ -733,6 +733,7 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x double>) %agg.res
; CHECK-P8-NEXT: stxvd2x vs4, r3, r4
; CHECK-P8-NEXT: li r4, 96
; CHECK-P8-NEXT: xxswapd vs4, vs5
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 80
; CHECK-P8-NEXT: stxvd2x vs7, r3, r4
@@ -745,7 +746,6 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x double>) %agg.res
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll
index e3aa898d6e516d..2900c0901a01b3 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll
@@ -68,8 +68,8 @@ define void @test8elt(ptr noalias nocapture sret(<8 x float>) %agg.result, ptr n
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: xvcvuxwsp vs1, vs1
; CHECK-P8-NEXT: xvcvuxwsp vs0, vs0
-; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
@@ -215,8 +215,8 @@ define void @test8elt_signed(ptr noalias nocapture sret(<8 x float>) %agg.result
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: xvcvsxwsp vs1, vs1
; CHECK-P8-NEXT: xvcvsxwsp vs0, vs0
-; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_8byte_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_8byte_elts.ll
index 5458c174ec88dc..6bf8f1a1e898b2 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_8byte_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_8byte_elts.ll
@@ -37,8 +37,8 @@ define void @test4elt(ptr noalias nocapture sret(<4 x double>) %agg.result, ptr
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: xvcvuxddp vs1, vs1
; CHECK-P8-NEXT: xvcvuxddp vs0, vs0
-; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt:
@@ -251,8 +251,8 @@ define void @test4elt_signed(ptr noalias nocapture sret(<4 x double>) %agg.resul
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: xvcvsxddp vs1, vs1
; CHECK-P8-NEXT: xvcvsxddp vs0, vs0
-; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test4elt_signed:
diff --git a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll
index e236cd91aceb22..12976e838f3ca6 100644
--- a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll
+++ b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll
@@ -694,12 +694,12 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; LE-64BIT-NEXT: lxvd2x 0, 3, 6
; LE-64BIT-NEXT: lwz 3, 0(4)
; LE-64BIT-NEXT: li 4, 48
-; LE-64BIT-NEXT: stxvd2x 2, 7, 4
; LE-64BIT-NEXT: stxvd2x 2, 7, 8
+; LE-64BIT-NEXT: stxvd2x 2, 7, 4
; LE-64BIT-NEXT: rlwinm 4, 3, 0, 27, 28
+; LE-64BIT-NEXT: stxvd2x 1, 0, 7
; LE-64BIT-NEXT: rlwinm 3, 3, 3, 26, 28
; LE-64BIT-NEXT: stxvd2x 0, 7, 6
-; LE-64BIT-NEXT: stxvd2x 1, 0, 7
; LE-64BIT-NEXT: ldux 6, 4, 7
; LE-64BIT-NEXT: subfic 7, 3, 64
; LE-64BIT-NEXT: ld 8, 8(4)
@@ -868,12 +868,12 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; LE-64BIT-NEXT: lxvd2x 0, 3, 6
; LE-64BIT-NEXT: lwz 3, 0(4)
; LE-64BIT-NEXT: li 4, 48
-; LE-64BIT-NEXT: stxvd2x 2, 7, 4
; LE-64BIT-NEXT: stxvd2x 2, 7, 8
+; LE-64BIT-NEXT: stxvd2x 2, 7, 4
; LE-64BIT-NEXT: rlwinm 4, 3, 2, 27, 28
+; LE-64BIT-NEXT: stxvd2x 1, 0, 7
; LE-64BIT-NEXT: rlwinm 3, 3, 5, 26, 26
; LE-64BIT-NEXT: stxvd2x 0, 7, 6
-; LE-64BIT-NEXT: stxvd2x 1, 0, 7
; LE-64BIT-NEXT: ldux 6, 4, 7
; LE-64BIT-NEXT: subfic 7, 3, 64
; LE-64BIT-NEXT: ld 8, 8(4)
@@ -1008,14 +1008,14 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; LE-64BIT-NEXT: stxvd2x 2, 7, 4
; LE-64BIT-NEXT: li 4, 32
; LE-64BIT-NEXT: rlwinm 3, 3, 3, 27, 28
+; LE-64BIT-NEXT: stxvd2x 1, 0, 7
; LE-64BIT-NEXT: stxvd2x 2, 7, 4
; LE-64BIT-NEXT: stxvd2x 0, 7, 6
-; LE-64BIT-NEXT: stxvd2x 1, 0, 7
; LE-64BIT-NEXT: lxvd2x 0, 7, 3
; LE-64BIT-NEXT: add 3, 7, 3
; LE-64BIT-NEXT: lxvd2x 1, 3, 6
-; LE-64BIT-NEXT: stxvd2x 1, 5, 6
; LE-64BIT-NEXT: stxvd2x 0, 0, 5
+; LE-64BIT-NEXT: stxvd2x 1, 5, 6
; LE-64BIT-NEXT: blr
;
; BE-LABEL: lshr_32bytes_dwordOff:
@@ -1111,37 +1111,37 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; LE-64BIT: # %bb.0:
; LE-64BIT-NEXT: li 6, 16
; LE-64BIT-NEXT: lwz 4, 0(4)
-; LE-64BIT-NEXT: xxlxor 2, 2, 2
-; LE-64BIT-NEXT: addi 7, 1, -64
; LE-64BIT-NEXT: lxvd2x 1, 0, 3
-; LE-64BIT-NEXT: addi 8, 1, -32
+; LE-64BIT-NEXT: addi 7, 1, -64
+; LE-64BIT-NEXT: li 8, 48
; LE-64BIT-NEXT: lxvd2x 0, 3, 6
-; LE-64BIT-NEXT: stxvd2x 2, 7, 6
-; LE-64BIT-NEXT: li 6, 48
; LE-64BIT-NEXT: rlwinm 3, 4, 0, 27, 28
; LE-64BIT-NEXT: rlwinm 4, 4, 3, 26, 28
; LE-64BIT-NEXT: neg 3, 3
-; LE-64BIT-NEXT: stxvd2x 0, 7, 6
-; LE-64BIT-NEXT: li 6, 32
+; LE-64BIT-NEXT: stxvd2x 0, 7, 8
+; LE-64BIT-NEXT: xxlxor 0, 0, 0
+; LE-64BIT-NEXT: li 8, 32
; LE-64BIT-NEXT: extsw 3, 3
-; LE-64BIT-NEXT: stxvd2x 1, 7, 6
-; LE-64BIT-NEXT: stxvd2x 2, 0, 7
-; LE-64BIT-NEXT: subfic 6, 4, 64
-; LE-64BIT-NEXT: ldux 3, 8, 3
-; LE-64BIT-NEXT: ld 7, 16(8)
-; LE-64BIT-NEXT: ld 9, 24(8)
-; LE-64BIT-NEXT: ld 8, 8(8)
-; LE-64BIT-NEXT: srd 10, 7, 6
+; LE-64BIT-NEXT: stxvd2x 0, 7, 6
+; LE-64BIT-NEXT: addi 6, 1, -32
+; LE-64BIT-NEXT: stxvd2x 0, 0, 7
+; LE-64BIT-NEXT: stxvd2x 1, 7, 8
+; LE-64BIT-NEXT: subfic 7, 4, 64
+; LE-64BIT-NEXT: ldux 3, 6, 3
+; LE-64BIT-NEXT: ld 8, 16(6)
+; LE-64BIT-NEXT: ld 9, 24(6)
+; LE-64BIT-NEXT: ld 6, 8(6)
+; LE-64BIT-NEXT: srd 10, 8, 7
; LE-64BIT-NEXT: sld 9, 9, 4
-; LE-64BIT-NEXT: sld 7, 7, 4
-; LE-64BIT-NEXT: or 9, 9, 10
-; LE-64BIT-NEXT: srd 10, 8, 6
-; LE-64BIT-NEXT: srd 6, 3, 6
; LE-64BIT-NEXT: sld 8, 8, 4
+; LE-64BIT-NEXT: or 9, 9, 10
+; LE-64BIT-NEXT: srd 10, 6, 7
+; LE-64BIT-NEXT: srd 7, 3, 7
+; LE-64BIT-NEXT: sld 6, 6, 4
; LE-64BIT-NEXT: sld 3, 3, 4
-; LE-64BIT-NEXT: or 6, 8, 6
+; LE-64BIT-NEXT: or 6, 6, 7
; LE-64BIT-NEXT: std 3, 0(5)
-; LE-64BIT-NEXT: or 3, 7, 10
+; LE-64BIT-NEXT: or 3, 8, 10
; LE-64BIT-NEXT: std 9, 24(5)
; LE-64BIT-NEXT: std 6, 8(5)
; LE-64BIT-NEXT: std 3, 16(5)
@@ -1285,37 +1285,37 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; LE-64BIT: # %bb.0:
; LE-64BIT-NEXT: li 6, 16
; LE-64BIT-NEXT: lwz 4, 0(4)
-; LE-64BIT-NEXT: xxlxor 2, 2, 2
-; LE-64BIT-NEXT: addi 7, 1, -64
; LE-64BIT-NEXT: lxvd2x 1, 0, 3
-; LE-64BIT-NEXT: addi 8, 1, -32
+; LE-64BIT-NEXT: addi 7, 1, -64
+; LE-64BIT-NEXT: li 8, 48
; LE-64BIT-NEXT: lxvd2x 0, 3, 6
-; LE-64BIT-NEXT: stxvd2x 2, 7, 6
-; LE-64BIT-NEXT: li 6, 48
; LE-64BIT-NEXT: rlwinm 3, 4, 2, 27, 28
; LE-64BIT-NEXT: rlwinm 4, 4, 5, 26, 26
; LE-64BIT-NEXT: neg 3, 3
-; LE-64BIT-NEXT: stxvd2x 0, 7, 6
-; LE-64BIT-NEXT: li 6, 32
+; LE-64BIT-NEXT: stxvd2x 0, 7, 8
+; LE-64BIT-NEXT: xxlxor 0, 0, 0
+; LE-64BIT-NEXT: li 8, 32
; LE-64BIT-NEXT: extsw 3, 3
-; LE-64BIT-NEXT: stxvd2x 1, 7, 6
-; LE-64BIT-NEXT: stxvd2x 2, 0, 7
-; LE-64BIT-NEXT: subfic 6, 4, 64
-; LE-64BIT-NEXT: ldux 3, 8, 3
-; LE-64BIT-NEXT: ld 7, 16(8)
-; LE-64BIT-NEXT: ld 9, 24(8)
-; LE-64BIT-NEXT: ld 8, 8(8)
-; LE-64BIT-NEXT: srd 10, 7, 6
+; LE-64BIT-NEXT: stxvd2x 0, 7, 6
+; LE-64BIT-NEXT: addi 6, 1, -32
+; LE-64BIT-NEXT: stxvd2x 0, 0, 7
+; LE-64BIT-NEXT: stxvd2x 1, 7, 8
+; LE-64BIT-NEXT: subfic 7, 4, 64
+; LE-64BIT-NEXT: ldux 3, 6, 3
+; LE-64BIT-NEXT: ld 8, 16(6)
+; LE-64BIT-NEXT: ld 9, 24(6)
+; LE-64BIT-NEXT: ld 6, 8(6)
+; LE-64BIT-NEXT: srd 10, 8, 7
; LE-64BIT-NEXT: sld 9, 9, 4
-; LE-64BIT-NEXT: sld 7, 7, 4
-; LE-64BIT-NEXT: or 9, 9, 10
-; LE-64BIT-NEXT: srd 10, 8, 6
-; LE-64BIT-NEXT: srd 6, 3, 6
; LE-64BIT-NEXT: sld 8, 8, 4
+; LE-64BIT-NEXT: or 9, 9, 10
+; LE-64BIT-NEXT: srd 10, 6, 7
+; LE-64BIT-NEXT: srd 7, 3, 7
+; LE-64BIT-NEXT: sld 6, 6, 4
; LE-64BIT-NEXT: sld 3, 3, 4
-; LE-64BIT-NEXT: or 6, 8, 6
+; LE-64BIT-NEXT: or 6, 6, 7
; LE-64BIT-NEXT: std 3, 0(5)
-; LE-64BIT-NEXT: or 3, 7, 10
+; LE-64BIT-NEXT: or 3, 8, 10
; LE-64BIT-NEXT: std 9, 24(5)
; LE-64BIT-NEXT: std 6, 8(5)
; LE-64BIT-NEXT: std 3, 16(5)
@@ -1423,25 +1423,25 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou
; LE-64BIT: # %bb.0:
; LE-64BIT-NEXT: li 6, 16
; LE-64BIT-NEXT: lxvd2x 1, 0, 3
-; LE-64BIT-NEXT: xxlxor 2, 2, 2
; LE-64BIT-NEXT: li 7, 48
+; LE-64BIT-NEXT: xxlxor 2, 2, 2
; LE-64BIT-NEXT: lxvd2x 0, 3, 6
; LE-64BIT-NEXT: lwz 3, 0(4)
; LE-64BIT-NEXT: addi 4, 1, -64
; LE-64BIT-NEXT: stxvd2x 2, 4, 6
+; LE-64BIT-NEXT: stxvd2x 2, 0, 4
; LE-64BIT-NEXT: rlwinm 3, 3, 3, 27, 28
; LE-64BIT-NEXT: stxvd2x 0, 4, 7
; LE-64BIT-NEXT: li 7, 32
; LE-64BIT-NEXT: neg 3, 3
; LE-64BIT-NEXT: stxvd2x 1, 4, 7
-; LE-64BIT-NEXT: stxvd2x 2, 0, 4
; LE-64BIT-NEXT: extsw 3, 3
; LE-64BIT-NEXT: addi 4, 1, -32
; LE-64BIT-NEXT: lxvd2x 0, 4, 3
; LE-64BIT-NEXT: add 3, 4, 3
; LE-64BIT-NEXT: lxvd2x 1, 3, 6
-; LE-64BIT-NEXT: stxvd2x 1, 5, 6
; LE-64BIT-NEXT: stxvd2x 0, 0, 5
+; LE-64BIT-NEXT: stxvd2x 1, 5, 6
; LE-64BIT-NEXT: blr
;
; BE-LABEL: shl_32bytes_dwordOff:
@@ -1541,17 +1541,17 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; LE-64BIT-NEXT: ld 3, 16(3)
; LE-64BIT-NEXT: sradi 8, 6, 63
; LE-64BIT-NEXT: rlwinm 9, 4, 0, 27, 28
-; LE-64BIT-NEXT: stxvd2x 0, 0, 7
; LE-64BIT-NEXT: std 6, -40(1)
; LE-64BIT-NEXT: std 3, -48(1)
+; LE-64BIT-NEXT: rlwinm 3, 4, 3, 26, 28
+; LE-64BIT-NEXT: stxvd2x 0, 0, 7
; LE-64BIT-NEXT: std 8, -8(1)
; LE-64BIT-NEXT: std 8, -16(1)
; LE-64BIT-NEXT: std 8, -24(1)
; LE-64BIT-NEXT: std 8, -32(1)
-; LE-64BIT-NEXT: rlwinm 3, 4, 3, 26, 28
+; LE-64BIT-NEXT: subfic 6, 3, 64
; LE-64BIT-NEXT: ldux 4, 9, 7
; LE-64BIT-NEXT: ld 7, 8(9)
-; LE-64BIT-NEXT: subfic 6, 3, 64
; LE-64BIT-NEXT: ld 8, 16(9)
; LE-64BIT-NEXT: ld 9, 24(9)
; LE-64BIT-NEXT: srd 4, 4, 3
@@ -1716,17 +1716,17 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; LE-64BIT-NEXT: ld 3, 16(3)
; LE-64BIT-NEXT: sradi 8, 6, 63
; LE-64BIT-NEXT: rlwinm 9, 4, 2, 27, 28
-; LE-64BIT-NEXT: stxvd2x 0, 0, 7
; LE-64BIT-NEXT: std 6, -40(1)
; LE-64BIT-NEXT: std 3, -48(1)
+; LE-64BIT-NEXT: rlwinm 3, 4, 5, 26, 26
+; LE-64BIT-NEXT: stxvd2x 0, 0, 7
; LE-64BIT-NEXT: std 8, -8(1)
; LE-64BIT-NEXT: std 8, -16(1)
; LE-64BIT-NEXT: std 8, -24(1)
; LE-64BIT-NEXT: std 8, -32(1)
-; LE-64BIT-NEXT: rlwinm 3, 4, 5, 26, 26
+; LE-64BIT-NEXT: subfic 6, 3, 64
; LE-64BIT-NEXT: ldux 4, 9, 7
; LE-64BIT-NEXT: ld 7, 8(9)
-; LE-64BIT-NEXT: subfic 6, 3, 64
; LE-64BIT-NEXT: ld 8, 16(9)
; LE-64BIT-NEXT: ld 9, 24(9)
; LE-64BIT-NEXT: srd 4, 4, 3
@@ -1848,16 +1848,16 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nounwind {
; LE-64BIT-LABEL: ashr_32bytes_dwordOff:
; LE-64BIT: # %bb.0:
-; LE-64BIT-NEXT: lxvd2x 0, 0, 3
; LE-64BIT-NEXT: ld 6, 16(3)
; LE-64BIT-NEXT: ld 7, 24(3)
+; LE-64BIT-NEXT: lxvd2x 0, 0, 3
; LE-64BIT-NEXT: lwz 3, 0(4)
; LE-64BIT-NEXT: addi 4, 1, -64
-; LE-64BIT-NEXT: rlwinm 3, 3, 3, 27, 28
-; LE-64BIT-NEXT: stxvd2x 0, 0, 4
; LE-64BIT-NEXT: std 6, -48(1)
; LE-64BIT-NEXT: sradi 6, 7, 63
+; LE-64BIT-NEXT: rlwinm 3, 3, 3, 27, 28
; LE-64BIT-NEXT: std 7, -40(1)
+; LE-64BIT-NEXT: stxvd2x 0, 0, 4
; LE-64BIT-NEXT: std 6, -8(1)
; LE-64BIT-NEXT: std 6, -16(1)
; LE-64BIT-NEXT: std 6, -24(1)
@@ -1866,8 +1866,8 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; LE-64BIT-NEXT: add 3, 4, 3
; LE-64BIT-NEXT: li 4, 16
; LE-64BIT-NEXT: lxvd2x 1, 3, 4
-; LE-64BIT-NEXT: stxvd2x 1, 5, 4
; LE-64BIT-NEXT: stxvd2x 0, 0, 5
+; LE-64BIT-NEXT: stxvd2x 1, 5, 4
; LE-64BIT-NEXT: blr
;
; BE-LABEL: ashr_32bytes_dwordOff:
diff --git a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
index 1a8592b5e94957..e1731ddfd92875 100644
--- a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
+++ b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
@@ -435,12 +435,12 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; LE-64BIT-NEXT: lxvd2x 0, 3, 6
; LE-64BIT-NEXT: lwz 3, 0(4)
; LE-64BIT-NEXT: li 4, 48
-; LE-64BIT-NEXT: stxvd2x 2, 7, 4
; LE-64BIT-NEXT: stxvd2x 2, 7, 8
+; LE-64BIT-NEXT: stxvd2x 2, 7, 4
; LE-64BIT-NEXT: rlwinm 4, 3, 29, 27, 28
+; LE-64BIT-NEXT: stxvd2x 1, 0, 7
; LE-64BIT-NEXT: clrlwi 3, 3, 26
; LE-64BIT-NEXT: stxvd2x 0, 7, 6
-; LE-64BIT-NEXT: stxvd2x 1, 0, 7
; LE-64BIT-NEXT: xori 8, 3, 63
; LE-64BIT-NEXT: ldux 6, 4, 7
; LE-64BIT-NEXT: ld 7, 16(4)
@@ -605,37 +605,37 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; LE-64BIT: # %bb.0:
; LE-64BIT-NEXT: li 6, 16
; LE-64BIT-NEXT: lwz 4, 0(4)
-; LE-64BIT-NEXT: xxlxor 2, 2, 2
-; LE-64BIT-NEXT: addi 7, 1, -64
; LE-64BIT-NEXT: lxvd2x 1, 0, 3
-; LE-64BIT-NEXT: addi 8, 1, -32
+; LE-64BIT-NEXT: addi 7, 1, -64
+; LE-64BIT-NEXT: li 8, 48
; LE-64BIT-NEXT: lxvd2x 0, 3, 6
-; LE-64BIT-NEXT: stxvd2x 2, 7, 6
-; LE-64BIT-NEXT: li 6, 48
; LE-64BIT-NEXT: rlwinm 3, 4, 29, 27, 28
; LE-64BIT-NEXT: clrlwi 4, 4, 26
; LE-64BIT-NEXT: neg 3, 3
-; LE-64BIT-NEXT: stxvd2x 0, 7, 6
-; LE-64BIT-NEXT: li 6, 32
+; LE-64BIT-NEXT: stxvd2x 0, 7, 8
+; LE-64BIT-NEXT: xxlxor 0, 0, 0
+; LE-64BIT-NEXT: li 8, 32
; LE-64BIT-NEXT: extsw 3, 3
-; LE-64BIT-NEXT: stxvd2x 1, 7, 6
-; LE-64BIT-NEXT: stxvd2x 2, 0, 7
-; LE-64BIT-NEXT: subfic 6, 4, 64
-; LE-64BIT-NEXT: ldux 3, 8, 3
-; LE-64BIT-NEXT: ld 7, 16(8)
-; LE-64BIT-NEXT: ld 9, 24(8)
-; LE-64BIT-NEXT: ld 8, 8(8)
-; LE-64BIT-NEXT: srd 10, 7, 6
+; LE-64BIT-NEXT: stxvd2x 0, 7, 6
+; LE-64BIT-NEXT: addi 6, 1, -32
+; LE-64BIT-NEXT: stxvd2x 0, 0, 7
+; LE-64BIT-NEXT: stxvd2x 1, 7, 8
+; LE-64BIT-NEXT: subfic 7, 4, 64
+; LE-64BIT-NEXT: ldux 3, 6, 3
+; LE-64BIT-NEXT: ld 8, 16(6)
+; LE-64BIT-NEXT: ld 9, 24(6)
+; LE-64BIT-NEXT: ld 6, 8(6)
+; LE-64BIT-NEXT: srd 10, 8, 7
; LE-64BIT-NEXT: sld 9, 9, 4
-; LE-64BIT-NEXT: sld 7, 7, 4
-; LE-64BIT-NEXT: or 9, 9, 10
-; LE-64BIT-NEXT: srd 10, 8, 6
-; LE-64BIT-NEXT: srd 6, 3, 6
; LE-64BIT-NEXT: sld 8, 8, 4
+; LE-64BIT-NEXT: or 9, 9, 10
+; LE-64BIT-NEXT: srd 10, 6, 7
+; LE-64BIT-NEXT: srd 7, 3, 7
+; LE-64BIT-NEXT: sld 6, 6, 4
; LE-64BIT-NEXT: sld 3, 3, 4
-; LE-64BIT-NEXT: or 6, 8, 6
+; LE-64BIT-NEXT: or 6, 6, 7
; LE-64BIT-NEXT: std 3, 0(5)
-; LE-64BIT-NEXT: or 3, 7, 10
+; LE-64BIT-NEXT: or 3, 8, 10
; LE-64BIT-NEXT: std 9, 24(5)
; LE-64BIT-NEXT: std 6, 8(5)
; LE-64BIT-NEXT: std 3, 16(5)
@@ -782,10 +782,10 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; LE-64BIT-NEXT: ld 3, 16(3)
; LE-64BIT-NEXT: sradi 8, 6, 63
; LE-64BIT-NEXT: rlwinm 9, 4, 29, 27, 28
-; LE-64BIT-NEXT: clrlwi 4, 4, 26
-; LE-64BIT-NEXT: stxvd2x 0, 0, 7
; LE-64BIT-NEXT: std 6, -40(1)
; LE-64BIT-NEXT: std 3, -48(1)
+; LE-64BIT-NEXT: clrlwi 4, 4, 26
+; LE-64BIT-NEXT: stxvd2x 0, 0, 7
; LE-64BIT-NEXT: std 8, -8(1)
; LE-64BIT-NEXT: std 8, -16(1)
; LE-64BIT-NEXT: std 8, -24(1)
More information about the llvm-commits
mailing list