[llvm] 7c5d5ed - [PowerPC] Generate tests for 16-byte atomic load/store. NFC.
Kai Luo via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 9 01:41:16 PDT 2022
Author: Kai Luo
Date: 2022-04-09T16:36:57+08:00
New Revision: 7c5d5edec8abb7389b16f5a92107f2d8d4be94e1
URL: https://github.com/llvm/llvm-project/commit/7c5d5edec8abb7389b16f5a92107f2d8d4be94e1
DIFF: https://github.com/llvm/llvm-project/commit/7c5d5edec8abb7389b16f5a92107f2d8d4be94e1.diff
LOG: [PowerPC] Generate tests for 16-byte atomic load/store. NFC.
Added:
Modified:
llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll b/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll
index e60407d664b0e..b971b52d874ca 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll
@@ -2,6 +2,21 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \
; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-track-subreg-liveness \
; RUN: -ppc-quadword-atomics < %s | FileCheck --check-prefix=P8 %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown -mcpu=pwr7 \
+; RUN: -ppc-asm-full-reg-names -ppc-quadword-atomics \
+; RUN: -ppc-track-subreg-liveness < %s | FileCheck --check-prefix=PWR7 %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 \
+; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s | FileCheck \
+; RUN: --check-prefix=LE-PWR8 %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-freebsd -mcpu=pwr8 \
+; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s | FileCheck \
+; RUN: --check-prefix=LE-PWR8 %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix -mcpu=pwr8 \
+; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s | FileCheck \
+; RUN: --check-prefix=AIX64-PWR8 %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-unknown -mcpu=pwr8 \
+; RUN: -ppc-quadword-atomics -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s \
+; RUN: | FileCheck --check-prefix=PPC-PWR8 %s
define dso_local i128 @lq_unordered(i128* %src) {
; P8-LABEL: lq_unordered:
@@ -10,6 +25,61 @@ define dso_local i128 @lq_unordered(i128* %src) {
; P8-NEXT: mr r3, r4
; P8-NEXT: mr r4, r5
; P8-NEXT: blr
+;
+; PWR7-LABEL: lq_unordered:
+; PWR7: # %bb.0: # %entry
+; PWR7-NEXT: mflr r0
+; PWR7-NEXT: std r0, 16(r1)
+; PWR7-NEXT: stdu r1, -112(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 112
+; PWR7-NEXT: .cfi_offset lr, 16
+; PWR7-NEXT: li r4, 0
+; PWR7-NEXT: bl __atomic_load_16
+; PWR7-NEXT: nop
+; PWR7-NEXT: addi r1, r1, 112
+; PWR7-NEXT: ld r0, 16(r1)
+; PWR7-NEXT: mtlr r0
+; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: lq_unordered:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: lq r4, 0(r3)
+; LE-PWR8-NEXT: mr r3, r5
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: lq_unordered:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -112(r1)
+; AIX64-PWR8-NEXT: li r4, 0
+; AIX64-PWR8-NEXT: bl .__atomic_load_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: addi r1, r1, 112
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: lq_unordered:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -32(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 32
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: mr r4, r3
+; PPC-PWR8-NEXT: addi r5, r1, 16
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: li r6, 0
+; PPC-PWR8-NEXT: bl __atomic_load
+; PPC-PWR8-NEXT: lwz r6, 28(r1)
+; PPC-PWR8-NEXT: lwz r5, 24(r1)
+; PPC-PWR8-NEXT: lwz r4, 20(r1)
+; PPC-PWR8-NEXT: lwz r3, 16(r1)
+; PPC-PWR8-NEXT: lwz r0, 36(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 32
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
%0 = load atomic i128, i128* %src unordered, align 16
ret i128 %0
@@ -24,6 +94,70 @@ define dso_local i128 @lqx_unordered(i128* %src, i64 %idx) {
; P8-NEXT: mr r3, r4
; P8-NEXT: mr r4, r5
; P8-NEXT: blr
+;
+; PWR7-LABEL: lqx_unordered:
+; PWR7: # %bb.0: # %entry
+; PWR7-NEXT: mflr r0
+; PWR7-NEXT: std r0, 16(r1)
+; PWR7-NEXT: stdu r1, -112(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 112
+; PWR7-NEXT: .cfi_offset lr, 16
+; PWR7-NEXT: sldi r4, r4, 4
+; PWR7-NEXT: add r3, r3, r4
+; PWR7-NEXT: li r4, 0
+; PWR7-NEXT: bl __atomic_load_16
+; PWR7-NEXT: nop
+; PWR7-NEXT: addi r1, r1, 112
+; PWR7-NEXT: ld r0, 16(r1)
+; PWR7-NEXT: mtlr r0
+; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: lqx_unordered:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: sldi r4, r4, 4
+; LE-PWR8-NEXT: std r2, -8(r1) # 8-byte Folded Spill
+; LE-PWR8-NEXT: add r4, r3, r4
+; LE-PWR8-NEXT: lq r2, 0(r4)
+; LE-PWR8-NEXT: mr r4, r2
+; LE-PWR8-NEXT: ld r2, -8(r1) # 8-byte Folded Reload
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: lqx_unordered:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -112(r1)
+; AIX64-PWR8-NEXT: sldi r4, r4, 4
+; AIX64-PWR8-NEXT: add r3, r3, r4
+; AIX64-PWR8-NEXT: li r4, 0
+; AIX64-PWR8-NEXT: bl .__atomic_load_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: addi r1, r1, 112
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: lqx_unordered:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -32(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 32
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: slwi r4, r6, 4
+; PPC-PWR8-NEXT: addi r5, r1, 16
+; PPC-PWR8-NEXT: li r6, 0
+; PPC-PWR8-NEXT: add r4, r3, r4
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: bl __atomic_load
+; PPC-PWR8-NEXT: lwz r6, 28(r1)
+; PPC-PWR8-NEXT: lwz r5, 24(r1)
+; PPC-PWR8-NEXT: lwz r4, 20(r1)
+; PPC-PWR8-NEXT: lwz r3, 16(r1)
+; PPC-PWR8-NEXT: lwz r0, 36(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 32
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
%0 = getelementptr i128, i128* %src, i64 %idx
%1 = load atomic i128, i128* %0 unordered, align 16
@@ -39,6 +173,67 @@ define dso_local i128 @lq_big_offset_unordered(i128* %src) {
; P8-NEXT: mr r3, r4
; P8-NEXT: mr r4, r5
; P8-NEXT: blr
+;
+; PWR7-LABEL: lq_big_offset_unordered:
+; PWR7: # %bb.0: # %entry
+; PWR7-NEXT: mflr r0
+; PWR7-NEXT: std r0, 16(r1)
+; PWR7-NEXT: stdu r1, -112(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 112
+; PWR7-NEXT: .cfi_offset lr, 16
+; PWR7-NEXT: addis r3, r3, 32
+; PWR7-NEXT: li r4, 0
+; PWR7-NEXT: bl __atomic_load_16
+; PWR7-NEXT: nop
+; PWR7-NEXT: addi r1, r1, 112
+; PWR7-NEXT: ld r0, 16(r1)
+; PWR7-NEXT: mtlr r0
+; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: lq_big_offset_unordered:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: lis r4, 32
+; LE-PWR8-NEXT: std r2, -8(r1) # 8-byte Folded Spill
+; LE-PWR8-NEXT: add r4, r3, r4
+; LE-PWR8-NEXT: lq r2, 0(r4)
+; LE-PWR8-NEXT: mr r4, r2
+; LE-PWR8-NEXT: ld r2, -8(r1) # 8-byte Folded Reload
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: lq_big_offset_unordered:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -112(r1)
+; AIX64-PWR8-NEXT: addis r3, r3, 32
+; AIX64-PWR8-NEXT: li r4, 0
+; AIX64-PWR8-NEXT: bl .__atomic_load_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: addi r1, r1, 112
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: lq_big_offset_unordered:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -32(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 32
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: addis r4, r3, 32
+; PPC-PWR8-NEXT: addi r5, r1, 16
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: li r6, 0
+; PPC-PWR8-NEXT: bl __atomic_load
+; PPC-PWR8-NEXT: lwz r6, 28(r1)
+; PPC-PWR8-NEXT: lwz r5, 24(r1)
+; PPC-PWR8-NEXT: lwz r4, 20(r1)
+; PPC-PWR8-NEXT: lwz r3, 16(r1)
+; PPC-PWR8-NEXT: lwz r0, 36(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 32
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
%0 = getelementptr i128, i128* %src, i64 131072
%1 = load atomic i128, i128* %0 unordered, align 16
@@ -52,6 +247,61 @@ define dso_local i128 @lq_monotonic(i128* %src) {
; P8-NEXT: mr r3, r4
; P8-NEXT: mr r4, r5
; P8-NEXT: blr
+;
+; PWR7-LABEL: lq_monotonic:
+; PWR7: # %bb.0: # %entry
+; PWR7-NEXT: mflr r0
+; PWR7-NEXT: std r0, 16(r1)
+; PWR7-NEXT: stdu r1, -112(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 112
+; PWR7-NEXT: .cfi_offset lr, 16
+; PWR7-NEXT: li r4, 0
+; PWR7-NEXT: bl __atomic_load_16
+; PWR7-NEXT: nop
+; PWR7-NEXT: addi r1, r1, 112
+; PWR7-NEXT: ld r0, 16(r1)
+; PWR7-NEXT: mtlr r0
+; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: lq_monotonic:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: lq r4, 0(r3)
+; LE-PWR8-NEXT: mr r3, r5
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: lq_monotonic:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -112(r1)
+; AIX64-PWR8-NEXT: li r4, 0
+; AIX64-PWR8-NEXT: bl .__atomic_load_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: addi r1, r1, 112
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: lq_monotonic:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -32(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 32
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: mr r4, r3
+; PPC-PWR8-NEXT: addi r5, r1, 16
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: li r6, 0
+; PPC-PWR8-NEXT: bl __atomic_load
+; PPC-PWR8-NEXT: lwz r6, 28(r1)
+; PPC-PWR8-NEXT: lwz r5, 24(r1)
+; PPC-PWR8-NEXT: lwz r4, 20(r1)
+; PPC-PWR8-NEXT: lwz r3, 16(r1)
+; PPC-PWR8-NEXT: lwz r0, 36(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 32
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
%0 = load atomic i128, i128* %src monotonic, align 16
ret i128 %0
@@ -67,6 +317,64 @@ define dso_local i128 @lq_acquire(i128* %src) {
; P8-NEXT: bne- cr7, .+4
; P8-NEXT: isync
; P8-NEXT: blr
+;
+; PWR7-LABEL: lq_acquire:
+; PWR7: # %bb.0: # %entry
+; PWR7-NEXT: mflr r0
+; PWR7-NEXT: std r0, 16(r1)
+; PWR7-NEXT: stdu r1, -112(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 112
+; PWR7-NEXT: .cfi_offset lr, 16
+; PWR7-NEXT: li r4, 2
+; PWR7-NEXT: bl __atomic_load_16
+; PWR7-NEXT: nop
+; PWR7-NEXT: addi r1, r1, 112
+; PWR7-NEXT: ld r0, 16(r1)
+; PWR7-NEXT: mtlr r0
+; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: lq_acquire:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: lq r4, 0(r3)
+; LE-PWR8-NEXT: cmpd cr7, r5, r5
+; LE-PWR8-NEXT: mr r3, r5
+; LE-PWR8-NEXT: bne- cr7, .+4
+; LE-PWR8-NEXT: isync
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: lq_acquire:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -112(r1)
+; AIX64-PWR8-NEXT: li r4, 2
+; AIX64-PWR8-NEXT: bl .__atomic_load_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: addi r1, r1, 112
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: lq_acquire:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -32(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 32
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: mr r4, r3
+; PPC-PWR8-NEXT: addi r5, r1, 16
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: li r6, 2
+; PPC-PWR8-NEXT: bl __atomic_load
+; PPC-PWR8-NEXT: lwz r6, 28(r1)
+; PPC-PWR8-NEXT: lwz r5, 24(r1)
+; PPC-PWR8-NEXT: lwz r4, 20(r1)
+; PPC-PWR8-NEXT: lwz r3, 16(r1)
+; PPC-PWR8-NEXT: lwz r0, 36(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 32
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
%0 = load atomic i128, i128* %src acquire, align 16
ret i128 %0
@@ -83,6 +391,65 @@ define dso_local i128 @lq_seqcst(i128* %src) {
; P8-NEXT: bne- cr7, .+4
; P8-NEXT: isync
; P8-NEXT: blr
+;
+; PWR7-LABEL: lq_seqcst:
+; PWR7: # %bb.0: # %entry
+; PWR7-NEXT: mflr r0
+; PWR7-NEXT: std r0, 16(r1)
+; PWR7-NEXT: stdu r1, -112(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 112
+; PWR7-NEXT: .cfi_offset lr, 16
+; PWR7-NEXT: li r4, 5
+; PWR7-NEXT: bl __atomic_load_16
+; PWR7-NEXT: nop
+; PWR7-NEXT: addi r1, r1, 112
+; PWR7-NEXT: ld r0, 16(r1)
+; PWR7-NEXT: mtlr r0
+; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: lq_seqcst:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: sync
+; LE-PWR8-NEXT: lq r4, 0(r3)
+; LE-PWR8-NEXT: cmpd cr7, r5, r5
+; LE-PWR8-NEXT: mr r3, r5
+; LE-PWR8-NEXT: bne- cr7, .+4
+; LE-PWR8-NEXT: isync
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: lq_seqcst:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -112(r1)
+; AIX64-PWR8-NEXT: li r4, 5
+; AIX64-PWR8-NEXT: bl .__atomic_load_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: addi r1, r1, 112
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: lq_seqcst:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -32(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 32
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: mr r4, r3
+; PPC-PWR8-NEXT: addi r5, r1, 16
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: li r6, 5
+; PPC-PWR8-NEXT: bl __atomic_load
+; PPC-PWR8-NEXT: lwz r6, 28(r1)
+; PPC-PWR8-NEXT: lwz r5, 24(r1)
+; PPC-PWR8-NEXT: lwz r4, 20(r1)
+; PPC-PWR8-NEXT: lwz r3, 16(r1)
+; PPC-PWR8-NEXT: lwz r0, 36(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 32
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
%0 = load atomic i128, i128* %src seq_cst, align 16
ret i128 %0
@@ -95,6 +462,70 @@ define dso_local void @stq_unordered(i128 %val, i128* %dst) {
; P8-NEXT: mr r6, r3
; P8-NEXT: stq r6, 0(r5)
; P8-NEXT: blr
+;
+; PWR7-LABEL: stq_unordered:
+; PWR7: # %bb.0: # %entry
+; PWR7-NEXT: mflr r0
+; PWR7-NEXT: std r0, 16(r1)
+; PWR7-NEXT: stdu r1, -112(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 112
+; PWR7-NEXT: .cfi_offset lr, 16
+; PWR7-NEXT: mr r6, r4
+; PWR7-NEXT: mr r4, r3
+; PWR7-NEXT: mr r3, r5
+; PWR7-NEXT: mr r5, r6
+; PWR7-NEXT: li r6, 0
+; PWR7-NEXT: bl __atomic_store_16
+; PWR7-NEXT: nop
+; PWR7-NEXT: addi r1, r1, 112
+; PWR7-NEXT: ld r0, 16(r1)
+; PWR7-NEXT: mtlr r0
+; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: stq_unordered:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: mr r7, r3
+; LE-PWR8-NEXT: mr r6, r4
+; LE-PWR8-NEXT: stq r6, 0(r5)
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: stq_unordered:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -112(r1)
+; AIX64-PWR8-NEXT: mr r6, r4
+; AIX64-PWR8-NEXT: mr r4, r3
+; AIX64-PWR8-NEXT: mr r3, r5
+; AIX64-PWR8-NEXT: mr r5, r6
+; AIX64-PWR8-NEXT: li r6, 0
+; AIX64-PWR8-NEXT: bl .__atomic_store_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: addi r1, r1, 112
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: stq_unordered:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -32(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 32
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: stw r6, 28(r1)
+; PPC-PWR8-NEXT: stw r5, 24(r1)
+; PPC-PWR8-NEXT: addi r5, r1, 16
+; PPC-PWR8-NEXT: li r6, 0
+; PPC-PWR8-NEXT: stw r4, 20(r1)
+; PPC-PWR8-NEXT: stw r3, 16(r1)
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: mr r4, r7
+; PPC-PWR8-NEXT: bl __atomic_store
+; PPC-PWR8-NEXT: lwz r0, 36(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 32
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
store atomic i128 %val, i128* %dst unordered, align 16
ret void
@@ -109,6 +540,76 @@ define dso_local void @stqx_unordered(i128 %val, i128* %dst, i64 %idx) {
; P8-NEXT: add r3, r5, r6
; P8-NEXT: stq r8, 0(r3)
; P8-NEXT: blr
+;
+; PWR7-LABEL: stqx_unordered:
+; PWR7: # %bb.0: # %entry
+; PWR7-NEXT: mflr r0
+; PWR7-NEXT: std r0, 16(r1)
+; PWR7-NEXT: stdu r1, -112(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 112
+; PWR7-NEXT: .cfi_offset lr, 16
+; PWR7-NEXT: mr r7, r4
+; PWR7-NEXT: mr r4, r3
+; PWR7-NEXT: sldi r3, r6, 4
+; PWR7-NEXT: li r6, 0
+; PWR7-NEXT: add r3, r5, r3
+; PWR7-NEXT: mr r5, r7
+; PWR7-NEXT: bl __atomic_store_16
+; PWR7-NEXT: nop
+; PWR7-NEXT: addi r1, r1, 112
+; PWR7-NEXT: ld r0, 16(r1)
+; PWR7-NEXT: mtlr r0
+; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: stqx_unordered:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: sldi r6, r6, 4
+; LE-PWR8-NEXT: mr r9, r3
+; LE-PWR8-NEXT: mr r8, r4
+; LE-PWR8-NEXT: add r3, r5, r6
+; LE-PWR8-NEXT: stq r8, 0(r3)
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: stqx_unordered:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -112(r1)
+; AIX64-PWR8-NEXT: mr r7, r4
+; AIX64-PWR8-NEXT: mr r4, r3
+; AIX64-PWR8-NEXT: sldi r3, r6, 4
+; AIX64-PWR8-NEXT: li r6, 0
+; AIX64-PWR8-NEXT: add r3, r5, r3
+; AIX64-PWR8-NEXT: mr r5, r7
+; AIX64-PWR8-NEXT: bl .__atomic_store_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: addi r1, r1, 112
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: stqx_unordered:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -32(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 32
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: slwi r8, r10, 4
+; PPC-PWR8-NEXT: stw r6, 28(r1)
+; PPC-PWR8-NEXT: stw r5, 24(r1)
+; PPC-PWR8-NEXT: addi r5, r1, 16
+; PPC-PWR8-NEXT: add r6, r7, r8
+; PPC-PWR8-NEXT: stw r4, 20(r1)
+; PPC-PWR8-NEXT: stw r3, 16(r1)
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: mr r4, r6
+; PPC-PWR8-NEXT: li r6, 0
+; PPC-PWR8-NEXT: bl __atomic_store
+; PPC-PWR8-NEXT: lwz r0, 36(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 32
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
%0 = getelementptr i128, i128* %dst, i64 %idx
store atomic i128 %val, i128* %0 unordered, align 16
@@ -124,6 +625,73 @@ define dso_local void @stq_big_offset_unordered(i128 %val, i128* %dst) {
; P8-NEXT: add r3, r5, r6
; P8-NEXT: stq r8, 0(r3)
; P8-NEXT: blr
+;
+; PWR7-LABEL: stq_big_offset_unordered:
+; PWR7: # %bb.0: # %entry
+; PWR7-NEXT: mflr r0
+; PWR7-NEXT: std r0, 16(r1)
+; PWR7-NEXT: stdu r1, -112(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 112
+; PWR7-NEXT: .cfi_offset lr, 16
+; PWR7-NEXT: mr r6, r4
+; PWR7-NEXT: mr r4, r3
+; PWR7-NEXT: addis r3, r5, 32
+; PWR7-NEXT: mr r5, r6
+; PWR7-NEXT: li r6, 0
+; PWR7-NEXT: bl __atomic_store_16
+; PWR7-NEXT: nop
+; PWR7-NEXT: addi r1, r1, 112
+; PWR7-NEXT: ld r0, 16(r1)
+; PWR7-NEXT: mtlr r0
+; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: stq_big_offset_unordered:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: lis r6, 32
+; LE-PWR8-NEXT: mr r9, r3
+; LE-PWR8-NEXT: mr r8, r4
+; LE-PWR8-NEXT: add r3, r5, r6
+; LE-PWR8-NEXT: stq r8, 0(r3)
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: stq_big_offset_unordered:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -112(r1)
+; AIX64-PWR8-NEXT: mr r6, r4
+; AIX64-PWR8-NEXT: mr r4, r3
+; AIX64-PWR8-NEXT: addis r3, r5, 32
+; AIX64-PWR8-NEXT: mr r5, r6
+; AIX64-PWR8-NEXT: li r6, 0
+; AIX64-PWR8-NEXT: bl .__atomic_store_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: addi r1, r1, 112
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: stq_big_offset_unordered:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -32(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 32
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: stw r6, 28(r1)
+; PPC-PWR8-NEXT: addis r6, r7, 32
+; PPC-PWR8-NEXT: stw r5, 24(r1)
+; PPC-PWR8-NEXT: addi r5, r1, 16
+; PPC-PWR8-NEXT: stw r4, 20(r1)
+; PPC-PWR8-NEXT: stw r3, 16(r1)
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: mr r4, r6
+; PPC-PWR8-NEXT: li r6, 0
+; PPC-PWR8-NEXT: bl __atomic_store
+; PPC-PWR8-NEXT: lwz r0, 36(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 32
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
%0 = getelementptr i128, i128* %dst, i64 131072
store atomic i128 %val, i128* %0 unordered, align 16
@@ -137,6 +705,70 @@ define dso_local void @stq_monotonic(i128 %val, i128* %dst) {
; P8-NEXT: mr r6, r3
; P8-NEXT: stq r6, 0(r5)
; P8-NEXT: blr
+;
+; PWR7-LABEL: stq_monotonic:
+; PWR7: # %bb.0: # %entry
+; PWR7-NEXT: mflr r0
+; PWR7-NEXT: std r0, 16(r1)
+; PWR7-NEXT: stdu r1, -112(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 112
+; PWR7-NEXT: .cfi_offset lr, 16
+; PWR7-NEXT: mr r6, r4
+; PWR7-NEXT: mr r4, r3
+; PWR7-NEXT: mr r3, r5
+; PWR7-NEXT: mr r5, r6
+; PWR7-NEXT: li r6, 0
+; PWR7-NEXT: bl __atomic_store_16
+; PWR7-NEXT: nop
+; PWR7-NEXT: addi r1, r1, 112
+; PWR7-NEXT: ld r0, 16(r1)
+; PWR7-NEXT: mtlr r0
+; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: stq_monotonic:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: mr r7, r3
+; LE-PWR8-NEXT: mr r6, r4
+; LE-PWR8-NEXT: stq r6, 0(r5)
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: stq_monotonic:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -112(r1)
+; AIX64-PWR8-NEXT: mr r6, r4
+; AIX64-PWR8-NEXT: mr r4, r3
+; AIX64-PWR8-NEXT: mr r3, r5
+; AIX64-PWR8-NEXT: mr r5, r6
+; AIX64-PWR8-NEXT: li r6, 0
+; AIX64-PWR8-NEXT: bl .__atomic_store_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: addi r1, r1, 112
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: stq_monotonic:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -32(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 32
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: stw r6, 28(r1)
+; PPC-PWR8-NEXT: stw r5, 24(r1)
+; PPC-PWR8-NEXT: addi r5, r1, 16
+; PPC-PWR8-NEXT: li r6, 0
+; PPC-PWR8-NEXT: stw r4, 20(r1)
+; PPC-PWR8-NEXT: stw r3, 16(r1)
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: mr r4, r7
+; PPC-PWR8-NEXT: bl __atomic_store
+; PPC-PWR8-NEXT: lwz r0, 36(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 32
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
store atomic i128 %val, i128* %dst monotonic, align 16
ret void
@@ -150,6 +782,71 @@ define dso_local void @stq_release(i128 %val, i128* %dst) {
; P8-NEXT: mr r6, r3
; P8-NEXT: stq r6, 0(r5)
; P8-NEXT: blr
+;
+; PWR7-LABEL: stq_release:
+; PWR7: # %bb.0: # %entry
+; PWR7-NEXT: mflr r0
+; PWR7-NEXT: std r0, 16(r1)
+; PWR7-NEXT: stdu r1, -112(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 112
+; PWR7-NEXT: .cfi_offset lr, 16
+; PWR7-NEXT: mr r6, r4
+; PWR7-NEXT: mr r4, r3
+; PWR7-NEXT: mr r3, r5
+; PWR7-NEXT: mr r5, r6
+; PWR7-NEXT: li r6, 3
+; PWR7-NEXT: bl __atomic_store_16
+; PWR7-NEXT: nop
+; PWR7-NEXT: addi r1, r1, 112
+; PWR7-NEXT: ld r0, 16(r1)
+; PWR7-NEXT: mtlr r0
+; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: stq_release:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: lwsync
+; LE-PWR8-NEXT: mr r7, r3
+; LE-PWR8-NEXT: mr r6, r4
+; LE-PWR8-NEXT: stq r6, 0(r5)
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: stq_release:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -112(r1)
+; AIX64-PWR8-NEXT: mr r6, r4
+; AIX64-PWR8-NEXT: mr r4, r3
+; AIX64-PWR8-NEXT: mr r3, r5
+; AIX64-PWR8-NEXT: mr r5, r6
+; AIX64-PWR8-NEXT: li r6, 3
+; AIX64-PWR8-NEXT: bl .__atomic_store_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: addi r1, r1, 112
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: stq_release:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -32(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 32
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: stw r6, 28(r1)
+; PPC-PWR8-NEXT: stw r5, 24(r1)
+; PPC-PWR8-NEXT: addi r5, r1, 16
+; PPC-PWR8-NEXT: li r6, 3
+; PPC-PWR8-NEXT: stw r4, 20(r1)
+; PPC-PWR8-NEXT: stw r3, 16(r1)
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: mr r4, r7
+; PPC-PWR8-NEXT: bl __atomic_store
+; PPC-PWR8-NEXT: lwz r0, 36(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 32
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
store atomic i128 %val, i128* %dst release, align 16
ret void
@@ -163,6 +860,71 @@ define dso_local void @stq_seqcst(i128 %val, i128* %dst) {
; P8-NEXT: mr r6, r3
; P8-NEXT: stq r6, 0(r5)
; P8-NEXT: blr
+;
+; PWR7-LABEL: stq_seqcst:
+; PWR7: # %bb.0: # %entry
+; PWR7-NEXT: mflr r0
+; PWR7-NEXT: std r0, 16(r1)
+; PWR7-NEXT: stdu r1, -112(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 112
+; PWR7-NEXT: .cfi_offset lr, 16
+; PWR7-NEXT: mr r6, r4
+; PWR7-NEXT: mr r4, r3
+; PWR7-NEXT: mr r3, r5
+; PWR7-NEXT: mr r5, r6
+; PWR7-NEXT: li r6, 5
+; PWR7-NEXT: bl __atomic_store_16
+; PWR7-NEXT: nop
+; PWR7-NEXT: addi r1, r1, 112
+; PWR7-NEXT: ld r0, 16(r1)
+; PWR7-NEXT: mtlr r0
+; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: stq_seqcst:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: sync
+; LE-PWR8-NEXT: mr r7, r3
+; LE-PWR8-NEXT: mr r6, r4
+; LE-PWR8-NEXT: stq r6, 0(r5)
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: stq_seqcst:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -112(r1)
+; AIX64-PWR8-NEXT: mr r6, r4
+; AIX64-PWR8-NEXT: mr r4, r3
+; AIX64-PWR8-NEXT: mr r3, r5
+; AIX64-PWR8-NEXT: mr r5, r6
+; AIX64-PWR8-NEXT: li r6, 5
+; AIX64-PWR8-NEXT: bl .__atomic_store_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: addi r1, r1, 112
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: stq_seqcst:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -32(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 32
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: stw r6, 28(r1)
+; PPC-PWR8-NEXT: stw r5, 24(r1)
+; PPC-PWR8-NEXT: addi r5, r1, 16
+; PPC-PWR8-NEXT: li r6, 5
+; PPC-PWR8-NEXT: stw r4, 20(r1)
+; PPC-PWR8-NEXT: stw r3, 16(r1)
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: mr r4, r7
+; PPC-PWR8-NEXT: bl __atomic_store
+; PPC-PWR8-NEXT: lwz r0, 36(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 32
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
store atomic i128 %val, i128* %dst seq_cst, align 16
ret void
More information about the llvm-commits
mailing list