[llvm] c063946 - [AIX] Adjust CSR order to avoid breaking ABI regarding traceback

Kai Luo via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 2 21:45:39 PDT 2021


Author: Kai Luo
Date: 2021-07-03T04:45:26Z
New Revision: c063946476e083a9a0c5bd397337d1ece4742ec6

URL: https://github.com/llvm/llvm-project/commit/c063946476e083a9a0c5bd397337d1ece4742ec6
DIFF: https://github.com/llvm/llvm-project/commit/c063946476e083a9a0c5bd397337d1ece4742ec6.diff

LOG: [AIX] Adjust CSR order to avoid breaking ABI regarding traceback

Allocate non-volatile registers in order to be compatible with ABI, regarding gpr_save.

Quoted from https://www.ibm.com/docs/en/ssw_aix_72/assembler/assembler_pdf.pdf page55,
> The preferred method of using GPRs is to use the volatile registers first. Next, use the nonvolatile registers
> in descending order, starting with GPR31.

This patch is based on @jsji 's initial draft.

Tested on test-suite and SPEC, found no degradation.

Reviewed By: jsji, ZarkoCA, xingxue

Differential Revision: https://reviews.llvm.org/D100167

Added: 
    llvm/test/CodeGen/PowerPC/aix-csr-alloc.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCRegisterInfo.td
    llvm/lib/Target/PowerPC/PPCSubtarget.h
    llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
    llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll
    llvm/test/CodeGen/PowerPC/aix-tracetable-csr.ll
    llvm/test/CodeGen/PowerPC/inc-of-add.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 6fbff72d1ac91..e1d1c52aa53ae 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -303,9 +303,14 @@ def GPRC : RegisterClass<"PPC", [i32,f32], 32, (add (sequence "R%u", 2, 12),
                                                     R31, R0, R1, FP, BP)> {
   // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
   // put it at the end of the list.
-  let AltOrders = [(add (sub GPRC, R2), R2)];
+  // On AIX, CSRs are allocated starting from R31 according to:
+  // https://www.ibm.com/docs/en/ssw_aix_72/assembler/assembler_pdf.pdf.
+  // This also helps setting the correct `NumOfGPRsSaved' in traceback table.
+  let AltOrders = [(add (sub GPRC, R2), R2),
+                   (add (sequence "R%u", 2, 12),
+                        (sequence "R%u", 31, 13), R0, R1, FP, BP)];
   let AltOrderSelect = [{
-    return MF.getSubtarget<PPCSubtarget>().is64BitELFABI();
+    return MF.getSubtarget<PPCSubtarget>().getGPRAllocationOrderIdx();
   }];
 }
 
@@ -314,9 +319,11 @@ def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
                                                 X31, X13, X0, X1, FP8, BP8)> {
   // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
   // put it at the end of the list.
-  let AltOrders = [(add (sub G8RC, X2), X2)];
+  let AltOrders = [(add (sub G8RC, X2), X2),
+                   (add (sequence "X%u", 2, 12),
+                        (sequence "X%u", 31, 13), X0, X1, FP8, BP8)];
   let AltOrderSelect = [{
-    return MF.getSubtarget<PPCSubtarget>().is64BitELFABI();
+    return MF.getSubtarget<PPCSubtarget>().getGPRAllocationOrderIdx();
   }];
 }
 
@@ -326,18 +333,22 @@ def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
 def GPRC_NOR0 : RegisterClass<"PPC", [i32,f32], 32, (add (sub GPRC, R0), ZERO)> {
   // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
   // put it at the end of the list.
-  let AltOrders = [(add (sub GPRC_NOR0, R2), R2)];
+  let AltOrders = [(add (sub GPRC_NOR0, R2), R2),
+                   (add (sequence "R%u", 2, 12),
+                        (sequence "R%u", 31, 13), R1, FP, BP, ZERO)];
   let AltOrderSelect = [{
-    return MF.getSubtarget<PPCSubtarget>().is64BitELFABI();
+    return MF.getSubtarget<PPCSubtarget>().getGPRAllocationOrderIdx();
   }];
 }
 
 def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)> {
   // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
   // put it at the end of the list.
-  let AltOrders = [(add (sub G8RC_NOX0, X2), X2)];
+  let AltOrders = [(add (sub G8RC_NOX0, X2), X2),
+                   (add (sequence "X%u", 2, 12),
+                        (sequence "X%u", 31, 13), X1, FP8, BP8, ZERO8)];
   let AltOrderSelect = [{
-    return MF.getSubtarget<PPCSubtarget>().is64BitELFABI();
+    return MF.getSubtarget<PPCSubtarget>().getGPRAllocationOrderIdx();
   }];
 }
 

diff  --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index abc7ea1d14128..56b7b8ab75494 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -411,6 +411,16 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
     return PredictableSelectIsExpensive;
   }
 
+  // Select allocation orders of GPRC and G8RC. It should be strictly consistent
+  // with corresponding AltOrders in PPCRegisterInfo.td.
+  unsigned getGPRAllocationOrderIdx() const {
+    if (is64BitELFABI())
+      return 1;
+    if (isAIXABI())
+      return 2;
+    return 0;
+  }
+
   // GlobalISEL
   const CallLowering *getCallLowering() const override;
   const RegisterBankInfo *getRegBankInfo() const override;

diff  --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
index eb095a75daf31..e483b1823707c 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
@@ -2337,7 +2337,7 @@ define void @caller_mix() {
 
 ; ASM64PWR4:      mflr 0
 ; ASM64PWR4-DAG:  std 0, 16(1)
-; ASM64PWR4-DAG:  stdu 1, -256(1)
+; ASM64PWR4-DAG:  stdu 1, -240(1)
 ; ASM64PWR4-DAG:  std [[REG:[0-9]+]], 112(1)
 ; ASM64PWR4-DAG:  std [[REG:[0-9]+]], 120(1)
 ; ASM64PWR4-DAG:  std [[REG:[0-9]+]], 128(1)

diff  --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll
index ac6c25de82d88..a83b55c500490 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll
@@ -203,7 +203,7 @@ entry:
 
 ; CHECKASM-LABEL: .call_test_byval_mem3:
 
-; ASM32BIT:       stwu 1, -112(1)
+; ASM32BIT:       stwu 1, -96(1)
 ; ASM32BIT-DAG:   lwz [[REG:[0-9]+]], L..C{{[0-9]+}}(2)
 ; ASM32BIT-DAG:   addi 3, 1, 56
 ; ASM32BIT-DAG:   addi 4, [[REG]], 24
@@ -216,7 +216,7 @@ entry:
 ; ASM32BIT-DAG:   lwz 9, 16([[REG]])
 ; ASM32BIT-DAG:   lwz 10, 20([[REG]])
 ; ASM32BIT:       bl .test_byval_mem3
-; ASM32BIT:       addi 1, 1, 112
+; ASM32BIT:       addi 1, 1, 96
 
 ; The memcpy call was inlined in 64-bit so MIR test is redundant and omitted.
 ; ASM64BIT:       stdu 1, -128(1)
@@ -319,7 +319,7 @@ entry:
 ; 32BIT-NEXT:     BL_NOP <mcsymbol .test_byval_mem4>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1
 ; 32BIT-NEXT:     ADJCALLSTACKUP 316, 0, implicit-def dead $r1, implicit $r1
 
-; ASM32BIT:       stwu 1, -336(1)
+; ASM32BIT:       stwu 1, -320(1)
 ; ASM32BIT-NEXT:  stw [[REG1:[0-9]+]], {{[0-9]+}}(1)
 ; ASM32BIT:       lwz [[REG1]], L..C{{[0-9]+}}(2)
 ; ASM32BIT-DAG:   lhz [[REG2:[0-9]+]], 28([[REG1]])
@@ -338,7 +338,7 @@ entry:
 ; ASM32BIT-DAG:   lwz 9, 20([[REG1]])
 ; ASM32BIT-DAG:   lwz 10, 24([[REG1]])
 ; ASM32BIT:       bl .test_byval_mem4
-; ASM32BIT:       addi 1, 1, 336
+; ASM32BIT:       addi 1, 1, 320
 
 ; Confirm the expected memcpy call is independent of the call to test_byval_mem4.
 ; 64BIT:          ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
@@ -363,7 +363,7 @@ entry:
 ; 64BIT-NEXT:     BL8_NOP <mcsymbol .test_byval_mem4>, csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x2, implicit-def $r1
 ; 64BIT-NEXT:     ADJCALLSTACKUP 344, 0, implicit-def dead $r1, implicit $r1
 
-; ASM64BIT:       stdu 1, -368(1)
+; ASM64BIT:       stdu 1, -352(1)
 ; ASM64BIT-DAG:   ld [[REG1:[0-9]+]], L..C{{[0-9]+}}(2)
 ; ASM64BIT-DAG:   addi 3, 1, 112
 ; ASM64BIT-DAG:   addi 4, [[REG1]], 24
@@ -383,7 +383,7 @@ entry:
 ; ASM64BIT-DAG:   ld 9, 8([[REG1]])
 ; ASM64BIT-DAG:   ld 10, 16([[REG1]])
 ; ASM64BIT:       bl .test_byval_mem4
-; ASM64BIT:       addi 1, 1, 368
+; ASM64BIT:       addi 1, 1, 352
 
 define void @test_byval_mem4(i32, %struct_S31* byval(%struct_S31) align 1, %struct_S256* byval(%struct_S256) align 1 %s) {
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/aix-csr-alloc.ll b/llvm/test/CodeGen/PowerPC/aix-csr-alloc.ll
new file mode 100644
index 0000000000000..c41b742f3e153
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-csr-alloc.ll
@@ -0,0 +1,30 @@
+; REQUIRES: asserts
+; RUN: llc -mtriple=powerpc64-aix-xcoff -debug-only=regalloc < %s 2>&1 | \
+; RUN:   FileCheck --check-prefix=AIX-64 %s
+; RUN: llc -mtriple=powerpc-aix-xcoff -debug-only=regalloc < %s 2>&1 | \
+; RUN:   FileCheck --check-prefix=AIX-32 %s
+
+define i32 @g(i32 %a, i32 %b) {
+; AIX-64: AllocationOrder(G8RC_and_G8RC_NOX0) = [ $x3 $x4 $x5 $x6 $x7 $x8 $x9 $x10 $x11 $x12 $x31 $x30 $x29 $x28 $x27 $x26 $x25 $x24 $x23 $x22 $x21 $x20 $x19 $x18 $x17 $x16 $x15 $x14 ]
+; AIX-64: AllocationOrder(G8RC) = [ $x3 $x4 $x5 $x6 $x7 $x8 $x9 $x10 $x11 $x12 $x0 $x31 $x30 $x29 $x28 $x27 $x26 $x25 $x24 $x23 $x22 $x21 $x20 $x19 $x18 $x17 $x16 $x15 $x14 ]
+; AIX-32: AllocationOrder(GPRC) = [ $r3 $r4 $r5 $r6 $r7 $r8 $r9 $r10 $r11 $r12 $r0 $r31 $r30 $r29 $r28 $r27 $r26 $r25 $r24 $r23 $r22 $r21 $r20 $r19 $r18 $r17 $r16 $r15 $r14 $r13 ]
+; AIX-32: AllocationOrder(GPRC_and_GPRC_NOR0) = [ $r3 $r4 $r5 $r6 $r7 $r8 $r9 $r10 $r11 $r12 $r31 $r30 $r29 $r28 $r27 $r26 $r25 $r24 $r23 $r22 $r21 $r20 $r19 $r18 $r17 $r16 $r15 $r14 $r13 ]
+  %c = add i32 %a, %b
+  %d = shl i32 %a, 4
+  %cmp = icmp slt i32 %c, %d
+  %e = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %e
+}
+
+define float @f(float %a, float %b) {
+; AIX-32: AllocationOrder(F4RC) = [ $f0 $f1 $f2 $f3 $f4 $f5 $f6 $f7 $f8 $f9 $f10 $f11 $f12 $f13 $f31 $f30 $f29 $f28 $f27 $f26 $f25 $f24 $f23 $f22 $f21 $f20 $f19 $f18 $f17 $f16 $f15 $f14 ]
+  %c = fadd float %a, %b
+  ret float %c
+}
+
+define double @d(double %a, double %b) {
+; AIX-64: AllocationOrder(VFRC) = [ $vf2 $vf3 $vf4 $vf5 $vf0 $vf1 $vf6 $vf7 $vf8 $vf9 $vf10 $vf11 $vf12 $vf13 $vf14 $vf15 $vf16 $vf17 $vf18 $vf19 $vf31 $vf30 $vf29 $vf28 $vf27 $vf26 $vf25 $vf24 $vf23 $vf22 $vf21 $vf20 ]
+; AIX-64: AllocationOrder(F8RC) = [ $f0 $f1 $f2 $f3 $f4 $f5 $f6 $f7 $f8 $f9 $f10 $f11 $f12 $f13 $f31 $f30 $f29 $f28 $f27 $f26 $f25 $f24 $f23 $f22 $f21 $f20 $f19 $f18 $f17 $f16 $f15 $f14 ]
+  %c = fadd double %a, %b
+  ret double %c
+}

diff  --git a/llvm/test/CodeGen/PowerPC/aix-tracetable-csr.ll b/llvm/test/CodeGen/PowerPC/aix-tracetable-csr.ll
index 616a94f9a16b1..702a5d2c4f206 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tracetable-csr.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tracetable-csr.ll
@@ -14,10 +14,10 @@ declare hidden fastcc i32 @spam(%1*, %2*, %3*) unnamed_addr #0
 
 ; Function Attrs: nounwind
 define void @baz(%3* %0) local_unnamed_addr #2 {
-; AIX-64: std 30
-; AIX-64: .byte 0x02 # -HasVectorInfo, -HasExtensionTable, NumOfGPRsSaved = 2
-; AIX-32: stw 30
-; AIX-32: .byte 0x02 # -HasVectorInfo, -HasExtensionTable, NumOfGPRsSaved = 2
+; AIX-64: std 31
+; AIX-64: .byte 0x01 # -HasVectorInfo, -HasExtensionTable, NumOfGPRsSaved = 1
+; AIX-32: stw 31
+; AIX-32: .byte 0x01 # -HasVectorInfo, -HasExtensionTable, NumOfGPRsSaved = 1
   %2 = call signext i32 @wibble(%1* nonnull undef) #2
   %3 = call fastcc zeroext i32 @spam(%1* nonnull undef, %2* nonnull undef, %3* nonnull %0)
   unreachable

diff  --git a/llvm/test/CodeGen/PowerPC/inc-of-add.ll b/llvm/test/CodeGen/PowerPC/inc-of-add.ll
index 2742c493e986d..0b06d7ed586bf 100644
--- a/llvm/test/CodeGen/PowerPC/inc-of-add.ll
+++ b/llvm/test/CodeGen/PowerPC/inc-of-add.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=ppc32-unknown-unknown | FileCheck %s --check-prefixes=ALL,PPC32
 ; RUN: llc < %s -mtriple=powerpc64-unknown-unknown | FileCheck %s --check-prefixes=ALL,PPC64,PPC64BE
-; RUN: llc < %s -mtriple=powerpc64-ibm-aix-xcoff | FileCheck %s --check-prefixes=ALL,PPC64,PPC64BE
+; RUN: llc < %s -mtriple=powerpc64-ibm-aix-xcoff | FileCheck %s --check-prefixes=ALL,PPC64,PPC64BE,AIX-PPC64
 ; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown | FileCheck %s --check-prefixes=ALL,PPC64,PPC64LE
 
 ; These two forms are equivalent:
@@ -162,102 +162,102 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind {
 ; PPC32-NEXT:    addi 1, 1, 64
 ; PPC32-NEXT:    blr
 ;
-; PPC64BE-LABEL: vector_i128_i8:
-; PPC64BE:       # %bb.0:
-; PPC64BE-NEXT:    std 21, -88(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    lbz 21, 207(1)
-; PPC64BE-NEXT:    std 22, -80(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 23, -72(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 25, -56(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 24, -64(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 28, -32(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 27, -40(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 26, -48(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 29, -24(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    lbz 22, 199(1)
-; PPC64BE-NEXT:    lbz 23, 191(1)
-; PPC64BE-NEXT:    add 6, 21, 6
-; PPC64BE-NEXT:    lbz 21, 231(1)
-; PPC64BE-NEXT:    add 5, 22, 5
-; PPC64BE-NEXT:    lbz 22, 223(1)
-; PPC64BE-NEXT:    add 4, 23, 4
-; PPC64BE-NEXT:    lbz 23, 215(1)
-; PPC64BE-NEXT:    add 9, 21, 9
-; PPC64BE-NEXT:    lbz 25, 127(1)
-; PPC64BE-NEXT:    add 8, 22, 8
-; PPC64BE-NEXT:    lbz 21, 255(1)
-; PPC64BE-NEXT:    add 7, 23, 7
-; PPC64BE-NEXT:    lbz 24, 119(1)
-; PPC64BE-NEXT:    addi 9, 9, 1
-; PPC64BE-NEXT:    lbz 22, 247(1)
-; PPC64BE-NEXT:    add 25, 21, 25
-; PPC64BE-NEXT:    lbz 23, 239(1)
-; PPC64BE-NEXT:    addi 8, 8, 1
-; PPC64BE-NEXT:    lbz 28, 151(1)
-; PPC64BE-NEXT:    add 24, 22, 24
-; PPC64BE-NEXT:    lbz 21, 279(1)
-; PPC64BE-NEXT:    add 10, 23, 10
-; PPC64BE-NEXT:    lbz 27, 143(1)
-; PPC64BE-NEXT:    addi 10, 10, 1
-; PPC64BE-NEXT:    lbz 22, 271(1)
-; PPC64BE-NEXT:    add 28, 21, 28
-; PPC64BE-NEXT:    lbz 26, 135(1)
-; PPC64BE-NEXT:    addi 7, 7, 1
-; PPC64BE-NEXT:    lbz 23, 263(1)
-; PPC64BE-NEXT:    add 27, 22, 27
-; PPC64BE-NEXT:    lbz 11, 183(1)
-; PPC64BE-NEXT:    addi 6, 6, 1
-; PPC64BE-NEXT:    lbz 21, 311(1)
-; PPC64BE-NEXT:    add 26, 23, 26
-; PPC64BE-NEXT:    lbz 12, 175(1)
-; PPC64BE-NEXT:    addi 5, 5, 1
-; PPC64BE-NEXT:    lbz 0, 303(1)
-; PPC64BE-NEXT:    add 11, 21, 11
-; PPC64BE-NEXT:    lbz 30, 167(1)
-; PPC64BE-NEXT:    addi 11, 11, 1
-; PPC64BE-NEXT:    lbz 22, 295(1)
-; PPC64BE-NEXT:    add 12, 0, 12
-; PPC64BE-NEXT:    lbz 29, 159(1)
-; PPC64BE-NEXT:    addi 4, 4, 1
-; PPC64BE-NEXT:    lbz 23, 287(1)
-; PPC64BE-NEXT:    add 30, 22, 30
-; PPC64BE-NEXT:    stb 11, 15(3)
-; PPC64BE-NEXT:    addi 11, 12, 1
-; PPC64BE-NEXT:    add 29, 23, 29
-; PPC64BE-NEXT:    stb 11, 14(3)
-; PPC64BE-NEXT:    addi 11, 30, 1
-; PPC64BE-NEXT:    stb 11, 13(3)
-; PPC64BE-NEXT:    addi 11, 29, 1
-; PPC64BE-NEXT:    stb 11, 12(3)
-; PPC64BE-NEXT:    addi 11, 28, 1
-; PPC64BE-NEXT:    stb 11, 11(3)
-; PPC64BE-NEXT:    addi 11, 27, 1
-; PPC64BE-NEXT:    stb 11, 10(3)
-; PPC64BE-NEXT:    addi 11, 26, 1
-; PPC64BE-NEXT:    stb 11, 9(3)
-; PPC64BE-NEXT:    addi 11, 25, 1
-; PPC64BE-NEXT:    stb 11, 8(3)
-; PPC64BE-NEXT:    addi 11, 24, 1
-; PPC64BE-NEXT:    stb 11, 7(3)
-; PPC64BE-NEXT:    stb 10, 6(3)
-; PPC64BE-NEXT:    stb 9, 5(3)
-; PPC64BE-NEXT:    stb 8, 4(3)
-; PPC64BE-NEXT:    stb 7, 3(3)
-; PPC64BE-NEXT:    stb 6, 2(3)
-; PPC64BE-NEXT:    stb 5, 1(3)
-; PPC64BE-NEXT:    stb 4, 0(3)
-; PPC64BE-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 28, -32(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 27, -40(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 26, -48(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 25, -56(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 24, -64(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 23, -72(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 22, -80(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 21, -88(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    blr
+; AIX-PPC64-LABEL: vector_i128_i8:
+; AIX-PPC64:       # %bb.0:
+; AIX-PPC64-NEXT:    std 22, -80(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    lbz 22, 207(1)
+; AIX-PPC64-NEXT:    std 23, -72(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 24, -64(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 26, -48(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 25, -56(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 29, -24(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 28, -32(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 27, -40(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 31, -8(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 30, -16(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    lbz 23, 199(1)
+; AIX-PPC64-NEXT:    lbz 24, 191(1)
+; AIX-PPC64-NEXT:    add 6, 22, 6
+; AIX-PPC64-NEXT:    lbz 22, 231(1)
+; AIX-PPC64-NEXT:    add 5, 23, 5
+; AIX-PPC64-NEXT:    lbz 23, 223(1)
+; AIX-PPC64-NEXT:    add 4, 24, 4
+; AIX-PPC64-NEXT:    lbz 24, 215(1)
+; AIX-PPC64-NEXT:    add 9, 22, 9
+; AIX-PPC64-NEXT:    lbz 26, 127(1)
+; AIX-PPC64-NEXT:    add 8, 23, 8
+; AIX-PPC64-NEXT:    lbz 22, 255(1)
+; AIX-PPC64-NEXT:    add 7, 24, 7
+; AIX-PPC64-NEXT:    lbz 25, 119(1)
+; AIX-PPC64-NEXT:    addi 9, 9, 1
+; AIX-PPC64-NEXT:    lbz 23, 247(1)
+; AIX-PPC64-NEXT:    add 26, 22, 26
+; AIX-PPC64-NEXT:    lbz 24, 239(1)
+; AIX-PPC64-NEXT:    addi 8, 8, 1
+; AIX-PPC64-NEXT:    lbz 29, 151(1)
+; AIX-PPC64-NEXT:    add 25, 23, 25
+; AIX-PPC64-NEXT:    lbz 22, 279(1)
+; AIX-PPC64-NEXT:    add 10, 24, 10
+; AIX-PPC64-NEXT:    lbz 28, 143(1)
+; AIX-PPC64-NEXT:    addi 10, 10, 1
+; AIX-PPC64-NEXT:    lbz 23, 271(1)
+; AIX-PPC64-NEXT:    add 29, 22, 29
+; AIX-PPC64-NEXT:    lbz 27, 135(1)
+; AIX-PPC64-NEXT:    addi 7, 7, 1
+; AIX-PPC64-NEXT:    lbz 24, 263(1)
+; AIX-PPC64-NEXT:    add 28, 23, 28
+; AIX-PPC64-NEXT:    lbz 11, 183(1)
+; AIX-PPC64-NEXT:    addi 6, 6, 1
+; AIX-PPC64-NEXT:    lbz 22, 311(1)
+; AIX-PPC64-NEXT:    add 27, 24, 27
+; AIX-PPC64-NEXT:    lbz 12, 175(1)
+; AIX-PPC64-NEXT:    addi 5, 5, 1
+; AIX-PPC64-NEXT:    lbz 0, 303(1)
+; AIX-PPC64-NEXT:    add 11, 22, 11
+; AIX-PPC64-NEXT:    lbz 31, 167(1)
+; AIX-PPC64-NEXT:    addi 11, 11, 1
+; AIX-PPC64-NEXT:    lbz 23, 295(1)
+; AIX-PPC64-NEXT:    add 12, 0, 12
+; AIX-PPC64-NEXT:    lbz 30, 159(1)
+; AIX-PPC64-NEXT:    addi 4, 4, 1
+; AIX-PPC64-NEXT:    lbz 24, 287(1)
+; AIX-PPC64-NEXT:    add 31, 23, 31
+; AIX-PPC64-NEXT:    stb 11, 15(3)
+; AIX-PPC64-NEXT:    addi 11, 12, 1
+; AIX-PPC64-NEXT:    add 30, 24, 30
+; AIX-PPC64-NEXT:    stb 11, 14(3)
+; AIX-PPC64-NEXT:    addi 11, 31, 1
+; AIX-PPC64-NEXT:    stb 11, 13(3)
+; AIX-PPC64-NEXT:    addi 11, 30, 1
+; AIX-PPC64-NEXT:    stb 11, 12(3)
+; AIX-PPC64-NEXT:    addi 11, 29, 1
+; AIX-PPC64-NEXT:    stb 11, 11(3)
+; AIX-PPC64-NEXT:    addi 11, 28, 1
+; AIX-PPC64-NEXT:    stb 11, 10(3)
+; AIX-PPC64-NEXT:    addi 11, 27, 1
+; AIX-PPC64-NEXT:    stb 11, 9(3)
+; AIX-PPC64-NEXT:    addi 11, 26, 1
+; AIX-PPC64-NEXT:    stb 11, 8(3)
+; AIX-PPC64-NEXT:    addi 11, 25, 1
+; AIX-PPC64-NEXT:    stb 11, 7(3)
+; AIX-PPC64-NEXT:    stb 10, 6(3)
+; AIX-PPC64-NEXT:    stb 9, 5(3)
+; AIX-PPC64-NEXT:    stb 8, 4(3)
+; AIX-PPC64-NEXT:    stb 7, 3(3)
+; AIX-PPC64-NEXT:    stb 6, 2(3)
+; AIX-PPC64-NEXT:    stb 5, 1(3)
+; AIX-PPC64-NEXT:    stb 4, 0(3)
+; AIX-PPC64-NEXT:    ld 31, -8(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 28, -32(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 27, -40(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 26, -48(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 25, -56(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 24, -64(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 23, -72(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 22, -80(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    blr
 ;
 ; PPC64LE-LABEL: vector_i128_i8:
 ; PPC64LE:       # %bb.0:
@@ -310,54 +310,54 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind {
 ; PPC32-NEXT:    addi 1, 1, 32
 ; PPC32-NEXT:    blr
 ;
-; PPC64BE-LABEL: vector_i128_i16:
-; PPC64BE:       # %bb.0:
-; PPC64BE-NEXT:    std 25, -56(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 26, -48(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 27, -40(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 28, -32(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 29, -24(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    lhz 11, 118(1)
-; PPC64BE-NEXT:    lhz 12, 182(1)
-; PPC64BE-NEXT:    lhz 0, 174(1)
-; PPC64BE-NEXT:    lhz 30, 166(1)
-; PPC64BE-NEXT:    add 11, 12, 11
-; PPC64BE-NEXT:    lhz 29, 158(1)
-; PPC64BE-NEXT:    add 10, 0, 10
-; PPC64BE-NEXT:    lhz 28, 142(1)
-; PPC64BE-NEXT:    add 9, 30, 9
-; PPC64BE-NEXT:    lhz 27, 126(1)
-; PPC64BE-NEXT:    add 8, 29, 8
-; PPC64BE-NEXT:    lhz 26, 134(1)
-; PPC64BE-NEXT:    add 6, 28, 6
-; PPC64BE-NEXT:    lhz 25, 150(1)
-; PPC64BE-NEXT:    add 4, 27, 4
-; PPC64BE-NEXT:    add 5, 26, 5
-; PPC64BE-NEXT:    addi 11, 11, 1
-; PPC64BE-NEXT:    add 7, 25, 7
-; PPC64BE-NEXT:    addi 10, 10, 1
-; PPC64BE-NEXT:    addi 9, 9, 1
-; PPC64BE-NEXT:    addi 8, 8, 1
-; PPC64BE-NEXT:    addi 7, 7, 1
-; PPC64BE-NEXT:    addi 6, 6, 1
-; PPC64BE-NEXT:    addi 5, 5, 1
-; PPC64BE-NEXT:    addi 4, 4, 1
-; PPC64BE-NEXT:    sth 11, 14(3)
-; PPC64BE-NEXT:    sth 10, 12(3)
-; PPC64BE-NEXT:    sth 9, 10(3)
-; PPC64BE-NEXT:    sth 8, 8(3)
-; PPC64BE-NEXT:    sth 7, 6(3)
-; PPC64BE-NEXT:    sth 6, 4(3)
-; PPC64BE-NEXT:    sth 5, 2(3)
-; PPC64BE-NEXT:    sth 4, 0(3)
-; PPC64BE-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 28, -32(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 27, -40(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 26, -48(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    ld 25, -56(1) # 8-byte Folded Reload
-; PPC64BE-NEXT:    blr
+; AIX-PPC64-LABEL: vector_i128_i16:
+; AIX-PPC64:       # %bb.0:
+; AIX-PPC64-NEXT:    std 26, -48(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 27, -40(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 28, -32(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 29, -24(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 30, -16(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    std 31, -8(1) # 8-byte Folded Spill
+; AIX-PPC64-NEXT:    lhz 11, 118(1)
+; AIX-PPC64-NEXT:    lhz 12, 182(1)
+; AIX-PPC64-NEXT:    lhz 0, 174(1)
+; AIX-PPC64-NEXT:    lhz 31, 166(1)
+; AIX-PPC64-NEXT:    add 11, 12, 11
+; AIX-PPC64-NEXT:    lhz 30, 158(1)
+; AIX-PPC64-NEXT:    add 10, 0, 10
+; AIX-PPC64-NEXT:    lhz 29, 142(1)
+; AIX-PPC64-NEXT:    add 9, 31, 9
+; AIX-PPC64-NEXT:    lhz 28, 126(1)
+; AIX-PPC64-NEXT:    add 8, 30, 8
+; AIX-PPC64-NEXT:    lhz 27, 134(1)
+; AIX-PPC64-NEXT:    add 6, 29, 6
+; AIX-PPC64-NEXT:    lhz 26, 150(1)
+; AIX-PPC64-NEXT:    add 4, 28, 4
+; AIX-PPC64-NEXT:    add 5, 27, 5
+; AIX-PPC64-NEXT:    addi 11, 11, 1
+; AIX-PPC64-NEXT:    add 7, 26, 7
+; AIX-PPC64-NEXT:    addi 10, 10, 1
+; AIX-PPC64-NEXT:    addi 9, 9, 1
+; AIX-PPC64-NEXT:    addi 8, 8, 1
+; AIX-PPC64-NEXT:    addi 7, 7, 1
+; AIX-PPC64-NEXT:    addi 6, 6, 1
+; AIX-PPC64-NEXT:    addi 5, 5, 1
+; AIX-PPC64-NEXT:    addi 4, 4, 1
+; AIX-PPC64-NEXT:    sth 11, 14(3)
+; AIX-PPC64-NEXT:    sth 10, 12(3)
+; AIX-PPC64-NEXT:    sth 9, 10(3)
+; AIX-PPC64-NEXT:    sth 8, 8(3)
+; AIX-PPC64-NEXT:    sth 7, 6(3)
+; AIX-PPC64-NEXT:    sth 6, 4(3)
+; AIX-PPC64-NEXT:    sth 5, 2(3)
+; AIX-PPC64-NEXT:    sth 4, 0(3)
+; AIX-PPC64-NEXT:    ld 31, -8(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 28, -32(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 27, -40(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    ld 26, -48(1) # 8-byte Folded Reload
+; AIX-PPC64-NEXT:    blr
 ;
 ; PPC64LE-LABEL: vector_i128_i16:
 ; PPC64LE:       # %bb.0:


        


More information about the llvm-commits mailing list