[llvm] [PowerPC][AIX] Support ByVals with greater alignment then pointer size (PR #93341)

Zarko Todorovski via llvm-commits llvm-commits at lists.llvm.org
Tue May 28 10:37:10 PDT 2024


https://github.com/ZarkoT updated https://github.com/llvm/llvm-project/pull/93341

>From 795bd074752cd5e854b630344669038f02fbaae9 Mon Sep 17 00:00:00 2001
From: Zarko Todorovski <zarko at ca.ibm.com>
Date: Fri, 24 May 2024 16:06:36 -0400
Subject: [PATCH 1/3] [PowerPC][AIX] Support ByVals with greater alignment then
 pointer size

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   | 45 ++++++++++-------
 .../PowerPC/aix-cc-byval-limitation3.ll       |  2 +-
 .../PowerPC/aix-vector-byval-callee.ll        | 50 +++++++++++++++++++
 llvm/test/CodeGen/PowerPC/aix-vector-byval.ll | 49 ++++++++++++++++++
 4 files changed, 128 insertions(+), 18 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-vector-byval.ll

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 8450ce9e0e3b3..0b628ea2b3f6a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -6855,7 +6855,8 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
   const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
       State.getMachineFunction().getSubtarget());
   const bool IsPPC64 = Subtarget.isPPC64();
-  const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
+  const Align PtrAlign(PtrSize);
+  const Align StackAlign(16);
   const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
 
   if (ValVT == MVT::f128)
@@ -6876,12 +6877,16 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
                                  PPC::V6,  PPC::V7,  PPC::V8,  PPC::V9,
                                  PPC::V10, PPC::V11, PPC::V12, PPC::V13};
 
+  const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
+
   if (ArgFlags.isByVal()) {
+    const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
     if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
       report_fatal_error("Pass-by-value arguments with alignment greater than "
-                         "register width are not supported.");
+                         "16 are not supported.");
 
     const unsigned ByValSize = ArgFlags.getByValSize();
+    const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
 
     // An empty aggregate parameter takes up no storage and no registers,
     // but needs a MemLoc for a stack slot for the formal arguments side.
@@ -6891,11 +6896,23 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
       return false;
     }
 
-    const unsigned StackSize = alignTo(ByValSize, PtrAlign);
-    unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
-    for (const unsigned E = Offset + StackSize; Offset < E;
-         Offset += PtrAlign.value()) {
-      if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
+    // Shadow allocate any registers that are not properly aligned.
+    unsigned NextReg = State.getFirstUnallocated(GPRs);
+    while (NextReg != GPRs.size() &&
+           !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
+      // Shadow allocate next registers since its aligment is not strict enough.
+      unsigned Reg = State.AllocateReg(GPRs);
+      // Allocate the stack space shadowed by said register.
+      State.AllocateStack(PtrSize, PtrAlign);
+      assert(Reg && "Alocating register unexpectedly failed.");
+      (void)Reg;
+      NextReg = State.getFirstUnallocated(GPRs);
+    }
+
+    const unsigned StackSize = alignTo(ByValSize, ObjAlign);
+    unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
+    for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
+      if (unsigned Reg = State.AllocateReg(GPRs))
         State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
       else {
         State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
@@ -6917,12 +6934,12 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
     [[fallthrough]];
   case MVT::i1:
   case MVT::i32: {
-    const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
+    const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
     // AIX integer arguments are always passed in register width.
     if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
       LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
                                   : CCValAssign::LocInfo::ZExt;
-    if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
+    if (unsigned Reg = State.AllocateReg(GPRs))
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
     else
       State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
@@ -6942,8 +6959,8 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
 
     // Reserve and initialize GPRs or initialize the PSA as required.
-    for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
-      if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
+    for (unsigned I = 0; I < StoreSize; I += PtrSize) {
+      if (unsigned Reg = State.AllocateReg(GPRs)) {
         assert(FReg && "An FPR should be available when a GPR is reserved.");
         if (State.isVarArg()) {
           // Successfully reserved GPRs are only initialized for vararg calls.
@@ -6995,9 +7012,6 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
       return false;
     }
 
-    const unsigned PtrSize = IsPPC64 ? 8 : 4;
-    ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
-
     unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
     // Burn any underaligned registers and their shadowed stack space until
     // we reach the required alignment.
@@ -7347,9 +7361,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
       const MCPhysReg ArgReg = VA.getLocReg();
       const PPCFrameLowering *FL = Subtarget.getFrameLowering();
 
-      if (Flags.getNonZeroByValAlign() > PtrByteSize)
-        report_fatal_error("Over aligned byvals not supported yet.");
-
       const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
       const int FI = MF.getFrameInfo().CreateFixedObject(
           StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll
index bb0231dbf5417..d4d9d67061fcb 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll
@@ -13,4 +13,4 @@ entry:
 
 declare void @foo(i32, i32, i32, i32, i32, i32, i32, i32, ptr byval(%struct.S) align 32)
 
-; CHECK: LLVM ERROR: Pass-by-value arguments with alignment greater than register width are not supported.
+; CHECK: LLVM ERROR: Pass-by-value arguments with alignment greater than 16 are not supported.
diff --git a/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll b/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll
new file mode 100644
index 0000000000000..3491779b1002c
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll
@@ -0,0 +1,50 @@
+TE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \
+; RUN:  -mattr=-altivec -verify-machineinstrs < %s | \
+; RUN: FileCheck --check-prefix=32BIT %s
+
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \
+; RUN:  -mattr=-altivec -verify-machineinstrs < %s | \
+; RUN: FileCheck --check-prefix=64BIT %s
+
+%struct.vec_struct = type { <4 x i32> }
+
+; Function Attrs: norecurse nounwind readonly
+define i32 @vec_struct_test(i32 %i, %struct.vec_struct* nocapture readonly byval(%struct.vec_struct) align 16 %vs) {
+  ; 32BIT-LABEL: name: vec_struct_test
+
+  ; 32BIT: fixedStack:
+  ; 32BIT: - { id: 0, type: default, offset: 32, size: 16, alignment: 16, stack-id: default,
+  ; 32BIT:     isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
+  ; 32BIT:     debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+
+  ; 32BIT: bb.0.entry:
+  ; 32BIT:   liveins: $r3, $r5, $r6, $r7, $r8
+  ; 32BIT:   renamable $r3 = nsw ADD4 renamable $r5, killed renamable $r3
+  ; 32BIT:   STW killed renamable $r7, 8, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 8, align 8)
+  ; 32BIT:   STW killed renamable $r6, 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4)
+  ; 32BIT:   STW killed renamable $r5, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0, align 16)
+  ; 32BIT:   STW killed renamable $r8, 12, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 12)
+  ; 32BIT:   BLR implicit $lr, implicit $rm, implicit $r3
+
+
+  ; 64BIT-LABEL: name: vec_struct_test
+  ; 64BIT: fixedStack:
+  ; 64BIT:  - { id: 0, type: default, offset: 64, size: 16, alignment: 16, stack-id: default,
+  ; 64BIT:      isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
+  ; 64BIT:      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+
+  ; 64BIT: bb.0.entry:
+  ; 64BIT:   liveins: $x3, $x5, $x6
+  ; 64BIT:   STD killed renamable $x5, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0, align 16)
+  ; 64BIT:   renamable $r4 = LWZ 0, %fixed-stack.0 :: (dereferenceable load (s32) from %ir.vsi1, align 16)
+  ; 64BIT:   renamable $r3 = nsw ADD4 killed renamable $r4, renamable $r3, implicit killed $x3, implicit-def $x3
+  ; 64BIT:   STD killed renamable $x6, 8, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 8)
+  ; 64BIT:   BLR8 implicit $lr8, implicit $rm, implicit $x3
+entry:
+  %vsi = getelementptr inbounds %struct.vec_struct, %struct.vec_struct* %vs, i32 0, i32 0
+  %0 = load <4 x i32>, <4 x i32>* %vsi, align 16
+  %vecext = extractelement <4 x i32> %0, i32 0
+  %add = add nsw i32 %vecext, %i
+  ret i32 %add
+}
diff --git a/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll b/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll
new file mode 100644
index 0000000000000..9e4139cdf0c6c
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \
+; RUN:  -mattr=-altivec -verify-machineinstrs < %s | \
+; RUN: FileCheck --check-prefix=32BIT %s
+
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \
+; RUN:  -mattr=-altivec -verify-machineinstrs < %s | \
+; RUN: FileCheck --check-prefix=64BIT %s
+
+%struct.B = type { <8 x i16>, i32, i32, [8 x i8] }
+
+; Function Attrs: nounwind
+define i32 @caller() {
+  ; 32BIT-LABEL: name: caller
+  ; 32BIT: bb.0.entry:
+  ; 32BIT:   renamable $r3 = LWZ 28, %stack.0.vs :: (load (s32) from unknown-address + 4)
+  ; 32BIT:   STW killed renamable $r3, 60, $r1 :: (store (s32) into unknown-address + 4, basealign 16)
+  ; 32BIT:   renamable $r3 = LWZ 24, %stack.0.vs :: (load (s32) from %stack.0.vs + 24, align 8, basealign 16)
+  ; 32BIT:   STW killed renamable $r3, 56, $r1 :: (store (s32), align 16)
+  ; 32BIT:   ADJCALLSTACKDOWN 64, 0, implicit-def dead $r1, implicit $r1
+  ; 32BIT:   renamable $r10 = LWZ 20, %stack.0.vs :: (load (s32) from %stack.0.vs + 20)
+  ; 32BIT:   renamable $r9 = LWZ 16, %stack.0.vs :: (load (s32) from %stack.0.vs + 16, align 16)
+  ; 32BIT:   renamable $r8 = LWZ 12, %stack.0.vs :: (load (s32) from %stack.0.vs + 12)
+  ; 32BIT:   renamable $r7 = LWZ 8, %stack.0.vs :: (load (s32) from %stack.0.vs + 8, align 8)
+  ; 32BIT:   renamable $r6 = LWZ 4, %stack.0.vs :: (load (s32) from %stack.0.vs + 4)
+  ; 32BIT:   renamable $r5 = LWZ 0, %stack.0.vs :: (load (s32) from %stack.0.vs, align 16)
+  ; 32BIT:   $r3 = LI 0
+  ; 32BIT:   BL_NOP <mcsymbol .vec_struct_test[PR]>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1, implicit-def $r3
+  ; 32BIT:   ADJCALLSTACKUP 64, 0, implicit-def dead $r1, implicit $r1
+  ; 32BIT:   BLR implicit $lr, implicit $rm, implicit $r3
+
+  ; 64BIT-LABEL: name: caller
+  ; 64BIT: bb.0.entry:
+  ; 64BIT:   ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+  ; 64BIT:   renamable $x8 = LD 24, %stack.0.vs :: (load (s64) from %stack.0.vs + 24)
+  ; 64BIT:   renamable $x7 = LD 16, %stack.0.vs :: (load (s64) from %stack.0.vs + 16, align 16)
+  ; 64BIT:   renamable $x6 = LD 8, %stack.0.vs :: (load (s64) from %stack.0.vs + 8)
+  ; 64BIT:   renamable $x5 = LD 0, %stack.0.vs :: (load (s64) from %stack.0.vs, align 16)
+  ; 64BIT:   $x3 = LI8 0
+  ; 64BIT:   BL8_NOP <mcsymbol .vec_struct_test[PR]>, csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x2, implicit-def $r1, implicit-def $x3
+  ; 64BIT:   ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+  ; 64BIT:   BLR8 implicit $lr8, implicit $rm, implicit $x3
+  entry:
+  %vs = alloca %struct.B, align 16
+  %call = tail call i32 @vec_struct_test(i32 0, %struct.B* nonnull byval(%struct.B) align 16 %vs)
+  ret i32 %call
+}
+
+declare i32 @vec_struct_test(i32, %struct.B* byval(%struct.B) align 16)

>From 7f91bbedec4fe39ff619d6bc05307eb0e5dcee53 Mon Sep 17 00:00:00 2001
From: Zarko Todorovski <zarko at ca.ibm.com>
Date: Sat, 25 May 2024 16:47:42 -0400
Subject: [PATCH 2/3] Fix typos and update tests

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  3 +-
 .../PowerPC/aix-vector-byval-callee.ll        | 44 +++++++---------
 llvm/test/CodeGen/PowerPC/aix-vector-byval.ll | 50 +++++++++----------
 3 files changed, 44 insertions(+), 53 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 0b628ea2b3f6a..6f273bc9a9d9d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -6855,6 +6855,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
   const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
       State.getMachineFunction().getSubtarget());
   const bool IsPPC64 = Subtarget.isPPC64();
+  const unsigned PtrSize = IsPPC64 ? 8 : 4;
   const Align PtrAlign(PtrSize);
   const Align StackAlign(16);
   const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
@@ -6881,7 +6882,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
 
   if (ArgFlags.isByVal()) {
     const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
-    if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
+    if (ByValAlign > StackAlign)
       report_fatal_error("Pass-by-value arguments with alignment greater than "
                          "16 are not supported.");
 
diff --git a/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll b/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll
index 3491779b1002c..002ee0e47f14f 100644
--- a/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll
@@ -1,4 +1,4 @@
-TE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \
 ; RUN:  -mattr=-altivec -verify-machineinstrs < %s | \
 ; RUN: FileCheck --check-prefix=32BIT %s
@@ -12,35 +12,25 @@ TE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; Function Attrs: norecurse nounwind readonly
 define i32 @vec_struct_test(i32 %i, %struct.vec_struct* nocapture readonly byval(%struct.vec_struct) align 16 %vs) {
   ; 32BIT-LABEL: name: vec_struct_test
-
-  ; 32BIT: fixedStack:
-  ; 32BIT: - { id: 0, type: default, offset: 32, size: 16, alignment: 16, stack-id: default,
-  ; 32BIT:     isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
-  ; 32BIT:     debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-
   ; 32BIT: bb.0.entry:
-  ; 32BIT:   liveins: $r3, $r5, $r6, $r7, $r8
-  ; 32BIT:   renamable $r3 = nsw ADD4 renamable $r5, killed renamable $r3
-  ; 32BIT:   STW killed renamable $r7, 8, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 8, align 8)
-  ; 32BIT:   STW killed renamable $r6, 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4)
-  ; 32BIT:   STW killed renamable $r5, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0, align 16)
-  ; 32BIT:   STW killed renamable $r8, 12, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 12)
-  ; 32BIT:   BLR implicit $lr, implicit $rm, implicit $r3
-
-
+  ; 32BIT-NEXT:   liveins: $r3, $r5, $r6, $r7, $r8
+  ; 32BIT-NEXT: {{  $}}
+  ; 32BIT-NEXT:   STW killed renamable $r7, 8, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 8, align 8)
+  ; 32BIT-NEXT:   STW killed renamable $r6, 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4)
+  ; 32BIT-NEXT:   STW renamable $r5, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0, align 16)
+  ; 32BIT-NEXT:   STW killed renamable $r8, 12, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 12)
+  ; 32BIT-NEXT:   renamable $r3 = nsw ADD4 killed renamable $r5, killed renamable $r3
+  ; 32BIT-NEXT:   BLR implicit $lr, implicit $rm, implicit $r3
+  ;
   ; 64BIT-LABEL: name: vec_struct_test
-  ; 64BIT: fixedStack:
-  ; 64BIT:  - { id: 0, type: default, offset: 64, size: 16, alignment: 16, stack-id: default,
-  ; 64BIT:      isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
-  ; 64BIT:      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-
   ; 64BIT: bb.0.entry:
-  ; 64BIT:   liveins: $x3, $x5, $x6
-  ; 64BIT:   STD killed renamable $x5, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0, align 16)
-  ; 64BIT:   renamable $r4 = LWZ 0, %fixed-stack.0 :: (dereferenceable load (s32) from %ir.vsi1, align 16)
-  ; 64BIT:   renamable $r3 = nsw ADD4 killed renamable $r4, renamable $r3, implicit killed $x3, implicit-def $x3
-  ; 64BIT:   STD killed renamable $x6, 8, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 8)
-  ; 64BIT:   BLR8 implicit $lr8, implicit $rm, implicit $x3
+  ; 64BIT-NEXT:   liveins: $x3, $x5, $x6
+  ; 64BIT-NEXT: {{  $}}
+  ; 64BIT-NEXT:   STD renamable $x5, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0, align 16)
+  ; 64BIT-NEXT:   STD killed renamable $x6, 8, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 8)
+  ; 64BIT-NEXT:   renamable $x4 = RLDICL killed renamable $x5, 32, 32
+  ; 64BIT-NEXT:   renamable $r3 = nsw ADD4 renamable $r4, renamable $r3, implicit killed $x3, implicit killed $x4, implicit-def $x3
+  ; 64BIT-NEXT:   BLR8 implicit $lr8, implicit $rm, implicit $x3
 entry:
   %vsi = getelementptr inbounds %struct.vec_struct, %struct.vec_struct* %vs, i32 0, i32 0
   %0 = load <4 x i32>, <4 x i32>* %vsi, align 16
diff --git a/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll b/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll
index 9e4139cdf0c6c..5c383e0323cb0 100644
--- a/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll
@@ -13,33 +13,33 @@
 define i32 @caller() {
   ; 32BIT-LABEL: name: caller
   ; 32BIT: bb.0.entry:
-  ; 32BIT:   renamable $r3 = LWZ 28, %stack.0.vs :: (load (s32) from unknown-address + 4)
-  ; 32BIT:   STW killed renamable $r3, 60, $r1 :: (store (s32) into unknown-address + 4, basealign 16)
-  ; 32BIT:   renamable $r3 = LWZ 24, %stack.0.vs :: (load (s32) from %stack.0.vs + 24, align 8, basealign 16)
-  ; 32BIT:   STW killed renamable $r3, 56, $r1 :: (store (s32), align 16)
-  ; 32BIT:   ADJCALLSTACKDOWN 64, 0, implicit-def dead $r1, implicit $r1
-  ; 32BIT:   renamable $r10 = LWZ 20, %stack.0.vs :: (load (s32) from %stack.0.vs + 20)
-  ; 32BIT:   renamable $r9 = LWZ 16, %stack.0.vs :: (load (s32) from %stack.0.vs + 16, align 16)
-  ; 32BIT:   renamable $r8 = LWZ 12, %stack.0.vs :: (load (s32) from %stack.0.vs + 12)
-  ; 32BIT:   renamable $r7 = LWZ 8, %stack.0.vs :: (load (s32) from %stack.0.vs + 8, align 8)
-  ; 32BIT:   renamable $r6 = LWZ 4, %stack.0.vs :: (load (s32) from %stack.0.vs + 4)
-  ; 32BIT:   renamable $r5 = LWZ 0, %stack.0.vs :: (load (s32) from %stack.0.vs, align 16)
-  ; 32BIT:   $r3 = LI 0
-  ; 32BIT:   BL_NOP <mcsymbol .vec_struct_test[PR]>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1, implicit-def $r3
-  ; 32BIT:   ADJCALLSTACKUP 64, 0, implicit-def dead $r1, implicit $r1
-  ; 32BIT:   BLR implicit $lr, implicit $rm, implicit $r3
-
+  ; 32BIT-NEXT:   renamable $r3 = LWZ 28, %stack.0.vs :: (load (s32) from unknown-address + 4)
+  ; 32BIT-NEXT:   STW killed renamable $r3, 60, $r1 :: (store (s32) into unknown-address + 4, basealign 16)
+  ; 32BIT-NEXT:   renamable $r3 = LWZ 24, %stack.0.vs :: (load (s32) from %stack.0.vs + 24, align 8, basealign 16)
+  ; 32BIT-NEXT:   STW killed renamable $r3, 56, $r1 :: (store (s32), align 16)
+  ; 32BIT-NEXT:   ADJCALLSTACKDOWN 64, 0, implicit-def dead $r1, implicit $r1
+  ; 32BIT-NEXT:   renamable $r10 = LWZ 20, %stack.0.vs :: (load (s32) from %stack.0.vs + 20)
+  ; 32BIT-NEXT:   renamable $r9 = LWZ 16, %stack.0.vs :: (load (s32) from %stack.0.vs + 16, align 16)
+  ; 32BIT-NEXT:   renamable $r8 = LWZ 12, %stack.0.vs :: (load (s32) from %stack.0.vs + 12)
+  ; 32BIT-NEXT:   renamable $r7 = LWZ 8, %stack.0.vs :: (load (s32) from %stack.0.vs + 8, align 8)
+  ; 32BIT-NEXT:   renamable $r6 = LWZ 4, %stack.0.vs :: (load (s32) from %stack.0.vs + 4)
+  ; 32BIT-NEXT:   renamable $r5 = LWZ 0, %stack.0.vs :: (load (s32) from %stack.0.vs, align 16)
+  ; 32BIT-NEXT:   $r3 = LI 0
+  ; 32BIT-NEXT:   BL_NOP <mcsymbol .vec_struct_test[PR]>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1, implicit-def $r3
+  ; 32BIT-NEXT:   ADJCALLSTACKUP 64, 0, implicit-def dead $r1, implicit $r1
+  ; 32BIT-NEXT:   BLR implicit $lr, implicit $rm, implicit $r3
+  ;
   ; 64BIT-LABEL: name: caller
   ; 64BIT: bb.0.entry:
-  ; 64BIT:   ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
-  ; 64BIT:   renamable $x8 = LD 24, %stack.0.vs :: (load (s64) from %stack.0.vs + 24)
-  ; 64BIT:   renamable $x7 = LD 16, %stack.0.vs :: (load (s64) from %stack.0.vs + 16, align 16)
-  ; 64BIT:   renamable $x6 = LD 8, %stack.0.vs :: (load (s64) from %stack.0.vs + 8)
-  ; 64BIT:   renamable $x5 = LD 0, %stack.0.vs :: (load (s64) from %stack.0.vs, align 16)
-  ; 64BIT:   $x3 = LI8 0
-  ; 64BIT:   BL8_NOP <mcsymbol .vec_struct_test[PR]>, csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x2, implicit-def $r1, implicit-def $x3
-  ; 64BIT:   ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
-  ; 64BIT:   BLR8 implicit $lr8, implicit $rm, implicit $x3
+  ; 64BIT-NEXT:   ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+  ; 64BIT-NEXT:   renamable $x8 = LD 24, %stack.0.vs :: (load (s64) from %stack.0.vs + 24)
+  ; 64BIT-NEXT:   renamable $x7 = LD 16, %stack.0.vs :: (load (s64) from %stack.0.vs + 16, align 16)
+  ; 64BIT-NEXT:   renamable $x6 = LD 8, %stack.0.vs :: (load (s64) from %stack.0.vs + 8)
+  ; 64BIT-NEXT:   renamable $x5 = LD 0, %stack.0.vs :: (load (s64) from %stack.0.vs, align 16)
+  ; 64BIT-NEXT:   $x3 = LI8 0
+  ; 64BIT-NEXT:   BL8_NOP <mcsymbol .vec_struct_test[PR]>, csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x2, implicit-def $r1, implicit-def $x3
+  ; 64BIT-NEXT:   ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+  ; 64BIT-NEXT:   BLR8 implicit $lr8, implicit $rm, implicit $x3
   entry:
   %vs = alloca %struct.B, align 16
   %call = tail call i32 @vec_struct_test(i32 0, %struct.B* nonnull byval(%struct.B) align 16 %vs)

>From e8a736811b667f2ea5276109b1cdbd7dc15341c3 Mon Sep 17 00:00:00 2001
From: Zarko Todorovski <zarko at ca.ibm.com>
Date: Tue, 28 May 2024 13:36:56 -0400
Subject: [PATCH 3/3] Fix tests so they use opaque pointers

---
 llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll | 6 +++---
 llvm/test/CodeGen/PowerPC/aix-vector-byval.ll        | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll b/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll
index 002ee0e47f14f..80c26471d8cdb 100644
--- a/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll
@@ -10,7 +10,7 @@
 %struct.vec_struct = type { <4 x i32> }
 
 ; Function Attrs: norecurse nounwind readonly
-define i32 @vec_struct_test(i32 %i, %struct.vec_struct* nocapture readonly byval(%struct.vec_struct) align 16 %vs) {
+define i32 @vec_struct_test(i32 %i, ptr nocapture readonly byval(%struct.vec_struct) align 16 %vs) {
   ; 32BIT-LABEL: name: vec_struct_test
   ; 32BIT: bb.0.entry:
   ; 32BIT-NEXT:   liveins: $r3, $r5, $r6, $r7, $r8
@@ -32,8 +32,8 @@ define i32 @vec_struct_test(i32 %i, %struct.vec_struct* nocapture readonly byval
   ; 64BIT-NEXT:   renamable $r3 = nsw ADD4 renamable $r4, renamable $r3, implicit killed $x3, implicit killed $x4, implicit-def $x3
   ; 64BIT-NEXT:   BLR8 implicit $lr8, implicit $rm, implicit $x3
 entry:
-  %vsi = getelementptr inbounds %struct.vec_struct, %struct.vec_struct* %vs, i32 0, i32 0
-  %0 = load <4 x i32>, <4 x i32>* %vsi, align 16
+  %vsi = getelementptr inbounds i8, ptr %vs, i32 0
+  %0 = load <4 x i32>, ptr %vsi, align 16
   %vecext = extractelement <4 x i32> %0, i32 0
   %add = add nsw i32 %vecext, %i
   ret i32 %add
diff --git a/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll b/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll
index 5c383e0323cb0..8fcac7b6a5921 100644
--- a/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll
@@ -42,8 +42,8 @@ define i32 @caller() {
   ; 64BIT-NEXT:   BLR8 implicit $lr8, implicit $rm, implicit $x3
   entry:
   %vs = alloca %struct.B, align 16
-  %call = tail call i32 @vec_struct_test(i32 0, %struct.B* nonnull byval(%struct.B) align 16 %vs)
+  %call = tail call i32 @vec_struct_test(i32 0, ptr nonnull byval(%struct.B) align 16 %vs)
   ret i32 %call
 }
 
-declare i32 @vec_struct_test(i32, %struct.B* byval(%struct.B) align 16)
+declare i32 @vec_struct_test(i32, ptr byval(%struct.B) align 16)



More information about the llvm-commits mailing list