[llvm] [PowerPC] Add special handling for arguments that are smaller than pointer size. (PR #119003)

Stefan Pintilie via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 11 10:52:52 PST 2024


https://github.com/stefanp-ibm updated https://github.com/llvm/llvm-project/pull/119003

>From e8044305e07344b787a0f5674f7f703cb390964e Mon Sep 17 00:00:00 2001
From: Stefan Pintilie <stefanp at ca.ibm.com>
Date: Fri, 6 Dec 2024 12:16:11 -0500
Subject: [PATCH 1/4] [PowerPC] Add special handling for arguments that are
 smaller than pointer size.

When arguments are passed in memory instead of registers we currently
load the entire pointer size even though the argument may be smaller.
For exmaple if the pointer size if i32 then we use a load word even if
the argument is only an i8. This patch clears the bits that are not
required to ensure that we are getting the correct value even if the
load is larger.
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 14 ++++++++++-
 llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll | 28 ++++++++++-----------
 llvm/test/CodeGen/PowerPC/aix-cc-abi.ll     | 28 ++++++++++-----------
 3 files changed, 41 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 14e09d502b696b..e3f607bb02f04b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7244,6 +7244,8 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
     MVT LocVT = VA.getLocVT();
     MVT ValVT = VA.getValVT();
     ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
+
+    EVT ArgVT = Ins[VA.getValNo()].ArgVT;
     // For compatibility with the AIX XL compiler, the float args in the
     // parameter save area are initialized even if the argument is available
     // in register.  The caller is required to initialize both the register
@@ -7291,7 +7293,17 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       SDValue ArgValue =
           DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
-      InVals.push_back(ArgValue);
+
+      // While the ABI specifies the the higher bits of the load should be
+      // zeroed out this is not always the case. For safety this code will zero
+      // extend the loaded value if the size of the argument type is smaller
+      // then the load.
+      if (!ArgVT.isVector() && !ValVT.isVector() &&
+          ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
+        SDValue ArgValueExt = DAG.getZeroExtendInReg(ArgValue, dl, ArgVT);
+        InVals.push_back(ArgValueExt);
+      } else
+        InVals.push_back(ArgValue);
     };
 
     // Vector arguments to VaArg functions are passed both on the stack, and
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll
index 00d1c471c2fa7c..efc1bd2d47a8a8 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll
@@ -1102,12 +1102,12 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6
   ; 32BIT-NEXT: {{  $}}
   ; 32BIT-NEXT:   renamable $r11 = LWZ 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0)
   ; 32BIT-NEXT:   renamable $r12 = LWZ 0, %fixed-stack.4 :: (load (s32) from %fixed-stack.4)
-  ; 32BIT-NEXT:   renamable $r0 = LWZ 0, %fixed-stack.1 :: (load (s32) from %fixed-stack.1, align 8)
+  ; 32BIT-NEXT:   renamable $r0 = LBZ 3, %fixed-stack.1 :: (load (s8) from %fixed-stack.1 + 3, basealign 4)
   ; 32BIT-NEXT:   renamable $r31 = LWZ 4, %fixed-stack.3 :: (load (s32) from %fixed-stack.3 + 4, basealign 16)
   ; 32BIT-NEXT:   renamable $r30 = LWZ 0, %fixed-stack.3 :: (load (s32) from %fixed-stack.3, align 16)
   ; 32BIT-NEXT:   renamable $r29 = LWZ 0, %fixed-stack.5 :: (load (s32) from %fixed-stack.5, align 8)
-  ; 32BIT-NEXT:   renamable $r28 = LWZ 0, %fixed-stack.6 :: (load (s32) from %fixed-stack.6)
-  ; 32BIT-NEXT:   renamable $r27 = LWZ 0, %fixed-stack.7 :: (load (s32) from %fixed-stack.7, align 16)
+  ; 32BIT-NEXT:   renamable $r28 = LBZ 3, %fixed-stack.6 :: (load (s8) from %fixed-stack.6 + 3, basealign 4)
+  ; 32BIT-NEXT:   renamable $r27 = LHZ 2, %fixed-stack.7 :: (load (s16) from %fixed-stack.7 + 2, basealign 4)
   ; 32BIT-NEXT:   renamable $r26 = LWZ 4, %fixed-stack.9 :: (load (s32) from %fixed-stack.9 + 4, basealign 8)
   ; 32BIT-NEXT:   renamable $r25 = LWZ 0, %fixed-stack.9 :: (load (s32) from %fixed-stack.9, align 8)
   ; 32BIT-NEXT:   renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4
@@ -1143,13 +1143,13 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6
   ; 64BIT: bb.0.entry:
   ; 64BIT-NEXT:   liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10
   ; 64BIT-NEXT: {{  $}}
-  ; 64BIT-NEXT:   renamable $r11 = LWZ 0, %fixed-stack.1, implicit-def $x11 :: (load (s32) from %fixed-stack.1)
+  ; 64BIT-NEXT:   renamable $r11 = LBZ 3, %fixed-stack.1, implicit-def $x11 :: (load (s8) from %fixed-stack.1 + 3, basealign 4)
   ; 64BIT-NEXT:   renamable $x12 = LWZ8 0, %fixed-stack.4 :: (load (s32) from %fixed-stack.4)
-  ; 64BIT-NEXT:   renamable $x0 = LWA 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0)
-  ; 64BIT-NEXT:   renamable $x2 = LD 0, %fixed-stack.2 :: (load (s64) from %fixed-stack.2)
-  ; 64BIT-NEXT:   renamable $x31 = LWA 0, %fixed-stack.3 :: (load (s32) from %fixed-stack.3)
-  ; 64BIT-NEXT:   renamable $r30 = LWZ 0, %fixed-stack.5, implicit-def $x30 :: (load (s32) from %fixed-stack.5)
-  ; 64BIT-NEXT:   renamable $x29 = LWA 0, %fixed-stack.6 :: (load (s32) from %fixed-stack.6)
+  ; 64BIT-NEXT:   renamable $r0 = LBZ 3, %fixed-stack.5, implicit-def $x0 :: (load (s8) from %fixed-stack.5 + 3, basealign 4)
+  ; 64BIT-NEXT:   renamable $x2 = LWA 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0)
+  ; 64BIT-NEXT:   renamable $x31 = LD 0, %fixed-stack.2 :: (load (s64) from %fixed-stack.2)
+  ; 64BIT-NEXT:   renamable $x30 = LWA 0, %fixed-stack.3 :: (load (s32) from %fixed-stack.3)
+  ; 64BIT-NEXT:   renamable $x29 = LHZ8 2, %fixed-stack.6
   ; 64BIT-NEXT:   renamable $x28 = LD 0, %fixed-stack.7 :: (load (s64) from %fixed-stack.7, align 16)
   ; 64BIT-NEXT:   renamable $r3 = nsw ADD4 renamable $r3, renamable $r4, implicit killed $x4, implicit killed $x3
   ; 64BIT-NEXT:   renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r5, implicit killed $x5
@@ -1161,12 +1161,12 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6
   ; 64BIT-NEXT:   renamable $x3 = EXTSW_32_64 killed renamable $r3
   ; 64BIT-NEXT:   renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x28
   ; 64BIT-NEXT:   renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x29
-  ; 64BIT-NEXT:   renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x30
+  ; 64BIT-NEXT:   renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x0
   ; 64BIT-NEXT:   renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x12
+  ; 64BIT-NEXT:   renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x30
   ; 64BIT-NEXT:   renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x31
-  ; 64BIT-NEXT:   renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x2
   ; 64BIT-NEXT:   renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x11
-  ; 64BIT-NEXT:   renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x0
+  ; 64BIT-NEXT:   renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x2
   ; 64BIT-NEXT:   BLR8 implicit $lr8, implicit $rm, implicit $x3
 entry:
   %add = add nsw i32 %i1, %i2
@@ -1611,8 +1611,8 @@ define i32 @mix_callee(double %d1, double %d2, double %d3, double %d4, i8 zeroex
   ; 32BIT-NEXT:   liveins: $f1, $f2, $f3, $f4
   ; 32BIT-NEXT: {{  $}}
   ; 32BIT-NEXT:   renamable $r3 = LWZ 0, %fixed-stack.3 :: (load (s32) from %fixed-stack.3)
-  ; 32BIT-NEXT:   renamable $r4 = LWZ 0, %fixed-stack.5 :: (load (s32) from %fixed-stack.5)
-  ; 32BIT-NEXT:   renamable $r5 = LWZ 0, %fixed-stack.6 :: (load (s32) from %fixed-stack.6, align 8)
+  ; 32BIT-NEXT:   renamable $r4 = LHZ 2, %fixed-stack.5 :: (load (s16) from %fixed-stack.5 + 2, basealign 4)
+  ; 32BIT-NEXT:   renamable $r5 = LBZ 3, %fixed-stack.6 :: (load (s8) from %fixed-stack.6 + 3, basealign 4)
   ; 32BIT-NEXT:   renamable $r6 = LWZ 0, %fixed-stack.2 :: (load (s32) from %fixed-stack.2, align 8)
   ; 32BIT-NEXT:   renamable $r7 = LIS 17200
   ; 32BIT-NEXT:   STW killed renamable $r7, 0, %stack.1 :: (store (s32) into %stack.1, align 8)
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
index 433d4273444660..3d5107139cc04f 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
@@ -1196,13 +1196,13 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6
 ; ASM32PWR4-NEXT:    add 3, 3, 10
 ; ASM32PWR4-NEXT:    srawi 5, 11, 31
 ; ASM32PWR4-NEXT:    srawi 8, 3, 31
-; ASM32PWR4-NEXT:    lwz 4, 64(1)
+; ASM32PWR4-NEXT:    lhz 4, 66(1)
 ; ASM32PWR4-NEXT:    lwz 7, 56(1)
 ; ASM32PWR4-NEXT:    stw 31, -4(1) # 4-byte Folded Spill
 ; ASM32PWR4-NEXT:    srawi 31, 12, 31
 ; ASM32PWR4-NEXT:    addc 3, 3, 6
 ; ASM32PWR4-NEXT:    adde 7, 8, 7
-; ASM32PWR4-NEXT:    lwz 6, 68(1)
+; ASM32PWR4-NEXT:    lbz 6, 71(1)
 ; ASM32PWR4-NEXT:    srawi 8, 4, 31
 ; ASM32PWR4-NEXT:    addc 3, 3, 4
 ; ASM32PWR4-NEXT:    adde 7, 7, 8
@@ -1216,7 +1216,7 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6
 ; ASM32PWR4-NEXT:    lwz 7, 80(1)
 ; ASM32PWR4-NEXT:    adde 4, 4, 31
 ; ASM32PWR4-NEXT:    addc 3, 3, 0
-; ASM32PWR4-NEXT:    lwz 6, 88(1)
+; ASM32PWR4-NEXT:    lbz 6, 91(1)
 ; ASM32PWR4-NEXT:    adde 4, 4, 7
 ; ASM32PWR4-NEXT:    addc 3, 3, 6
 ; ASM32PWR4-NEXT:    lwz 31, -4(1) # 4-byte Folded Reload
@@ -1228,29 +1228,29 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6
 ; ASM64PWR4-LABEL: test_ints_stack:
 ; ASM64PWR4:       # %bb.0: # %entry
 ; ASM64PWR4-NEXT:    add 3, 3, 4
-; ASM64PWR4-NEXT:    ld 4, 112(1)
+; ASM64PWR4-NEXT:    std 2, -8(1) # 8-byte Folded Spill
 ; ASM64PWR4-NEXT:    add 3, 3, 5
 ; ASM64PWR4-NEXT:    add 3, 3, 6
 ; ASM64PWR4-NEXT:    add 3, 3, 7
-; ASM64PWR4-NEXT:    lwa 12, 124(1)
+; ASM64PWR4-NEXT:    ld 4, 112(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 8
 ; ASM64PWR4-NEXT:    add 3, 3, 9
+; ASM64PWR4-NEXT:    lhz 5, 126(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 10
 ; ASM64PWR4-NEXT:    extsw 3, 3
-; ASM64PWR4-NEXT:    lwz 5, 132(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 4
-; ASM64PWR4-NEXT:    add 3, 3, 12
-; ASM64PWR4-NEXT:    std 2, -8(1) # 8-byte Folded Spill
+; ASM64PWR4-NEXT:    lbz 2, 135(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 5
-; ASM64PWR4-NEXT:    lwz 2, 140(1)
-; ASM64PWR4-NEXT:    lwa 11, 148(1)
+; ASM64PWR4-NEXT:    lwz 0, 140(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 2
+; ASM64PWR4-NEXT:    lwa 11, 148(1)
+; ASM64PWR4-NEXT:    add 3, 3, 0
 ; ASM64PWR4-NEXT:    add 3, 3, 11
 ; ASM64PWR4-NEXT:    ld 4, 152(1)
-; ASM64PWR4-NEXT:    lwz 0, 164(1)
+; ASM64PWR4-NEXT:    lbz 12, 167(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 4
 ; ASM64PWR4-NEXT:    lwa 5, 172(1)
-; ASM64PWR4-NEXT:    add 3, 3, 0
+; ASM64PWR4-NEXT:    add 3, 3, 12
 ; ASM64PWR4-NEXT:    add 3, 3, 5
 ; ASM64PWR4-NEXT:    ld 2, -8(1) # 8-byte Folded Reload
 ; ASM64PWR4-NEXT:    blr
@@ -1720,11 +1720,11 @@ entry:
 define i32 @mix_callee(double %d1, double %d2, double %d3, double %d4, i8 zeroext %c1, i16 signext %s1, i64 %ll1, i32 %i1, i32 %i2, i32 %i3) {
 ; ASM32PWR4-LABEL: mix_callee:
 ; ASM32PWR4:       # %bb.0: # %entry
-; ASM32PWR4-NEXT:    lwz 4, 60(1)
+; ASM32PWR4-NEXT:    lhz 4, 62(1)
 ; ASM32PWR4-NEXT:    lis 8, 17200
 ; ASM32PWR4-NEXT:    fadd 1, 1, 2
 ; ASM32PWR4-NEXT:    fadd 1, 1, 3
-; ASM32PWR4-NEXT:    lwz 5, 56(1)
+; ASM32PWR4-NEXT:    lbz 5, 59(1)
 ; ASM32PWR4-NEXT:    lwz 3, 68(1)
 ; ASM32PWR4-NEXT:    add 4, 5, 4
 ; ASM32PWR4-NEXT:    lwz 5, L..C34(2) # %const.0

>From 492bd26d1df2409dd4c0ebd909ec37e1263bca2c Mon Sep 17 00:00:00 2001
From: Stefan Pintilie <stefanp at ca.ibm.com>
Date: Tue, 10 Dec 2024 14:30:27 -0500
Subject: [PATCH 2/4] Added the sign extend as well as the zero extend.

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 13 +++--
 llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll |  6 +--
 llvm/test/CodeGen/PowerPC/aix-cc-abi.ll     | 60 ++++++++++-----------
 3 files changed, 43 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index e3f607bb02f04b..95e289bd840c60 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7246,6 +7246,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
     ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
 
     EVT ArgVT = Ins[VA.getValNo()].ArgVT;
+    bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
     // For compatibility with the AIX XL compiler, the float args in the
     // parameter save area are initialized even if the argument is available
     // in register.  The caller is required to initialize both the register
@@ -7298,12 +7299,18 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
       // zeroed out this is not always the case. For safety this code will zero
       // extend the loaded value if the size of the argument type is smaller
       // then the load.
-      if (!ArgVT.isVector() && !ValVT.isVector() &&
+      if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
+          ValVT.isInteger() &&
           ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
-        SDValue ArgValueExt = DAG.getZeroExtendInReg(ArgValue, dl, ArgVT);
+        SDValue ArgValueTrunc = DAG.getNode(ISD::TRUNCATE, dl, ArgVT, ArgValue);
+        //    DAG.getLoad(ArgVT, dl, Chain, FIN, MachinePointerInfo());
+        SDValue ArgValueExt =
+            ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
+                       : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
         InVals.push_back(ArgValueExt);
-      } else
+      } else {
         InVals.push_back(ArgValue);
+      }
     };
 
     // Vector arguments to VaArg functions are passed both on the stack, and
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll
index efc1bd2d47a8a8..501227c9072c45 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll
@@ -1107,7 +1107,7 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6
   ; 32BIT-NEXT:   renamable $r30 = LWZ 0, %fixed-stack.3 :: (load (s32) from %fixed-stack.3, align 16)
   ; 32BIT-NEXT:   renamable $r29 = LWZ 0, %fixed-stack.5 :: (load (s32) from %fixed-stack.5, align 8)
   ; 32BIT-NEXT:   renamable $r28 = LBZ 3, %fixed-stack.6 :: (load (s8) from %fixed-stack.6 + 3, basealign 4)
-  ; 32BIT-NEXT:   renamable $r27 = LHZ 2, %fixed-stack.7 :: (load (s16) from %fixed-stack.7 + 2, basealign 4)
+  ; 32BIT-NEXT:   renamable $r27 = LHA 2, %fixed-stack.7 :: (load (s16) from %fixed-stack.7 + 2, basealign 4)
   ; 32BIT-NEXT:   renamable $r26 = LWZ 4, %fixed-stack.9 :: (load (s32) from %fixed-stack.9 + 4, basealign 8)
   ; 32BIT-NEXT:   renamable $r25 = LWZ 0, %fixed-stack.9 :: (load (s32) from %fixed-stack.9, align 8)
   ; 32BIT-NEXT:   renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4
@@ -1149,7 +1149,7 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6
   ; 64BIT-NEXT:   renamable $x2 = LWA 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0)
   ; 64BIT-NEXT:   renamable $x31 = LD 0, %fixed-stack.2 :: (load (s64) from %fixed-stack.2)
   ; 64BIT-NEXT:   renamable $x30 = LWA 0, %fixed-stack.3 :: (load (s32) from %fixed-stack.3)
-  ; 64BIT-NEXT:   renamable $x29 = LHZ8 2, %fixed-stack.6
+  ; 64BIT-NEXT:   renamable $x29 = LHA8 2, %fixed-stack.6
   ; 64BIT-NEXT:   renamable $x28 = LD 0, %fixed-stack.7 :: (load (s64) from %fixed-stack.7, align 16)
   ; 64BIT-NEXT:   renamable $r3 = nsw ADD4 renamable $r3, renamable $r4, implicit killed $x4, implicit killed $x3
   ; 64BIT-NEXT:   renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r5, implicit killed $x5
@@ -1611,7 +1611,7 @@ define i32 @mix_callee(double %d1, double %d2, double %d3, double %d4, i8 zeroex
   ; 32BIT-NEXT:   liveins: $f1, $f2, $f3, $f4
   ; 32BIT-NEXT: {{  $}}
   ; 32BIT-NEXT:   renamable $r3 = LWZ 0, %fixed-stack.3 :: (load (s32) from %fixed-stack.3)
-  ; 32BIT-NEXT:   renamable $r4 = LHZ 2, %fixed-stack.5 :: (load (s16) from %fixed-stack.5 + 2, basealign 4)
+  ; 32BIT-NEXT:   renamable $r4 = LHA 2, %fixed-stack.5 :: (load (s16) from %fixed-stack.5 + 2, basealign 4)
   ; 32BIT-NEXT:   renamable $r5 = LBZ 3, %fixed-stack.6 :: (load (s8) from %fixed-stack.6 + 3, basealign 4)
   ; 32BIT-NEXT:   renamable $r6 = LWZ 0, %fixed-stack.2 :: (load (s32) from %fixed-stack.2, align 8)
   ; 32BIT-NEXT:   renamable $r7 = LIS 17200
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
index 3d5107139cc04f..3880b055ea9837 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
@@ -1185,43 +1185,43 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6
 ; ASM32PWR4-LABEL: test_ints_stack:
 ; ASM32PWR4:       # %bb.0: # %entry
 ; ASM32PWR4-NEXT:    add 3, 3, 4
-; ASM32PWR4-NEXT:    lwz 11, 92(1)
+; ASM32PWR4-NEXT:    lwz 12, 92(1)
 ; ASM32PWR4-NEXT:    add 3, 3, 5
 ; ASM32PWR4-NEXT:    add 3, 3, 6
 ; ASM32PWR4-NEXT:    add 3, 3, 7
-; ASM32PWR4-NEXT:    lwz 12, 76(1)
+; ASM32PWR4-NEXT:    lwz 0, 76(1)
 ; ASM32PWR4-NEXT:    add 3, 3, 8
 ; ASM32PWR4-NEXT:    add 3, 3, 9
 ; ASM32PWR4-NEXT:    lwz 6, 60(1)
 ; ASM32PWR4-NEXT:    add 3, 3, 10
-; ASM32PWR4-NEXT:    srawi 5, 11, 31
+; ASM32PWR4-NEXT:    srawi 5, 12, 31
 ; ASM32PWR4-NEXT:    srawi 8, 3, 31
-; ASM32PWR4-NEXT:    lhz 4, 66(1)
+; ASM32PWR4-NEXT:    lha 11, 66(1)
 ; ASM32PWR4-NEXT:    lwz 7, 56(1)
 ; ASM32PWR4-NEXT:    stw 31, -4(1) # 4-byte Folded Spill
-; ASM32PWR4-NEXT:    srawi 31, 12, 31
+; ASM32PWR4-NEXT:    srawi 31, 0, 31
 ; ASM32PWR4-NEXT:    addc 3, 3, 6
 ; ASM32PWR4-NEXT:    adde 7, 8, 7
 ; ASM32PWR4-NEXT:    lbz 6, 71(1)
-; ASM32PWR4-NEXT:    srawi 8, 4, 31
-; ASM32PWR4-NEXT:    addc 3, 3, 4
+; ASM32PWR4-NEXT:    srawi 8, 11, 31
+; ASM32PWR4-NEXT:    addc 3, 3, 11
 ; ASM32PWR4-NEXT:    adde 7, 7, 8
-; ASM32PWR4-NEXT:    lwz 4, 72(1)
+; ASM32PWR4-NEXT:    lwz 9, 72(1)
 ; ASM32PWR4-NEXT:    addc 3, 3, 6
 ; ASM32PWR4-NEXT:    addze 6, 7
-; ASM32PWR4-NEXT:    addc 3, 3, 4
-; ASM32PWR4-NEXT:    lwz 0, 84(1)
-; ASM32PWR4-NEXT:    addze 4, 6
-; ASM32PWR4-NEXT:    addc 3, 3, 12
-; ASM32PWR4-NEXT:    lwz 7, 80(1)
-; ASM32PWR4-NEXT:    adde 4, 4, 31
+; ASM32PWR4-NEXT:    addc 3, 3, 9
+; ASM32PWR4-NEXT:    lwz 4, 84(1)
+; ASM32PWR4-NEXT:    addze 6, 6
 ; ASM32PWR4-NEXT:    addc 3, 3, 0
-; ASM32PWR4-NEXT:    lbz 6, 91(1)
-; ASM32PWR4-NEXT:    adde 4, 4, 7
-; ASM32PWR4-NEXT:    addc 3, 3, 6
+; ASM32PWR4-NEXT:    lwz 7, 80(1)
+; ASM32PWR4-NEXT:    adde 6, 6, 31
+; ASM32PWR4-NEXT:    addc 3, 3, 4
+; ASM32PWR4-NEXT:    lbz 8, 91(1)
+; ASM32PWR4-NEXT:    adde 4, 6, 7
+; ASM32PWR4-NEXT:    addc 3, 3, 8
 ; ASM32PWR4-NEXT:    lwz 31, -4(1) # 4-byte Folded Reload
 ; ASM32PWR4-NEXT:    addze 6, 4
-; ASM32PWR4-NEXT:    addc 4, 3, 11
+; ASM32PWR4-NEXT:    addc 4, 3, 12
 ; ASM32PWR4-NEXT:    adde 3, 6, 5
 ; ASM32PWR4-NEXT:    blr
 ;
@@ -1235,22 +1235,22 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6
 ; ASM64PWR4-NEXT:    ld 4, 112(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 8
 ; ASM64PWR4-NEXT:    add 3, 3, 9
-; ASM64PWR4-NEXT:    lhz 5, 126(1)
+; ASM64PWR4-NEXT:    lha 12, 126(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 10
 ; ASM64PWR4-NEXT:    extsw 3, 3
 ; ASM64PWR4-NEXT:    add 3, 3, 4
-; ASM64PWR4-NEXT:    lbz 2, 135(1)
+; ASM64PWR4-NEXT:    add 3, 3, 12
+; ASM64PWR4-NEXT:    lbz 5, 135(1)
+; ASM64PWR4-NEXT:    lwz 2, 140(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 5
-; ASM64PWR4-NEXT:    lwz 0, 140(1)
-; ASM64PWR4-NEXT:    add 3, 3, 2
 ; ASM64PWR4-NEXT:    lwa 11, 148(1)
-; ASM64PWR4-NEXT:    add 3, 3, 0
+; ASM64PWR4-NEXT:    add 3, 3, 2
 ; ASM64PWR4-NEXT:    add 3, 3, 11
 ; ASM64PWR4-NEXT:    ld 4, 152(1)
-; ASM64PWR4-NEXT:    lbz 12, 167(1)
+; ASM64PWR4-NEXT:    lbz 0, 167(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 4
 ; ASM64PWR4-NEXT:    lwa 5, 172(1)
-; ASM64PWR4-NEXT:    add 3, 3, 12
+; ASM64PWR4-NEXT:    add 3, 3, 0
 ; ASM64PWR4-NEXT:    add 3, 3, 5
 ; ASM64PWR4-NEXT:    ld 2, -8(1) # 8-byte Folded Reload
 ; ASM64PWR4-NEXT:    blr
@@ -1720,17 +1720,17 @@ entry:
 define i32 @mix_callee(double %d1, double %d2, double %d3, double %d4, i8 zeroext %c1, i16 signext %s1, i64 %ll1, i32 %i1, i32 %i2, i32 %i3) {
 ; ASM32PWR4-LABEL: mix_callee:
 ; ASM32PWR4:       # %bb.0: # %entry
-; ASM32PWR4-NEXT:    lhz 4, 62(1)
+; ASM32PWR4-NEXT:    lha 3, 62(1)
 ; ASM32PWR4-NEXT:    lis 8, 17200
 ; ASM32PWR4-NEXT:    fadd 1, 1, 2
 ; ASM32PWR4-NEXT:    fadd 1, 1, 3
 ; ASM32PWR4-NEXT:    lbz 5, 59(1)
-; ASM32PWR4-NEXT:    lwz 3, 68(1)
-; ASM32PWR4-NEXT:    add 4, 5, 4
-; ASM32PWR4-NEXT:    lwz 5, L..C34(2) # %const.0
 ; ASM32PWR4-NEXT:    fadd 1, 1, 4
+; ASM32PWR4-NEXT:    lwz 4, 68(1)
+; ASM32PWR4-NEXT:    add 3, 5, 3
+; ASM32PWR4-NEXT:    lwz 5, L..C34(2) # %const.0
 ; ASM32PWR4-NEXT:    lwz 6, 72(1)
-; ASM32PWR4-NEXT:    add 3, 4, 3
+; ASM32PWR4-NEXT:    add 3, 3, 4
 ; ASM32PWR4-NEXT:    lwz 7, 76(1)
 ; ASM32PWR4-NEXT:    add 3, 3, 6
 ; ASM32PWR4-NEXT:    stw 8, -16(1)

>From 9bcb92854d33bbd8e4bc7618b973abbea6c6f383 Mon Sep 17 00:00:00 2001
From: Stefan Pintilie <stefanp at ca.ibm.com>
Date: Tue, 10 Dec 2024 15:32:31 -0500
Subject: [PATCH 3/4] Fixed a couple of nits and a comment that I forgot to
 update.

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 95e289bd840c60..d5f76522d08b7c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7295,22 +7295,20 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
       SDValue ArgValue =
           DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
 
-      // While the ABI specifies the the higher bits of the load should be
-      // zeroed out this is not always the case. For safety this code will zero
-      // extend the loaded value if the size of the argument type is smaller
-      // then the load.
+      // While the ABI specifies that the higher bits of the load should be
+      // zeroed out or sign extended this is not always the case. For safety
+      // this code will zero or sign extend the loaded value if the size of
+      // the argument type is smaller than the load.
       if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
           ValVT.isInteger() &&
           ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
         SDValue ArgValueTrunc = DAG.getNode(ISD::TRUNCATE, dl, ArgVT, ArgValue);
-        //    DAG.getLoad(ArgVT, dl, Chain, FIN, MachinePointerInfo());
         SDValue ArgValueExt =
             ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
                        : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
         InVals.push_back(ArgValueExt);
-      } else {
+      } else
         InVals.push_back(ArgValue);
-      }
     };
 
     // Vector arguments to VaArg functions are passed both on the stack, and

>From 7743fe13d2f0cd4f14e0b5b9298b6a9003b74771 Mon Sep 17 00:00:00 2001
From: Stefan Pintilie <stefanp at ca.ibm.com>
Date: Wed, 11 Dec 2024 13:52:29 -0500
Subject: [PATCH 4/4] Fixed comment and added two arguments to the test case.

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp |  11 ++-
 llvm/test/CodeGen/PowerPC/aix-cc-abi.ll     | 101 ++++++++++++--------
 2 files changed, 65 insertions(+), 47 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index d5f76522d08b7c..5299a1e45b6764 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7295,10 +7295,10 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
       SDValue ArgValue =
           DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
 
-      // While the ABI specifies that the higher bits of the load should be
-      // zeroed out or sign extended this is not always the case. For safety
-      // this code will zero or sign extend the loaded value if the size of
-      // the argument type is smaller than the load.
+      // While the ABI specifies the argument type is (sign or zero) extended
+      // out to register width, not all code is compliant. We truncate and
+      // re-extend to be more forgiving of these callers when the argument type
+      // is smaller than register width.
       if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
           ValVT.isInteger() &&
           ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
@@ -7307,8 +7307,9 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
             ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
                        : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
         InVals.push_back(ArgValueExt);
-      } else
+      } else {
         InVals.push_back(ArgValue);
+      }
     };
 
     // Vector arguments to VaArg functions are passed both on the stack, and
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
index 3880b055ea9837..a78d82bc109a63 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
@@ -1181,78 +1181,91 @@ entry:
 
 declare void @test_stackarg_float3(i32, i32, i32, i32, i32, i32, i32, ...)
 
-define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i64 %ll9, i16 signext %s10, i8 zeroext %c11, i32 %ui12, i32 %si13, i64 %ll14, i8 zeroext %uc15, i32 %i16) {
+define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i64 %ll9, i16 signext %s10, i8 zeroext %c11, i32 %ui12, i32 %si13, i64 %ll14, i8 zeroext %uc15, i32 %i16, i8 signext %si8, i1 zeroext %zi1) {
 ; ASM32PWR4-LABEL: test_ints_stack:
 ; ASM32PWR4:       # %bb.0: # %entry
 ; ASM32PWR4-NEXT:    add 3, 3, 4
-; ASM32PWR4-NEXT:    lwz 12, 92(1)
+; ASM32PWR4-NEXT:    lwz 11, 92(1)
 ; ASM32PWR4-NEXT:    add 3, 3, 5
 ; ASM32PWR4-NEXT:    add 3, 3, 6
 ; ASM32PWR4-NEXT:    add 3, 3, 7
-; ASM32PWR4-NEXT:    lwz 0, 76(1)
+; ASM32PWR4-NEXT:    lwz 12, 76(1)
 ; ASM32PWR4-NEXT:    add 3, 3, 8
 ; ASM32PWR4-NEXT:    add 3, 3, 9
 ; ASM32PWR4-NEXT:    lwz 6, 60(1)
 ; ASM32PWR4-NEXT:    add 3, 3, 10
-; ASM32PWR4-NEXT:    srawi 5, 12, 31
+; ASM32PWR4-NEXT:    srawi 5, 11, 31
 ; ASM32PWR4-NEXT:    srawi 8, 3, 31
-; ASM32PWR4-NEXT:    lha 11, 66(1)
+; ASM32PWR4-NEXT:    lwz 4, 64(1)
 ; ASM32PWR4-NEXT:    lwz 7, 56(1)
 ; ASM32PWR4-NEXT:    stw 31, -4(1) # 4-byte Folded Spill
-; ASM32PWR4-NEXT:    srawi 31, 0, 31
+; ASM32PWR4-NEXT:    srawi 31, 12, 31
 ; ASM32PWR4-NEXT:    addc 3, 3, 6
 ; ASM32PWR4-NEXT:    adde 7, 8, 7
-; ASM32PWR4-NEXT:    lbz 6, 71(1)
-; ASM32PWR4-NEXT:    srawi 8, 11, 31
-; ASM32PWR4-NEXT:    addc 3, 3, 11
+; ASM32PWR4-NEXT:    lwz 6, 68(1)
+; ASM32PWR4-NEXT:    srawi 8, 4, 31
+; ASM32PWR4-NEXT:    addc 3, 3, 4
 ; ASM32PWR4-NEXT:    adde 7, 7, 8
-; ASM32PWR4-NEXT:    lwz 9, 72(1)
+; ASM32PWR4-NEXT:    lwz 4, 72(1)
 ; ASM32PWR4-NEXT:    addc 3, 3, 6
 ; ASM32PWR4-NEXT:    addze 6, 7
-; ASM32PWR4-NEXT:    addc 3, 3, 9
-; ASM32PWR4-NEXT:    lwz 4, 84(1)
-; ASM32PWR4-NEXT:    addze 6, 6
-; ASM32PWR4-NEXT:    addc 3, 3, 0
-; ASM32PWR4-NEXT:    lwz 7, 80(1)
-; ASM32PWR4-NEXT:    adde 6, 6, 31
 ; ASM32PWR4-NEXT:    addc 3, 3, 4
-; ASM32PWR4-NEXT:    lbz 8, 91(1)
-; ASM32PWR4-NEXT:    adde 4, 6, 7
-; ASM32PWR4-NEXT:    addc 3, 3, 8
+; ASM32PWR4-NEXT:    lwz 0, 84(1)
+; ASM32PWR4-NEXT:    addze 4, 6
+; ASM32PWR4-NEXT:    addc 3, 3, 12
+; ASM32PWR4-NEXT:    lwz 7, 80(1)
+; ASM32PWR4-NEXT:    adde 4, 4, 31
+; ASM32PWR4-NEXT:    addc 3, 3, 0
+; ASM32PWR4-NEXT:    lwz 6, 88(1)
+; ASM32PWR4-NEXT:    adde 4, 4, 7
+; ASM32PWR4-NEXT:    addc 3, 3, 6
+; ASM32PWR4-NEXT:    lwz 6, 96(1)
+; ASM32PWR4-NEXT:    addze 4, 4
+; ASM32PWR4-NEXT:    addc 3, 3, 11
+; ASM32PWR4-NEXT:    adde 4, 4, 5
+; ASM32PWR4-NEXT:    lbz 5, 103(1)
+; ASM32PWR4-NEXT:    srawi 7, 6, 31
+; ASM32PWR4-NEXT:    addc 3, 3, 6
+; ASM32PWR4-NEXT:    adde 6, 4, 7
 ; ASM32PWR4-NEXT:    lwz 31, -4(1) # 4-byte Folded Reload
-; ASM32PWR4-NEXT:    addze 6, 4
-; ASM32PWR4-NEXT:    addc 4, 3, 12
-; ASM32PWR4-NEXT:    adde 3, 6, 5
+; ASM32PWR4-NEXT:    addc 4, 3, 5
+; ASM32PWR4-NEXT:    addze 3, 6
 ; ASM32PWR4-NEXT:    blr
 ;
 ; ASM64PWR4-LABEL: test_ints_stack:
 ; ASM64PWR4:       # %bb.0: # %entry
 ; ASM64PWR4-NEXT:    add 3, 3, 4
-; ASM64PWR4-NEXT:    std 2, -8(1) # 8-byte Folded Spill
+; ASM64PWR4-NEXT:    std 2, -16(1) # 8-byte Folded Spill
 ; ASM64PWR4-NEXT:    add 3, 3, 5
 ; ASM64PWR4-NEXT:    add 3, 3, 6
 ; ASM64PWR4-NEXT:    add 3, 3, 7
-; ASM64PWR4-NEXT:    ld 4, 112(1)
+; ASM64PWR4-NEXT:    ld 5, 112(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 8
 ; ASM64PWR4-NEXT:    add 3, 3, 9
-; ASM64PWR4-NEXT:    lha 12, 126(1)
+; ASM64PWR4-NEXT:    lwa 2, 124(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 10
 ; ASM64PWR4-NEXT:    extsw 3, 3
-; ASM64PWR4-NEXT:    add 3, 3, 4
-; ASM64PWR4-NEXT:    add 3, 3, 12
-; ASM64PWR4-NEXT:    lbz 5, 135(1)
-; ASM64PWR4-NEXT:    lwz 2, 140(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 5
-; ASM64PWR4-NEXT:    lwa 11, 148(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 2
-; ASM64PWR4-NEXT:    add 3, 3, 11
-; ASM64PWR4-NEXT:    ld 4, 152(1)
-; ASM64PWR4-NEXT:    lbz 0, 167(1)
+; ASM64PWR4-NEXT:    lwz 4, 132(1)
+; ASM64PWR4-NEXT:    lwz 6, 140(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 4
-; ASM64PWR4-NEXT:    lwa 5, 172(1)
+; ASM64PWR4-NEXT:    lwa 0, 148(1)
+; ASM64PWR4-NEXT:    add 3, 3, 6
 ; ASM64PWR4-NEXT:    add 3, 3, 0
+; ASM64PWR4-NEXT:    std 31, -8(1) # 8-byte Folded Spill
+; ASM64PWR4-NEXT:    ld 31, 152(1)
+; ASM64PWR4-NEXT:    lwz 4, 164(1)
+; ASM64PWR4-NEXT:    add 3, 3, 31
+; ASM64PWR4-NEXT:    lwa 12, 172(1)
+; ASM64PWR4-NEXT:    add 3, 3, 4
+; ASM64PWR4-NEXT:    add 3, 3, 12
+; ASM64PWR4-NEXT:    lwa 11, 180(1)
+; ASM64PWR4-NEXT:    add 3, 3, 11
+; ASM64PWR4-NEXT:    lbz 5, 191(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 5
-; ASM64PWR4-NEXT:    ld 2, -8(1) # 8-byte Folded Reload
+; ASM64PWR4-NEXT:    ld 2, -16(1) # 8-byte Folded Reload
+; ASM64PWR4-NEXT:    ld 31, -8(1) # 8-byte Folded Reload
 ; ASM64PWR4-NEXT:    blr
 entry:
   %add = add nsw i32 %i1, %i2
@@ -1277,7 +1290,11 @@ entry:
   %add18 = add nsw i64 %add16, %conv17
   %conv19 = sext i32 %i16 to i64
   %add20 = add nsw i64 %add18, %conv19
-  ret i64 %add20
+  %conv21 = sext i8 %si8 to i64
+  %add22 = add nsw i64 %add20, %conv21
+  %conv23 = zext i1 %zi1 to i64
+  %add24 = add nsw i64 %add22, %conv23
+  ret i64 %add24
 }
 
 @ll1 = common global i64 0, align 8
@@ -1720,17 +1737,17 @@ entry:
 define i32 @mix_callee(double %d1, double %d2, double %d3, double %d4, i8 zeroext %c1, i16 signext %s1, i64 %ll1, i32 %i1, i32 %i2, i32 %i3) {
 ; ASM32PWR4-LABEL: mix_callee:
 ; ASM32PWR4:       # %bb.0: # %entry
-; ASM32PWR4-NEXT:    lha 3, 62(1)
+; ASM32PWR4-NEXT:    lwz 4, 60(1)
 ; ASM32PWR4-NEXT:    lis 8, 17200
 ; ASM32PWR4-NEXT:    fadd 1, 1, 2
 ; ASM32PWR4-NEXT:    fadd 1, 1, 3
-; ASM32PWR4-NEXT:    lbz 5, 59(1)
-; ASM32PWR4-NEXT:    fadd 1, 1, 4
-; ASM32PWR4-NEXT:    lwz 4, 68(1)
-; ASM32PWR4-NEXT:    add 3, 5, 3
+; ASM32PWR4-NEXT:    lwz 5, 56(1)
+; ASM32PWR4-NEXT:    lwz 3, 68(1)
+; ASM32PWR4-NEXT:    add 4, 5, 4
 ; ASM32PWR4-NEXT:    lwz 5, L..C34(2) # %const.0
+; ASM32PWR4-NEXT:    fadd 1, 1, 4
 ; ASM32PWR4-NEXT:    lwz 6, 72(1)
-; ASM32PWR4-NEXT:    add 3, 3, 4
+; ASM32PWR4-NEXT:    add 3, 4, 3
 ; ASM32PWR4-NEXT:    lwz 7, 76(1)
 ; ASM32PWR4-NEXT:    add 3, 3, 6
 ; ASM32PWR4-NEXT:    stw 8, -16(1)



More information about the llvm-commits mailing list