[llvm] [PowerPC] Add special handling for arguments that are smaller than pointer size. (PR #119003)

Tue Dec 10 11:30:59 PST 2024

https://github.com/stefanp-ibm updated https://github.com/llvm/llvm-project/pull/119003

>From e8044305e07344b787a0f5674f7f703cb390964e Mon Sep 17 00:00:00 2001
From: Stefan Pintilie <stefanp at ca.ibm.com>
Date: Fri, 6 Dec 2024 12:16:11 -0500
Subject: [PATCH 1/2] [PowerPC] Add special handling for arguments that are
 smaller than pointer size.

When arguments are passed in memory instead of registers we currently
load the entire pointer size even though the argument may be smaller.
For exmaple if the pointer size if i32 then we use a load word even if
the argument is only an i8. This patch clears the bits that are not
required to ensure that we are getting the correct value even if the
load is larger.
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 14 ++++++++++-
 llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll | 28 ++++++++++-----------
 llvm/test/CodeGen/PowerPC/aix-cc-abi.ll     | 28 ++++++++++-----------
 3 files changed, 41 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 14e09d502b696b..e3f607bb02f04b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7244,6 +7244,8 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
     MVT LocVT = VA.getLocVT();
     MVT ValVT = VA.getValVT();
     ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
+
+    EVT ArgVT = Ins[VA.getValNo()].ArgVT;
     // For compatibility with the AIX XL compiler, the float args in the
     // parameter save area are initialized even if the argument is available
     // in register.  The caller is required to initialize both the register
@@ -7291,7 +7293,17 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       SDValue ArgValue =
           DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
-      InVals.push_back(ArgValue);
+
+      // While the ABI specifies the the higher bits of the load should be
+      // zeroed out this is not always the case. For safety this code will zero
+      // extend the loaded value if the size of the argument type is smaller
+      // then the load.
+      if (!ArgVT.isVector() && !ValVT.isVector() &&
+          ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
+        SDValue ArgValueExt = DAG.getZeroExtendInReg(ArgValue, dl, ArgVT);
+        InVals.push_back(ArgValueExt);
+      } else
+        InVals.push_back(ArgValue);
     };
 
     // Vector arguments to VaArg functions are passed both on the stack, and
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll
index 00d1c471c2fa7c..efc1bd2d47a8a8 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll
@@ -1102,12 +1102,12 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6
   ; 32BIT-NEXT: {{  $}}
   ; 32BIT-NEXT:   renamable $r11 = LWZ 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0)
   ; 32BIT-NEXT:   renamable $r12 = LWZ 0, %fixed-stack.4 :: (load (s32) from %fixed-stack.4)
-  ; 32BIT-NEXT:   renamable $r0 = LWZ 0, %fixed-stack.1 :: (load (s32) from %fixed-stack.1, align 8)
+  ; 32BIT-NEXT:   renamable $r0 = LBZ 3, %fixed-stack.1 :: (load (s8) from %fixed-stack.1 + 3, basealign 4)
   ; 32BIT-NEXT:   renamable $r31 = LWZ 4, %fixed-stack.3 :: (load (s32) from %fixed-stack.3 + 4, basealign 16)
   ; 32BIT-NEXT:   renamable $r30 = LWZ 0, %fixed-stack.3 :: (load (s32) from %fixed-stack.3, align 16)
   ; 32BIT-NEXT:   renamable $r29 = LWZ 0, %fixed-stack.5 :: (load (s32) from %fixed-stack.5, align 8)
-  ; 32BIT-NEXT:   renamable $r28 = LWZ 0, %fixed-stack.6 :: (load (s32) from %fixed-stack.6)
-  ; 32BIT-NEXT:   renamable $r27 = LWZ 0, %fixed-stack.7 :: (load (s32) from %fixed-stack.7, align 16)
+  ; 32BIT-NEXT:   renamable $r28 = LBZ 3, %fixed-stack.6 :: (load (s8) from %fixed-stack.6 + 3, basealign 4)
+  ; 32BIT-NEXT:   renamable $r27 = LHZ 2, %fixed-stack.7 :: (load (s16) from %fixed-stack.7 + 2, basealign 4)
   ; 32BIT-NEXT:   renamable $r26 = LWZ 4, %fixed-stack.9 :: (load (s32) from %fixed-stack.9 + 4, basealign 8)
   ; 32BIT-NEXT:   renamable $r25 = LWZ 0, %fixed-stack.9 :: (load (s32) from %fixed-stack.9, align 8)
   ; 32BIT-NEXT:   renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4
@@ -1143,13 +1143,13 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6
   ; 64BIT: bb.0.entry:
   ; 64BIT-NEXT:   liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10
   ; 64BIT-NEXT: {{  $}}
-  ; 64BIT-NEXT:   renamable $r11 = LWZ 0, %fixed-stack.1, implicit-def $x11 :: (load (s32) from %fixed-stack.1)
+  ; 64BIT-NEXT:   renamable $r11 = LBZ 3, %fixed-stack.1, implicit-def $x11 :: (load (s8) from %fixed-stack.1 + 3, basealign 4)
   ; 64BIT-NEXT:   renamable $x12 = LWZ8 0, %fixed-stack.4 :: (load (s32) from %fixed-stack.4)
-  ; 64BIT-NEXT:   renamable $x0 = LWA 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0)
-  ; 64BIT-NEXT:   renamable $x2 = LD 0, %fixed-stack.2 :: (load (s64) from %fixed-stack.2)
-  ; 64BIT-NEXT:   renamable $x31 = LWA 0, %fixed-stack.3 :: (load (s32) from %fixed-stack.3)
-  ; 64BIT-NEXT:   renamable $r30 = LWZ 0, %fixed-stack.5, implicit-def $x30 :: (load (s32) from %fixed-stack.5)
-  ; 64BIT-NEXT:   renamable $x29 = LWA 0, %fixed-stack.6 :: (load (s32) from %fixed-stack.6)
+  ; 64BIT-NEXT:   renamable $r0 = LBZ 3, %fixed-stack.5, implicit-def $x0 :: (load (s8) from %fixed-stack.5 + 3, basealign 4)
+  ; 64BIT-NEXT:   renamable $x2 = LWA 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0)
+  ; 64BIT-NEXT:   renamable $x31 = LD 0, %fixed-stack.2 :: (load (s64) from %fixed-stack.2)
+  ; 64BIT-NEXT:   renamable $x30 = LWA 0, %fixed-stack.3 :: (load (s32) from %fixed-stack.3)
+  ; 64BIT-NEXT:   renamable $x29 = LHZ8 2, %fixed-stack.6
   ; 64BIT-NEXT:   renamable $x28 = LD 0, %fixed-stack.7 :: (load (s64) from %fixed-stack.7, align 16)
   ; 64BIT-NEXT:   renamable $r3 = nsw ADD4 renamable $r3, renamable $r4, implicit killed $x4, implicit killed $x3
   ; 64BIT-NEXT:   renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r5, implicit killed $x5
@@ -1161,12 +1161,12 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6
   ; 64BIT-NEXT:   renamable $x3 = EXTSW_32_64 killed renamable $r3
   ; 64BIT-NEXT:   renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x28
   ; 64BIT-NEXT:   renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x29
-  ; 64BIT-NEXT:   renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x30
+  ; 64BIT-NEXT:   renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x0
   ; 64BIT-NEXT:   renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x12
+  ; 64BIT-NEXT:   renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x30
   ; 64BIT-NEXT:   renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x31
-  ; 64BIT-NEXT:   renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x2
   ; 64BIT-NEXT:   renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x11
-  ; 64BIT-NEXT:   renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x0
+  ; 64BIT-NEXT:   renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x2
   ; 64BIT-NEXT:   BLR8 implicit $lr8, implicit $rm, implicit $x3
 entry:
   %add = add nsw i32 %i1, %i2
@@ -1611,8 +1611,8 @@ define i32 @mix_callee(double %d1, double %d2, double %d3, double %d4, i8 zeroex
   ; 32BIT-NEXT:   liveins: $f1, $f2, $f3, $f4
   ; 32BIT-NEXT: {{  $}}
   ; 32BIT-NEXT:   renamable $r3 = LWZ 0, %fixed-stack.3 :: (load (s32) from %fixed-stack.3)
-  ; 32BIT-NEXT:   renamable $r4 = LWZ 0, %fixed-stack.5 :: (load (s32) from %fixed-stack.5)
-  ; 32BIT-NEXT:   renamable $r5 = LWZ 0, %fixed-stack.6 :: (load (s32) from %fixed-stack.6, align 8)
+  ; 32BIT-NEXT:   renamable $r4 = LHZ 2, %fixed-stack.5 :: (load (s16) from %fixed-stack.5 + 2, basealign 4)
+  ; 32BIT-NEXT:   renamable $r5 = LBZ 3, %fixed-stack.6 :: (load (s8) from %fixed-stack.6 + 3, basealign 4)
   ; 32BIT-NEXT:   renamable $r6 = LWZ 0, %fixed-stack.2 :: (load (s32) from %fixed-stack.2, align 8)
   ; 32BIT-NEXT:   renamable $r7 = LIS 17200
   ; 32BIT-NEXT:   STW killed renamable $r7, 0, %stack.1 :: (store (s32) into %stack.1, align 8)
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
index 433d4273444660..3d5107139cc04f 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
@@ -1196,13 +1196,13 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6
 ; ASM32PWR4-NEXT:    add 3, 3, 10
 ; ASM32PWR4-NEXT:    srawi 5, 11, 31
 ; ASM32PWR4-NEXT:    srawi 8, 3, 31
-; ASM32PWR4-NEXT:    lwz 4, 64(1)
+; ASM32PWR4-NEXT:    lhz 4, 66(1)
 ; ASM32PWR4-NEXT:    lwz 7, 56(1)
 ; ASM32PWR4-NEXT:    stw 31, -4(1) # 4-byte Folded Spill
 ; ASM32PWR4-NEXT:    srawi 31, 12, 31
 ; ASM32PWR4-NEXT:    addc 3, 3, 6
 ; ASM32PWR4-NEXT:    adde 7, 8, 7
-; ASM32PWR4-NEXT:    lwz 6, 68(1)
+; ASM32PWR4-NEXT:    lbz 6, 71(1)
 ; ASM32PWR4-NEXT:    srawi 8, 4, 31
 ; ASM32PWR4-NEXT:    addc 3, 3, 4
 ; ASM32PWR4-NEXT:    adde 7, 7, 8
@@ -1216,7 +1216,7 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6
 ; ASM32PWR4-NEXT:    lwz 7, 80(1)
 ; ASM32PWR4-NEXT:    adde 4, 4, 31
 ; ASM32PWR4-NEXT:    addc 3, 3, 0
-; ASM32PWR4-NEXT:    lwz 6, 88(1)
+; ASM32PWR4-NEXT:    lbz 6, 91(1)
 ; ASM32PWR4-NEXT:    adde 4, 4, 7
 ; ASM32PWR4-NEXT:    addc 3, 3, 6
 ; ASM32PWR4-NEXT:    lwz 31, -4(1) # 4-byte Folded Reload
@@ -1228,29 +1228,29 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6
 ; ASM64PWR4-LABEL: test_ints_stack:
 ; ASM64PWR4:       # %bb.0: # %entry
 ; ASM64PWR4-NEXT:    add 3, 3, 4
-; ASM64PWR4-NEXT:    ld 4, 112(1)
+; ASM64PWR4-NEXT:    std 2, -8(1) # 8-byte Folded Spill
 ; ASM64PWR4-NEXT:    add 3, 3, 5
 ; ASM64PWR4-NEXT:    add 3, 3, 6
 ; ASM64PWR4-NEXT:    add 3, 3, 7
-; ASM64PWR4-NEXT:    lwa 12, 124(1)
+; ASM64PWR4-NEXT:    ld 4, 112(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 8
 ; ASM64PWR4-NEXT:    add 3, 3, 9
+; ASM64PWR4-NEXT:    lhz 5, 126(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 10
 ; ASM64PWR4-NEXT:    extsw 3, 3
-; ASM64PWR4-NEXT:    lwz 5, 132(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 4
-; ASM64PWR4-NEXT:    add 3, 3, 12
-; ASM64PWR4-NEXT:    std 2, -8(1) # 8-byte Folded Spill
+; ASM64PWR4-NEXT:    lbz 2, 135(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 5
-; ASM64PWR4-NEXT:    lwz 2, 140(1)
-; ASM64PWR4-NEXT:    lwa 11, 148(1)
+; ASM64PWR4-NEXT:    lwz 0, 140(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 2
+; ASM64PWR4-NEXT:    lwa 11, 148(1)
+; ASM64PWR4-NEXT:    add 3, 3, 0
 ; ASM64PWR4-NEXT:    add 3, 3, 11
 ; ASM64PWR4-NEXT:    ld 4, 152(1)
-; ASM64PWR4-NEXT:    lwz 0, 164(1)
+; ASM64PWR4-NEXT:    lbz 12, 167(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 4
 ; ASM64PWR4-NEXT:    lwa 5, 172(1)
-; ASM64PWR4-NEXT:    add 3, 3, 0
+; ASM64PWR4-NEXT:    add 3, 3, 12
 ; ASM64PWR4-NEXT:    add 3, 3, 5
 ; ASM64PWR4-NEXT:    ld 2, -8(1) # 8-byte Folded Reload
 ; ASM64PWR4-NEXT:    blr
@@ -1720,11 +1720,11 @@ entry:
 define i32 @mix_callee(double %d1, double %d2, double %d3, double %d4, i8 zeroext %c1, i16 signext %s1, i64 %ll1, i32 %i1, i32 %i2, i32 %i3) {
 ; ASM32PWR4-LABEL: mix_callee:
 ; ASM32PWR4:       # %bb.0: # %entry
-; ASM32PWR4-NEXT:    lwz 4, 60(1)
+; ASM32PWR4-NEXT:    lhz 4, 62(1)
 ; ASM32PWR4-NEXT:    lis 8, 17200
 ; ASM32PWR4-NEXT:    fadd 1, 1, 2
 ; ASM32PWR4-NEXT:    fadd 1, 1, 3
-; ASM32PWR4-NEXT:    lwz 5, 56(1)
+; ASM32PWR4-NEXT:    lbz 5, 59(1)
 ; ASM32PWR4-NEXT:    lwz 3, 68(1)
 ; ASM32PWR4-NEXT:    add 4, 5, 4
 ; ASM32PWR4-NEXT:    lwz 5, L..C34(2) # %const.0

>From 492bd26d1df2409dd4c0ebd909ec37e1263bca2c Mon Sep 17 00:00:00 2001
From: Stefan Pintilie <stefanp at ca.ibm.com>
Date: Tue, 10 Dec 2024 14:30:27 -0500
Subject: [PATCH 2/2] Added the sign extend as well as the zero extend.

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 13 +++--
 llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll |  6 +--
 llvm/test/CodeGen/PowerPC/aix-cc-abi.ll     | 60 ++++++++++-----------
 3 files changed, 43 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index e3f607bb02f04b..95e289bd840c60 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7246,6 +7246,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
     ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
 
     EVT ArgVT = Ins[VA.getValNo()].ArgVT;
+    bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
     // For compatibility with the AIX XL compiler, the float args in the
     // parameter save area are initialized even if the argument is available
     // in register.  The caller is required to initialize both the register
@@ -7298,12 +7299,18 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
       // zeroed out this is not always the case. For safety this code will zero
       // extend the loaded value if the size of the argument type is smaller
       // then the load.
-      if (!ArgVT.isVector() && !ValVT.isVector() &&
+      if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
+          ValVT.isInteger() &&
           ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
-        SDValue ArgValueExt = DAG.getZeroExtendInReg(ArgValue, dl, ArgVT);
+        SDValue ArgValueTrunc = DAG.getNode(ISD::TRUNCATE, dl, ArgVT, ArgValue);
+        //    DAG.getLoad(ArgVT, dl, Chain, FIN, MachinePointerInfo());
+        SDValue ArgValueExt =
+            ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
+                       : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
         InVals.push_back(ArgValueExt);
-      } else
+      } else {
         InVals.push_back(ArgValue);
+      }
     };
 
     // Vector arguments to VaArg functions are passed both on the stack, and
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll
index efc1bd2d47a8a8..501227c9072c45 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll
@@ -1107,7 +1107,7 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6
   ; 32BIT-NEXT:   renamable $r30 = LWZ 0, %fixed-stack.3 :: (load (s32) from %fixed-stack.3, align 16)
   ; 32BIT-NEXT:   renamable $r29 = LWZ 0, %fixed-stack.5 :: (load (s32) from %fixed-stack.5, align 8)
   ; 32BIT-NEXT:   renamable $r28 = LBZ 3, %fixed-stack.6 :: (load (s8) from %fixed-stack.6 + 3, basealign 4)
-  ; 32BIT-NEXT:   renamable $r27 = LHZ 2, %fixed-stack.7 :: (load (s16) from %fixed-stack.7 + 2, basealign 4)
+  ; 32BIT-NEXT:   renamable $r27 = LHA 2, %fixed-stack.7 :: (load (s16) from %fixed-stack.7 + 2, basealign 4)
   ; 32BIT-NEXT:   renamable $r26 = LWZ 4, %fixed-stack.9 :: (load (s32) from %fixed-stack.9 + 4, basealign 8)
   ; 32BIT-NEXT:   renamable $r25 = LWZ 0, %fixed-stack.9 :: (load (s32) from %fixed-stack.9, align 8)
   ; 32BIT-NEXT:   renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4
@@ -1149,7 +1149,7 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6
   ; 64BIT-NEXT:   renamable $x2 = LWA 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0)
   ; 64BIT-NEXT:   renamable $x31 = LD 0, %fixed-stack.2 :: (load (s64) from %fixed-stack.2)
   ; 64BIT-NEXT:   renamable $x30 = LWA 0, %fixed-stack.3 :: (load (s32) from %fixed-stack.3)
-  ; 64BIT-NEXT:   renamable $x29 = LHZ8 2, %fixed-stack.6
+  ; 64BIT-NEXT:   renamable $x29 = LHA8 2, %fixed-stack.6
   ; 64BIT-NEXT:   renamable $x28 = LD 0, %fixed-stack.7 :: (load (s64) from %fixed-stack.7, align 16)
   ; 64BIT-NEXT:   renamable $r3 = nsw ADD4 renamable $r3, renamable $r4, implicit killed $x4, implicit killed $x3
   ; 64BIT-NEXT:   renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r5, implicit killed $x5
@@ -1611,7 +1611,7 @@ define i32 @mix_callee(double %d1, double %d2, double %d3, double %d4, i8 zeroex
   ; 32BIT-NEXT:   liveins: $f1, $f2, $f3, $f4
   ; 32BIT-NEXT: {{  $}}
   ; 32BIT-NEXT:   renamable $r3 = LWZ 0, %fixed-stack.3 :: (load (s32) from %fixed-stack.3)
-  ; 32BIT-NEXT:   renamable $r4 = LHZ 2, %fixed-stack.5 :: (load (s16) from %fixed-stack.5 + 2, basealign 4)
+  ; 32BIT-NEXT:   renamable $r4 = LHA 2, %fixed-stack.5 :: (load (s16) from %fixed-stack.5 + 2, basealign 4)
   ; 32BIT-NEXT:   renamable $r5 = LBZ 3, %fixed-stack.6 :: (load (s8) from %fixed-stack.6 + 3, basealign 4)
   ; 32BIT-NEXT:   renamable $r6 = LWZ 0, %fixed-stack.2 :: (load (s32) from %fixed-stack.2, align 8)
   ; 32BIT-NEXT:   renamable $r7 = LIS 17200
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
index 3d5107139cc04f..3880b055ea9837 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
@@ -1185,43 +1185,43 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6
 ; ASM32PWR4-LABEL: test_ints_stack:
 ; ASM32PWR4:       # %bb.0: # %entry
 ; ASM32PWR4-NEXT:    add 3, 3, 4
-; ASM32PWR4-NEXT:    lwz 11, 92(1)
+; ASM32PWR4-NEXT:    lwz 12, 92(1)
 ; ASM32PWR4-NEXT:    add 3, 3, 5
 ; ASM32PWR4-NEXT:    add 3, 3, 6
 ; ASM32PWR4-NEXT:    add 3, 3, 7
-; ASM32PWR4-NEXT:    lwz 12, 76(1)
+; ASM32PWR4-NEXT:    lwz 0, 76(1)
 ; ASM32PWR4-NEXT:    add 3, 3, 8
 ; ASM32PWR4-NEXT:    add 3, 3, 9
 ; ASM32PWR4-NEXT:    lwz 6, 60(1)
 ; ASM32PWR4-NEXT:    add 3, 3, 10
-; ASM32PWR4-NEXT:    srawi 5, 11, 31
+; ASM32PWR4-NEXT:    srawi 5, 12, 31
 ; ASM32PWR4-NEXT:    srawi 8, 3, 31
-; ASM32PWR4-NEXT:    lhz 4, 66(1)
+; ASM32PWR4-NEXT:    lha 11, 66(1)
 ; ASM32PWR4-NEXT:    lwz 7, 56(1)
 ; ASM32PWR4-NEXT:    stw 31, -4(1) # 4-byte Folded Spill
-; ASM32PWR4-NEXT:    srawi 31, 12, 31
+; ASM32PWR4-NEXT:    srawi 31, 0, 31
 ; ASM32PWR4-NEXT:    addc 3, 3, 6
 ; ASM32PWR4-NEXT:    adde 7, 8, 7
 ; ASM32PWR4-NEXT:    lbz 6, 71(1)
-; ASM32PWR4-NEXT:    srawi 8, 4, 31
-; ASM32PWR4-NEXT:    addc 3, 3, 4
+; ASM32PWR4-NEXT:    srawi 8, 11, 31
+; ASM32PWR4-NEXT:    addc 3, 3, 11
 ; ASM32PWR4-NEXT:    adde 7, 7, 8
-; ASM32PWR4-NEXT:    lwz 4, 72(1)
+; ASM32PWR4-NEXT:    lwz 9, 72(1)
 ; ASM32PWR4-NEXT:    addc 3, 3, 6
 ; ASM32PWR4-NEXT:    addze 6, 7
-; ASM32PWR4-NEXT:    addc 3, 3, 4
-; ASM32PWR4-NEXT:    lwz 0, 84(1)
-; ASM32PWR4-NEXT:    addze 4, 6
-; ASM32PWR4-NEXT:    addc 3, 3, 12
-; ASM32PWR4-NEXT:    lwz 7, 80(1)
-; ASM32PWR4-NEXT:    adde 4, 4, 31
+; ASM32PWR4-NEXT:    addc 3, 3, 9
+; ASM32PWR4-NEXT:    lwz 4, 84(1)
+; ASM32PWR4-NEXT:    addze 6, 6
 ; ASM32PWR4-NEXT:    addc 3, 3, 0
-; ASM32PWR4-NEXT:    lbz 6, 91(1)
-; ASM32PWR4-NEXT:    adde 4, 4, 7
-; ASM32PWR4-NEXT:    addc 3, 3, 6
+; ASM32PWR4-NEXT:    lwz 7, 80(1)
+; ASM32PWR4-NEXT:    adde 6, 6, 31
+; ASM32PWR4-NEXT:    addc 3, 3, 4
+; ASM32PWR4-NEXT:    lbz 8, 91(1)
+; ASM32PWR4-NEXT:    adde 4, 6, 7
+; ASM32PWR4-NEXT:    addc 3, 3, 8
 ; ASM32PWR4-NEXT:    lwz 31, -4(1) # 4-byte Folded Reload
 ; ASM32PWR4-NEXT:    addze 6, 4
-; ASM32PWR4-NEXT:    addc 4, 3, 11
+; ASM32PWR4-NEXT:    addc 4, 3, 12
 ; ASM32PWR4-NEXT:    adde 3, 6, 5
 ; ASM32PWR4-NEXT:    blr
 ;
@@ -1235,22 +1235,22 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6
 ; ASM64PWR4-NEXT:    ld 4, 112(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 8
 ; ASM64PWR4-NEXT:    add 3, 3, 9
-; ASM64PWR4-NEXT:    lhz 5, 126(1)
+; ASM64PWR4-NEXT:    lha 12, 126(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 10
 ; ASM64PWR4-NEXT:    extsw 3, 3
 ; ASM64PWR4-NEXT:    add 3, 3, 4
-; ASM64PWR4-NEXT:    lbz 2, 135(1)
+; ASM64PWR4-NEXT:    add 3, 3, 12
+; ASM64PWR4-NEXT:    lbz 5, 135(1)
+; ASM64PWR4-NEXT:    lwz 2, 140(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 5
-; ASM64PWR4-NEXT:    lwz 0, 140(1)
-; ASM64PWR4-NEXT:    add 3, 3, 2
 ; ASM64PWR4-NEXT:    lwa 11, 148(1)
-; ASM64PWR4-NEXT:    add 3, 3, 0
+; ASM64PWR4-NEXT:    add 3, 3, 2
 ; ASM64PWR4-NEXT:    add 3, 3, 11
 ; ASM64PWR4-NEXT:    ld 4, 152(1)
-; ASM64PWR4-NEXT:    lbz 12, 167(1)
+; ASM64PWR4-NEXT:    lbz 0, 167(1)
 ; ASM64PWR4-NEXT:    add 3, 3, 4
 ; ASM64PWR4-NEXT:    lwa 5, 172(1)
-; ASM64PWR4-NEXT:    add 3, 3, 12
+; ASM64PWR4-NEXT:    add 3, 3, 0
 ; ASM64PWR4-NEXT:    add 3, 3, 5
 ; ASM64PWR4-NEXT:    ld 2, -8(1) # 8-byte Folded Reload
 ; ASM64PWR4-NEXT:    blr
@@ -1720,17 +1720,17 @@ entry:
 define i32 @mix_callee(double %d1, double %d2, double %d3, double %d4, i8 zeroext %c1, i16 signext %s1, i64 %ll1, i32 %i1, i32 %i2, i32 %i3) {
 ; ASM32PWR4-LABEL: mix_callee:
 ; ASM32PWR4:       # %bb.0: # %entry
-; ASM32PWR4-NEXT:    lhz 4, 62(1)
+; ASM32PWR4-NEXT:    lha 3, 62(1)
 ; ASM32PWR4-NEXT:    lis 8, 17200
 ; ASM32PWR4-NEXT:    fadd 1, 1, 2
 ; ASM32PWR4-NEXT:    fadd 1, 1, 3
 ; ASM32PWR4-NEXT:    lbz 5, 59(1)
-; ASM32PWR4-NEXT:    lwz 3, 68(1)
-; ASM32PWR4-NEXT:    add 4, 5, 4
-; ASM32PWR4-NEXT:    lwz 5, L..C34(2) # %const.0
 ; ASM32PWR4-NEXT:    fadd 1, 1, 4
+; ASM32PWR4-NEXT:    lwz 4, 68(1)
+; ASM32PWR4-NEXT:    add 3, 5, 3
+; ASM32PWR4-NEXT:    lwz 5, L..C34(2) # %const.0
 ; ASM32PWR4-NEXT:    lwz 6, 72(1)
-; ASM32PWR4-NEXT:    add 3, 4, 3
+; ASM32PWR4-NEXT:    add 3, 3, 4
 ; ASM32PWR4-NEXT:    lwz 7, 76(1)
 ; ASM32PWR4-NEXT:    add 3, 3, 6
 ; ASM32PWR4-NEXT:    stw 8, -16(1)