[llvm] 585c85a - [PowerPC] Fix lowering of byval parameters for sizes greater than 8 bytes.

Stefan Pintilie via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 31 13:12:58 PDT 2022


Author: Stefan Pintilie
Date: 2022-03-31T15:12:46-05:00
New Revision: 585c85abe545a42a47a3b3d0411b91421e9552e9

URL: https://github.com/llvm/llvm-project/commit/585c85abe545a42a47a3b3d0411b91421e9552e9
DIFF: https://github.com/llvm/llvm-project/commit/585c85abe545a42a47a3b3d0411b91421e9552e9.diff

LOG: [PowerPC] Fix lowering of byval parameters for sizes greater than 8 bytes.

To store a byval parameter the existing code would store as many 8 byte elements
as was required to store the full size of the byval parameter.
For example, a paramter of size 16 would store two element of 8 bytes.
A paramter of size 12 would also store two elements of 8 bytes.
This would sometimes store too many bytes as the size of the paramter is not
always a factor of 8.

This patch fixes that issue and now byval paramters are stored with the correct
number of bytes.

Reviewed By: nemanjai, #powerpc, quinnp, amyk

Differential Revision: https://reviews.llvm.org/D121430

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/test/CodeGen/PowerPC/byval.ll
    llvm/test/CodeGen/PowerPC/ppc64-byval-larger-struct.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 01cdb3de755da..c99c68d7b018a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -4431,8 +4431,11 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
           SDValue Off = DAG.getConstant(j, dl, PtrVT);
           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
         }
-        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
-                                     MachinePointerInfo(&*FuncArg, j));
+        unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
+        EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
+        SDValue Store =
+            DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
+                              MachinePointerInfo(&*FuncArg, j), ObjType);
         MemOps.push_back(Store);
         ++GPR_idx;
       }
@@ -6269,8 +6272,11 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
         if (GPR_idx != NumGPRs) {
-          SDValue Load =
-              DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
+          unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
+          EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
+          SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
+                                        MachinePointerInfo(), ObjType);
+
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
           ArgOffset += PtrByteSize;

diff  --git a/llvm/test/CodeGen/PowerPC/byval.ll b/llvm/test/CodeGen/PowerPC/byval.ll
index b04583662c4eb..6babf75747550 100644
--- a/llvm/test/CodeGen/PowerPC/byval.ll
+++ b/llvm/test/CodeGen/PowerPC/byval.ll
@@ -22,7 +22,7 @@ define dso_local i32 @bar() {
 ; CHECK-NEXT:    addi 3, 1, 40
 ; CHECK-NEXT:    bl foo
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld 7, 72(1)
+; CHECK-NEXT:    lwz 7, 72(1)
 ; CHECK-NEXT:    ld 6, 64(1)
 ; CHECK-NEXT:    ld 5, 56(1)
 ; CHECK-NEXT:    ld 4, 48(1)

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-byval-larger-struct.ll b/llvm/test/CodeGen/PowerPC/ppc64-byval-larger-struct.ll
index d05ddf0d05f47..4742c7be45ca2 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-byval-larger-struct.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-byval-larger-struct.ll
@@ -18,9 +18,10 @@ define signext i8 @caller_9([9 x i8]* nocapture readonly byval([9 x i8]) %data)
 ; P8LE-NEXT:    mflr r0
 ; P8LE-NEXT:    std r0, 16(r1)
 ; P8LE-NEXT:    stdu r1, -80(r1)
+; P8LE-NEXT:    stb r4, 56(r1)
 ; P8LE-NEXT:    addi r5, r1, 71
 ; P8LE-NEXT:    std r3, 48(r1)
-; P8LE-NEXT:    std r4, 56(r1)
+; P8LE-NEXT:    lbz r4, 56(r1)
 ; P8LE-NEXT:    stdx r3, 0, r5
 ; P8LE-NEXT:    mr r3, r5
 ; P8LE-NEXT:    stb r4, 79(r1)
@@ -37,12 +38,13 @@ define signext i8 @caller_9([9 x i8]* nocapture readonly byval([9 x i8]) %data)
 ; P9LE-NEXT:    mflr r0
 ; P9LE-NEXT:    std r0, 16(r1)
 ; P9LE-NEXT:    stdu r1, -80(r1)
-; P9LE-NEXT:    addi r5, r1, 71
+; P9LE-NEXT:    stb r4, 56(r1)
+; P9LE-NEXT:    addi r4, r1, 71
 ; P9LE-NEXT:    std r3, 48(r1)
-; P9LE-NEXT:    std r4, 56(r1)
-; P9LE-NEXT:    stb r4, 79(r1)
-; P9LE-NEXT:    stdx r3, 0, r5
-; P9LE-NEXT:    mr r3, r5
+; P9LE-NEXT:    lbz r5, 56(r1)
+; P9LE-NEXT:    stdx r3, 0, r4
+; P9LE-NEXT:    mr r3, r4
+; P9LE-NEXT:    stb r5, 79(r1)
 ; P9LE-NEXT:    bl callee
 ; P9LE-NEXT:    nop
 ; P9LE-NEXT:    li r3, 0
@@ -56,12 +58,13 @@ define signext i8 @caller_9([9 x i8]* nocapture readonly byval([9 x i8]) %data)
 ; P10LE-NEXT:    mflr r0
 ; P10LE-NEXT:    std r0, 16(r1)
 ; P10LE-NEXT:    stdu r1, -80(r1)
-; P10LE-NEXT:    addi r5, r1, 71
+; P10LE-NEXT:    stb r4, 56(r1)
+; P10LE-NEXT:    addi r4, r1, 71
 ; P10LE-NEXT:    std r3, 48(r1)
-; P10LE-NEXT:    std r4, 56(r1)
-; P10LE-NEXT:    stb r4, 79(r1)
-; P10LE-NEXT:    stdx r3, 0, r5
-; P10LE-NEXT:    mr r3, r5
+; P10LE-NEXT:    lbz r5, 56(r1)
+; P10LE-NEXT:    stdx r3, 0, r4
+; P10LE-NEXT:    mr r3, r4
+; P10LE-NEXT:    stb r5, 79(r1)
 ; P10LE-NEXT:    bl callee at notoc
 ; P10LE-NEXT:    li r3, 0
 ; P10LE-NEXT:    addi r1, r1, 80
@@ -74,13 +77,13 @@ define signext i8 @caller_9([9 x i8]* nocapture readonly byval([9 x i8]) %data)
 ; P8BE-NEXT:    mflr r0
 ; P8BE-NEXT:    std r0, 16(r1)
 ; P8BE-NEXT:    stdu r1, -144(r1)
-; P8BE-NEXT:    lbz r6, 200(r1)
+; P8BE-NEXT:    stb r4, 200(r1)
 ; P8BE-NEXT:    addi r5, r1, 135
 ; P8BE-NEXT:    std r3, 192(r1)
-; P8BE-NEXT:    std r4, 200(r1)
+; P8BE-NEXT:    lbz r4, 200(r1)
 ; P8BE-NEXT:    stdx r3, 0, r5
 ; P8BE-NEXT:    mr r3, r5
-; P8BE-NEXT:    stb r6, 143(r1)
+; P8BE-NEXT:    stb r4, 143(r1)
 ; P8BE-NEXT:    bl callee
 ; P8BE-NEXT:    nop
 ; P8BE-NEXT:    li r3, 0
@@ -94,10 +97,10 @@ define signext i8 @caller_9([9 x i8]* nocapture readonly byval([9 x i8]) %data)
 ; P9BE-NEXT:    mflr r0
 ; P9BE-NEXT:    std r0, 16(r1)
 ; P9BE-NEXT:    stdu r1, -144(r1)
-; P9BE-NEXT:    std r4, 200(r1)
+; P9BE-NEXT:    stb r4, 200(r1)
 ; P9BE-NEXT:    addi r4, r1, 135
-; P9BE-NEXT:    lbz r5, 200(r1)
 ; P9BE-NEXT:    std r3, 192(r1)
+; P9BE-NEXT:    lbz r5, 200(r1)
 ; P9BE-NEXT:    stdx r3, 0, r4
 ; P9BE-NEXT:    mr r3, r4
 ; P9BE-NEXT:    stb r5, 143(r1)
@@ -114,10 +117,10 @@ define signext i8 @caller_9([9 x i8]* nocapture readonly byval([9 x i8]) %data)
 ; P10BE-NEXT:    mflr r0
 ; P10BE-NEXT:    std r0, 16(r1)
 ; P10BE-NEXT:    stdu r1, -144(r1)
-; P10BE-NEXT:    lbz r5, 200(r1)
-; P10BE-NEXT:    std r4, 200(r1)
-; P10BE-NEXT:    std r3, 192(r1)
+; P10BE-NEXT:    stb r4, 200(r1)
 ; P10BE-NEXT:    addi r4, r1, 135
+; P10BE-NEXT:    std r3, 192(r1)
+; P10BE-NEXT:    lbz r5, 200(r1)
 ; P10BE-NEXT:    stdx r3, 0, r4
 ; P10BE-NEXT:    mr r3, r4
 ; P10BE-NEXT:    stb r5, 143(r1)
@@ -170,15 +173,183 @@ entry:
   ret i8 0
 }
 
+define signext i8 @caller_9_callee_9([9 x i8]* nocapture readonly byval([9 x i8]) %data) #0 {
+; P8LE-LABEL: caller_9_callee_9:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    mflr r0
+; P8LE-NEXT:    std r0, 16(r1)
+; P8LE-NEXT:    stdu r1, -80(r1)
+; P8LE-NEXT:    stb r4, 56(r1)
+; P8LE-NEXT:    addi r5, r1, 71
+; P8LE-NEXT:    std r3, 48(r1)
+; P8LE-NEXT:    lbz r4, 56(r1)
+; P8LE-NEXT:    stdx r3, 0, r5
+; P8LE-NEXT:    ld r3, 48(r1)
+; P8LE-NEXT:    stb r4, 79(r1)
+; P8LE-NEXT:    lbz r4, 56(r1)
+; P8LE-NEXT:    bl callee_9
+; P8LE-NEXT:    nop
+; P8LE-NEXT:    li r3, 0
+; P8LE-NEXT:    addi r1, r1, 80
+; P8LE-NEXT:    ld r0, 16(r1)
+; P8LE-NEXT:    mtlr r0
+; P8LE-NEXT:    blr
+;
+; P9LE-LABEL: caller_9_callee_9:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    mflr r0
+; P9LE-NEXT:    std r0, 16(r1)
+; P9LE-NEXT:    stdu r1, -80(r1)
+; P9LE-NEXT:    stb r4, 56(r1)
+; P9LE-NEXT:    addi r4, r1, 71
+; P9LE-NEXT:    std r3, 48(r1)
+; P9LE-NEXT:    lbz r5, 56(r1)
+; P9LE-NEXT:    stdx r3, 0, r4
+; P9LE-NEXT:    lbz r4, 56(r1)
+; P9LE-NEXT:    ld r3, 48(r1)
+; P9LE-NEXT:    stb r5, 79(r1)
+; P9LE-NEXT:    bl callee_9
+; P9LE-NEXT:    nop
+; P9LE-NEXT:    li r3, 0
+; P9LE-NEXT:    addi r1, r1, 80
+; P9LE-NEXT:    ld r0, 16(r1)
+; P9LE-NEXT:    mtlr r0
+; P9LE-NEXT:    blr
+;
+; P10LE-LABEL: caller_9_callee_9:
+; P10LE:       # %bb.0: # %entry
+; P10LE-NEXT:    mflr r0
+; P10LE-NEXT:    std r0, 16(r1)
+; P10LE-NEXT:    stdu r1, -80(r1)
+; P10LE-NEXT:    stb r4, 56(r1)
+; P10LE-NEXT:    addi r4, r1, 71
+; P10LE-NEXT:    std r3, 48(r1)
+; P10LE-NEXT:    lbz r5, 56(r1)
+; P10LE-NEXT:    stdx r3, 0, r4
+; P10LE-NEXT:    lbz r4, 56(r1)
+; P10LE-NEXT:    ld r3, 48(r1)
+; P10LE-NEXT:    stb r5, 79(r1)
+; P10LE-NEXT:    bl callee_9 at notoc
+; P10LE-NEXT:    li r3, 0
+; P10LE-NEXT:    addi r1, r1, 80
+; P10LE-NEXT:    ld r0, 16(r1)
+; P10LE-NEXT:    mtlr r0
+; P10LE-NEXT:    blr
+;
+; P8BE-LABEL: caller_9_callee_9:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    mflr r0
+; P8BE-NEXT:    std r0, 16(r1)
+; P8BE-NEXT:    stdu r1, -144(r1)
+; P8BE-NEXT:    stb r4, 200(r1)
+; P8BE-NEXT:    addi r5, r1, 135
+; P8BE-NEXT:    std r3, 192(r1)
+; P8BE-NEXT:    lbz r4, 200(r1)
+; P8BE-NEXT:    stdx r3, 0, r5
+; P8BE-NEXT:    ld r3, 192(r1)
+; P8BE-NEXT:    stb r4, 143(r1)
+; P8BE-NEXT:    lbz r4, 200(r1)
+; P8BE-NEXT:    bl callee_9
+; P8BE-NEXT:    nop
+; P8BE-NEXT:    li r3, 0
+; P8BE-NEXT:    addi r1, r1, 144
+; P8BE-NEXT:    ld r0, 16(r1)
+; P8BE-NEXT:    mtlr r0
+; P8BE-NEXT:    blr
+;
+; P9BE-LABEL: caller_9_callee_9:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    mflr r0
+; P9BE-NEXT:    std r0, 16(r1)
+; P9BE-NEXT:    stdu r1, -144(r1)
+; P9BE-NEXT:    stb r4, 200(r1)
+; P9BE-NEXT:    addi r4, r1, 135
+; P9BE-NEXT:    std r3, 192(r1)
+; P9BE-NEXT:    lbz r5, 200(r1)
+; P9BE-NEXT:    stdx r3, 0, r4
+; P9BE-NEXT:    lbz r4, 200(r1)
+; P9BE-NEXT:    ld r3, 192(r1)
+; P9BE-NEXT:    stb r5, 143(r1)
+; P9BE-NEXT:    bl callee_9
+; P9BE-NEXT:    nop
+; P9BE-NEXT:    li r3, 0
+; P9BE-NEXT:    addi r1, r1, 144
+; P9BE-NEXT:    ld r0, 16(r1)
+; P9BE-NEXT:    mtlr r0
+; P9BE-NEXT:    blr
+;
+; P10BE-LABEL: caller_9_callee_9:
+; P10BE:       # %bb.0: # %entry
+; P10BE-NEXT:    mflr r0
+; P10BE-NEXT:    std r0, 16(r1)
+; P10BE-NEXT:    stdu r1, -144(r1)
+; P10BE-NEXT:    stb r4, 200(r1)
+; P10BE-NEXT:    addi r4, r1, 135
+; P10BE-NEXT:    std r3, 192(r1)
+; P10BE-NEXT:    lbz r5, 200(r1)
+; P10BE-NEXT:    stdx r3, 0, r4
+; P10BE-NEXT:    lbz r4, 200(r1)
+; P10BE-NEXT:    ld r3, 192(r1)
+; P10BE-NEXT:    stb r5, 143(r1)
+; P10BE-NEXT:    bl callee_9
+; P10BE-NEXT:    nop
+; P10BE-NEXT:    li r3, 0
+; P10BE-NEXT:    addi r1, r1, 144
+; P10BE-NEXT:    ld r0, 16(r1)
+; P10BE-NEXT:    mtlr r0
+; P10BE-NEXT:    blr
+entry:
+  %_param_data = alloca [9 x i8], align 1
+  %.elt0 = getelementptr inbounds [9 x i8], [9 x i8]* %data, i64 0, i64 0
+  %.unpack0 = load i8, i8* %.elt0, align 1
+  %.elt1 = getelementptr inbounds [9 x i8], [9 x i8]* %data, i64 0, i64 1
+  %.unpack1 = load i8, i8* %.elt1, align 1
+  %.elt2 = getelementptr inbounds [9 x i8], [9 x i8]* %data, i64 0, i64 2
+  %.unpack2 = load i8, i8* %.elt2, align 1
+  %.elt3 = getelementptr inbounds [9 x i8], [9 x i8]* %data, i64 0, i64 3
+  %.unpack3 = load i8, i8* %.elt3, align 1
+  %.elt4 = getelementptr inbounds [9 x i8], [9 x i8]* %data, i64 0, i64 4
+  %.unpack4 = load i8, i8* %.elt4, align 1
+  %.elt5 = getelementptr inbounds [9 x i8], [9 x i8]* %data, i64 0, i64 5
+  %.unpack5 = load i8, i8* %.elt5, align 1
+  %.elt6 = getelementptr inbounds [9 x i8], [9 x i8]* %data, i64 0, i64 6
+  %.unpack6 = load i8, i8* %.elt6, align 1
+  %.elt7 = getelementptr inbounds [9 x i8], [9 x i8]* %data, i64 0, i64 7
+  %.unpack7 = load i8, i8* %.elt7, align 1
+  %.elt8 = getelementptr inbounds [9 x i8], [9 x i8]* %data, i64 0, i64 8
+  %.unpack8 = load i8, i8* %.elt8, align 1
+  %.temp.0.gep = getelementptr inbounds [9 x i8], [9 x i8]* %_param_data, i64 0, i64 0
+  store i8 %.unpack0, i8* %.temp.0.gep, align 1
+  %.temp.1.gep = getelementptr inbounds [9 x i8], [9 x i8]* %_param_data, i64 0, i64 1
+  store i8 %.unpack1, i8* %.temp.1.gep, align 1
+  %.temp.2.gep = getelementptr inbounds [9 x i8], [9 x i8]* %_param_data, i64 0, i64 2
+  store i8 %.unpack2, i8* %.temp.2.gep, align 1
+  %.temp.3.gep = getelementptr inbounds [9 x i8], [9 x i8]* %_param_data, i64 0, i64 3
+  store i8 %.unpack3, i8* %.temp.3.gep, align 1
+  %.temp.4.gep = getelementptr inbounds [9 x i8], [9 x i8]* %_param_data, i64 0, i64 4
+  store i8 %.unpack4, i8* %.temp.4.gep, align 1
+  %.temp.5.gep = getelementptr inbounds [9 x i8], [9 x i8]* %_param_data, i64 0, i64 5
+  store i8 %.unpack5, i8* %.temp.5.gep, align 1
+  %.temp.6.gep = getelementptr inbounds [9 x i8], [9 x i8]* %_param_data, i64 0, i64 6
+  store i8 %.unpack6, i8* %.temp.6.gep, align 1
+  %.temp.7.gep = getelementptr inbounds [9 x i8], [9 x i8]* %_param_data, i64 0, i64 7
+  store i8 %.unpack7, i8* %.temp.7.gep, align 1
+  %.temp.8.gep = getelementptr inbounds [9 x i8], [9 x i8]* %_param_data, i64 0, i64 8
+  store i8 %.unpack8, i8* %.temp.8.gep, align 1
+  call void @callee_9([9 x i8]* nocapture readonly byval([9 x i8]) %data)
+  ret i8 0
+}
+
 define signext i8 @caller_10([10 x i8]* nocapture readonly byval([10 x i8]) %data) #0 {
 ; P8LE-LABEL: caller_10:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    mflr r0
 ; P8LE-NEXT:    std r0, 16(r1)
 ; P8LE-NEXT:    stdu r1, -80(r1)
+; P8LE-NEXT:    sth r4, 56(r1)
 ; P8LE-NEXT:    addi r5, r1, 70
 ; P8LE-NEXT:    std r3, 48(r1)
-; P8LE-NEXT:    std r4, 56(r1)
+; P8LE-NEXT:    lhz r4, 56(r1)
 ; P8LE-NEXT:    stdx r3, 0, r5
 ; P8LE-NEXT:    mr r3, r5
 ; P8LE-NEXT:    sth r4, 78(r1)
@@ -195,12 +366,13 @@ define signext i8 @caller_10([10 x i8]* nocapture readonly byval([10 x i8]) %dat
 ; P9LE-NEXT:    mflr r0
 ; P9LE-NEXT:    std r0, 16(r1)
 ; P9LE-NEXT:    stdu r1, -80(r1)
-; P9LE-NEXT:    addi r5, r1, 70
+; P9LE-NEXT:    sth r4, 56(r1)
+; P9LE-NEXT:    addi r4, r1, 70
 ; P9LE-NEXT:    std r3, 48(r1)
-; P9LE-NEXT:    std r4, 56(r1)
-; P9LE-NEXT:    sth r4, 78(r1)
-; P9LE-NEXT:    stdx r3, 0, r5
-; P9LE-NEXT:    mr r3, r5
+; P9LE-NEXT:    lhz r5, 56(r1)
+; P9LE-NEXT:    stdx r3, 0, r4
+; P9LE-NEXT:    mr r3, r4
+; P9LE-NEXT:    sth r5, 78(r1)
 ; P9LE-NEXT:    bl callee
 ; P9LE-NEXT:    nop
 ; P9LE-NEXT:    li r3, 0
@@ -214,12 +386,13 @@ define signext i8 @caller_10([10 x i8]* nocapture readonly byval([10 x i8]) %dat
 ; P10LE-NEXT:    mflr r0
 ; P10LE-NEXT:    std r0, 16(r1)
 ; P10LE-NEXT:    stdu r1, -80(r1)
-; P10LE-NEXT:    addi r5, r1, 70
+; P10LE-NEXT:    sth r4, 56(r1)
+; P10LE-NEXT:    addi r4, r1, 70
 ; P10LE-NEXT:    std r3, 48(r1)
-; P10LE-NEXT:    std r4, 56(r1)
-; P10LE-NEXT:    sth r4, 78(r1)
-; P10LE-NEXT:    stdx r3, 0, r5
-; P10LE-NEXT:    mr r3, r5
+; P10LE-NEXT:    lhz r5, 56(r1)
+; P10LE-NEXT:    stdx r3, 0, r4
+; P10LE-NEXT:    mr r3, r4
+; P10LE-NEXT:    sth r5, 78(r1)
 ; P10LE-NEXT:    bl callee at notoc
 ; P10LE-NEXT:    li r3, 0
 ; P10LE-NEXT:    addi r1, r1, 80
@@ -232,13 +405,13 @@ define signext i8 @caller_10([10 x i8]* nocapture readonly byval([10 x i8]) %dat
 ; P8BE-NEXT:    mflr r0
 ; P8BE-NEXT:    std r0, 16(r1)
 ; P8BE-NEXT:    stdu r1, -144(r1)
-; P8BE-NEXT:    lhz r6, 200(r1)
+; P8BE-NEXT:    sth r4, 200(r1)
 ; P8BE-NEXT:    addi r5, r1, 134
 ; P8BE-NEXT:    std r3, 192(r1)
-; P8BE-NEXT:    std r4, 200(r1)
+; P8BE-NEXT:    lhz r4, 200(r1)
 ; P8BE-NEXT:    stdx r3, 0, r5
 ; P8BE-NEXT:    mr r3, r5
-; P8BE-NEXT:    sth r6, 142(r1)
+; P8BE-NEXT:    sth r4, 142(r1)
 ; P8BE-NEXT:    bl callee
 ; P8BE-NEXT:    nop
 ; P8BE-NEXT:    li r3, 0
@@ -252,10 +425,10 @@ define signext i8 @caller_10([10 x i8]* nocapture readonly byval([10 x i8]) %dat
 ; P9BE-NEXT:    mflr r0
 ; P9BE-NEXT:    std r0, 16(r1)
 ; P9BE-NEXT:    stdu r1, -144(r1)
-; P9BE-NEXT:    std r4, 200(r1)
+; P9BE-NEXT:    sth r4, 200(r1)
 ; P9BE-NEXT:    addi r4, r1, 134
-; P9BE-NEXT:    lhz r5, 200(r1)
 ; P9BE-NEXT:    std r3, 192(r1)
+; P9BE-NEXT:    lhz r5, 200(r1)
 ; P9BE-NEXT:    stdx r3, 0, r4
 ; P9BE-NEXT:    mr r3, r4
 ; P9BE-NEXT:    sth r5, 142(r1)
@@ -272,10 +445,10 @@ define signext i8 @caller_10([10 x i8]* nocapture readonly byval([10 x i8]) %dat
 ; P10BE-NEXT:    mflr r0
 ; P10BE-NEXT:    std r0, 16(r1)
 ; P10BE-NEXT:    stdu r1, -144(r1)
-; P10BE-NEXT:    lhz r5, 200(r1)
-; P10BE-NEXT:    std r4, 200(r1)
-; P10BE-NEXT:    std r3, 192(r1)
+; P10BE-NEXT:    sth r4, 200(r1)
 ; P10BE-NEXT:    addi r4, r1, 134
+; P10BE-NEXT:    std r3, 192(r1)
+; P10BE-NEXT:    lhz r5, 200(r1)
 ; P10BE-NEXT:    stdx r3, 0, r4
 ; P10BE-NEXT:    mr r3, r4
 ; P10BE-NEXT:    sth r5, 142(r1)
@@ -338,9 +511,10 @@ define signext i8 @caller_12([12 x i8]* nocapture readonly byval([12 x i8]) %dat
 ; P8LE-NEXT:    mflr r0
 ; P8LE-NEXT:    std r0, 16(r1)
 ; P8LE-NEXT:    stdu r1, -80(r1)
+; P8LE-NEXT:    stw r4, 56(r1)
 ; P8LE-NEXT:    addi r5, r1, 68
 ; P8LE-NEXT:    std r3, 48(r1)
-; P8LE-NEXT:    std r4, 56(r1)
+; P8LE-NEXT:    lwz r4, 56(r1)
 ; P8LE-NEXT:    std r3, 68(r1)
 ; P8LE-NEXT:    mr r3, r5
 ; P8LE-NEXT:    stw r4, 76(r1)
@@ -357,12 +531,13 @@ define signext i8 @caller_12([12 x i8]* nocapture readonly byval([12 x i8]) %dat
 ; P9LE-NEXT:    mflr r0
 ; P9LE-NEXT:    std r0, 16(r1)
 ; P9LE-NEXT:    stdu r1, -80(r1)
-; P9LE-NEXT:    addi r5, r1, 68
+; P9LE-NEXT:    stw r4, 56(r1)
+; P9LE-NEXT:    addi r4, r1, 68
 ; P9LE-NEXT:    std r3, 48(r1)
-; P9LE-NEXT:    std r4, 56(r1)
-; P9LE-NEXT:    stw r4, 76(r1)
+; P9LE-NEXT:    lwz r5, 56(r1)
 ; P9LE-NEXT:    std r3, 68(r1)
-; P9LE-NEXT:    mr r3, r5
+; P9LE-NEXT:    mr r3, r4
+; P9LE-NEXT:    stw r5, 76(r1)
 ; P9LE-NEXT:    bl callee
 ; P9LE-NEXT:    nop
 ; P9LE-NEXT:    li r3, 0
@@ -376,12 +551,13 @@ define signext i8 @caller_12([12 x i8]* nocapture readonly byval([12 x i8]) %dat
 ; P10LE-NEXT:    mflr r0
 ; P10LE-NEXT:    std r0, 16(r1)
 ; P10LE-NEXT:    stdu r1, -80(r1)
-; P10LE-NEXT:    addi r5, r1, 68
+; P10LE-NEXT:    stw r4, 56(r1)
+; P10LE-NEXT:    addi r4, r1, 68
 ; P10LE-NEXT:    std r3, 48(r1)
-; P10LE-NEXT:    std r4, 56(r1)
-; P10LE-NEXT:    stw r4, 76(r1)
+; P10LE-NEXT:    lwz r5, 56(r1)
 ; P10LE-NEXT:    std r3, 68(r1)
-; P10LE-NEXT:    mr r3, r5
+; P10LE-NEXT:    mr r3, r4
+; P10LE-NEXT:    stw r5, 76(r1)
 ; P10LE-NEXT:    bl callee at notoc
 ; P10LE-NEXT:    li r3, 0
 ; P10LE-NEXT:    addi r1, r1, 80
@@ -394,13 +570,13 @@ define signext i8 @caller_12([12 x i8]* nocapture readonly byval([12 x i8]) %dat
 ; P8BE-NEXT:    mflr r0
 ; P8BE-NEXT:    std r0, 16(r1)
 ; P8BE-NEXT:    stdu r1, -144(r1)
-; P8BE-NEXT:    lwz r6, 200(r1)
+; P8BE-NEXT:    stw r4, 200(r1)
 ; P8BE-NEXT:    addi r5, r1, 132
 ; P8BE-NEXT:    std r3, 192(r1)
-; P8BE-NEXT:    std r4, 200(r1)
+; P8BE-NEXT:    lwz r4, 200(r1)
 ; P8BE-NEXT:    std r3, 132(r1)
 ; P8BE-NEXT:    mr r3, r5
-; P8BE-NEXT:    stw r6, 140(r1)
+; P8BE-NEXT:    stw r4, 140(r1)
 ; P8BE-NEXT:    bl callee
 ; P8BE-NEXT:    nop
 ; P8BE-NEXT:    li r3, 0
@@ -414,10 +590,10 @@ define signext i8 @caller_12([12 x i8]* nocapture readonly byval([12 x i8]) %dat
 ; P9BE-NEXT:    mflr r0
 ; P9BE-NEXT:    std r0, 16(r1)
 ; P9BE-NEXT:    stdu r1, -144(r1)
-; P9BE-NEXT:    std r4, 200(r1)
+; P9BE-NEXT:    stw r4, 200(r1)
 ; P9BE-NEXT:    addi r4, r1, 132
-; P9BE-NEXT:    lwz r5, 200(r1)
 ; P9BE-NEXT:    std r3, 192(r1)
+; P9BE-NEXT:    lwz r5, 200(r1)
 ; P9BE-NEXT:    std r3, 132(r1)
 ; P9BE-NEXT:    mr r3, r4
 ; P9BE-NEXT:    stw r5, 140(r1)
@@ -434,10 +610,10 @@ define signext i8 @caller_12([12 x i8]* nocapture readonly byval([12 x i8]) %dat
 ; P10BE-NEXT:    mflr r0
 ; P10BE-NEXT:    std r0, 16(r1)
 ; P10BE-NEXT:    stdu r1, -144(r1)
-; P10BE-NEXT:    lwz r5, 200(r1)
-; P10BE-NEXT:    std r4, 200(r1)
-; P10BE-NEXT:    std r3, 192(r1)
+; P10BE-NEXT:    stw r4, 200(r1)
 ; P10BE-NEXT:    addi r4, r1, 132
+; P10BE-NEXT:    std r3, 192(r1)
+; P10BE-NEXT:    lwz r5, 200(r1)
 ; P10BE-NEXT:    std r3, 132(r1)
 ; P10BE-NEXT:    mr r3, r4
 ; P10BE-NEXT:    stw r5, 140(r1)
@@ -508,12 +684,15 @@ define signext i8 @caller_14([14 x i8]* nocapture readonly byval([14 x i8]) %dat
 ; P8LE-NEXT:    mflr r0
 ; P8LE-NEXT:    std r0, 16(r1)
 ; P8LE-NEXT:    stdu r1, -80(r1)
+; P8LE-NEXT:    stw r4, 56(r1)
 ; P8LE-NEXT:    addi r5, r1, 66
+; P8LE-NEXT:    rldicl r4, r4, 32, 32
 ; P8LE-NEXT:    std r3, 48(r1)
-; P8LE-NEXT:    std r4, 56(r1)
+; P8LE-NEXT:    lwz r6, 56(r1)
 ; P8LE-NEXT:    stdx r3, 0, r5
 ; P8LE-NEXT:    mr r3, r5
-; P8LE-NEXT:    stw r4, 74(r1)
+; P8LE-NEXT:    sth r4, 60(r1)
+; P8LE-NEXT:    stw r6, 74(r1)
 ; P8LE-NEXT:    bl callee
 ; P8LE-NEXT:    nop
 ; P8LE-NEXT:    li r3, 0
@@ -527,12 +706,15 @@ define signext i8 @caller_14([14 x i8]* nocapture readonly byval([14 x i8]) %dat
 ; P9LE-NEXT:    mflr r0
 ; P9LE-NEXT:    std r0, 16(r1)
 ; P9LE-NEXT:    stdu r1, -80(r1)
-; P9LE-NEXT:    addi r5, r1, 66
+; P9LE-NEXT:    stw r4, 56(r1)
+; P9LE-NEXT:    rldicl r4, r4, 32, 32
+; P9LE-NEXT:    lwz r5, 56(r1)
 ; P9LE-NEXT:    std r3, 48(r1)
-; P9LE-NEXT:    std r4, 56(r1)
-; P9LE-NEXT:    stw r4, 74(r1)
-; P9LE-NEXT:    stdx r3, 0, r5
-; P9LE-NEXT:    mr r3, r5
+; P9LE-NEXT:    sth r4, 60(r1)
+; P9LE-NEXT:    addi r4, r1, 66
+; P9LE-NEXT:    stdx r3, 0, r4
+; P9LE-NEXT:    mr r3, r4
+; P9LE-NEXT:    stw r5, 74(r1)
 ; P9LE-NEXT:    bl callee
 ; P9LE-NEXT:    nop
 ; P9LE-NEXT:    li r3, 0
@@ -546,12 +728,15 @@ define signext i8 @caller_14([14 x i8]* nocapture readonly byval([14 x i8]) %dat
 ; P10LE-NEXT:    mflr r0
 ; P10LE-NEXT:    std r0, 16(r1)
 ; P10LE-NEXT:    stdu r1, -80(r1)
-; P10LE-NEXT:    addi r5, r1, 66
+; P10LE-NEXT:    stw r4, 56(r1)
+; P10LE-NEXT:    rldicl r4, r4, 32, 32
 ; P10LE-NEXT:    std r3, 48(r1)
-; P10LE-NEXT:    std r4, 56(r1)
-; P10LE-NEXT:    stw r4, 74(r1)
-; P10LE-NEXT:    stdx r3, 0, r5
-; P10LE-NEXT:    mr r3, r5
+; P10LE-NEXT:    lwz r5, 56(r1)
+; P10LE-NEXT:    sth r4, 60(r1)
+; P10LE-NEXT:    addi r4, r1, 66
+; P10LE-NEXT:    stdx r3, 0, r4
+; P10LE-NEXT:    mr r3, r4
+; P10LE-NEXT:    stw r5, 74(r1)
 ; P10LE-NEXT:    bl callee at notoc
 ; P10LE-NEXT:    li r3, 0
 ; P10LE-NEXT:    addi r1, r1, 80
@@ -564,12 +749,14 @@ define signext i8 @caller_14([14 x i8]* nocapture readonly byval([14 x i8]) %dat
 ; P8BE-NEXT:    mflr r0
 ; P8BE-NEXT:    std r0, 16(r1)
 ; P8BE-NEXT:    stdu r1, -144(r1)
-; P8BE-NEXT:    lwz r6, 200(r1)
+; P8BE-NEXT:    rldicl r6, r4, 48, 16
 ; P8BE-NEXT:    addi r5, r1, 130
 ; P8BE-NEXT:    std r3, 192(r1)
-; P8BE-NEXT:    std r4, 200(r1)
+; P8BE-NEXT:    sth r4, 204(r1)
+; P8BE-NEXT:    stw r6, 200(r1)
 ; P8BE-NEXT:    stdx r3, 0, r5
 ; P8BE-NEXT:    mr r3, r5
+; P8BE-NEXT:    lwz r6, 200(r1)
 ; P8BE-NEXT:    stw r6, 138(r1)
 ; P8BE-NEXT:    bl callee
 ; P8BE-NEXT:    nop
@@ -584,12 +771,14 @@ define signext i8 @caller_14([14 x i8]* nocapture readonly byval([14 x i8]) %dat
 ; P9BE-NEXT:    mflr r0
 ; P9BE-NEXT:    std r0, 16(r1)
 ; P9BE-NEXT:    stdu r1, -144(r1)
-; P9BE-NEXT:    std r4, 200(r1)
+; P9BE-NEXT:    rldicl r5, r4, 48, 16
+; P9BE-NEXT:    sth r4, 204(r1)
 ; P9BE-NEXT:    addi r4, r1, 130
-; P9BE-NEXT:    lwz r5, 200(r1)
 ; P9BE-NEXT:    std r3, 192(r1)
+; P9BE-NEXT:    stw r5, 200(r1)
 ; P9BE-NEXT:    stdx r3, 0, r4
 ; P9BE-NEXT:    mr r3, r4
+; P9BE-NEXT:    lwz r5, 200(r1)
 ; P9BE-NEXT:    stw r5, 138(r1)
 ; P9BE-NEXT:    bl callee
 ; P9BE-NEXT:    nop
@@ -604,12 +793,14 @@ define signext i8 @caller_14([14 x i8]* nocapture readonly byval([14 x i8]) %dat
 ; P10BE-NEXT:    mflr r0
 ; P10BE-NEXT:    std r0, 16(r1)
 ; P10BE-NEXT:    stdu r1, -144(r1)
-; P10BE-NEXT:    lwz r5, 200(r1)
-; P10BE-NEXT:    std r4, 200(r1)
-; P10BE-NEXT:    std r3, 192(r1)
+; P10BE-NEXT:    rldicl r5, r4, 48, 16
+; P10BE-NEXT:    sth r4, 204(r1)
 ; P10BE-NEXT:    addi r4, r1, 130
+; P10BE-NEXT:    std r3, 192(r1)
+; P10BE-NEXT:    stw r5, 200(r1)
 ; P10BE-NEXT:    stdx r3, 0, r4
 ; P10BE-NEXT:    mr r3, r4
+; P10BE-NEXT:    lwz r5, 200(r1)
 ; P10BE-NEXT:    stw r5, 138(r1)
 ; P10BE-NEXT:    bl callee
 ; P10BE-NEXT:    nop
@@ -850,7 +1041,7 @@ define signext i8 @caller_18([18 x i8]* nocapture readonly byval([18 x i8]) %dat
 ; P8LE-NEXT:    stdu r1, -96(r1)
 ; P8LE-NEXT:    addi r6, r1, 78
 ; P8LE-NEXT:    std r3, 48(r1)
-; P8LE-NEXT:    std r5, 64(r1)
+; P8LE-NEXT:    sth r5, 64(r1)
 ; P8LE-NEXT:    std r4, 56(r1)
 ; P8LE-NEXT:    stdx r3, 0, r6
 ; P8LE-NEXT:    mr r3, r6
@@ -868,7 +1059,7 @@ define signext i8 @caller_18([18 x i8]* nocapture readonly byval([18 x i8]) %dat
 ; P9LE-NEXT:    mflr r0
 ; P9LE-NEXT:    std r0, 16(r1)
 ; P9LE-NEXT:    stdu r1, -96(r1)
-; P9LE-NEXT:    std r5, 64(r1)
+; P9LE-NEXT:    sth r5, 64(r1)
 ; P9LE-NEXT:    addi r5, r1, 78
 ; P9LE-NEXT:    std r3, 48(r1)
 ; P9LE-NEXT:    std r4, 56(r1)
@@ -888,7 +1079,7 @@ define signext i8 @caller_18([18 x i8]* nocapture readonly byval([18 x i8]) %dat
 ; P10LE-NEXT:    mflr r0
 ; P10LE-NEXT:    std r0, 16(r1)
 ; P10LE-NEXT:    stdu r1, -96(r1)
-; P10LE-NEXT:    std r5, 64(r1)
+; P10LE-NEXT:    sth r5, 64(r1)
 ; P10LE-NEXT:    addi r5, r1, 78
 ; P10LE-NEXT:    std r3, 48(r1)
 ; P10LE-NEXT:    std r4, 56(r1)
@@ -910,10 +1101,10 @@ define signext i8 @caller_18([18 x i8]* nocapture readonly byval([18 x i8]) %dat
 ; P8BE-NEXT:    std r4, 200(r1)
 ; P8BE-NEXT:    addi r6, r1, 126
 ; P8BE-NEXT:    std r3, 192(r1)
-; P8BE-NEXT:    std r5, 208(r1)
 ; P8BE-NEXT:    lwz r4, 200(r1)
 ; P8BE-NEXT:    stdx r3, 0, r6
 ; P8BE-NEXT:    mr r3, r6
+; P8BE-NEXT:    sth r5, 208(r1)
 ; P8BE-NEXT:    stw r4, 134(r1)
 ; P8BE-NEXT:    bl callee
 ; P8BE-NEXT:    nop
@@ -930,7 +1121,7 @@ define signext i8 @caller_18([18 x i8]* nocapture readonly byval([18 x i8]) %dat
 ; P9BE-NEXT:    stdu r1, -144(r1)
 ; P9BE-NEXT:    std r4, 200(r1)
 ; P9BE-NEXT:    addi r4, r1, 126
-; P9BE-NEXT:    std r5, 208(r1)
+; P9BE-NEXT:    sth r5, 208(r1)
 ; P9BE-NEXT:    lwz r5, 200(r1)
 ; P9BE-NEXT:    std r3, 192(r1)
 ; P9BE-NEXT:    stdx r3, 0, r4
@@ -952,7 +1143,7 @@ define signext i8 @caller_18([18 x i8]* nocapture readonly byval([18 x i8]) %dat
 ; P10BE-NEXT:    std r4, 200(r1)
 ; P10BE-NEXT:    std r3, 192(r1)
 ; P10BE-NEXT:    addi r4, r1, 126
-; P10BE-NEXT:    std r5, 208(r1)
+; P10BE-NEXT:    sth r5, 208(r1)
 ; P10BE-NEXT:    lwz r5, 200(r1)
 ; P10BE-NEXT:    stdx r3, 0, r4
 ; P10BE-NEXT:    mr r3, r4
@@ -1021,6 +1212,7 @@ entry:
 
 
 declare void @callee(i8*) local_unnamed_addr #0
+declare void @callee_9([9 x i8]* nocapture readonly byval([9 x i8]) %data) local_unnamed_addr #0
 
 attributes #0 = { nounwind }
 


        


More information about the llvm-commits mailing list