[clang] [llvm] [PowerPC] Update data layout aligment of i128 to 16 (PR #118004)

Lei Huang via cfe-commits cfe-commits at lists.llvm.org
Thu Nov 28 08:07:36 PST 2024


https://github.com/lei137 updated https://github.com/llvm/llvm-project/pull/118004

>From ae5beae74fcd7717bf40519c80a9d920625bb137 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Wed, 27 Nov 2024 18:44:38 +0000
Subject: [PATCH 1/7] update llc tc affected

---
 .../CostModel/PowerPC/load-to-trunc.ll        |  4 +-
 llvm/test/CodeGen/PowerPC/ctrloop-sh.ll       | 58 ++++++++++---------
 llvm/test/CodeGen/PowerPC/pr59074.ll          | 29 +++++-----
 ...lar-shift-by-byte-multiple-legalization.ll | 48 +++++++--------
 .../PowerPC/wide-scalar-shift-legalization.ll | 33 ++++++-----
 .../AtomicExpand/PowerPC/cmpxchg.ll           |  6 +-
 6 files changed, 92 insertions(+), 86 deletions(-)

diff --git a/llvm/test/Analysis/CostModel/PowerPC/load-to-trunc.ll b/llvm/test/Analysis/CostModel/PowerPC/load-to-trunc.ll
index 57a6e98cfb4ee6..b78d121ff4f389 100644
--- a/llvm/test/Analysis/CostModel/PowerPC/load-to-trunc.ll
+++ b/llvm/test/Analysis/CostModel/PowerPC/load-to-trunc.ll
@@ -7,7 +7,7 @@
 ; Check that cost is 1 for unusual load to register sized load.
 define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) {
 ; CHECK-LABEL: 'loadUnusualIntegerWithTrunc'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 8
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %trunc = trunc i128 %out to i32
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %trunc
 ;
@@ -18,7 +18,7 @@ define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) {
 
 define i128 @loadUnusualInteger(ptr %ptr) {
 ; CHECK-LABEL: 'loadUnusualInteger'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %out = load i128, ptr %ptr, align 8
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %out = load i128, ptr %ptr, align 16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i128 %out
 ;
   %out = load i128, ptr %ptr
diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll b/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll
index 72de456cba395b..19f86f9d1af6ff 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll
@@ -16,39 +16,41 @@ define void @foo1(ptr %a, ptr readonly %b, ptr readonly %c) #0 {
 ; CHECK-NEXT:    addi 7, 1, 16
 ; CHECK-NEXT:  .LBB0_1: # %for.body
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    lwz 8, 0(4)
 ; CHECK-NEXT:    lwz 9, 4(4)
+; CHECK-NEXT:    lwz 8, 0(4)
 ; CHECK-NEXT:    lwz 10, 8(4)
 ; CHECK-NEXT:    lwz 11, 12(4)
 ; CHECK-NEXT:    lwz 12, 12(5)
+; CHECK-NEXT:    stw 9, 20(1)
+; CHECK-NEXT:    mr 9, 7
 ; CHECK-NEXT:    stw 6, 44(1)
+; CHECK-NEXT:    rlwimi 9, 12, 29, 28, 29
 ; CHECK-NEXT:    stw 6, 40(1)
 ; CHECK-NEXT:    stw 6, 36(1)
 ; CHECK-NEXT:    stw 6, 32(1)
 ; CHECK-NEXT:    stw 11, 28(1)
 ; CHECK-NEXT:    stw 10, 24(1)
 ; CHECK-NEXT:    clrlwi 10, 12, 27
-; CHECK-NEXT:    stw 9, 20(1)
 ; CHECK-NEXT:    stw 8, 16(1)
-; CHECK-NEXT:    rlwinm 8, 12, 29, 28, 29
-; CHECK-NEXT:    lwzux 9, 8, 7
-; CHECK-NEXT:    subfic 12, 10, 32
-; CHECK-NEXT:    lwz 11, 8(8)
-; CHECK-NEXT:    slw 9, 9, 10
-; CHECK-NEXT:    lwz 0, 4(8)
-; CHECK-NEXT:    lwz 8, 12(8)
-; CHECK-NEXT:    srw 30, 11, 12
-; CHECK-NEXT:    slw 29, 0, 10
-; CHECK-NEXT:    srw 0, 0, 12
-; CHECK-NEXT:    srw 12, 8, 12
-; CHECK-NEXT:    slw 11, 11, 10
+; CHECK-NEXT:    rlwinm 12, 12, 29, 28, 29
+; CHECK-NEXT:    lwz 8, 8(9)
+; CHECK-NEXT:    subfic 0, 10, 32
+; CHECK-NEXT:    lwz 11, 4(9)
+; CHECK-NEXT:    lwz 9, 12(9)
+; CHECK-NEXT:    srw 30, 8, 0
+; CHECK-NEXT:    lwzx 12, 7, 12
+; CHECK-NEXT:    slw 29, 11, 10
+; CHECK-NEXT:    srw 11, 11, 0
+; CHECK-NEXT:    srw 0, 9, 0
 ; CHECK-NEXT:    slw 8, 8, 10
-; CHECK-NEXT:    stw 8, 12(3)
-; CHECK-NEXT:    or 8, 11, 12
+; CHECK-NEXT:    slw 12, 12, 10
+; CHECK-NEXT:    or 8, 8, 0
 ; CHECK-NEXT:    stw 8, 8(3)
-; CHECK-NEXT:    or 8, 9, 0
+; CHECK-NEXT:    or 8, 12, 11
+; CHECK-NEXT:    slw 9, 9, 10
 ; CHECK-NEXT:    stw 8, 0(3)
 ; CHECK-NEXT:    or 8, 29, 30
+; CHECK-NEXT:    stw 9, 12(3)
 ; CHECK-NEXT:    stw 8, 4(3)
 ; CHECK-NEXT:    bdnz .LBB0_1
 ; CHECK-NEXT:  # %bb.2: # %for.end
@@ -77,11 +79,11 @@ for.end:                                          ; preds = %for.body
 define void @foo2(ptr %a, ptr readonly %b, ptr readonly %c) #0 {
 ; CHECK-LABEL: foo2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    stwu 1, -48(1)
-; CHECK-NEXT:    stw 30, 40(1) # 4-byte Folded Spill
+; CHECK-NEXT:    stwu 1, -64(1)
+; CHECK-NEXT:    stw 30, 56(1) # 4-byte Folded Spill
 ; CHECK-NEXT:    li 6, 2048
 ; CHECK-NEXT:    mtctr 6
-; CHECK-NEXT:    addi 6, 1, 24
+; CHECK-NEXT:    addi 6, 1, 32
 ; CHECK-NEXT:  .LBB1_1: # %for.body
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwz 7, 0(4)
@@ -89,18 +91,18 @@ define void @foo2(ptr %a, ptr readonly %b, ptr readonly %c) #0 {
 ; CHECK-NEXT:    lwz 11, 12(5)
 ; CHECK-NEXT:    lwz 9, 8(4)
 ; CHECK-NEXT:    lwz 10, 12(4)
-; CHECK-NEXT:    stw 8, 28(1)
+; CHECK-NEXT:    stw 8, 36(1)
 ; CHECK-NEXT:    rlwinm 8, 11, 29, 28, 29
-; CHECK-NEXT:    stw 7, 24(1)
+; CHECK-NEXT:    stw 7, 32(1)
 ; CHECK-NEXT:    srawi 7, 7, 31
-; CHECK-NEXT:    stw 10, 36(1)
+; CHECK-NEXT:    stw 10, 44(1)
 ; CHECK-NEXT:    clrlwi 10, 11, 27
-; CHECK-NEXT:    stw 9, 32(1)
+; CHECK-NEXT:    stw 9, 40(1)
 ; CHECK-NEXT:    subfic 12, 10, 32
+; CHECK-NEXT:    stw 7, 28(1)
+; CHECK-NEXT:    stw 7, 24(1)
 ; CHECK-NEXT:    stw 7, 20(1)
 ; CHECK-NEXT:    stw 7, 16(1)
-; CHECK-NEXT:    stw 7, 12(1)
-; CHECK-NEXT:    stw 7, 8(1)
 ; CHECK-NEXT:    sub 7, 6, 8
 ; CHECK-NEXT:    lwz 8, 4(7)
 ; CHECK-NEXT:    lwz 9, 0(7)
@@ -122,8 +124,8 @@ define void @foo2(ptr %a, ptr readonly %b, ptr readonly %c) #0 {
 ; CHECK-NEXT:    stw 7, 4(3)
 ; CHECK-NEXT:    bdnz .LBB1_1
 ; CHECK-NEXT:  # %bb.2: # %for.end
-; CHECK-NEXT:    lwz 30, 40(1) # 4-byte Folded Reload
-; CHECK-NEXT:    addi 1, 1, 48
+; CHECK-NEXT:    lwz 30, 56(1) # 4-byte Folded Reload
+; CHECK-NEXT:    addi 1, 1, 64
 ; CHECK-NEXT:    blr
 entry:
   br label %for.body
diff --git a/llvm/test/CodeGen/PowerPC/pr59074.ll b/llvm/test/CodeGen/PowerPC/pr59074.ll
index 6264b9f22876cc..ba21fed9f5abd4 100644
--- a/llvm/test/CodeGen/PowerPC/pr59074.ll
+++ b/llvm/test/CodeGen/PowerPC/pr59074.ll
@@ -38,26 +38,27 @@ define void @pr59074(ptr %0) {
 ; LE32-NEXT:    stw 7, 40(1)
 ; LE32-NEXT:    stw 7, 36(1)
 ; LE32-NEXT:    stw 8, 16(1)
-; LE32-NEXT:    rlwinm 9, 4, 29, 28, 29
+; LE32-NEXT:    rlwinm 7, 4, 29, 28, 29
 ; LE32-NEXT:    stxvd2x 0, 0, 5
+; LE32-NEXT:    lwzx 5, 6, 7
+; LE32-NEXT:    rlwimi 6, 4, 29, 28, 29
 ; LE32-NEXT:    clrlwi 4, 4, 27
-; LE32-NEXT:    lwzux 5, 9, 6
-; LE32-NEXT:    lwz 6, 8(9)
-; LE32-NEXT:    lwz 7, 4(9)
-; LE32-NEXT:    lwz 8, 12(9)
+; LE32-NEXT:    lwz 7, 8(6)
+; LE32-NEXT:    lwz 8, 4(6)
+; LE32-NEXT:    lwz 6, 12(6)
 ; LE32-NEXT:    xori 9, 4, 31
 ; LE32-NEXT:    subfic 11, 4, 32
 ; LE32-NEXT:    srw 5, 5, 4
-; LE32-NEXT:    slwi 10, 6, 1
-; LE32-NEXT:    srw 6, 6, 4
+; LE32-NEXT:    slwi 10, 7, 1
+; LE32-NEXT:    srw 12, 8, 4
+; LE32-NEXT:    slw 8, 8, 11
+; LE32-NEXT:    srw 7, 7, 4
+; LE32-NEXT:    slw 11, 6, 11
+; LE32-NEXT:    srw 4, 6, 4
 ; LE32-NEXT:    slw 9, 10, 9
-; LE32-NEXT:    srw 10, 7, 4
-; LE32-NEXT:    slw 7, 7, 11
-; LE32-NEXT:    slw 11, 8, 11
-; LE32-NEXT:    srw 4, 8, 4
-; LE32-NEXT:    or 5, 7, 5
-; LE32-NEXT:    or 6, 11, 6
-; LE32-NEXT:    or 7, 10, 9
+; LE32-NEXT:    or 5, 8, 5
+; LE32-NEXT:    or 6, 11, 7
+; LE32-NEXT:    or 7, 12, 9
 ; LE32-NEXT:    stw 4, 12(3)
 ; LE32-NEXT:    stw 6, 8(3)
 ; LE32-NEXT:    stw 5, 0(3)
diff --git a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll
index 12976e838f3ca6..6bd5cd23dcec8c 100644
--- a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll
+++ b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll
@@ -405,25 +405,26 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; LE-32BIT-NEXT:    stw 9, 24(1)
 ; LE-32BIT-NEXT:    rlwinm 4, 4, 3, 27, 28
 ; LE-32BIT-NEXT:    stw 8, 20(1)
-; LE-32BIT-NEXT:    subfic 8, 4, 32
+; LE-32BIT-NEXT:    subfic 9, 4, 32
 ; LE-32BIT-NEXT:    stw 7, 16(1)
-; LE-32BIT-NEXT:    lwzux 3, 6, 3
-; LE-32BIT-NEXT:    lwz 9, 4(6)
-; LE-32BIT-NEXT:    slw 3, 3, 4
-; LE-32BIT-NEXT:    lwz 7, 8(6)
-; LE-32BIT-NEXT:    lwz 6, 12(6)
-; LE-32BIT-NEXT:    slw 11, 9, 4
-; LE-32BIT-NEXT:    srw 9, 9, 8
-; LE-32BIT-NEXT:    srw 10, 7, 8
-; LE-32BIT-NEXT:    srw 8, 6, 8
+; LE-32BIT-NEXT:    lwzx 7, 3, 6
+; LE-32BIT-NEXT:    rlwimi 3, 6, 0, 28, 29
+; LE-32BIT-NEXT:    lwz 6, 8(3)
+; LE-32BIT-NEXT:    lwz 8, 4(3)
 ; LE-32BIT-NEXT:    slw 7, 7, 4
-; LE-32BIT-NEXT:    slw 4, 6, 4
-; LE-32BIT-NEXT:    or 3, 3, 9
-; LE-32BIT-NEXT:    stw 4, 12(5)
-; LE-32BIT-NEXT:    or 4, 7, 8
+; LE-32BIT-NEXT:    lwz 3, 12(3)
+; LE-32BIT-NEXT:    srw 10, 6, 9
+; LE-32BIT-NEXT:    slw 11, 8, 4
+; LE-32BIT-NEXT:    srw 8, 8, 9
+; LE-32BIT-NEXT:    srw 9, 3, 9
+; LE-32BIT-NEXT:    slw 6, 6, 4
+; LE-32BIT-NEXT:    slw 3, 3, 4
+; LE-32BIT-NEXT:    stw 3, 12(5)
+; LE-32BIT-NEXT:    or 3, 6, 9
+; LE-32BIT-NEXT:    stw 3, 8(5)
+; LE-32BIT-NEXT:    or 3, 7, 8
 ; LE-32BIT-NEXT:    stw 3, 0(5)
 ; LE-32BIT-NEXT:    or 3, 11, 10
-; LE-32BIT-NEXT:    stw 4, 8(5)
 ; LE-32BIT-NEXT:    stw 3, 4(5)
 ; LE-32BIT-NEXT:    addi 1, 1, 48
 ; LE-32BIT-NEXT:    blr
@@ -483,22 +484,23 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
 ; LE-32BIT-NEXT:    lwz 4, 12(4)
 ; LE-32BIT-NEXT:    stw 6, 44(1)
 ; LE-32BIT-NEXT:    stw 6, 40(1)
-; LE-32BIT-NEXT:    rlwinm 4, 4, 2, 28, 29
 ; LE-32BIT-NEXT:    stw 6, 36(1)
 ; LE-32BIT-NEXT:    stw 6, 32(1)
+; LE-32BIT-NEXT:    rlwinm 6, 4, 2, 28, 29
 ; LE-32BIT-NEXT:    stw 3, 28(1)
 ; LE-32BIT-NEXT:    addi 3, 1, 16
 ; LE-32BIT-NEXT:    stw 9, 24(1)
 ; LE-32BIT-NEXT:    stw 8, 20(1)
 ; LE-32BIT-NEXT:    stw 7, 16(1)
-; LE-32BIT-NEXT:    lwzux 3, 4, 3
-; LE-32BIT-NEXT:    lwz 6, 4(4)
-; LE-32BIT-NEXT:    lwz 7, 12(4)
-; LE-32BIT-NEXT:    lwz 4, 8(4)
-; LE-32BIT-NEXT:    stw 3, 0(5)
-; LE-32BIT-NEXT:    stw 4, 8(5)
+; LE-32BIT-NEXT:    lwzx 6, 3, 6
+; LE-32BIT-NEXT:    rlwimi 3, 4, 2, 28, 29
+; LE-32BIT-NEXT:    lwz 4, 4(3)
+; LE-32BIT-NEXT:    lwz 7, 12(3)
+; LE-32BIT-NEXT:    lwz 3, 8(3)
+; LE-32BIT-NEXT:    stw 6, 0(5)
+; LE-32BIT-NEXT:    stw 3, 8(5)
 ; LE-32BIT-NEXT:    stw 7, 12(5)
-; LE-32BIT-NEXT:    stw 6, 4(5)
+; LE-32BIT-NEXT:    stw 4, 4(5)
 ; LE-32BIT-NEXT:    addi 1, 1, 48
 ; LE-32BIT-NEXT:    blr
   %src = load i128, ptr %src.ptr, align 1
diff --git a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
index e1731ddfd92875..5fe7fca5b6e41d 100644
--- a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
+++ b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
@@ -304,27 +304,28 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; LE-32BIT-NEXT:    stw 3, 28(1)
 ; LE-32BIT-NEXT:    addi 3, 1, 16
 ; LE-32BIT-NEXT:    stw 9, 24(1)
-; LE-32BIT-NEXT:    clrlwi 4, 4, 27
 ; LE-32BIT-NEXT:    stw 8, 20(1)
-; LE-32BIT-NEXT:    subfic 8, 4, 32
 ; LE-32BIT-NEXT:    stw 7, 16(1)
-; LE-32BIT-NEXT:    lwzux 3, 6, 3
-; LE-32BIT-NEXT:    lwz 9, 4(6)
-; LE-32BIT-NEXT:    slw 3, 3, 4
-; LE-32BIT-NEXT:    lwz 7, 8(6)
-; LE-32BIT-NEXT:    lwz 6, 12(6)
-; LE-32BIT-NEXT:    slw 11, 9, 4
-; LE-32BIT-NEXT:    srw 9, 9, 8
-; LE-32BIT-NEXT:    srw 10, 7, 8
-; LE-32BIT-NEXT:    srw 8, 6, 8
+; LE-32BIT-NEXT:    lwzx 6, 3, 6
+; LE-32BIT-NEXT:    rlwimi 3, 4, 29, 28, 29
+; LE-32BIT-NEXT:    lwz 7, 8(3)
+; LE-32BIT-NEXT:    clrlwi 4, 4, 27
+; LE-32BIT-NEXT:    lwz 8, 4(3)
+; LE-32BIT-NEXT:    subfic 9, 4, 32
+; LE-32BIT-NEXT:    lwz 3, 12(3)
+; LE-32BIT-NEXT:    srw 10, 7, 9
+; LE-32BIT-NEXT:    slw 11, 8, 4
+; LE-32BIT-NEXT:    srw 8, 8, 9
+; LE-32BIT-NEXT:    srw 9, 3, 9
 ; LE-32BIT-NEXT:    slw 7, 7, 4
-; LE-32BIT-NEXT:    slw 4, 6, 4
-; LE-32BIT-NEXT:    or 3, 3, 9
-; LE-32BIT-NEXT:    stw 4, 12(5)
-; LE-32BIT-NEXT:    or 4, 7, 8
+; LE-32BIT-NEXT:    slw 3, 3, 4
+; LE-32BIT-NEXT:    slw 6, 6, 4
+; LE-32BIT-NEXT:    stw 3, 12(5)
+; LE-32BIT-NEXT:    or 3, 7, 9
+; LE-32BIT-NEXT:    stw 3, 8(5)
+; LE-32BIT-NEXT:    or 3, 6, 8
 ; LE-32BIT-NEXT:    stw 3, 0(5)
 ; LE-32BIT-NEXT:    or 3, 11, 10
-; LE-32BIT-NEXT:    stw 4, 8(5)
 ; LE-32BIT-NEXT:    stw 3, 4(5)
 ; LE-32BIT-NEXT:    addi 1, 1, 48
 ; LE-32BIT-NEXT:    blr
diff --git a/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll b/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll
index b94023b97a2950..cc51a00db415ca 100644
--- a/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll
+++ b/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll
@@ -30,11 +30,11 @@ define i1 @test_cmpxchg_seq_cst(ptr %addr, i128 %desire, i128 %new) {
 ;
 ; PWR7-LABEL: @test_cmpxchg_seq_cst(
 ; PWR7-NEXT:  entry:
-; PWR7-NEXT:    [[TMP0:%.*]] = alloca i128, align 8
+; PWR7-NEXT:    [[TMP0:%.*]] = alloca i128, align 16
 ; PWR7-NEXT:    call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP0]])
-; PWR7-NEXT:    store i128 [[DESIRE:%.*]], ptr [[TMP0]], align 8
+; PWR7-NEXT:    store i128 [[DESIRE:%.*]], ptr [[TMP0]], align 16
 ; PWR7-NEXT:    [[TMP1:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[ADDR:%.*]], ptr [[TMP0]], i128 [[NEW:%.*]], i32 5, i32 5)
-; PWR7-NEXT:    [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8
+; PWR7-NEXT:    [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 16
 ; PWR7-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP0]])
 ; PWR7-NEXT:    [[TMP3:%.*]] = insertvalue { i128, i1 } poison, i128 [[TMP2]], 0
 ; PWR7-NEXT:    [[TMP4:%.*]] = insertvalue { i128, i1 } [[TMP3]], i1 [[TMP1]], 1

>From 6c23894650b039de4f3e142ce7f90611243bff33 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Wed, 27 Nov 2024 22:16:29 +0000
Subject: [PATCH 2/7] additional tc updates

---
 clang/test/CodeGen/target-data.c              |  22 +--
 llvm/test/CodeGen/PowerPC/all-atomics.ll      | 130 +++++++++---------
 .../Bitcode/DataLayoutUpgradeTest.cpp         |   2 +-
 3 files changed, 77 insertions(+), 77 deletions(-)

diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c
index cb89fad941c832..3e11f6ae183b6b 100644
--- a/clang/test/CodeGen/target-data.c
+++ b/clang/test/CodeGen/target-data.c
@@ -88,7 +88,7 @@
 
 // RUN: %clang_cc1 -triple powerpc64-lv2 -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=PS3
-// PS3: target datalayout = "E-m:e-p:32:32-Fi64-i64:64-n32:64"
+// PS3: target datalayout = "E-m:e-p:32:32-Fi64-i64:64-i128:128-n32:64"
 
 // RUN: %clang_cc1 -triple i686-nacl -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=I686-NACL
@@ -120,43 +120,43 @@
 
 // RUN: %clang_cc1 -triple powerpc-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=PPC
-// PPC: target datalayout = "E-m:e-p:32:32-Fn32-i64:64-n32"
+// PPC: target datalayout = "E-m:e-p:32:32-Fn32-i64:64-i128:128-n32"
 
 // RUN: %clang_cc1 -triple powerpcle-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=PPCLE
-// PPCLE: target datalayout = "e-m:e-p:32:32-Fn32-i64:64-n32"
+// PPCLE: target datalayout = "e-m:e-p:32:32-Fn32-i64:64-i128:128-n32"
 
 // RUN: %clang_cc1 -triple powerpc64-freebsd -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=PPC64-FREEBSD
-// PPC64-FREEBSD: target datalayout = "E-m:e-Fn32-i64:64-n32:64"
+// PPC64-FREEBSD: target datalayout = "E-m:e-Fn32-i64:64-i128:128-n32:64"
 
 // RUN: %clang_cc1 -triple powerpc64le-freebsd -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=PPC64LE-FREEBSD
-// PPC64LE-FREEBSD: target datalayout = "e-m:e-Fn32-i64:64-n32:64"
+// PPC64LE-FREEBSD: target datalayout = "e-m:e-Fn32-i64:64-i128:128-n32:64"
 
 // RUN: %clang_cc1 -triple powerpc64-linux -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=PPC64-LINUX
-// PPC64-LINUX: target datalayout = "E-m:e-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+// PPC64-LINUX: target datalayout = "E-m:e-Fi64-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512"
 
 // RUN: %clang_cc1 -triple powerpc64-linux -o - -emit-llvm -target-cpu future %s | \
 // RUN: FileCheck %s -check-prefix=PPC64-FUTURE
-// PPC64-FUTURE: target datalayout = "E-m:e-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+// PPC64-FUTURE: target datalayout = "E-m:e-Fi64-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512"
 
 // RUN: %clang_cc1 -triple powerpc64-linux -o - -emit-llvm -target-cpu pwr10 %s | \
 // RUN: FileCheck %s -check-prefix=PPC64-P10
-// PPC64-P10: target datalayout = "E-m:e-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+// PPC64-P10: target datalayout = "E-m:e-Fi64-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512"
 
 // RUN: %clang_cc1 -triple powerpc64le-linux -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=PPC64LE-LINUX
-// PPC64LE-LINUX: target datalayout = "e-m:e-Fn32-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+// PPC64LE-LINUX: target datalayout = "e-m:e-Fn32-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512"
 
 // RUN: %clang_cc1 -triple powerpc64le-linux -o - -emit-llvm -target-cpu future %s | \
 // RUN: FileCheck %s -check-prefix=PPC64LE-FUTURE
-// PPC64LE-FUTURE: target datalayout = "e-m:e-Fn32-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+// PPC64LE-FUTURE: target datalayout = "e-m:e-Fn32-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512"
 
 // RUN: %clang_cc1 -triple powerpc64le-linux -o - -emit-llvm -target-cpu pwr10 %s | \
 // RUN: FileCheck %s -check-prefix=PPC64LE-P10
-// PPC64LE-P10: target datalayout = "e-m:e-Fn32-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+// PPC64LE-P10: target datalayout = "e-m:e-Fn32-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512"
 
 // RUN: %clang_cc1 -triple nvptx-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=NVPTX
diff --git a/llvm/test/CodeGen/PowerPC/all-atomics.ll b/llvm/test/CodeGen/PowerPC/all-atomics.ll
index 531e559ea7309c..40b96dab94b9ff 100644
--- a/llvm/test/CodeGen/PowerPC/all-atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/all-atomics.ll
@@ -509,31 +509,31 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; AIX32-LABEL: test_op_ignore:
 ; AIX32:       # %bb.0: # %entry
 ; AIX32-NEXT:    mflr 0
-; AIX32-NEXT:    stwu 1, -160(1)
+; AIX32-NEXT:    stwu 1, -176(1)
 ; AIX32-NEXT:    lwz 3, L..C0(2) # @sc
-; AIX32-NEXT:    stw 0, 168(1)
+; AIX32-NEXT:    stw 0, 184(1)
 ; AIX32-NEXT:    rlwinm 4, 3, 3, 27, 28
-; AIX32-NEXT:    stw 15, 92(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 26, 136(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 28, 144(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 15, 108(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 26, 152(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 28, 160(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    li 15, 1
 ; AIX32-NEXT:    rlwinm 28, 3, 0, 0, 29
 ; AIX32-NEXT:    li 3, 255
 ; AIX32-NEXT:    xori 26, 4, 24
-; AIX32-NEXT:    stw 16, 96(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 17, 100(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 18, 104(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 19, 108(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 20, 112(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 21, 116(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 22, 120(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 23, 124(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 24, 128(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 25, 132(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 27, 140(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 29, 148(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 30, 152(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 31, 156(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 16, 112(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 17, 116(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 18, 120(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 19, 124(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 20, 128(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 21, 132(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 22, 136(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 23, 140(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 24, 144(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 25, 148(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 27, 156(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 29, 164(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 30, 168(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 31, 172(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    sync
 ; AIX32-NEXT:    slw 29, 15, 26
 ; AIX32-NEXT:    slw 3, 3, 26
@@ -906,8 +906,8 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; AIX32-NEXT:    bl .__atomic_fetch_xor_8[PR]
 ; AIX32-NEXT:    nop
 ; AIX32-NEXT:    lwz 31, L..C8(2) # @u128
-; AIX32-NEXT:    addi 30, 1, 72
-; AIX32-NEXT:    addi 29, 1, 56
+; AIX32-NEXT:    addi 30, 1, 80
+; AIX32-NEXT:    addi 29, 1, 64
 ; AIX32-NEXT:    lwz 5, 12(31)
 ; AIX32-NEXT:    lwz 4, 8(31)
 ; AIX32-NEXT:    lwz 6, 4(31)
@@ -916,32 +916,32 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; AIX32-NEXT:  L..BB0_49: # %atomicrmw.start2
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    xori 3, 5, 1
-; AIX32-NEXT:    stw 7, 72(1)
-; AIX32-NEXT:    stw 7, 56(1)
+; AIX32-NEXT:    stw 7, 80(1)
+; AIX32-NEXT:    stw 7, 64(1)
 ; AIX32-NEXT:    li 7, 5
-; AIX32-NEXT:    stw 3, 68(1)
+; AIX32-NEXT:    stw 3, 76(1)
 ; AIX32-NEXT:    li 3, 16
 ; AIX32-NEXT:    li 8, 5
-; AIX32-NEXT:    stw 6, 76(1)
-; AIX32-NEXT:    stw 4, 80(1)
-; AIX32-NEXT:    stw 5, 84(1)
-; AIX32-NEXT:    stw 4, 64(1)
-; AIX32-NEXT:    stw 6, 60(1)
+; AIX32-NEXT:    stw 6, 84(1)
+; AIX32-NEXT:    stw 4, 88(1)
+; AIX32-NEXT:    stw 5, 92(1)
+; AIX32-NEXT:    stw 4, 72(1)
+; AIX32-NEXT:    stw 6, 68(1)
 ; AIX32-NEXT:    mr 4, 31
 ; AIX32-NEXT:    mr 5, 30
 ; AIX32-NEXT:    mr 6, 29
 ; AIX32-NEXT:    bl .__atomic_compare_exchange[PR]
 ; AIX32-NEXT:    nop
-; AIX32-NEXT:    lwz 5, 84(1)
-; AIX32-NEXT:    lwz 4, 80(1)
-; AIX32-NEXT:    lwz 6, 76(1)
-; AIX32-NEXT:    lwz 7, 72(1)
+; AIX32-NEXT:    lwz 5, 92(1)
+; AIX32-NEXT:    lwz 4, 88(1)
+; AIX32-NEXT:    lwz 6, 84(1)
+; AIX32-NEXT:    lwz 7, 80(1)
 ; AIX32-NEXT:    cmplwi 3, 0
 ; AIX32-NEXT:    beq 0, L..BB0_49
 ; AIX32-NEXT:  # %bb.50: # %atomicrmw.end1
 ; AIX32-NEXT:    lwz 31, L..C9(2) # @s128
-; AIX32-NEXT:    addi 30, 1, 72
-; AIX32-NEXT:    addi 29, 1, 56
+; AIX32-NEXT:    addi 30, 1, 80
+; AIX32-NEXT:    addi 29, 1, 64
 ; AIX32-NEXT:    lwz 5, 12(31)
 ; AIX32-NEXT:    lwz 4, 8(31)
 ; AIX32-NEXT:    lwz 6, 4(31)
@@ -950,26 +950,26 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; AIX32-NEXT:  L..BB0_51: # %atomicrmw.start
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    xori 3, 5, 1
-; AIX32-NEXT:    stw 7, 72(1)
-; AIX32-NEXT:    stw 7, 56(1)
+; AIX32-NEXT:    stw 7, 80(1)
+; AIX32-NEXT:    stw 7, 64(1)
 ; AIX32-NEXT:    li 7, 5
-; AIX32-NEXT:    stw 3, 68(1)
+; AIX32-NEXT:    stw 3, 76(1)
 ; AIX32-NEXT:    li 3, 16
 ; AIX32-NEXT:    li 8, 5
-; AIX32-NEXT:    stw 6, 76(1)
-; AIX32-NEXT:    stw 4, 80(1)
-; AIX32-NEXT:    stw 5, 84(1)
-; AIX32-NEXT:    stw 4, 64(1)
-; AIX32-NEXT:    stw 6, 60(1)
+; AIX32-NEXT:    stw 6, 84(1)
+; AIX32-NEXT:    stw 4, 88(1)
+; AIX32-NEXT:    stw 5, 92(1)
+; AIX32-NEXT:    stw 4, 72(1)
+; AIX32-NEXT:    stw 6, 68(1)
 ; AIX32-NEXT:    mr 4, 31
 ; AIX32-NEXT:    mr 5, 30
 ; AIX32-NEXT:    mr 6, 29
 ; AIX32-NEXT:    bl .__atomic_compare_exchange[PR]
 ; AIX32-NEXT:    nop
-; AIX32-NEXT:    lwz 5, 84(1)
-; AIX32-NEXT:    lwz 4, 80(1)
-; AIX32-NEXT:    lwz 6, 76(1)
-; AIX32-NEXT:    lwz 7, 72(1)
+; AIX32-NEXT:    lwz 5, 92(1)
+; AIX32-NEXT:    lwz 4, 88(1)
+; AIX32-NEXT:    lwz 6, 84(1)
+; AIX32-NEXT:    lwz 7, 80(1)
 ; AIX32-NEXT:    cmplwi 3, 0
 ; AIX32-NEXT:    beq 0, L..BB0_51
 ; AIX32-NEXT:  # %bb.52: # %atomicrmw.end
@@ -1156,24 +1156,24 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; AIX32-NEXT:    li 6, 5
 ; AIX32-NEXT:    bl .__atomic_fetch_and_8[PR]
 ; AIX32-NEXT:    nop
-; AIX32-NEXT:    lwz 31, 156(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 30, 152(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 29, 148(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 28, 144(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 27, 140(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 26, 136(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 25, 132(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 24, 128(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 23, 124(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 22, 120(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 21, 116(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 20, 112(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 19, 108(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 18, 104(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 17, 100(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 16, 96(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 15, 92(1) # 4-byte Folded Reload
-; AIX32-NEXT:    addi 1, 1, 160
+; AIX32-NEXT:    lwz 31, 172(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 30, 168(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 29, 164(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 28, 160(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 27, 156(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 26, 152(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 25, 148(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 24, 144(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 23, 140(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 22, 136(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 21, 132(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 20, 128(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 19, 124(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 18, 120(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 17, 116(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 16, 112(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 15, 108(1) # 4-byte Folded Reload
+; AIX32-NEXT:    addi 1, 1, 176
 ; AIX32-NEXT:    lwz 0, 8(1)
 ; AIX32-NEXT:    mtlr 0
 ; AIX32-NEXT:    blr
diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
index 795646b22b945a..67dccb3849f93c 100644
--- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
+++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
@@ -118,7 +118,7 @@ TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) {
       "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128"
       "-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64"
       "-f80:128:128-n8:16:32:64-S128");
-  EXPECT_EQ(DL2, "e-m:e-i64:64-n32:64");
+  EXPECT_EQ(DL2, "e-m:e-i64:64-i128:128-n32:64");
   EXPECT_EQ(DL3, "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:"
                  "64-S128-Fn32");
 

>From b09ee68faaa7f0bfd5499d1e786346c17905adb2 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Wed, 27 Nov 2024 22:19:49 +0000
Subject: [PATCH 3/7] [PowerPC] Update data layout aligment of i128 to 16

---
 clang/lib/Basic/Targets/OSTargets.h          |  2 +-
 clang/lib/Basic/Targets/PPC.h                | 12 ++++++------
 llvm/lib/IR/AutoUpgrade.cpp                  |  2 +-
 llvm/lib/Target/PowerPC/PPCTargetMachine.cpp |  3 +++
 4 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h
index 75f53e96ce28f6..c0351f26e9bee6 100644
--- a/clang/lib/Basic/Targets/OSTargets.h
+++ b/clang/lib/Basic/Targets/OSTargets.h
@@ -473,7 +473,7 @@ class LLVM_LIBRARY_VISIBILITY PS3PPUTargetInfo : public OSTargetInfo<Target> {
     this->IntMaxType = TargetInfo::SignedLongLong;
     this->Int64Type = TargetInfo::SignedLongLong;
     this->SizeType = TargetInfo::UnsignedInt;
-    this->resetDataLayout("E-m:e-p:32:32-Fi64-i64:64-n32:64");
+    this->resetDataLayout("E-m:e-p:32:32-Fi64-i64:64-i128:128-n32:64");
   }
 };
 
diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index b0833d30550af4..966647e98c624d 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -403,11 +403,11 @@ class LLVM_LIBRARY_VISIBILITY PPC32TargetInfo : public PPCTargetInfo {
   PPC32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
       : PPCTargetInfo(Triple, Opts) {
     if (Triple.isOSAIX())
-      resetDataLayout("E-m:a-p:32:32-Fi32-i64:64-n32");
+      resetDataLayout("E-m:a-p:32:32-Fi32-i64:64-i128:128-n32");
     else if (Triple.getArch() == llvm::Triple::ppcle)
-      resetDataLayout("e-m:e-p:32:32-Fn32-i64:64-n32");
+      resetDataLayout("e-m:e-p:32:32-Fn32-i64:64-i128:128-n32");
     else
-      resetDataLayout("E-m:e-p:32:32-Fn32-i64:64-n32");
+      resetDataLayout("E-m:e-p:32:32-Fn32-i64:64-i128:128-n32");
 
     switch (getTriple().getOS()) {
     case llvm::Triple::Linux:
@@ -462,12 +462,12 @@ class LLVM_LIBRARY_VISIBILITY PPC64TargetInfo : public PPCTargetInfo {
 
     if (Triple.isOSAIX()) {
       // TODO: Set appropriate ABI for AIX platform.
-      DataLayout = "E-m:a-Fi64-i64:64-n32:64";
+      DataLayout = "E-m:a-Fi64-i64:64-i128:128-n32:64";
       LongDoubleWidth = 64;
       LongDoubleAlign = DoubleAlign = 32;
       LongDoubleFormat = &llvm::APFloat::IEEEdouble();
     } else if ((Triple.getArch() == llvm::Triple::ppc64le)) {
-      DataLayout = "e-m:e-Fn32-i64:64-n32:64";
+      DataLayout = "e-m:e-Fn32-i64:64-i128:128-n32:64";
       ABI = "elfv2";
     } else {
       DataLayout = "E-m:e";
@@ -478,7 +478,7 @@ class LLVM_LIBRARY_VISIBILITY PPC64TargetInfo : public PPCTargetInfo {
         ABI = "elfv1";
         DataLayout += "-Fi64";
       }
-      DataLayout += "-i64:64-n32:64";
+      DataLayout += "-i64:64-i128:128-n32:64";
     }
 
     if (Triple.isOSFreeBSD() || Triple.isOSOpenBSD() || Triple.isMusl()) {
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index e73538da282e99..be426de2fa80a8 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -5559,7 +5559,7 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
     return Res;
   }
 
-  if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m"))) {
+  if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC()) {
     // Mips64 with o32 ABI did not add "-i128:128".
     // Add "-i128:128"
     std::string I64 = "-i64:64";
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 2b64ab9aa6973d..9eaa02bd7f80b9 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -188,6 +188,9 @@ static std::string getDataLayoutString(const Triple &T) {
   // documentation are wrong; these are correct (i.e. "what gcc does").
   Ret += "-i64:64";
 
+  // Alignment for 128 bit integers.
+  Ret += "-i128:128";
+
   // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
   if (is64Bit)
     Ret += "-n32:64";

>From 6628f981a61dd1614e9e135155c81b12aa978298 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Wed, 27 Nov 2024 22:37:30 +0000
Subject: [PATCH 4/7] add tc to check alignment of int types

---
 llvm/test/CodeGen/PowerPC/data-align.ll | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/data-align.ll

diff --git a/llvm/test/CodeGen/PowerPC/data-align.ll b/llvm/test/CodeGen/PowerPC/data-align.ll
new file mode 100644
index 00000000000000..bfedec139369c0
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/data-align.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=powerpc-unknown-linux | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux | FileCheck %s
+
+; CHECK:      .set .Li8,
+; CHECK-NEXT:  .size	.Li8, 1
+ at i8 = private constant i8 42
+
+; CHECK:      .set .Li16,
+; CHECK-NEXT: .size	.Li16, 2
+ at i16 = private constant i16 42
+
+; CHECK:      .set .Li32,
+; CHECK-NEXT: .size	.Li32, 4
+ at i32 = private constant i32 42
+
+; CHECK:      .set .Li64,
+; CHECK-NEXT: .size	.Li64, 8
+ at i64 = private constant i64 42
+
+; CHECK:        .set .Li128,
+; CHECK-NEXT:	.size	.Li128, 16
+ at i128 = private constant i128 42
+

>From e1e0a886a9701041e7493b90d598cb5c8a4a78cc Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Thu, 28 Nov 2024 13:48:12 +0000
Subject: [PATCH 5/7] update alignment for 32bit only

---
 clang/lib/Basic/Targets/PPC.h                 |   6 +-
 clang/test/CodeGen/target-data.c              |   4 +-
 llvm/lib/IR/AutoUpgrade.cpp                   |   2 +-
 llvm/lib/Target/PowerPC/PPCTargetMachine.cpp  |   5 +-
 llvm/test/CodeGen/PowerPC/all-atomics.ll      | 130 +++++++++---------
 llvm/test/CodeGen/PowerPC/ctrloop-sh.ll       |  58 ++++----
 llvm/test/CodeGen/PowerPC/pr59074.ll          |  29 ++--
 ...lar-shift-by-byte-multiple-legalization.ll |  48 ++++---
 .../PowerPC/wide-scalar-shift-legalization.ll |  33 +++--
 9 files changed, 153 insertions(+), 162 deletions(-)

diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index 966647e98c624d..3cd0fcad172939 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -403,11 +403,11 @@ class LLVM_LIBRARY_VISIBILITY PPC32TargetInfo : public PPCTargetInfo {
   PPC32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
       : PPCTargetInfo(Triple, Opts) {
     if (Triple.isOSAIX())
-      resetDataLayout("E-m:a-p:32:32-Fi32-i64:64-i128:128-n32");
+      resetDataLayout("E-m:a-p:32:32-Fi32-i64:64-n32");
     else if (Triple.getArch() == llvm::Triple::ppcle)
-      resetDataLayout("e-m:e-p:32:32-Fn32-i64:64-i128:128-n32");
+      resetDataLayout("e-m:e-p:32:32-Fn32-i64:64-n32");
     else
-      resetDataLayout("E-m:e-p:32:32-Fn32-i64:64-i128:128-n32");
+      resetDataLayout("E-m:e-p:32:32-Fn32-i64:64-n32");
 
     switch (getTriple().getOS()) {
     case llvm::Triple::Linux:
diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c
index 3e11f6ae183b6b..2c758567faa1f6 100644
--- a/clang/test/CodeGen/target-data.c
+++ b/clang/test/CodeGen/target-data.c
@@ -120,11 +120,11 @@
 
 // RUN: %clang_cc1 -triple powerpc-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=PPC
-// PPC: target datalayout = "E-m:e-p:32:32-Fn32-i64:64-i128:128-n32"
+// PPC: target datalayout = "E-m:e-p:32:32-Fn32-i64:64-n32"
 
 // RUN: %clang_cc1 -triple powerpcle-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=PPCLE
-// PPCLE: target datalayout = "e-m:e-p:32:32-Fn32-i64:64-i128:128-n32"
+// PPCLE: target datalayout = "e-m:e-p:32:32-Fn32-i64:64-n32"
 
 // RUN: %clang_cc1 -triple powerpc64-freebsd -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=PPC64-FREEBSD
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index be426de2fa80a8..d54176a7878b5a 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -5559,7 +5559,7 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
     return Res;
   }
 
-  if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC()) {
+  if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64()) {
     // Mips64 with o32 ABI did not add "-i128:128".
     // Add "-i128:128"
     std::string I64 = "-i64:64";
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 9eaa02bd7f80b9..a027049b9e9d47 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -188,12 +188,9 @@ static std::string getDataLayoutString(const Triple &T) {
   // documentation are wrong; these are correct (i.e. "what gcc does").
   Ret += "-i64:64";
 
-  // Alignment for 128 bit integers.
-  Ret += "-i128:128";
-
   // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
   if (is64Bit)
-    Ret += "-n32:64";
+    Ret += "-i128:128-n32:64";
   else
     Ret += "-n32";
 
diff --git a/llvm/test/CodeGen/PowerPC/all-atomics.ll b/llvm/test/CodeGen/PowerPC/all-atomics.ll
index 40b96dab94b9ff..531e559ea7309c 100644
--- a/llvm/test/CodeGen/PowerPC/all-atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/all-atomics.ll
@@ -509,31 +509,31 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; AIX32-LABEL: test_op_ignore:
 ; AIX32:       # %bb.0: # %entry
 ; AIX32-NEXT:    mflr 0
-; AIX32-NEXT:    stwu 1, -176(1)
+; AIX32-NEXT:    stwu 1, -160(1)
 ; AIX32-NEXT:    lwz 3, L..C0(2) # @sc
-; AIX32-NEXT:    stw 0, 184(1)
+; AIX32-NEXT:    stw 0, 168(1)
 ; AIX32-NEXT:    rlwinm 4, 3, 3, 27, 28
-; AIX32-NEXT:    stw 15, 108(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 26, 152(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 28, 160(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 15, 92(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 26, 136(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 28, 144(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    li 15, 1
 ; AIX32-NEXT:    rlwinm 28, 3, 0, 0, 29
 ; AIX32-NEXT:    li 3, 255
 ; AIX32-NEXT:    xori 26, 4, 24
-; AIX32-NEXT:    stw 16, 112(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 17, 116(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 18, 120(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 19, 124(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 20, 128(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 21, 132(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 22, 136(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 23, 140(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 24, 144(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 25, 148(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 27, 156(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 29, 164(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 30, 168(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 31, 172(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 16, 96(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 17, 100(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 18, 104(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 19, 108(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 20, 112(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 21, 116(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 22, 120(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 23, 124(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 24, 128(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 25, 132(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 27, 140(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 29, 148(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 30, 152(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 31, 156(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    sync
 ; AIX32-NEXT:    slw 29, 15, 26
 ; AIX32-NEXT:    slw 3, 3, 26
@@ -906,8 +906,8 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; AIX32-NEXT:    bl .__atomic_fetch_xor_8[PR]
 ; AIX32-NEXT:    nop
 ; AIX32-NEXT:    lwz 31, L..C8(2) # @u128
-; AIX32-NEXT:    addi 30, 1, 80
-; AIX32-NEXT:    addi 29, 1, 64
+; AIX32-NEXT:    addi 30, 1, 72
+; AIX32-NEXT:    addi 29, 1, 56
 ; AIX32-NEXT:    lwz 5, 12(31)
 ; AIX32-NEXT:    lwz 4, 8(31)
 ; AIX32-NEXT:    lwz 6, 4(31)
@@ -916,32 +916,32 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; AIX32-NEXT:  L..BB0_49: # %atomicrmw.start2
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    xori 3, 5, 1
-; AIX32-NEXT:    stw 7, 80(1)
-; AIX32-NEXT:    stw 7, 64(1)
+; AIX32-NEXT:    stw 7, 72(1)
+; AIX32-NEXT:    stw 7, 56(1)
 ; AIX32-NEXT:    li 7, 5
-; AIX32-NEXT:    stw 3, 76(1)
+; AIX32-NEXT:    stw 3, 68(1)
 ; AIX32-NEXT:    li 3, 16
 ; AIX32-NEXT:    li 8, 5
-; AIX32-NEXT:    stw 6, 84(1)
-; AIX32-NEXT:    stw 4, 88(1)
-; AIX32-NEXT:    stw 5, 92(1)
-; AIX32-NEXT:    stw 4, 72(1)
-; AIX32-NEXT:    stw 6, 68(1)
+; AIX32-NEXT:    stw 6, 76(1)
+; AIX32-NEXT:    stw 4, 80(1)
+; AIX32-NEXT:    stw 5, 84(1)
+; AIX32-NEXT:    stw 4, 64(1)
+; AIX32-NEXT:    stw 6, 60(1)
 ; AIX32-NEXT:    mr 4, 31
 ; AIX32-NEXT:    mr 5, 30
 ; AIX32-NEXT:    mr 6, 29
 ; AIX32-NEXT:    bl .__atomic_compare_exchange[PR]
 ; AIX32-NEXT:    nop
-; AIX32-NEXT:    lwz 5, 92(1)
-; AIX32-NEXT:    lwz 4, 88(1)
-; AIX32-NEXT:    lwz 6, 84(1)
-; AIX32-NEXT:    lwz 7, 80(1)
+; AIX32-NEXT:    lwz 5, 84(1)
+; AIX32-NEXT:    lwz 4, 80(1)
+; AIX32-NEXT:    lwz 6, 76(1)
+; AIX32-NEXT:    lwz 7, 72(1)
 ; AIX32-NEXT:    cmplwi 3, 0
 ; AIX32-NEXT:    beq 0, L..BB0_49
 ; AIX32-NEXT:  # %bb.50: # %atomicrmw.end1
 ; AIX32-NEXT:    lwz 31, L..C9(2) # @s128
-; AIX32-NEXT:    addi 30, 1, 80
-; AIX32-NEXT:    addi 29, 1, 64
+; AIX32-NEXT:    addi 30, 1, 72
+; AIX32-NEXT:    addi 29, 1, 56
 ; AIX32-NEXT:    lwz 5, 12(31)
 ; AIX32-NEXT:    lwz 4, 8(31)
 ; AIX32-NEXT:    lwz 6, 4(31)
@@ -950,26 +950,26 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; AIX32-NEXT:  L..BB0_51: # %atomicrmw.start
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    xori 3, 5, 1
-; AIX32-NEXT:    stw 7, 80(1)
-; AIX32-NEXT:    stw 7, 64(1)
+; AIX32-NEXT:    stw 7, 72(1)
+; AIX32-NEXT:    stw 7, 56(1)
 ; AIX32-NEXT:    li 7, 5
-; AIX32-NEXT:    stw 3, 76(1)
+; AIX32-NEXT:    stw 3, 68(1)
 ; AIX32-NEXT:    li 3, 16
 ; AIX32-NEXT:    li 8, 5
-; AIX32-NEXT:    stw 6, 84(1)
-; AIX32-NEXT:    stw 4, 88(1)
-; AIX32-NEXT:    stw 5, 92(1)
-; AIX32-NEXT:    stw 4, 72(1)
-; AIX32-NEXT:    stw 6, 68(1)
+; AIX32-NEXT:    stw 6, 76(1)
+; AIX32-NEXT:    stw 4, 80(1)
+; AIX32-NEXT:    stw 5, 84(1)
+; AIX32-NEXT:    stw 4, 64(1)
+; AIX32-NEXT:    stw 6, 60(1)
 ; AIX32-NEXT:    mr 4, 31
 ; AIX32-NEXT:    mr 5, 30
 ; AIX32-NEXT:    mr 6, 29
 ; AIX32-NEXT:    bl .__atomic_compare_exchange[PR]
 ; AIX32-NEXT:    nop
-; AIX32-NEXT:    lwz 5, 92(1)
-; AIX32-NEXT:    lwz 4, 88(1)
-; AIX32-NEXT:    lwz 6, 84(1)
-; AIX32-NEXT:    lwz 7, 80(1)
+; AIX32-NEXT:    lwz 5, 84(1)
+; AIX32-NEXT:    lwz 4, 80(1)
+; AIX32-NEXT:    lwz 6, 76(1)
+; AIX32-NEXT:    lwz 7, 72(1)
 ; AIX32-NEXT:    cmplwi 3, 0
 ; AIX32-NEXT:    beq 0, L..BB0_51
 ; AIX32-NEXT:  # %bb.52: # %atomicrmw.end
@@ -1156,24 +1156,24 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; AIX32-NEXT:    li 6, 5
 ; AIX32-NEXT:    bl .__atomic_fetch_and_8[PR]
 ; AIX32-NEXT:    nop
-; AIX32-NEXT:    lwz 31, 172(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 30, 168(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 29, 164(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 28, 160(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 27, 156(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 26, 152(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 25, 148(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 24, 144(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 23, 140(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 22, 136(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 21, 132(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 20, 128(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 19, 124(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 18, 120(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 17, 116(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 16, 112(1) # 4-byte Folded Reload
-; AIX32-NEXT:    lwz 15, 108(1) # 4-byte Folded Reload
-; AIX32-NEXT:    addi 1, 1, 176
+; AIX32-NEXT:    lwz 31, 156(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 30, 152(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 29, 148(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 28, 144(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 27, 140(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 26, 136(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 25, 132(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 24, 128(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 23, 124(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 22, 120(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 21, 116(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 20, 112(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 19, 108(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 18, 104(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 17, 100(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 16, 96(1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz 15, 92(1) # 4-byte Folded Reload
+; AIX32-NEXT:    addi 1, 1, 160
 ; AIX32-NEXT:    lwz 0, 8(1)
 ; AIX32-NEXT:    mtlr 0
 ; AIX32-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll b/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll
index 19f86f9d1af6ff..72de456cba395b 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll
@@ -16,41 +16,39 @@ define void @foo1(ptr %a, ptr readonly %b, ptr readonly %c) #0 {
 ; CHECK-NEXT:    addi 7, 1, 16
 ; CHECK-NEXT:  .LBB0_1: # %for.body
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    lwz 9, 4(4)
 ; CHECK-NEXT:    lwz 8, 0(4)
+; CHECK-NEXT:    lwz 9, 4(4)
 ; CHECK-NEXT:    lwz 10, 8(4)
 ; CHECK-NEXT:    lwz 11, 12(4)
 ; CHECK-NEXT:    lwz 12, 12(5)
-; CHECK-NEXT:    stw 9, 20(1)
-; CHECK-NEXT:    mr 9, 7
 ; CHECK-NEXT:    stw 6, 44(1)
-; CHECK-NEXT:    rlwimi 9, 12, 29, 28, 29
 ; CHECK-NEXT:    stw 6, 40(1)
 ; CHECK-NEXT:    stw 6, 36(1)
 ; CHECK-NEXT:    stw 6, 32(1)
 ; CHECK-NEXT:    stw 11, 28(1)
 ; CHECK-NEXT:    stw 10, 24(1)
 ; CHECK-NEXT:    clrlwi 10, 12, 27
+; CHECK-NEXT:    stw 9, 20(1)
 ; CHECK-NEXT:    stw 8, 16(1)
-; CHECK-NEXT:    rlwinm 12, 12, 29, 28, 29
-; CHECK-NEXT:    lwz 8, 8(9)
-; CHECK-NEXT:    subfic 0, 10, 32
-; CHECK-NEXT:    lwz 11, 4(9)
-; CHECK-NEXT:    lwz 9, 12(9)
-; CHECK-NEXT:    srw 30, 8, 0
-; CHECK-NEXT:    lwzx 12, 7, 12
-; CHECK-NEXT:    slw 29, 11, 10
-; CHECK-NEXT:    srw 11, 11, 0
-; CHECK-NEXT:    srw 0, 9, 0
+; CHECK-NEXT:    rlwinm 8, 12, 29, 28, 29
+; CHECK-NEXT:    lwzux 9, 8, 7
+; CHECK-NEXT:    subfic 12, 10, 32
+; CHECK-NEXT:    lwz 11, 8(8)
+; CHECK-NEXT:    slw 9, 9, 10
+; CHECK-NEXT:    lwz 0, 4(8)
+; CHECK-NEXT:    lwz 8, 12(8)
+; CHECK-NEXT:    srw 30, 11, 12
+; CHECK-NEXT:    slw 29, 0, 10
+; CHECK-NEXT:    srw 0, 0, 12
+; CHECK-NEXT:    srw 12, 8, 12
+; CHECK-NEXT:    slw 11, 11, 10
 ; CHECK-NEXT:    slw 8, 8, 10
-; CHECK-NEXT:    slw 12, 12, 10
-; CHECK-NEXT:    or 8, 8, 0
+; CHECK-NEXT:    stw 8, 12(3)
+; CHECK-NEXT:    or 8, 11, 12
 ; CHECK-NEXT:    stw 8, 8(3)
-; CHECK-NEXT:    or 8, 12, 11
-; CHECK-NEXT:    slw 9, 9, 10
+; CHECK-NEXT:    or 8, 9, 0
 ; CHECK-NEXT:    stw 8, 0(3)
 ; CHECK-NEXT:    or 8, 29, 30
-; CHECK-NEXT:    stw 9, 12(3)
 ; CHECK-NEXT:    stw 8, 4(3)
 ; CHECK-NEXT:    bdnz .LBB0_1
 ; CHECK-NEXT:  # %bb.2: # %for.end
@@ -79,11 +77,11 @@ for.end:                                          ; preds = %for.body
 define void @foo2(ptr %a, ptr readonly %b, ptr readonly %c) #0 {
 ; CHECK-LABEL: foo2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    stwu 1, -64(1)
-; CHECK-NEXT:    stw 30, 56(1) # 4-byte Folded Spill
+; CHECK-NEXT:    stwu 1, -48(1)
+; CHECK-NEXT:    stw 30, 40(1) # 4-byte Folded Spill
 ; CHECK-NEXT:    li 6, 2048
 ; CHECK-NEXT:    mtctr 6
-; CHECK-NEXT:    addi 6, 1, 32
+; CHECK-NEXT:    addi 6, 1, 24
 ; CHECK-NEXT:  .LBB1_1: # %for.body
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwz 7, 0(4)
@@ -91,18 +89,18 @@ define void @foo2(ptr %a, ptr readonly %b, ptr readonly %c) #0 {
 ; CHECK-NEXT:    lwz 11, 12(5)
 ; CHECK-NEXT:    lwz 9, 8(4)
 ; CHECK-NEXT:    lwz 10, 12(4)
-; CHECK-NEXT:    stw 8, 36(1)
+; CHECK-NEXT:    stw 8, 28(1)
 ; CHECK-NEXT:    rlwinm 8, 11, 29, 28, 29
-; CHECK-NEXT:    stw 7, 32(1)
+; CHECK-NEXT:    stw 7, 24(1)
 ; CHECK-NEXT:    srawi 7, 7, 31
-; CHECK-NEXT:    stw 10, 44(1)
+; CHECK-NEXT:    stw 10, 36(1)
 ; CHECK-NEXT:    clrlwi 10, 11, 27
-; CHECK-NEXT:    stw 9, 40(1)
+; CHECK-NEXT:    stw 9, 32(1)
 ; CHECK-NEXT:    subfic 12, 10, 32
-; CHECK-NEXT:    stw 7, 28(1)
-; CHECK-NEXT:    stw 7, 24(1)
 ; CHECK-NEXT:    stw 7, 20(1)
 ; CHECK-NEXT:    stw 7, 16(1)
+; CHECK-NEXT:    stw 7, 12(1)
+; CHECK-NEXT:    stw 7, 8(1)
 ; CHECK-NEXT:    sub 7, 6, 8
 ; CHECK-NEXT:    lwz 8, 4(7)
 ; CHECK-NEXT:    lwz 9, 0(7)
@@ -124,8 +122,8 @@ define void @foo2(ptr %a, ptr readonly %b, ptr readonly %c) #0 {
 ; CHECK-NEXT:    stw 7, 4(3)
 ; CHECK-NEXT:    bdnz .LBB1_1
 ; CHECK-NEXT:  # %bb.2: # %for.end
-; CHECK-NEXT:    lwz 30, 56(1) # 4-byte Folded Reload
-; CHECK-NEXT:    addi 1, 1, 64
+; CHECK-NEXT:    lwz 30, 40(1) # 4-byte Folded Reload
+; CHECK-NEXT:    addi 1, 1, 48
 ; CHECK-NEXT:    blr
 entry:
   br label %for.body
diff --git a/llvm/test/CodeGen/PowerPC/pr59074.ll b/llvm/test/CodeGen/PowerPC/pr59074.ll
index ba21fed9f5abd4..6264b9f22876cc 100644
--- a/llvm/test/CodeGen/PowerPC/pr59074.ll
+++ b/llvm/test/CodeGen/PowerPC/pr59074.ll
@@ -38,27 +38,26 @@ define void @pr59074(ptr %0) {
 ; LE32-NEXT:    stw 7, 40(1)
 ; LE32-NEXT:    stw 7, 36(1)
 ; LE32-NEXT:    stw 8, 16(1)
-; LE32-NEXT:    rlwinm 7, 4, 29, 28, 29
+; LE32-NEXT:    rlwinm 9, 4, 29, 28, 29
 ; LE32-NEXT:    stxvd2x 0, 0, 5
-; LE32-NEXT:    lwzx 5, 6, 7
-; LE32-NEXT:    rlwimi 6, 4, 29, 28, 29
 ; LE32-NEXT:    clrlwi 4, 4, 27
-; LE32-NEXT:    lwz 7, 8(6)
-; LE32-NEXT:    lwz 8, 4(6)
-; LE32-NEXT:    lwz 6, 12(6)
+; LE32-NEXT:    lwzux 5, 9, 6
+; LE32-NEXT:    lwz 6, 8(9)
+; LE32-NEXT:    lwz 7, 4(9)
+; LE32-NEXT:    lwz 8, 12(9)
 ; LE32-NEXT:    xori 9, 4, 31
 ; LE32-NEXT:    subfic 11, 4, 32
 ; LE32-NEXT:    srw 5, 5, 4
-; LE32-NEXT:    slwi 10, 7, 1
-; LE32-NEXT:    srw 12, 8, 4
-; LE32-NEXT:    slw 8, 8, 11
-; LE32-NEXT:    srw 7, 7, 4
-; LE32-NEXT:    slw 11, 6, 11
-; LE32-NEXT:    srw 4, 6, 4
+; LE32-NEXT:    slwi 10, 6, 1
+; LE32-NEXT:    srw 6, 6, 4
 ; LE32-NEXT:    slw 9, 10, 9
-; LE32-NEXT:    or 5, 8, 5
-; LE32-NEXT:    or 6, 11, 7
-; LE32-NEXT:    or 7, 12, 9
+; LE32-NEXT:    srw 10, 7, 4
+; LE32-NEXT:    slw 7, 7, 11
+; LE32-NEXT:    slw 11, 8, 11
+; LE32-NEXT:    srw 4, 8, 4
+; LE32-NEXT:    or 5, 7, 5
+; LE32-NEXT:    or 6, 11, 6
+; LE32-NEXT:    or 7, 10, 9
 ; LE32-NEXT:    stw 4, 12(3)
 ; LE32-NEXT:    stw 6, 8(3)
 ; LE32-NEXT:    stw 5, 0(3)
diff --git a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll
index 6bd5cd23dcec8c..12976e838f3ca6 100644
--- a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll
+++ b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll
@@ -405,26 +405,25 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; LE-32BIT-NEXT:    stw 9, 24(1)
 ; LE-32BIT-NEXT:    rlwinm 4, 4, 3, 27, 28
 ; LE-32BIT-NEXT:    stw 8, 20(1)
-; LE-32BIT-NEXT:    subfic 9, 4, 32
+; LE-32BIT-NEXT:    subfic 8, 4, 32
 ; LE-32BIT-NEXT:    stw 7, 16(1)
-; LE-32BIT-NEXT:    lwzx 7, 3, 6
-; LE-32BIT-NEXT:    rlwimi 3, 6, 0, 28, 29
-; LE-32BIT-NEXT:    lwz 6, 8(3)
-; LE-32BIT-NEXT:    lwz 8, 4(3)
-; LE-32BIT-NEXT:    slw 7, 7, 4
-; LE-32BIT-NEXT:    lwz 3, 12(3)
-; LE-32BIT-NEXT:    srw 10, 6, 9
-; LE-32BIT-NEXT:    slw 11, 8, 4
-; LE-32BIT-NEXT:    srw 8, 8, 9
-; LE-32BIT-NEXT:    srw 9, 3, 9
-; LE-32BIT-NEXT:    slw 6, 6, 4
+; LE-32BIT-NEXT:    lwzux 3, 6, 3
+; LE-32BIT-NEXT:    lwz 9, 4(6)
 ; LE-32BIT-NEXT:    slw 3, 3, 4
-; LE-32BIT-NEXT:    stw 3, 12(5)
-; LE-32BIT-NEXT:    or 3, 6, 9
-; LE-32BIT-NEXT:    stw 3, 8(5)
-; LE-32BIT-NEXT:    or 3, 7, 8
+; LE-32BIT-NEXT:    lwz 7, 8(6)
+; LE-32BIT-NEXT:    lwz 6, 12(6)
+; LE-32BIT-NEXT:    slw 11, 9, 4
+; LE-32BIT-NEXT:    srw 9, 9, 8
+; LE-32BIT-NEXT:    srw 10, 7, 8
+; LE-32BIT-NEXT:    srw 8, 6, 8
+; LE-32BIT-NEXT:    slw 7, 7, 4
+; LE-32BIT-NEXT:    slw 4, 6, 4
+; LE-32BIT-NEXT:    or 3, 3, 9
+; LE-32BIT-NEXT:    stw 4, 12(5)
+; LE-32BIT-NEXT:    or 4, 7, 8
 ; LE-32BIT-NEXT:    stw 3, 0(5)
 ; LE-32BIT-NEXT:    or 3, 11, 10
+; LE-32BIT-NEXT:    stw 4, 8(5)
 ; LE-32BIT-NEXT:    stw 3, 4(5)
 ; LE-32BIT-NEXT:    addi 1, 1, 48
 ; LE-32BIT-NEXT:    blr
@@ -484,23 +483,22 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
 ; LE-32BIT-NEXT:    lwz 4, 12(4)
 ; LE-32BIT-NEXT:    stw 6, 44(1)
 ; LE-32BIT-NEXT:    stw 6, 40(1)
+; LE-32BIT-NEXT:    rlwinm 4, 4, 2, 28, 29
 ; LE-32BIT-NEXT:    stw 6, 36(1)
 ; LE-32BIT-NEXT:    stw 6, 32(1)
-; LE-32BIT-NEXT:    rlwinm 6, 4, 2, 28, 29
 ; LE-32BIT-NEXT:    stw 3, 28(1)
 ; LE-32BIT-NEXT:    addi 3, 1, 16
 ; LE-32BIT-NEXT:    stw 9, 24(1)
 ; LE-32BIT-NEXT:    stw 8, 20(1)
 ; LE-32BIT-NEXT:    stw 7, 16(1)
-; LE-32BIT-NEXT:    lwzx 6, 3, 6
-; LE-32BIT-NEXT:    rlwimi 3, 4, 2, 28, 29
-; LE-32BIT-NEXT:    lwz 4, 4(3)
-; LE-32BIT-NEXT:    lwz 7, 12(3)
-; LE-32BIT-NEXT:    lwz 3, 8(3)
-; LE-32BIT-NEXT:    stw 6, 0(5)
-; LE-32BIT-NEXT:    stw 3, 8(5)
+; LE-32BIT-NEXT:    lwzux 3, 4, 3
+; LE-32BIT-NEXT:    lwz 6, 4(4)
+; LE-32BIT-NEXT:    lwz 7, 12(4)
+; LE-32BIT-NEXT:    lwz 4, 8(4)
+; LE-32BIT-NEXT:    stw 3, 0(5)
+; LE-32BIT-NEXT:    stw 4, 8(5)
 ; LE-32BIT-NEXT:    stw 7, 12(5)
-; LE-32BIT-NEXT:    stw 4, 4(5)
+; LE-32BIT-NEXT:    stw 6, 4(5)
 ; LE-32BIT-NEXT:    addi 1, 1, 48
 ; LE-32BIT-NEXT:    blr
   %src = load i128, ptr %src.ptr, align 1
diff --git a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
index 5fe7fca5b6e41d..e1731ddfd92875 100644
--- a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
+++ b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
@@ -304,28 +304,27 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; LE-32BIT-NEXT:    stw 3, 28(1)
 ; LE-32BIT-NEXT:    addi 3, 1, 16
 ; LE-32BIT-NEXT:    stw 9, 24(1)
+; LE-32BIT-NEXT:    clrlwi 4, 4, 27
 ; LE-32BIT-NEXT:    stw 8, 20(1)
+; LE-32BIT-NEXT:    subfic 8, 4, 32
 ; LE-32BIT-NEXT:    stw 7, 16(1)
-; LE-32BIT-NEXT:    lwzx 6, 3, 6
-; LE-32BIT-NEXT:    rlwimi 3, 4, 29, 28, 29
-; LE-32BIT-NEXT:    lwz 7, 8(3)
-; LE-32BIT-NEXT:    clrlwi 4, 4, 27
-; LE-32BIT-NEXT:    lwz 8, 4(3)
-; LE-32BIT-NEXT:    subfic 9, 4, 32
-; LE-32BIT-NEXT:    lwz 3, 12(3)
-; LE-32BIT-NEXT:    srw 10, 7, 9
-; LE-32BIT-NEXT:    slw 11, 8, 4
-; LE-32BIT-NEXT:    srw 8, 8, 9
-; LE-32BIT-NEXT:    srw 9, 3, 9
-; LE-32BIT-NEXT:    slw 7, 7, 4
+; LE-32BIT-NEXT:    lwzux 3, 6, 3
+; LE-32BIT-NEXT:    lwz 9, 4(6)
 ; LE-32BIT-NEXT:    slw 3, 3, 4
-; LE-32BIT-NEXT:    slw 6, 6, 4
-; LE-32BIT-NEXT:    stw 3, 12(5)
-; LE-32BIT-NEXT:    or 3, 7, 9
-; LE-32BIT-NEXT:    stw 3, 8(5)
-; LE-32BIT-NEXT:    or 3, 6, 8
+; LE-32BIT-NEXT:    lwz 7, 8(6)
+; LE-32BIT-NEXT:    lwz 6, 12(6)
+; LE-32BIT-NEXT:    slw 11, 9, 4
+; LE-32BIT-NEXT:    srw 9, 9, 8
+; LE-32BIT-NEXT:    srw 10, 7, 8
+; LE-32BIT-NEXT:    srw 8, 6, 8
+; LE-32BIT-NEXT:    slw 7, 7, 4
+; LE-32BIT-NEXT:    slw 4, 6, 4
+; LE-32BIT-NEXT:    or 3, 3, 9
+; LE-32BIT-NEXT:    stw 4, 12(5)
+; LE-32BIT-NEXT:    or 4, 7, 8
 ; LE-32BIT-NEXT:    stw 3, 0(5)
 ; LE-32BIT-NEXT:    or 3, 11, 10
+; LE-32BIT-NEXT:    stw 4, 8(5)
 ; LE-32BIT-NEXT:    stw 3, 4(5)
 ; LE-32BIT-NEXT:    addi 1, 1, 48
 ; LE-32BIT-NEXT:    blr

>From 577d588bae567a6517e18d52d947228a86a833c0 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Thu, 28 Nov 2024 15:16:33 +0000
Subject: [PATCH 6/7] add additional tests for DataLayoutUpgrade and move
 existing to proper location

---
 .../Bitcode/DataLayoutUpgradeTest.cpp         | 21 ++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
index 67dccb3849f93c..7a74a5882286f9 100644
--- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
+++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
@@ -92,6 +92,16 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) {
                 "e-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64", "mips64el"),
             "e-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64");
 
+  // Check that PowerPC64 targets add -i128:128.
+  EXPECT_EQ(UpgradeDataLayoutString("e-m:e-i64:64-n32:64", "powerpc64le-linux"),
+            "e-m:e-i64:64-i128:128-n32:64");
+  EXPECT_EQ(UpgradeDataLayoutString(
+              "E-m:e-Fn32-i64:64-n32:64", "powerpc64-linux"),
+            "E-m:e-Fn32-i64:64-i128:128-n32:64");
+  EXPECT_EQ(UpgradeDataLayoutString(
+              "E-m:a-Fi64-i64:64-n32:64", "powerpc64-ibm-aix"),
+            "E-m:a-Fi64-i64:64-i128:128-n32:64");
+
   // Check that SPIR && SPIRV targets add -G1 if it's not present.
   EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "spir"), "e-p:32:32-G1");
   EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "spir64"), "e-p:32:32-G1");
@@ -108,8 +118,6 @@ TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) {
       "-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
       "-n8:16:32:64-S128",
       "x86_64-unknown-linux-gnu");
-  std::string DL2 = UpgradeDataLayoutString("e-m:e-i64:64-n32:64",
-                                            "powerpc64le-unknown-linux-gnu");
   std::string DL3 = UpgradeDataLayoutString(
       "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32",
       "aarch64--");
@@ -118,7 +126,6 @@ TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) {
       "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128"
       "-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64"
       "-f80:128:128-n8:16:32:64-S128");
-  EXPECT_EQ(DL2, "e-m:e-i64:64-i128:128-n32:64");
   EXPECT_EQ(DL3, "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:"
                  "64-S128-Fn32");
 
@@ -153,6 +160,14 @@ TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) {
   EXPECT_EQ(UpgradeDataLayoutString("G2", "spir64"), "G2");
   EXPECT_EQ(UpgradeDataLayoutString("G2", "spirv32"), "G2");
   EXPECT_EQ(UpgradeDataLayoutString("G2", "spirv64"), "G2");
+
+  // Check that PowerPC32 targets don't add -i128:128.
+  EXPECT_EQ(UpgradeDataLayoutString("e-m:e-i64:64-n32", "powerpcle-linux"),
+            "e-m:e-i64:64-n32");
+  EXPECT_EQ(UpgradeDataLayoutString("E-m:e-Fn32-i64:64-n32", "powerpc-linux"),
+            "E-m:e-Fn32-i64:64-n32");
+  EXPECT_EQ(UpgradeDataLayoutString("E-m:a-Fi64-i64:64-n32", "powerpc-aix"),
+            "E-m:a-Fi64-i64:64-n32");
 }
 
 TEST(DataLayoutUpgradeTest, EmptyDataLayout) {

>From 5eb545c3649fa0e28a9b898a884f1ac3c54cf48a Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Thu, 28 Nov 2024 11:07:21 -0500
Subject: [PATCH 7/7] clang-format code

---
 llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
index 7a74a5882286f9..00bb963d39cedb 100644
--- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
+++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
@@ -95,12 +95,12 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) {
   // Check that PowerPC64 targets add -i128:128.
   EXPECT_EQ(UpgradeDataLayoutString("e-m:e-i64:64-n32:64", "powerpc64le-linux"),
             "e-m:e-i64:64-i128:128-n32:64");
-  EXPECT_EQ(UpgradeDataLayoutString(
-              "E-m:e-Fn32-i64:64-n32:64", "powerpc64-linux"),
-            "E-m:e-Fn32-i64:64-i128:128-n32:64");
-  EXPECT_EQ(UpgradeDataLayoutString(
-              "E-m:a-Fi64-i64:64-n32:64", "powerpc64-ibm-aix"),
-            "E-m:a-Fi64-i64:64-i128:128-n32:64");
+  EXPECT_EQ(
+      UpgradeDataLayoutString("E-m:e-Fn32-i64:64-n32:64", "powerpc64-linux"),
+      "E-m:e-Fn32-i64:64-i128:128-n32:64");
+  EXPECT_EQ(
+      UpgradeDataLayoutString("E-m:a-Fi64-i64:64-n32:64", "powerpc64-ibm-aix"),
+      "E-m:a-Fi64-i64:64-i128:128-n32:64");
 
   // Check that SPIR && SPIRV targets add -G1 if it's not present.
   EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "spir"), "e-p:32:32-G1");



More information about the cfe-commits mailing list