[clang] [llvm] [PowerPC] Update data layout aligment of i128 to 16 (PR #118004)
Lei Huang via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 28 08:07:36 PST 2024
https://github.com/lei137 updated https://github.com/llvm/llvm-project/pull/118004
>From ae5beae74fcd7717bf40519c80a9d920625bb137 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Wed, 27 Nov 2024 18:44:38 +0000
Subject: [PATCH 1/7] update llc tc affected
---
.../CostModel/PowerPC/load-to-trunc.ll | 4 +-
llvm/test/CodeGen/PowerPC/ctrloop-sh.ll | 58 ++++++++++---------
llvm/test/CodeGen/PowerPC/pr59074.ll | 29 +++++-----
...lar-shift-by-byte-multiple-legalization.ll | 48 +++++++--------
.../PowerPC/wide-scalar-shift-legalization.ll | 33 ++++++-----
.../AtomicExpand/PowerPC/cmpxchg.ll | 6 +-
6 files changed, 92 insertions(+), 86 deletions(-)
diff --git a/llvm/test/Analysis/CostModel/PowerPC/load-to-trunc.ll b/llvm/test/Analysis/CostModel/PowerPC/load-to-trunc.ll
index 57a6e98cfb4ee6..b78d121ff4f389 100644
--- a/llvm/test/Analysis/CostModel/PowerPC/load-to-trunc.ll
+++ b/llvm/test/Analysis/CostModel/PowerPC/load-to-trunc.ll
@@ -7,7 +7,7 @@
; Check that cost is 1 for unusual load to register sized load.
define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) {
; CHECK-LABEL: 'loadUnusualIntegerWithTrunc'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc = trunc i128 %out to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %trunc
;
@@ -18,7 +18,7 @@ define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) {
define i128 @loadUnusualInteger(ptr %ptr) {
; CHECK-LABEL: 'loadUnusualInteger'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %out = load i128, ptr %ptr, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %out = load i128, ptr %ptr, align 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i128 %out
;
%out = load i128, ptr %ptr
diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll b/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll
index 72de456cba395b..19f86f9d1af6ff 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll
@@ -16,39 +16,41 @@ define void @foo1(ptr %a, ptr readonly %b, ptr readonly %c) #0 {
; CHECK-NEXT: addi 7, 1, 16
; CHECK-NEXT: .LBB0_1: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: lwz 8, 0(4)
; CHECK-NEXT: lwz 9, 4(4)
+; CHECK-NEXT: lwz 8, 0(4)
; CHECK-NEXT: lwz 10, 8(4)
; CHECK-NEXT: lwz 11, 12(4)
; CHECK-NEXT: lwz 12, 12(5)
+; CHECK-NEXT: stw 9, 20(1)
+; CHECK-NEXT: mr 9, 7
; CHECK-NEXT: stw 6, 44(1)
+; CHECK-NEXT: rlwimi 9, 12, 29, 28, 29
; CHECK-NEXT: stw 6, 40(1)
; CHECK-NEXT: stw 6, 36(1)
; CHECK-NEXT: stw 6, 32(1)
; CHECK-NEXT: stw 11, 28(1)
; CHECK-NEXT: stw 10, 24(1)
; CHECK-NEXT: clrlwi 10, 12, 27
-; CHECK-NEXT: stw 9, 20(1)
; CHECK-NEXT: stw 8, 16(1)
-; CHECK-NEXT: rlwinm 8, 12, 29, 28, 29
-; CHECK-NEXT: lwzux 9, 8, 7
-; CHECK-NEXT: subfic 12, 10, 32
-; CHECK-NEXT: lwz 11, 8(8)
-; CHECK-NEXT: slw 9, 9, 10
-; CHECK-NEXT: lwz 0, 4(8)
-; CHECK-NEXT: lwz 8, 12(8)
-; CHECK-NEXT: srw 30, 11, 12
-; CHECK-NEXT: slw 29, 0, 10
-; CHECK-NEXT: srw 0, 0, 12
-; CHECK-NEXT: srw 12, 8, 12
-; CHECK-NEXT: slw 11, 11, 10
+; CHECK-NEXT: rlwinm 12, 12, 29, 28, 29
+; CHECK-NEXT: lwz 8, 8(9)
+; CHECK-NEXT: subfic 0, 10, 32
+; CHECK-NEXT: lwz 11, 4(9)
+; CHECK-NEXT: lwz 9, 12(9)
+; CHECK-NEXT: srw 30, 8, 0
+; CHECK-NEXT: lwzx 12, 7, 12
+; CHECK-NEXT: slw 29, 11, 10
+; CHECK-NEXT: srw 11, 11, 0
+; CHECK-NEXT: srw 0, 9, 0
; CHECK-NEXT: slw 8, 8, 10
-; CHECK-NEXT: stw 8, 12(3)
-; CHECK-NEXT: or 8, 11, 12
+; CHECK-NEXT: slw 12, 12, 10
+; CHECK-NEXT: or 8, 8, 0
; CHECK-NEXT: stw 8, 8(3)
-; CHECK-NEXT: or 8, 9, 0
+; CHECK-NEXT: or 8, 12, 11
+; CHECK-NEXT: slw 9, 9, 10
; CHECK-NEXT: stw 8, 0(3)
; CHECK-NEXT: or 8, 29, 30
+; CHECK-NEXT: stw 9, 12(3)
; CHECK-NEXT: stw 8, 4(3)
; CHECK-NEXT: bdnz .LBB0_1
; CHECK-NEXT: # %bb.2: # %for.end
@@ -77,11 +79,11 @@ for.end: ; preds = %for.body
define void @foo2(ptr %a, ptr readonly %b, ptr readonly %c) #0 {
; CHECK-LABEL: foo2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: stwu 1, -48(1)
-; CHECK-NEXT: stw 30, 40(1) # 4-byte Folded Spill
+; CHECK-NEXT: stwu 1, -64(1)
+; CHECK-NEXT: stw 30, 56(1) # 4-byte Folded Spill
; CHECK-NEXT: li 6, 2048
; CHECK-NEXT: mtctr 6
-; CHECK-NEXT: addi 6, 1, 24
+; CHECK-NEXT: addi 6, 1, 32
; CHECK-NEXT: .LBB1_1: # %for.body
; CHECK-NEXT: #
; CHECK-NEXT: lwz 7, 0(4)
@@ -89,18 +91,18 @@ define void @foo2(ptr %a, ptr readonly %b, ptr readonly %c) #0 {
; CHECK-NEXT: lwz 11, 12(5)
; CHECK-NEXT: lwz 9, 8(4)
; CHECK-NEXT: lwz 10, 12(4)
-; CHECK-NEXT: stw 8, 28(1)
+; CHECK-NEXT: stw 8, 36(1)
; CHECK-NEXT: rlwinm 8, 11, 29, 28, 29
-; CHECK-NEXT: stw 7, 24(1)
+; CHECK-NEXT: stw 7, 32(1)
; CHECK-NEXT: srawi 7, 7, 31
-; CHECK-NEXT: stw 10, 36(1)
+; CHECK-NEXT: stw 10, 44(1)
; CHECK-NEXT: clrlwi 10, 11, 27
-; CHECK-NEXT: stw 9, 32(1)
+; CHECK-NEXT: stw 9, 40(1)
; CHECK-NEXT: subfic 12, 10, 32
+; CHECK-NEXT: stw 7, 28(1)
+; CHECK-NEXT: stw 7, 24(1)
; CHECK-NEXT: stw 7, 20(1)
; CHECK-NEXT: stw 7, 16(1)
-; CHECK-NEXT: stw 7, 12(1)
-; CHECK-NEXT: stw 7, 8(1)
; CHECK-NEXT: sub 7, 6, 8
; CHECK-NEXT: lwz 8, 4(7)
; CHECK-NEXT: lwz 9, 0(7)
@@ -122,8 +124,8 @@ define void @foo2(ptr %a, ptr readonly %b, ptr readonly %c) #0 {
; CHECK-NEXT: stw 7, 4(3)
; CHECK-NEXT: bdnz .LBB1_1
; CHECK-NEXT: # %bb.2: # %for.end
-; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload
-; CHECK-NEXT: addi 1, 1, 48
+; CHECK-NEXT: lwz 30, 56(1) # 4-byte Folded Reload
+; CHECK-NEXT: addi 1, 1, 64
; CHECK-NEXT: blr
entry:
br label %for.body
diff --git a/llvm/test/CodeGen/PowerPC/pr59074.ll b/llvm/test/CodeGen/PowerPC/pr59074.ll
index 6264b9f22876cc..ba21fed9f5abd4 100644
--- a/llvm/test/CodeGen/PowerPC/pr59074.ll
+++ b/llvm/test/CodeGen/PowerPC/pr59074.ll
@@ -38,26 +38,27 @@ define void @pr59074(ptr %0) {
; LE32-NEXT: stw 7, 40(1)
; LE32-NEXT: stw 7, 36(1)
; LE32-NEXT: stw 8, 16(1)
-; LE32-NEXT: rlwinm 9, 4, 29, 28, 29
+; LE32-NEXT: rlwinm 7, 4, 29, 28, 29
; LE32-NEXT: stxvd2x 0, 0, 5
+; LE32-NEXT: lwzx 5, 6, 7
+; LE32-NEXT: rlwimi 6, 4, 29, 28, 29
; LE32-NEXT: clrlwi 4, 4, 27
-; LE32-NEXT: lwzux 5, 9, 6
-; LE32-NEXT: lwz 6, 8(9)
-; LE32-NEXT: lwz 7, 4(9)
-; LE32-NEXT: lwz 8, 12(9)
+; LE32-NEXT: lwz 7, 8(6)
+; LE32-NEXT: lwz 8, 4(6)
+; LE32-NEXT: lwz 6, 12(6)
; LE32-NEXT: xori 9, 4, 31
; LE32-NEXT: subfic 11, 4, 32
; LE32-NEXT: srw 5, 5, 4
-; LE32-NEXT: slwi 10, 6, 1
-; LE32-NEXT: srw 6, 6, 4
+; LE32-NEXT: slwi 10, 7, 1
+; LE32-NEXT: srw 12, 8, 4
+; LE32-NEXT: slw 8, 8, 11
+; LE32-NEXT: srw 7, 7, 4
+; LE32-NEXT: slw 11, 6, 11
+; LE32-NEXT: srw 4, 6, 4
; LE32-NEXT: slw 9, 10, 9
-; LE32-NEXT: srw 10, 7, 4
-; LE32-NEXT: slw 7, 7, 11
-; LE32-NEXT: slw 11, 8, 11
-; LE32-NEXT: srw 4, 8, 4
-; LE32-NEXT: or 5, 7, 5
-; LE32-NEXT: or 6, 11, 6
-; LE32-NEXT: or 7, 10, 9
+; LE32-NEXT: or 5, 8, 5
+; LE32-NEXT: or 6, 11, 7
+; LE32-NEXT: or 7, 12, 9
; LE32-NEXT: stw 4, 12(3)
; LE32-NEXT: stw 6, 8(3)
; LE32-NEXT: stw 5, 0(3)
diff --git a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll
index 12976e838f3ca6..6bd5cd23dcec8c 100644
--- a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll
+++ b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll
@@ -405,25 +405,26 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; LE-32BIT-NEXT: stw 9, 24(1)
; LE-32BIT-NEXT: rlwinm 4, 4, 3, 27, 28
; LE-32BIT-NEXT: stw 8, 20(1)
-; LE-32BIT-NEXT: subfic 8, 4, 32
+; LE-32BIT-NEXT: subfic 9, 4, 32
; LE-32BIT-NEXT: stw 7, 16(1)
-; LE-32BIT-NEXT: lwzux 3, 6, 3
-; LE-32BIT-NEXT: lwz 9, 4(6)
-; LE-32BIT-NEXT: slw 3, 3, 4
-; LE-32BIT-NEXT: lwz 7, 8(6)
-; LE-32BIT-NEXT: lwz 6, 12(6)
-; LE-32BIT-NEXT: slw 11, 9, 4
-; LE-32BIT-NEXT: srw 9, 9, 8
-; LE-32BIT-NEXT: srw 10, 7, 8
-; LE-32BIT-NEXT: srw 8, 6, 8
+; LE-32BIT-NEXT: lwzx 7, 3, 6
+; LE-32BIT-NEXT: rlwimi 3, 6, 0, 28, 29
+; LE-32BIT-NEXT: lwz 6, 8(3)
+; LE-32BIT-NEXT: lwz 8, 4(3)
; LE-32BIT-NEXT: slw 7, 7, 4
-; LE-32BIT-NEXT: slw 4, 6, 4
-; LE-32BIT-NEXT: or 3, 3, 9
-; LE-32BIT-NEXT: stw 4, 12(5)
-; LE-32BIT-NEXT: or 4, 7, 8
+; LE-32BIT-NEXT: lwz 3, 12(3)
+; LE-32BIT-NEXT: srw 10, 6, 9
+; LE-32BIT-NEXT: slw 11, 8, 4
+; LE-32BIT-NEXT: srw 8, 8, 9
+; LE-32BIT-NEXT: srw 9, 3, 9
+; LE-32BIT-NEXT: slw 6, 6, 4
+; LE-32BIT-NEXT: slw 3, 3, 4
+; LE-32BIT-NEXT: stw 3, 12(5)
+; LE-32BIT-NEXT: or 3, 6, 9
+; LE-32BIT-NEXT: stw 3, 8(5)
+; LE-32BIT-NEXT: or 3, 7, 8
; LE-32BIT-NEXT: stw 3, 0(5)
; LE-32BIT-NEXT: or 3, 11, 10
-; LE-32BIT-NEXT: stw 4, 8(5)
; LE-32BIT-NEXT: stw 3, 4(5)
; LE-32BIT-NEXT: addi 1, 1, 48
; LE-32BIT-NEXT: blr
@@ -483,22 +484,23 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; LE-32BIT-NEXT: lwz 4, 12(4)
; LE-32BIT-NEXT: stw 6, 44(1)
; LE-32BIT-NEXT: stw 6, 40(1)
-; LE-32BIT-NEXT: rlwinm 4, 4, 2, 28, 29
; LE-32BIT-NEXT: stw 6, 36(1)
; LE-32BIT-NEXT: stw 6, 32(1)
+; LE-32BIT-NEXT: rlwinm 6, 4, 2, 28, 29
; LE-32BIT-NEXT: stw 3, 28(1)
; LE-32BIT-NEXT: addi 3, 1, 16
; LE-32BIT-NEXT: stw 9, 24(1)
; LE-32BIT-NEXT: stw 8, 20(1)
; LE-32BIT-NEXT: stw 7, 16(1)
-; LE-32BIT-NEXT: lwzux 3, 4, 3
-; LE-32BIT-NEXT: lwz 6, 4(4)
-; LE-32BIT-NEXT: lwz 7, 12(4)
-; LE-32BIT-NEXT: lwz 4, 8(4)
-; LE-32BIT-NEXT: stw 3, 0(5)
-; LE-32BIT-NEXT: stw 4, 8(5)
+; LE-32BIT-NEXT: lwzx 6, 3, 6
+; LE-32BIT-NEXT: rlwimi 3, 4, 2, 28, 29
+; LE-32BIT-NEXT: lwz 4, 4(3)
+; LE-32BIT-NEXT: lwz 7, 12(3)
+; LE-32BIT-NEXT: lwz 3, 8(3)
+; LE-32BIT-NEXT: stw 6, 0(5)
+; LE-32BIT-NEXT: stw 3, 8(5)
; LE-32BIT-NEXT: stw 7, 12(5)
-; LE-32BIT-NEXT: stw 6, 4(5)
+; LE-32BIT-NEXT: stw 4, 4(5)
; LE-32BIT-NEXT: addi 1, 1, 48
; LE-32BIT-NEXT: blr
%src = load i128, ptr %src.ptr, align 1
diff --git a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
index e1731ddfd92875..5fe7fca5b6e41d 100644
--- a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
+++ b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
@@ -304,27 +304,28 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; LE-32BIT-NEXT: stw 3, 28(1)
; LE-32BIT-NEXT: addi 3, 1, 16
; LE-32BIT-NEXT: stw 9, 24(1)
-; LE-32BIT-NEXT: clrlwi 4, 4, 27
; LE-32BIT-NEXT: stw 8, 20(1)
-; LE-32BIT-NEXT: subfic 8, 4, 32
; LE-32BIT-NEXT: stw 7, 16(1)
-; LE-32BIT-NEXT: lwzux 3, 6, 3
-; LE-32BIT-NEXT: lwz 9, 4(6)
-; LE-32BIT-NEXT: slw 3, 3, 4
-; LE-32BIT-NEXT: lwz 7, 8(6)
-; LE-32BIT-NEXT: lwz 6, 12(6)
-; LE-32BIT-NEXT: slw 11, 9, 4
-; LE-32BIT-NEXT: srw 9, 9, 8
-; LE-32BIT-NEXT: srw 10, 7, 8
-; LE-32BIT-NEXT: srw 8, 6, 8
+; LE-32BIT-NEXT: lwzx 6, 3, 6
+; LE-32BIT-NEXT: rlwimi 3, 4, 29, 28, 29
+; LE-32BIT-NEXT: lwz 7, 8(3)
+; LE-32BIT-NEXT: clrlwi 4, 4, 27
+; LE-32BIT-NEXT: lwz 8, 4(3)
+; LE-32BIT-NEXT: subfic 9, 4, 32
+; LE-32BIT-NEXT: lwz 3, 12(3)
+; LE-32BIT-NEXT: srw 10, 7, 9
+; LE-32BIT-NEXT: slw 11, 8, 4
+; LE-32BIT-NEXT: srw 8, 8, 9
+; LE-32BIT-NEXT: srw 9, 3, 9
; LE-32BIT-NEXT: slw 7, 7, 4
-; LE-32BIT-NEXT: slw 4, 6, 4
-; LE-32BIT-NEXT: or 3, 3, 9
-; LE-32BIT-NEXT: stw 4, 12(5)
-; LE-32BIT-NEXT: or 4, 7, 8
+; LE-32BIT-NEXT: slw 3, 3, 4
+; LE-32BIT-NEXT: slw 6, 6, 4
+; LE-32BIT-NEXT: stw 3, 12(5)
+; LE-32BIT-NEXT: or 3, 7, 9
+; LE-32BIT-NEXT: stw 3, 8(5)
+; LE-32BIT-NEXT: or 3, 6, 8
; LE-32BIT-NEXT: stw 3, 0(5)
; LE-32BIT-NEXT: or 3, 11, 10
-; LE-32BIT-NEXT: stw 4, 8(5)
; LE-32BIT-NEXT: stw 3, 4(5)
; LE-32BIT-NEXT: addi 1, 1, 48
; LE-32BIT-NEXT: blr
diff --git a/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll b/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll
index b94023b97a2950..cc51a00db415ca 100644
--- a/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll
+++ b/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll
@@ -30,11 +30,11 @@ define i1 @test_cmpxchg_seq_cst(ptr %addr, i128 %desire, i128 %new) {
;
; PWR7-LABEL: @test_cmpxchg_seq_cst(
; PWR7-NEXT: entry:
-; PWR7-NEXT: [[TMP0:%.*]] = alloca i128, align 8
+; PWR7-NEXT: [[TMP0:%.*]] = alloca i128, align 16
; PWR7-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP0]])
-; PWR7-NEXT: store i128 [[DESIRE:%.*]], ptr [[TMP0]], align 8
+; PWR7-NEXT: store i128 [[DESIRE:%.*]], ptr [[TMP0]], align 16
; PWR7-NEXT: [[TMP1:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[ADDR:%.*]], ptr [[TMP0]], i128 [[NEW:%.*]], i32 5, i32 5)
-; PWR7-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8
+; PWR7-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 16
; PWR7-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP0]])
; PWR7-NEXT: [[TMP3:%.*]] = insertvalue { i128, i1 } poison, i128 [[TMP2]], 0
; PWR7-NEXT: [[TMP4:%.*]] = insertvalue { i128, i1 } [[TMP3]], i1 [[TMP1]], 1
>From 6c23894650b039de4f3e142ce7f90611243bff33 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Wed, 27 Nov 2024 22:16:29 +0000
Subject: [PATCH 2/7] additional tc updates
---
clang/test/CodeGen/target-data.c | 22 +--
llvm/test/CodeGen/PowerPC/all-atomics.ll | 130 +++++++++---------
.../Bitcode/DataLayoutUpgradeTest.cpp | 2 +-
3 files changed, 77 insertions(+), 77 deletions(-)
diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c
index cb89fad941c832..3e11f6ae183b6b 100644
--- a/clang/test/CodeGen/target-data.c
+++ b/clang/test/CodeGen/target-data.c
@@ -88,7 +88,7 @@
// RUN: %clang_cc1 -triple powerpc64-lv2 -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=PS3
-// PS3: target datalayout = "E-m:e-p:32:32-Fi64-i64:64-n32:64"
+// PS3: target datalayout = "E-m:e-p:32:32-Fi64-i64:64-i128:128-n32:64"
// RUN: %clang_cc1 -triple i686-nacl -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=I686-NACL
@@ -120,43 +120,43 @@
// RUN: %clang_cc1 -triple powerpc-unknown -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=PPC
-// PPC: target datalayout = "E-m:e-p:32:32-Fn32-i64:64-n32"
+// PPC: target datalayout = "E-m:e-p:32:32-Fn32-i64:64-i128:128-n32"
// RUN: %clang_cc1 -triple powerpcle-unknown -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=PPCLE
-// PPCLE: target datalayout = "e-m:e-p:32:32-Fn32-i64:64-n32"
+// PPCLE: target datalayout = "e-m:e-p:32:32-Fn32-i64:64-i128:128-n32"
// RUN: %clang_cc1 -triple powerpc64-freebsd -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=PPC64-FREEBSD
-// PPC64-FREEBSD: target datalayout = "E-m:e-Fn32-i64:64-n32:64"
+// PPC64-FREEBSD: target datalayout = "E-m:e-Fn32-i64:64-i128:128-n32:64"
// RUN: %clang_cc1 -triple powerpc64le-freebsd -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=PPC64LE-FREEBSD
-// PPC64LE-FREEBSD: target datalayout = "e-m:e-Fn32-i64:64-n32:64"
+// PPC64LE-FREEBSD: target datalayout = "e-m:e-Fn32-i64:64-i128:128-n32:64"
// RUN: %clang_cc1 -triple powerpc64-linux -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=PPC64-LINUX
-// PPC64-LINUX: target datalayout = "E-m:e-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+// PPC64-LINUX: target datalayout = "E-m:e-Fi64-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512"
// RUN: %clang_cc1 -triple powerpc64-linux -o - -emit-llvm -target-cpu future %s | \
// RUN: FileCheck %s -check-prefix=PPC64-FUTURE
-// PPC64-FUTURE: target datalayout = "E-m:e-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+// PPC64-FUTURE: target datalayout = "E-m:e-Fi64-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512"
// RUN: %clang_cc1 -triple powerpc64-linux -o - -emit-llvm -target-cpu pwr10 %s | \
// RUN: FileCheck %s -check-prefix=PPC64-P10
-// PPC64-P10: target datalayout = "E-m:e-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+// PPC64-P10: target datalayout = "E-m:e-Fi64-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512"
// RUN: %clang_cc1 -triple powerpc64le-linux -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=PPC64LE-LINUX
-// PPC64LE-LINUX: target datalayout = "e-m:e-Fn32-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+// PPC64LE-LINUX: target datalayout = "e-m:e-Fn32-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512"
// RUN: %clang_cc1 -triple powerpc64le-linux -o - -emit-llvm -target-cpu future %s | \
// RUN: FileCheck %s -check-prefix=PPC64LE-FUTURE
-// PPC64LE-FUTURE: target datalayout = "e-m:e-Fn32-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+// PPC64LE-FUTURE: target datalayout = "e-m:e-Fn32-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512"
// RUN: %clang_cc1 -triple powerpc64le-linux -o - -emit-llvm -target-cpu pwr10 %s | \
// RUN: FileCheck %s -check-prefix=PPC64LE-P10
-// PPC64LE-P10: target datalayout = "e-m:e-Fn32-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+// PPC64LE-P10: target datalayout = "e-m:e-Fn32-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512"
// RUN: %clang_cc1 -triple nvptx-unknown -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=NVPTX
diff --git a/llvm/test/CodeGen/PowerPC/all-atomics.ll b/llvm/test/CodeGen/PowerPC/all-atomics.ll
index 531e559ea7309c..40b96dab94b9ff 100644
--- a/llvm/test/CodeGen/PowerPC/all-atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/all-atomics.ll
@@ -509,31 +509,31 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-LABEL: test_op_ignore:
; AIX32: # %bb.0: # %entry
; AIX32-NEXT: mflr 0
-; AIX32-NEXT: stwu 1, -160(1)
+; AIX32-NEXT: stwu 1, -176(1)
; AIX32-NEXT: lwz 3, L..C0(2) # @sc
-; AIX32-NEXT: stw 0, 168(1)
+; AIX32-NEXT: stw 0, 184(1)
; AIX32-NEXT: rlwinm 4, 3, 3, 27, 28
-; AIX32-NEXT: stw 15, 92(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 26, 136(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 28, 144(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 15, 108(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 26, 152(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 28, 160(1) # 4-byte Folded Spill
; AIX32-NEXT: li 15, 1
; AIX32-NEXT: rlwinm 28, 3, 0, 0, 29
; AIX32-NEXT: li 3, 255
; AIX32-NEXT: xori 26, 4, 24
-; AIX32-NEXT: stw 16, 96(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 17, 100(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 18, 104(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 19, 108(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 20, 112(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 21, 116(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 22, 120(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 23, 124(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 24, 128(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 25, 132(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 27, 140(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 29, 148(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 30, 152(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 31, 156(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 16, 112(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 17, 116(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 18, 120(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 19, 124(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 20, 128(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 21, 132(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 22, 136(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 23, 140(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 24, 144(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 25, 148(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 27, 156(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 29, 164(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 30, 168(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 31, 172(1) # 4-byte Folded Spill
; AIX32-NEXT: sync
; AIX32-NEXT: slw 29, 15, 26
; AIX32-NEXT: slw 3, 3, 26
@@ -906,8 +906,8 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-NEXT: bl .__atomic_fetch_xor_8[PR]
; AIX32-NEXT: nop
; AIX32-NEXT: lwz 31, L..C8(2) # @u128
-; AIX32-NEXT: addi 30, 1, 72
-; AIX32-NEXT: addi 29, 1, 56
+; AIX32-NEXT: addi 30, 1, 80
+; AIX32-NEXT: addi 29, 1, 64
; AIX32-NEXT: lwz 5, 12(31)
; AIX32-NEXT: lwz 4, 8(31)
; AIX32-NEXT: lwz 6, 4(31)
@@ -916,32 +916,32 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-NEXT: L..BB0_49: # %atomicrmw.start2
; AIX32-NEXT: #
; AIX32-NEXT: xori 3, 5, 1
-; AIX32-NEXT: stw 7, 72(1)
-; AIX32-NEXT: stw 7, 56(1)
+; AIX32-NEXT: stw 7, 80(1)
+; AIX32-NEXT: stw 7, 64(1)
; AIX32-NEXT: li 7, 5
-; AIX32-NEXT: stw 3, 68(1)
+; AIX32-NEXT: stw 3, 76(1)
; AIX32-NEXT: li 3, 16
; AIX32-NEXT: li 8, 5
-; AIX32-NEXT: stw 6, 76(1)
-; AIX32-NEXT: stw 4, 80(1)
-; AIX32-NEXT: stw 5, 84(1)
-; AIX32-NEXT: stw 4, 64(1)
-; AIX32-NEXT: stw 6, 60(1)
+; AIX32-NEXT: stw 6, 84(1)
+; AIX32-NEXT: stw 4, 88(1)
+; AIX32-NEXT: stw 5, 92(1)
+; AIX32-NEXT: stw 4, 72(1)
+; AIX32-NEXT: stw 6, 68(1)
; AIX32-NEXT: mr 4, 31
; AIX32-NEXT: mr 5, 30
; AIX32-NEXT: mr 6, 29
; AIX32-NEXT: bl .__atomic_compare_exchange[PR]
; AIX32-NEXT: nop
-; AIX32-NEXT: lwz 5, 84(1)
-; AIX32-NEXT: lwz 4, 80(1)
-; AIX32-NEXT: lwz 6, 76(1)
-; AIX32-NEXT: lwz 7, 72(1)
+; AIX32-NEXT: lwz 5, 92(1)
+; AIX32-NEXT: lwz 4, 88(1)
+; AIX32-NEXT: lwz 6, 84(1)
+; AIX32-NEXT: lwz 7, 80(1)
; AIX32-NEXT: cmplwi 3, 0
; AIX32-NEXT: beq 0, L..BB0_49
; AIX32-NEXT: # %bb.50: # %atomicrmw.end1
; AIX32-NEXT: lwz 31, L..C9(2) # @s128
-; AIX32-NEXT: addi 30, 1, 72
-; AIX32-NEXT: addi 29, 1, 56
+; AIX32-NEXT: addi 30, 1, 80
+; AIX32-NEXT: addi 29, 1, 64
; AIX32-NEXT: lwz 5, 12(31)
; AIX32-NEXT: lwz 4, 8(31)
; AIX32-NEXT: lwz 6, 4(31)
@@ -950,26 +950,26 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-NEXT: L..BB0_51: # %atomicrmw.start
; AIX32-NEXT: #
; AIX32-NEXT: xori 3, 5, 1
-; AIX32-NEXT: stw 7, 72(1)
-; AIX32-NEXT: stw 7, 56(1)
+; AIX32-NEXT: stw 7, 80(1)
+; AIX32-NEXT: stw 7, 64(1)
; AIX32-NEXT: li 7, 5
-; AIX32-NEXT: stw 3, 68(1)
+; AIX32-NEXT: stw 3, 76(1)
; AIX32-NEXT: li 3, 16
; AIX32-NEXT: li 8, 5
-; AIX32-NEXT: stw 6, 76(1)
-; AIX32-NEXT: stw 4, 80(1)
-; AIX32-NEXT: stw 5, 84(1)
-; AIX32-NEXT: stw 4, 64(1)
-; AIX32-NEXT: stw 6, 60(1)
+; AIX32-NEXT: stw 6, 84(1)
+; AIX32-NEXT: stw 4, 88(1)
+; AIX32-NEXT: stw 5, 92(1)
+; AIX32-NEXT: stw 4, 72(1)
+; AIX32-NEXT: stw 6, 68(1)
; AIX32-NEXT: mr 4, 31
; AIX32-NEXT: mr 5, 30
; AIX32-NEXT: mr 6, 29
; AIX32-NEXT: bl .__atomic_compare_exchange[PR]
; AIX32-NEXT: nop
-; AIX32-NEXT: lwz 5, 84(1)
-; AIX32-NEXT: lwz 4, 80(1)
-; AIX32-NEXT: lwz 6, 76(1)
-; AIX32-NEXT: lwz 7, 72(1)
+; AIX32-NEXT: lwz 5, 92(1)
+; AIX32-NEXT: lwz 4, 88(1)
+; AIX32-NEXT: lwz 6, 84(1)
+; AIX32-NEXT: lwz 7, 80(1)
; AIX32-NEXT: cmplwi 3, 0
; AIX32-NEXT: beq 0, L..BB0_51
; AIX32-NEXT: # %bb.52: # %atomicrmw.end
@@ -1156,24 +1156,24 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-NEXT: li 6, 5
; AIX32-NEXT: bl .__atomic_fetch_and_8[PR]
; AIX32-NEXT: nop
-; AIX32-NEXT: lwz 31, 156(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 30, 152(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 29, 148(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 28, 144(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 27, 140(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 26, 136(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 25, 132(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 24, 128(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 23, 124(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 22, 120(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 21, 116(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 20, 112(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 19, 108(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 18, 104(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 17, 100(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 16, 96(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 15, 92(1) # 4-byte Folded Reload
-; AIX32-NEXT: addi 1, 1, 160
+; AIX32-NEXT: lwz 31, 172(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 30, 168(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 29, 164(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 28, 160(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 27, 156(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 26, 152(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 25, 148(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 24, 144(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 23, 140(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 22, 136(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 21, 132(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 20, 128(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 19, 124(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 18, 120(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 17, 116(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 16, 112(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 15, 108(1) # 4-byte Folded Reload
+; AIX32-NEXT: addi 1, 1, 176
; AIX32-NEXT: lwz 0, 8(1)
; AIX32-NEXT: mtlr 0
; AIX32-NEXT: blr
diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
index 795646b22b945a..67dccb3849f93c 100644
--- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
+++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
@@ -118,7 +118,7 @@ TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) {
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128"
"-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64"
"-f80:128:128-n8:16:32:64-S128");
- EXPECT_EQ(DL2, "e-m:e-i64:64-n32:64");
+ EXPECT_EQ(DL2, "e-m:e-i64:64-i128:128-n32:64");
EXPECT_EQ(DL3, "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:"
"64-S128-Fn32");
>From b09ee68faaa7f0bfd5499d1e786346c17905adb2 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Wed, 27 Nov 2024 22:19:49 +0000
Subject: [PATCH 3/7] [PowerPC] Update data layout aligment of i128 to 16
---
clang/lib/Basic/Targets/OSTargets.h | 2 +-
clang/lib/Basic/Targets/PPC.h | 12 ++++++------
llvm/lib/IR/AutoUpgrade.cpp | 2 +-
llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 3 +++
4 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h
index 75f53e96ce28f6..c0351f26e9bee6 100644
--- a/clang/lib/Basic/Targets/OSTargets.h
+++ b/clang/lib/Basic/Targets/OSTargets.h
@@ -473,7 +473,7 @@ class LLVM_LIBRARY_VISIBILITY PS3PPUTargetInfo : public OSTargetInfo<Target> {
this->IntMaxType = TargetInfo::SignedLongLong;
this->Int64Type = TargetInfo::SignedLongLong;
this->SizeType = TargetInfo::UnsignedInt;
- this->resetDataLayout("E-m:e-p:32:32-Fi64-i64:64-n32:64");
+ this->resetDataLayout("E-m:e-p:32:32-Fi64-i64:64-i128:128-n32:64");
}
};
diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index b0833d30550af4..966647e98c624d 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -403,11 +403,11 @@ class LLVM_LIBRARY_VISIBILITY PPC32TargetInfo : public PPCTargetInfo {
PPC32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: PPCTargetInfo(Triple, Opts) {
if (Triple.isOSAIX())
- resetDataLayout("E-m:a-p:32:32-Fi32-i64:64-n32");
+ resetDataLayout("E-m:a-p:32:32-Fi32-i64:64-i128:128-n32");
else if (Triple.getArch() == llvm::Triple::ppcle)
- resetDataLayout("e-m:e-p:32:32-Fn32-i64:64-n32");
+ resetDataLayout("e-m:e-p:32:32-Fn32-i64:64-i128:128-n32");
else
- resetDataLayout("E-m:e-p:32:32-Fn32-i64:64-n32");
+ resetDataLayout("E-m:e-p:32:32-Fn32-i64:64-i128:128-n32");
switch (getTriple().getOS()) {
case llvm::Triple::Linux:
@@ -462,12 +462,12 @@ class LLVM_LIBRARY_VISIBILITY PPC64TargetInfo : public PPCTargetInfo {
if (Triple.isOSAIX()) {
// TODO: Set appropriate ABI for AIX platform.
- DataLayout = "E-m:a-Fi64-i64:64-n32:64";
+ DataLayout = "E-m:a-Fi64-i64:64-i128:128-n32:64";
LongDoubleWidth = 64;
LongDoubleAlign = DoubleAlign = 32;
LongDoubleFormat = &llvm::APFloat::IEEEdouble();
} else if ((Triple.getArch() == llvm::Triple::ppc64le)) {
- DataLayout = "e-m:e-Fn32-i64:64-n32:64";
+ DataLayout = "e-m:e-Fn32-i64:64-i128:128-n32:64";
ABI = "elfv2";
} else {
DataLayout = "E-m:e";
@@ -478,7 +478,7 @@ class LLVM_LIBRARY_VISIBILITY PPC64TargetInfo : public PPCTargetInfo {
ABI = "elfv1";
DataLayout += "-Fi64";
}
- DataLayout += "-i64:64-n32:64";
+ DataLayout += "-i64:64-i128:128-n32:64";
}
if (Triple.isOSFreeBSD() || Triple.isOSOpenBSD() || Triple.isMusl()) {
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index e73538da282e99..be426de2fa80a8 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -5559,7 +5559,7 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
return Res;
}
- if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m"))) {
+ if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC()) {
// Mips64 with o32 ABI did not add "-i128:128".
// Add "-i128:128"
std::string I64 = "-i64:64";
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 2b64ab9aa6973d..9eaa02bd7f80b9 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -188,6 +188,9 @@ static std::string getDataLayoutString(const Triple &T) {
// documentation are wrong; these are correct (i.e. "what gcc does").
Ret += "-i64:64";
+ // Alignment for 128 bit integers.
+ Ret += "-i128:128";
+
// PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
if (is64Bit)
Ret += "-n32:64";
>From 6628f981a61dd1614e9e135155c81b12aa978298 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Wed, 27 Nov 2024 22:37:30 +0000
Subject: [PATCH 4/7] add tc to check alignment of int types
---
llvm/test/CodeGen/PowerPC/data-align.ll | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
create mode 100644 llvm/test/CodeGen/PowerPC/data-align.ll
diff --git a/llvm/test/CodeGen/PowerPC/data-align.ll b/llvm/test/CodeGen/PowerPC/data-align.ll
new file mode 100644
index 00000000000000..bfedec139369c0
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/data-align.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=powerpc-unknown-linux | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux | FileCheck %s
+
+; CHECK: .set .Li8,
+; CHECK-NEXT: .size .Li8, 1
+ at i8 = private constant i8 42
+
+; CHECK: .set .Li16,
+; CHECK-NEXT: .size .Li16, 2
+ at i16 = private constant i16 42
+
+; CHECK: .set .Li32,
+; CHECK-NEXT: .size .Li32, 4
+ at i32 = private constant i32 42
+
+; CHECK: .set .Li64,
+; CHECK-NEXT: .size .Li64, 8
+ at i64 = private constant i64 42
+
+; CHECK: .set .Li128,
+; CHECK-NEXT: .size .Li128, 16
+ at i128 = private constant i128 42
+
>From e1e0a886a9701041e7493b90d598cb5c8a4a78cc Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Thu, 28 Nov 2024 13:48:12 +0000
Subject: [PATCH 5/7] update alignment for 32bit only
---
clang/lib/Basic/Targets/PPC.h | 6 +-
clang/test/CodeGen/target-data.c | 4 +-
llvm/lib/IR/AutoUpgrade.cpp | 2 +-
llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 5 +-
llvm/test/CodeGen/PowerPC/all-atomics.ll | 130 +++++++++---------
llvm/test/CodeGen/PowerPC/ctrloop-sh.ll | 58 ++++----
llvm/test/CodeGen/PowerPC/pr59074.ll | 29 ++--
...lar-shift-by-byte-multiple-legalization.ll | 48 ++++---
.../PowerPC/wide-scalar-shift-legalization.ll | 33 +++--
9 files changed, 153 insertions(+), 162 deletions(-)
diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index 966647e98c624d..3cd0fcad172939 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -403,11 +403,11 @@ class LLVM_LIBRARY_VISIBILITY PPC32TargetInfo : public PPCTargetInfo {
PPC32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: PPCTargetInfo(Triple, Opts) {
if (Triple.isOSAIX())
- resetDataLayout("E-m:a-p:32:32-Fi32-i64:64-i128:128-n32");
+ resetDataLayout("E-m:a-p:32:32-Fi32-i64:64-n32");
else if (Triple.getArch() == llvm::Triple::ppcle)
- resetDataLayout("e-m:e-p:32:32-Fn32-i64:64-i128:128-n32");
+ resetDataLayout("e-m:e-p:32:32-Fn32-i64:64-n32");
else
- resetDataLayout("E-m:e-p:32:32-Fn32-i64:64-i128:128-n32");
+ resetDataLayout("E-m:e-p:32:32-Fn32-i64:64-n32");
switch (getTriple().getOS()) {
case llvm::Triple::Linux:
diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c
index 3e11f6ae183b6b..2c758567faa1f6 100644
--- a/clang/test/CodeGen/target-data.c
+++ b/clang/test/CodeGen/target-data.c
@@ -120,11 +120,11 @@
// RUN: %clang_cc1 -triple powerpc-unknown -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=PPC
-// PPC: target datalayout = "E-m:e-p:32:32-Fn32-i64:64-i128:128-n32"
+// PPC: target datalayout = "E-m:e-p:32:32-Fn32-i64:64-n32"
// RUN: %clang_cc1 -triple powerpcle-unknown -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=PPCLE
-// PPCLE: target datalayout = "e-m:e-p:32:32-Fn32-i64:64-i128:128-n32"
+// PPCLE: target datalayout = "e-m:e-p:32:32-Fn32-i64:64-n32"
// RUN: %clang_cc1 -triple powerpc64-freebsd -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=PPC64-FREEBSD
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index be426de2fa80a8..d54176a7878b5a 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -5559,7 +5559,7 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
return Res;
}
- if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC()) {
+ if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64()) {
// Mips64 with o32 ABI did not add "-i128:128".
// Add "-i128:128"
std::string I64 = "-i64:64";
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 9eaa02bd7f80b9..a027049b9e9d47 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -188,12 +188,9 @@ static std::string getDataLayoutString(const Triple &T) {
// documentation are wrong; these are correct (i.e. "what gcc does").
Ret += "-i64:64";
- // Alignment for 128 bit integers.
- Ret += "-i128:128";
-
// PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
if (is64Bit)
- Ret += "-n32:64";
+ Ret += "-i128:128-n32:64";
else
Ret += "-n32";
diff --git a/llvm/test/CodeGen/PowerPC/all-atomics.ll b/llvm/test/CodeGen/PowerPC/all-atomics.ll
index 40b96dab94b9ff..531e559ea7309c 100644
--- a/llvm/test/CodeGen/PowerPC/all-atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/all-atomics.ll
@@ -509,31 +509,31 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-LABEL: test_op_ignore:
; AIX32: # %bb.0: # %entry
; AIX32-NEXT: mflr 0
-; AIX32-NEXT: stwu 1, -176(1)
+; AIX32-NEXT: stwu 1, -160(1)
; AIX32-NEXT: lwz 3, L..C0(2) # @sc
-; AIX32-NEXT: stw 0, 184(1)
+; AIX32-NEXT: stw 0, 168(1)
; AIX32-NEXT: rlwinm 4, 3, 3, 27, 28
-; AIX32-NEXT: stw 15, 108(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 26, 152(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 28, 160(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 15, 92(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 26, 136(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 28, 144(1) # 4-byte Folded Spill
; AIX32-NEXT: li 15, 1
; AIX32-NEXT: rlwinm 28, 3, 0, 0, 29
; AIX32-NEXT: li 3, 255
; AIX32-NEXT: xori 26, 4, 24
-; AIX32-NEXT: stw 16, 112(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 17, 116(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 18, 120(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 19, 124(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 20, 128(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 21, 132(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 22, 136(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 23, 140(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 24, 144(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 25, 148(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 27, 156(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 29, 164(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 30, 168(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 31, 172(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 16, 96(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 17, 100(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 18, 104(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 19, 108(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 20, 112(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 21, 116(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 22, 120(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 23, 124(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 24, 128(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 25, 132(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 27, 140(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 29, 148(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 30, 152(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 31, 156(1) # 4-byte Folded Spill
; AIX32-NEXT: sync
; AIX32-NEXT: slw 29, 15, 26
; AIX32-NEXT: slw 3, 3, 26
@@ -906,8 +906,8 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-NEXT: bl .__atomic_fetch_xor_8[PR]
; AIX32-NEXT: nop
; AIX32-NEXT: lwz 31, L..C8(2) # @u128
-; AIX32-NEXT: addi 30, 1, 80
-; AIX32-NEXT: addi 29, 1, 64
+; AIX32-NEXT: addi 30, 1, 72
+; AIX32-NEXT: addi 29, 1, 56
; AIX32-NEXT: lwz 5, 12(31)
; AIX32-NEXT: lwz 4, 8(31)
; AIX32-NEXT: lwz 6, 4(31)
@@ -916,32 +916,32 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-NEXT: L..BB0_49: # %atomicrmw.start2
; AIX32-NEXT: #
; AIX32-NEXT: xori 3, 5, 1
-; AIX32-NEXT: stw 7, 80(1)
-; AIX32-NEXT: stw 7, 64(1)
+; AIX32-NEXT: stw 7, 72(1)
+; AIX32-NEXT: stw 7, 56(1)
; AIX32-NEXT: li 7, 5
-; AIX32-NEXT: stw 3, 76(1)
+; AIX32-NEXT: stw 3, 68(1)
; AIX32-NEXT: li 3, 16
; AIX32-NEXT: li 8, 5
-; AIX32-NEXT: stw 6, 84(1)
-; AIX32-NEXT: stw 4, 88(1)
-; AIX32-NEXT: stw 5, 92(1)
-; AIX32-NEXT: stw 4, 72(1)
-; AIX32-NEXT: stw 6, 68(1)
+; AIX32-NEXT: stw 6, 76(1)
+; AIX32-NEXT: stw 4, 80(1)
+; AIX32-NEXT: stw 5, 84(1)
+; AIX32-NEXT: stw 4, 64(1)
+; AIX32-NEXT: stw 6, 60(1)
; AIX32-NEXT: mr 4, 31
; AIX32-NEXT: mr 5, 30
; AIX32-NEXT: mr 6, 29
; AIX32-NEXT: bl .__atomic_compare_exchange[PR]
; AIX32-NEXT: nop
-; AIX32-NEXT: lwz 5, 92(1)
-; AIX32-NEXT: lwz 4, 88(1)
-; AIX32-NEXT: lwz 6, 84(1)
-; AIX32-NEXT: lwz 7, 80(1)
+; AIX32-NEXT: lwz 5, 84(1)
+; AIX32-NEXT: lwz 4, 80(1)
+; AIX32-NEXT: lwz 6, 76(1)
+; AIX32-NEXT: lwz 7, 72(1)
; AIX32-NEXT: cmplwi 3, 0
; AIX32-NEXT: beq 0, L..BB0_49
; AIX32-NEXT: # %bb.50: # %atomicrmw.end1
; AIX32-NEXT: lwz 31, L..C9(2) # @s128
-; AIX32-NEXT: addi 30, 1, 80
-; AIX32-NEXT: addi 29, 1, 64
+; AIX32-NEXT: addi 30, 1, 72
+; AIX32-NEXT: addi 29, 1, 56
; AIX32-NEXT: lwz 5, 12(31)
; AIX32-NEXT: lwz 4, 8(31)
; AIX32-NEXT: lwz 6, 4(31)
@@ -950,26 +950,26 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-NEXT: L..BB0_51: # %atomicrmw.start
; AIX32-NEXT: #
; AIX32-NEXT: xori 3, 5, 1
-; AIX32-NEXT: stw 7, 80(1)
-; AIX32-NEXT: stw 7, 64(1)
+; AIX32-NEXT: stw 7, 72(1)
+; AIX32-NEXT: stw 7, 56(1)
; AIX32-NEXT: li 7, 5
-; AIX32-NEXT: stw 3, 76(1)
+; AIX32-NEXT: stw 3, 68(1)
; AIX32-NEXT: li 3, 16
; AIX32-NEXT: li 8, 5
-; AIX32-NEXT: stw 6, 84(1)
-; AIX32-NEXT: stw 4, 88(1)
-; AIX32-NEXT: stw 5, 92(1)
-; AIX32-NEXT: stw 4, 72(1)
-; AIX32-NEXT: stw 6, 68(1)
+; AIX32-NEXT: stw 6, 76(1)
+; AIX32-NEXT: stw 4, 80(1)
+; AIX32-NEXT: stw 5, 84(1)
+; AIX32-NEXT: stw 4, 64(1)
+; AIX32-NEXT: stw 6, 60(1)
; AIX32-NEXT: mr 4, 31
; AIX32-NEXT: mr 5, 30
; AIX32-NEXT: mr 6, 29
; AIX32-NEXT: bl .__atomic_compare_exchange[PR]
; AIX32-NEXT: nop
-; AIX32-NEXT: lwz 5, 92(1)
-; AIX32-NEXT: lwz 4, 88(1)
-; AIX32-NEXT: lwz 6, 84(1)
-; AIX32-NEXT: lwz 7, 80(1)
+; AIX32-NEXT: lwz 5, 84(1)
+; AIX32-NEXT: lwz 4, 80(1)
+; AIX32-NEXT: lwz 6, 76(1)
+; AIX32-NEXT: lwz 7, 72(1)
; AIX32-NEXT: cmplwi 3, 0
; AIX32-NEXT: beq 0, L..BB0_51
; AIX32-NEXT: # %bb.52: # %atomicrmw.end
@@ -1156,24 +1156,24 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-NEXT: li 6, 5
; AIX32-NEXT: bl .__atomic_fetch_and_8[PR]
; AIX32-NEXT: nop
-; AIX32-NEXT: lwz 31, 172(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 30, 168(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 29, 164(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 28, 160(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 27, 156(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 26, 152(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 25, 148(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 24, 144(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 23, 140(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 22, 136(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 21, 132(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 20, 128(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 19, 124(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 18, 120(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 17, 116(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 16, 112(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 15, 108(1) # 4-byte Folded Reload
-; AIX32-NEXT: addi 1, 1, 176
+; AIX32-NEXT: lwz 31, 156(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 30, 152(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 29, 148(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 28, 144(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 27, 140(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 26, 136(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 25, 132(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 24, 128(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 23, 124(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 22, 120(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 21, 116(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 20, 112(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 19, 108(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 18, 104(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 17, 100(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 16, 96(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 15, 92(1) # 4-byte Folded Reload
+; AIX32-NEXT: addi 1, 1, 160
; AIX32-NEXT: lwz 0, 8(1)
; AIX32-NEXT: mtlr 0
; AIX32-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll b/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll
index 19f86f9d1af6ff..72de456cba395b 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll
@@ -16,41 +16,39 @@ define void @foo1(ptr %a, ptr readonly %b, ptr readonly %c) #0 {
; CHECK-NEXT: addi 7, 1, 16
; CHECK-NEXT: .LBB0_1: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: lwz 9, 4(4)
; CHECK-NEXT: lwz 8, 0(4)
+; CHECK-NEXT: lwz 9, 4(4)
; CHECK-NEXT: lwz 10, 8(4)
; CHECK-NEXT: lwz 11, 12(4)
; CHECK-NEXT: lwz 12, 12(5)
-; CHECK-NEXT: stw 9, 20(1)
-; CHECK-NEXT: mr 9, 7
; CHECK-NEXT: stw 6, 44(1)
-; CHECK-NEXT: rlwimi 9, 12, 29, 28, 29
; CHECK-NEXT: stw 6, 40(1)
; CHECK-NEXT: stw 6, 36(1)
; CHECK-NEXT: stw 6, 32(1)
; CHECK-NEXT: stw 11, 28(1)
; CHECK-NEXT: stw 10, 24(1)
; CHECK-NEXT: clrlwi 10, 12, 27
+; CHECK-NEXT: stw 9, 20(1)
; CHECK-NEXT: stw 8, 16(1)
-; CHECK-NEXT: rlwinm 12, 12, 29, 28, 29
-; CHECK-NEXT: lwz 8, 8(9)
-; CHECK-NEXT: subfic 0, 10, 32
-; CHECK-NEXT: lwz 11, 4(9)
-; CHECK-NEXT: lwz 9, 12(9)
-; CHECK-NEXT: srw 30, 8, 0
-; CHECK-NEXT: lwzx 12, 7, 12
-; CHECK-NEXT: slw 29, 11, 10
-; CHECK-NEXT: srw 11, 11, 0
-; CHECK-NEXT: srw 0, 9, 0
+; CHECK-NEXT: rlwinm 8, 12, 29, 28, 29
+; CHECK-NEXT: lwzux 9, 8, 7
+; CHECK-NEXT: subfic 12, 10, 32
+; CHECK-NEXT: lwz 11, 8(8)
+; CHECK-NEXT: slw 9, 9, 10
+; CHECK-NEXT: lwz 0, 4(8)
+; CHECK-NEXT: lwz 8, 12(8)
+; CHECK-NEXT: srw 30, 11, 12
+; CHECK-NEXT: slw 29, 0, 10
+; CHECK-NEXT: srw 0, 0, 12
+; CHECK-NEXT: srw 12, 8, 12
+; CHECK-NEXT: slw 11, 11, 10
; CHECK-NEXT: slw 8, 8, 10
-; CHECK-NEXT: slw 12, 12, 10
-; CHECK-NEXT: or 8, 8, 0
+; CHECK-NEXT: stw 8, 12(3)
+; CHECK-NEXT: or 8, 11, 12
; CHECK-NEXT: stw 8, 8(3)
-; CHECK-NEXT: or 8, 12, 11
-; CHECK-NEXT: slw 9, 9, 10
+; CHECK-NEXT: or 8, 9, 0
; CHECK-NEXT: stw 8, 0(3)
; CHECK-NEXT: or 8, 29, 30
-; CHECK-NEXT: stw 9, 12(3)
; CHECK-NEXT: stw 8, 4(3)
; CHECK-NEXT: bdnz .LBB0_1
; CHECK-NEXT: # %bb.2: # %for.end
@@ -79,11 +77,11 @@ for.end: ; preds = %for.body
define void @foo2(ptr %a, ptr readonly %b, ptr readonly %c) #0 {
; CHECK-LABEL: foo2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: stwu 1, -64(1)
-; CHECK-NEXT: stw 30, 56(1) # 4-byte Folded Spill
+; CHECK-NEXT: stwu 1, -48(1)
+; CHECK-NEXT: stw 30, 40(1) # 4-byte Folded Spill
; CHECK-NEXT: li 6, 2048
; CHECK-NEXT: mtctr 6
-; CHECK-NEXT: addi 6, 1, 32
+; CHECK-NEXT: addi 6, 1, 24
; CHECK-NEXT: .LBB1_1: # %for.body
; CHECK-NEXT: #
; CHECK-NEXT: lwz 7, 0(4)
@@ -91,18 +89,18 @@ define void @foo2(ptr %a, ptr readonly %b, ptr readonly %c) #0 {
; CHECK-NEXT: lwz 11, 12(5)
; CHECK-NEXT: lwz 9, 8(4)
; CHECK-NEXT: lwz 10, 12(4)
-; CHECK-NEXT: stw 8, 36(1)
+; CHECK-NEXT: stw 8, 28(1)
; CHECK-NEXT: rlwinm 8, 11, 29, 28, 29
-; CHECK-NEXT: stw 7, 32(1)
+; CHECK-NEXT: stw 7, 24(1)
; CHECK-NEXT: srawi 7, 7, 31
-; CHECK-NEXT: stw 10, 44(1)
+; CHECK-NEXT: stw 10, 36(1)
; CHECK-NEXT: clrlwi 10, 11, 27
-; CHECK-NEXT: stw 9, 40(1)
+; CHECK-NEXT: stw 9, 32(1)
; CHECK-NEXT: subfic 12, 10, 32
-; CHECK-NEXT: stw 7, 28(1)
-; CHECK-NEXT: stw 7, 24(1)
; CHECK-NEXT: stw 7, 20(1)
; CHECK-NEXT: stw 7, 16(1)
+; CHECK-NEXT: stw 7, 12(1)
+; CHECK-NEXT: stw 7, 8(1)
; CHECK-NEXT: sub 7, 6, 8
; CHECK-NEXT: lwz 8, 4(7)
; CHECK-NEXT: lwz 9, 0(7)
@@ -124,8 +122,8 @@ define void @foo2(ptr %a, ptr readonly %b, ptr readonly %c) #0 {
; CHECK-NEXT: stw 7, 4(3)
; CHECK-NEXT: bdnz .LBB1_1
; CHECK-NEXT: # %bb.2: # %for.end
-; CHECK-NEXT: lwz 30, 56(1) # 4-byte Folded Reload
-; CHECK-NEXT: addi 1, 1, 64
+; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload
+; CHECK-NEXT: addi 1, 1, 48
; CHECK-NEXT: blr
entry:
br label %for.body
diff --git a/llvm/test/CodeGen/PowerPC/pr59074.ll b/llvm/test/CodeGen/PowerPC/pr59074.ll
index ba21fed9f5abd4..6264b9f22876cc 100644
--- a/llvm/test/CodeGen/PowerPC/pr59074.ll
+++ b/llvm/test/CodeGen/PowerPC/pr59074.ll
@@ -38,27 +38,26 @@ define void @pr59074(ptr %0) {
; LE32-NEXT: stw 7, 40(1)
; LE32-NEXT: stw 7, 36(1)
; LE32-NEXT: stw 8, 16(1)
-; LE32-NEXT: rlwinm 7, 4, 29, 28, 29
+; LE32-NEXT: rlwinm 9, 4, 29, 28, 29
; LE32-NEXT: stxvd2x 0, 0, 5
-; LE32-NEXT: lwzx 5, 6, 7
-; LE32-NEXT: rlwimi 6, 4, 29, 28, 29
; LE32-NEXT: clrlwi 4, 4, 27
-; LE32-NEXT: lwz 7, 8(6)
-; LE32-NEXT: lwz 8, 4(6)
-; LE32-NEXT: lwz 6, 12(6)
+; LE32-NEXT: lwzux 5, 9, 6
+; LE32-NEXT: lwz 6, 8(9)
+; LE32-NEXT: lwz 7, 4(9)
+; LE32-NEXT: lwz 8, 12(9)
; LE32-NEXT: xori 9, 4, 31
; LE32-NEXT: subfic 11, 4, 32
; LE32-NEXT: srw 5, 5, 4
-; LE32-NEXT: slwi 10, 7, 1
-; LE32-NEXT: srw 12, 8, 4
-; LE32-NEXT: slw 8, 8, 11
-; LE32-NEXT: srw 7, 7, 4
-; LE32-NEXT: slw 11, 6, 11
-; LE32-NEXT: srw 4, 6, 4
+; LE32-NEXT: slwi 10, 6, 1
+; LE32-NEXT: srw 6, 6, 4
; LE32-NEXT: slw 9, 10, 9
-; LE32-NEXT: or 5, 8, 5
-; LE32-NEXT: or 6, 11, 7
-; LE32-NEXT: or 7, 12, 9
+; LE32-NEXT: srw 10, 7, 4
+; LE32-NEXT: slw 7, 7, 11
+; LE32-NEXT: slw 11, 8, 11
+; LE32-NEXT: srw 4, 8, 4
+; LE32-NEXT: or 5, 7, 5
+; LE32-NEXT: or 6, 11, 6
+; LE32-NEXT: or 7, 10, 9
; LE32-NEXT: stw 4, 12(3)
; LE32-NEXT: stw 6, 8(3)
; LE32-NEXT: stw 5, 0(3)
diff --git a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll
index 6bd5cd23dcec8c..12976e838f3ca6 100644
--- a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll
+++ b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll
@@ -405,26 +405,25 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; LE-32BIT-NEXT: stw 9, 24(1)
; LE-32BIT-NEXT: rlwinm 4, 4, 3, 27, 28
; LE-32BIT-NEXT: stw 8, 20(1)
-; LE-32BIT-NEXT: subfic 9, 4, 32
+; LE-32BIT-NEXT: subfic 8, 4, 32
; LE-32BIT-NEXT: stw 7, 16(1)
-; LE-32BIT-NEXT: lwzx 7, 3, 6
-; LE-32BIT-NEXT: rlwimi 3, 6, 0, 28, 29
-; LE-32BIT-NEXT: lwz 6, 8(3)
-; LE-32BIT-NEXT: lwz 8, 4(3)
-; LE-32BIT-NEXT: slw 7, 7, 4
-; LE-32BIT-NEXT: lwz 3, 12(3)
-; LE-32BIT-NEXT: srw 10, 6, 9
-; LE-32BIT-NEXT: slw 11, 8, 4
-; LE-32BIT-NEXT: srw 8, 8, 9
-; LE-32BIT-NEXT: srw 9, 3, 9
-; LE-32BIT-NEXT: slw 6, 6, 4
+; LE-32BIT-NEXT: lwzux 3, 6, 3
+; LE-32BIT-NEXT: lwz 9, 4(6)
; LE-32BIT-NEXT: slw 3, 3, 4
-; LE-32BIT-NEXT: stw 3, 12(5)
-; LE-32BIT-NEXT: or 3, 6, 9
-; LE-32BIT-NEXT: stw 3, 8(5)
-; LE-32BIT-NEXT: or 3, 7, 8
+; LE-32BIT-NEXT: lwz 7, 8(6)
+; LE-32BIT-NEXT: lwz 6, 12(6)
+; LE-32BIT-NEXT: slw 11, 9, 4
+; LE-32BIT-NEXT: srw 9, 9, 8
+; LE-32BIT-NEXT: srw 10, 7, 8
+; LE-32BIT-NEXT: srw 8, 6, 8
+; LE-32BIT-NEXT: slw 7, 7, 4
+; LE-32BIT-NEXT: slw 4, 6, 4
+; LE-32BIT-NEXT: or 3, 3, 9
+; LE-32BIT-NEXT: stw 4, 12(5)
+; LE-32BIT-NEXT: or 4, 7, 8
; LE-32BIT-NEXT: stw 3, 0(5)
; LE-32BIT-NEXT: or 3, 11, 10
+; LE-32BIT-NEXT: stw 4, 8(5)
; LE-32BIT-NEXT: stw 3, 4(5)
; LE-32BIT-NEXT: addi 1, 1, 48
; LE-32BIT-NEXT: blr
@@ -484,23 +483,22 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; LE-32BIT-NEXT: lwz 4, 12(4)
; LE-32BIT-NEXT: stw 6, 44(1)
; LE-32BIT-NEXT: stw 6, 40(1)
+; LE-32BIT-NEXT: rlwinm 4, 4, 2, 28, 29
; LE-32BIT-NEXT: stw 6, 36(1)
; LE-32BIT-NEXT: stw 6, 32(1)
-; LE-32BIT-NEXT: rlwinm 6, 4, 2, 28, 29
; LE-32BIT-NEXT: stw 3, 28(1)
; LE-32BIT-NEXT: addi 3, 1, 16
; LE-32BIT-NEXT: stw 9, 24(1)
; LE-32BIT-NEXT: stw 8, 20(1)
; LE-32BIT-NEXT: stw 7, 16(1)
-; LE-32BIT-NEXT: lwzx 6, 3, 6
-; LE-32BIT-NEXT: rlwimi 3, 4, 2, 28, 29
-; LE-32BIT-NEXT: lwz 4, 4(3)
-; LE-32BIT-NEXT: lwz 7, 12(3)
-; LE-32BIT-NEXT: lwz 3, 8(3)
-; LE-32BIT-NEXT: stw 6, 0(5)
-; LE-32BIT-NEXT: stw 3, 8(5)
+; LE-32BIT-NEXT: lwzux 3, 4, 3
+; LE-32BIT-NEXT: lwz 6, 4(4)
+; LE-32BIT-NEXT: lwz 7, 12(4)
+; LE-32BIT-NEXT: lwz 4, 8(4)
+; LE-32BIT-NEXT: stw 3, 0(5)
+; LE-32BIT-NEXT: stw 4, 8(5)
; LE-32BIT-NEXT: stw 7, 12(5)
-; LE-32BIT-NEXT: stw 4, 4(5)
+; LE-32BIT-NEXT: stw 6, 4(5)
; LE-32BIT-NEXT: addi 1, 1, 48
; LE-32BIT-NEXT: blr
%src = load i128, ptr %src.ptr, align 1
diff --git a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
index 5fe7fca5b6e41d..e1731ddfd92875 100644
--- a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
+++ b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
@@ -304,28 +304,27 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; LE-32BIT-NEXT: stw 3, 28(1)
; LE-32BIT-NEXT: addi 3, 1, 16
; LE-32BIT-NEXT: stw 9, 24(1)
+; LE-32BIT-NEXT: clrlwi 4, 4, 27
; LE-32BIT-NEXT: stw 8, 20(1)
+; LE-32BIT-NEXT: subfic 8, 4, 32
; LE-32BIT-NEXT: stw 7, 16(1)
-; LE-32BIT-NEXT: lwzx 6, 3, 6
-; LE-32BIT-NEXT: rlwimi 3, 4, 29, 28, 29
-; LE-32BIT-NEXT: lwz 7, 8(3)
-; LE-32BIT-NEXT: clrlwi 4, 4, 27
-; LE-32BIT-NEXT: lwz 8, 4(3)
-; LE-32BIT-NEXT: subfic 9, 4, 32
-; LE-32BIT-NEXT: lwz 3, 12(3)
-; LE-32BIT-NEXT: srw 10, 7, 9
-; LE-32BIT-NEXT: slw 11, 8, 4
-; LE-32BIT-NEXT: srw 8, 8, 9
-; LE-32BIT-NEXT: srw 9, 3, 9
-; LE-32BIT-NEXT: slw 7, 7, 4
+; LE-32BIT-NEXT: lwzux 3, 6, 3
+; LE-32BIT-NEXT: lwz 9, 4(6)
; LE-32BIT-NEXT: slw 3, 3, 4
-; LE-32BIT-NEXT: slw 6, 6, 4
-; LE-32BIT-NEXT: stw 3, 12(5)
-; LE-32BIT-NEXT: or 3, 7, 9
-; LE-32BIT-NEXT: stw 3, 8(5)
-; LE-32BIT-NEXT: or 3, 6, 8
+; LE-32BIT-NEXT: lwz 7, 8(6)
+; LE-32BIT-NEXT: lwz 6, 12(6)
+; LE-32BIT-NEXT: slw 11, 9, 4
+; LE-32BIT-NEXT: srw 9, 9, 8
+; LE-32BIT-NEXT: srw 10, 7, 8
+; LE-32BIT-NEXT: srw 8, 6, 8
+; LE-32BIT-NEXT: slw 7, 7, 4
+; LE-32BIT-NEXT: slw 4, 6, 4
+; LE-32BIT-NEXT: or 3, 3, 9
+; LE-32BIT-NEXT: stw 4, 12(5)
+; LE-32BIT-NEXT: or 4, 7, 8
; LE-32BIT-NEXT: stw 3, 0(5)
; LE-32BIT-NEXT: or 3, 11, 10
+; LE-32BIT-NEXT: stw 4, 8(5)
; LE-32BIT-NEXT: stw 3, 4(5)
; LE-32BIT-NEXT: addi 1, 1, 48
; LE-32BIT-NEXT: blr
>From 577d588bae567a6517e18d52d947228a86a833c0 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Thu, 28 Nov 2024 15:16:33 +0000
Subject: [PATCH 6/7] add additional tests for DataLayoutUpgrade and move
existing to proper location
---
.../Bitcode/DataLayoutUpgradeTest.cpp | 21 ++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)
diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
index 67dccb3849f93c..7a74a5882286f9 100644
--- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
+++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
@@ -92,6 +92,16 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) {
"e-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64", "mips64el"),
"e-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64");
+ // Check that PowerPC64 targets add -i128:128.
+ EXPECT_EQ(UpgradeDataLayoutString("e-m:e-i64:64-n32:64", "powerpc64le-linux"),
+ "e-m:e-i64:64-i128:128-n32:64");
+ EXPECT_EQ(UpgradeDataLayoutString(
+ "E-m:e-Fn32-i64:64-n32:64", "powerpc64-linux"),
+ "E-m:e-Fn32-i64:64-i128:128-n32:64");
+ EXPECT_EQ(UpgradeDataLayoutString(
+ "E-m:a-Fi64-i64:64-n32:64", "powerpc64-ibm-aix"),
+ "E-m:a-Fi64-i64:64-i128:128-n32:64");
+
// Check that SPIR && SPIRV targets add -G1 if it's not present.
EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "spir"), "e-p:32:32-G1");
EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "spir64"), "e-p:32:32-G1");
@@ -108,8 +118,6 @@ TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) {
"-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
"-n8:16:32:64-S128",
"x86_64-unknown-linux-gnu");
- std::string DL2 = UpgradeDataLayoutString("e-m:e-i64:64-n32:64",
- "powerpc64le-unknown-linux-gnu");
std::string DL3 = UpgradeDataLayoutString(
"e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32",
"aarch64--");
@@ -118,7 +126,6 @@ TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) {
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128"
"-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64"
"-f80:128:128-n8:16:32:64-S128");
- EXPECT_EQ(DL2, "e-m:e-i64:64-i128:128-n32:64");
EXPECT_EQ(DL3, "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:"
"64-S128-Fn32");
@@ -153,6 +160,14 @@ TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) {
EXPECT_EQ(UpgradeDataLayoutString("G2", "spir64"), "G2");
EXPECT_EQ(UpgradeDataLayoutString("G2", "spirv32"), "G2");
EXPECT_EQ(UpgradeDataLayoutString("G2", "spirv64"), "G2");
+
+ // Check that PowerPC32 targets don't add -i128:128.
+ EXPECT_EQ(UpgradeDataLayoutString("e-m:e-i64:64-n32", "powerpcle-linux"),
+ "e-m:e-i64:64-n32");
+ EXPECT_EQ(UpgradeDataLayoutString("E-m:e-Fn32-i64:64-n32", "powerpc-linux"),
+ "E-m:e-Fn32-i64:64-n32");
+ EXPECT_EQ(UpgradeDataLayoutString("E-m:a-Fi64-i64:64-n32", "powerpc-aix"),
+ "E-m:a-Fi64-i64:64-n32");
}
TEST(DataLayoutUpgradeTest, EmptyDataLayout) {
>From 5eb545c3649fa0e28a9b898a884f1ac3c54cf48a Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Thu, 28 Nov 2024 11:07:21 -0500
Subject: [PATCH 7/7] clang-format code
---
llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
index 7a74a5882286f9..00bb963d39cedb 100644
--- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
+++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
@@ -95,12 +95,12 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) {
// Check that PowerPC64 targets add -i128:128.
EXPECT_EQ(UpgradeDataLayoutString("e-m:e-i64:64-n32:64", "powerpc64le-linux"),
"e-m:e-i64:64-i128:128-n32:64");
- EXPECT_EQ(UpgradeDataLayoutString(
- "E-m:e-Fn32-i64:64-n32:64", "powerpc64-linux"),
- "E-m:e-Fn32-i64:64-i128:128-n32:64");
- EXPECT_EQ(UpgradeDataLayoutString(
- "E-m:a-Fi64-i64:64-n32:64", "powerpc64-ibm-aix"),
- "E-m:a-Fi64-i64:64-i128:128-n32:64");
+ EXPECT_EQ(
+ UpgradeDataLayoutString("E-m:e-Fn32-i64:64-n32:64", "powerpc64-linux"),
+ "E-m:e-Fn32-i64:64-i128:128-n32:64");
+ EXPECT_EQ(
+ UpgradeDataLayoutString("E-m:a-Fi64-i64:64-n32:64", "powerpc64-ibm-aix"),
+ "E-m:a-Fi64-i64:64-i128:128-n32:64");
// Check that SPIR && SPIRV targets add -G1 if it's not present.
EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "spir"), "e-p:32:32-G1");
More information about the llvm-commits
mailing list