[llvm] [PowerPC] Update to run VSX FMA Mutation pass before Register Coalescer for `-schedule-ppc-vsx-fma-mutation-early` (PR #111696)
zhijian lin via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 10 13:20:05 PDT 2024
https://github.com/diggerlin updated https://github.com/llvm/llvm-project/pull/111696
>From 3059a3fd0e02f9536c4111af36edb2bdb3bf27cd Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 9 Oct 2024 11:12:39 -0400
Subject: [PATCH 1/4] move the head of
---
llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 3 +-
llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll | 77 ++++++++++++++++++++
2 files changed, 79 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 7d0455942923dd..a9e8d038ffd8bd 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -572,7 +572,8 @@ void PPCPassConfig::addMachineSSAOptimization() {
void PPCPassConfig::addPreRegAlloc() {
if (getOptLevel() != CodeGenOptLevel::None) {
initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
- insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
+ insertPass(VSXFMAMutateEarly ? &TwoAddressInstructionPassID
+ : &MachineSchedulerID,
&PPCVSXFMAMutateID);
}
diff --git a/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll b/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll
new file mode 100644
index 00000000000000..fa86fe7664e41c
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll
@@ -0,0 +1,77 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early < %s | \
+; RUN: FileCheck --check-prefix=CHECK-M %s
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false -ppc-asm-full-reg-names < %s | \
+; RUN: FileCheck --check-prefix=CHECK-A %s
+
+target triple = "powerpc64-ibm-aix7.2.0.0"
+define void @vsexp(ptr noalias nocapture noundef writeonly %__output_a, ptr noalias nocapture noundef readonly %var1321In_a, ptr noalias nocapture noundef readonly %n) local_unnamed_addr #0 {
+entry:
+ %0 = load i32, ptr %n, align 4
+ %cmp11 = icmp sgt i32 %0, 0
+ br i1 %cmp11, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ %wide.trip.count = zext i32 %0 to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %1 = shl nsw i64 %indvars.iv, 2
+ %add.ptr = getelementptr inbounds float, ptr %var1321In_a, i64 %1
+ %add.ptr.val = load <4 x float>, ptr %add.ptr, align 1
+ %2 = tail call contract <4 x float> @llvm.fma.v4f32(<4 x float> %add.ptr.val, <4 x float> <float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000>, <4 x float> <float 6.270500e+03, float 6.270500e+03, float 6.270500e+03, float 6.270500e+03>)
+ %add.ptr6 = getelementptr inbounds float, ptr %__output_a, i64 %1
+ store <4 x float> %2, ptr %add.ptr6, align 1
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
+
+; CHECK-M: .csect ..text..[PR],5{{[[:space:]].*}}.vsexp:
+; CHECK-M-NEXT: # %bb.0: # %entry
+; CHECK-M-NEXT: lwz r5, 0(r5)
+; CHECK-M-NEXT: cmpwi r5, 1
+; CHECK-M-NEXT: bltlr cr0
+; CHECK-M-NEXT: # %bb.1: # %for.body.preheader
+; CHECK-M-NEXT: xxspltiw vs0, 1069066811
+; CHECK-M-NEXT: xxspltiw vs1, 1170469888
+; CHECK-M-NEXT: mtctr r5
+; CHECK-M-NEXT: li r5, 0
+; CHECK-M-NEXT: .align 5
+; CHECK-M-NEXT: L..BB0_2: # %for.body
+; CHECK-M-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-M-NEXT: lxvx vs2, r4, r5
+; CHECK-M-NEXT: xvmaddmsp vs2, vs0, vs1
+; CHECK-M-NEXT: stxvx vs2, r3, r5
+; CHECK-M-NEXT: addi r5, r5, 16
+; CHECK-M-NEXT: bdnz L..BB0_2
+; CHECK-M-NEXT: # %bb.3: # %for.end
+; CHECK-M-NEXT: blr
+
+; CHECK-A: .csect ..text..[PR],5{{[[:space:]].*}}.vsexp:
+; CHECK-A-NEXT: # %bb.0: # %entry
+; CHECK-A-NEXT: lwz r5, 0(r5)
+; CHECK-A-NEXT: cmpwi r5, 1
+; CHECK-A-NEXT: bltlr cr0
+; CHECK-A-NEXT: # %bb.1: # %for.body.preheader
+; CHECK-A-NEXT: xxspltiw vs0, 1069066811
+; CHECK-A-NEXT: mtctr r5
+; CHECK-A-NEXT: li r5, 0
+; CHECK-A-NEXT: .align 5
+; CHECK-A-NEXT: L..BB0_2: # %for.body
+; CHECK-A-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-A-NEXT: lxvx vs1, r4, r5
+; CHECK-A-NEXT: xxspltiw vs2, 1170469888
+; CHECK-A-NEXT: xvmaddasp vs2, vs1, vs0
+; CHECK-A-NEXT: stxvx vs2, r3, r5
+; CHECK-A-NEXT: addi r5, r5, 16
+; CHECK-A-NEXT: bdnz L..BB0_2
+; CHECK-A-NEXT: # %bb.3: # %for.end
+; CHECK-A-NEXT: blr
>From 7788c3a1872078ca9ea5280adddd7abb963c7297 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 9 Oct 2024 11:31:49 -0400
Subject: [PATCH 2/4] minor change
---
llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index 4b4e47e9532a87..74ddfb327cd2f9 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -87,8 +87,8 @@ namespace {
if (AltOpc == -1)
continue;
- // This pass is run after register coalescing, and so we're looking for
- // a situation like this:
+ // This pass is run after Two-Address instruction pass, and so we're
+ // looking for a situation like this:
// ...
// %5 = COPY %9; VSLRC:%5,%9
// %5<def,tied1> = XSMADDADP %5<tied0>, %17, %16,
>From 917af9b11d6d996191ae3d410a9777c4c364bfa2 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 10 Oct 2024 10:21:34 -0400
Subject: [PATCH 3/4] minor change: simplify the test case
---
llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll b/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll
index fa86fe7664e41c..8d9589f7b1e98c 100644
--- a/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll
@@ -5,17 +5,17 @@
; RUN: FileCheck --check-prefix=CHECK-A %s
target triple = "powerpc64-ibm-aix7.2.0.0"
-define void @vsexp(ptr noalias nocapture noundef writeonly %__output_a, ptr noalias nocapture noundef readonly %var1321In_a, ptr noalias nocapture noundef readonly %n) local_unnamed_addr #0 {
+define void @vsexp(ptr noalias nocapture noundef writeonly %__output_a, ptr noalias nocapture noundef readonly %var1321In_a, ptr noalias nocapture noundef readonly %n) {
entry:
%0 = load i32, ptr %n, align 4
%cmp11 = icmp sgt i32 %0, 0
br i1 %cmp11, label %for.body.preheader, label %for.end
-for.body.preheader: ; preds = %entry
+for.body.preheader:
%wide.trip.count = zext i32 %0 to i64
br label %for.body
-for.body: ; preds = %for.body.preheader, %for.body
+for.body:
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
%1 = shl nsw i64 %indvars.iv, 2
%add.ptr = getelementptr inbounds float, ptr %var1321In_a, i64 %1
@@ -27,7 +27,7 @@ for.body: ; preds = %for.body.preheader,
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond.not, label %for.end, label %for.body
-for.end: ; preds = %for.body, %entry
+for.end:
ret void
}
>From b3f61209680b570a593f4a90fcf3c8ca51cdf218 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 10 Oct 2024 16:19:52 -0400
Subject: [PATCH 4/4] add comment of test case
---
llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll b/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll
index 8d9589f7b1e98c..bb2cfa1b0844fa 100644
--- a/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll
@@ -1,10 +1,11 @@
-; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early < %s | \
-; RUN: FileCheck --check-prefix=CHECK-M %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false \
+; RUN: -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early \
+; RUN: -mtriple powerpc64-ibm-aix7.2.0.0 < %s | FileCheck --check-prefix=CHECK-M %s
-; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false -ppc-asm-full-reg-names < %s | \
-; RUN: FileCheck --check-prefix=CHECK-A %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false \
+; RUN: -ppc-asm-full-reg-names -mtriple powerpc64-ibm-aix7.2.0.0 < %s | \
+; RUN: FileCheck --check-prefix=CHECK-A %s
-target triple = "powerpc64-ibm-aix7.2.0.0"
define void @vsexp(ptr noalias nocapture noundef writeonly %__output_a, ptr noalias nocapture noundef readonly %var1321In_a, ptr noalias nocapture noundef readonly %n) {
entry:
%0 = load i32, ptr %n, align 4
More information about the llvm-commits
mailing list