[llvm] [PowerPC] Update to run VSX FMA Mutation pass before Register Coalescer for `-schedule-ppc-vsx-fma-mutation-early` (PR #111696)

zhijian lin via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 10 13:20:05 PDT 2024


https://github.com/diggerlin updated https://github.com/llvm/llvm-project/pull/111696

>From 3059a3fd0e02f9536c4111af36edb2bdb3bf27cd Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 9 Oct 2024 11:12:39 -0400
Subject: [PATCH 1/4] move the  head of

---
 llvm/lib/Target/PowerPC/PPCTargetMachine.cpp |  3 +-
 llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll | 77 ++++++++++++++++++++
 2 files changed, 79 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll

diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 7d0455942923dd..a9e8d038ffd8bd 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -572,7 +572,8 @@ void PPCPassConfig::addMachineSSAOptimization() {
 void PPCPassConfig::addPreRegAlloc() {
   if (getOptLevel() != CodeGenOptLevel::None) {
     initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
-    insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
+    insertPass(VSXFMAMutateEarly ? &TwoAddressInstructionPassID
+                                 : &MachineSchedulerID,
                &PPCVSXFMAMutateID);
   }
 
diff --git a/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll b/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll
new file mode 100644
index 00000000000000..fa86fe7664e41c
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll
@@ -0,0 +1,77 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early < %s | \
+; RUN:  FileCheck --check-prefix=CHECK-M %s
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false -ppc-asm-full-reg-names < %s | \
+; RUN:  FileCheck --check-prefix=CHECK-A %s
+
+target triple = "powerpc64-ibm-aix7.2.0.0"
+define void @vsexp(ptr noalias nocapture noundef writeonly %__output_a, ptr noalias nocapture noundef readonly %var1321In_a, ptr noalias nocapture noundef readonly %n) local_unnamed_addr #0 {
+entry:
+  %0 = load i32, ptr %n, align 4
+  %cmp11 = icmp sgt i32 %0, 0
+  br i1 %cmp11, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  %wide.trip.count = zext i32 %0 to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %1 = shl nsw i64 %indvars.iv, 2
+  %add.ptr = getelementptr inbounds float, ptr %var1321In_a, i64 %1
+  %add.ptr.val = load <4 x float>, ptr %add.ptr, align 1
+  %2 = tail call contract <4 x float> @llvm.fma.v4f32(<4 x float> %add.ptr.val, <4 x float> <float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000>, <4 x float> <float 6.270500e+03, float 6.270500e+03, float 6.270500e+03, float 6.270500e+03>)
+  %add.ptr6 = getelementptr inbounds float, ptr %__output_a, i64 %1
+  store <4 x float> %2, ptr %add.ptr6, align 1 
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) 
+
+; CHECK-M:              .csect ..text..[PR],5{{[[:space:]].*}}.vsexp: 
+; CHECK-M-NEXT: # %bb.0:                                # %entry
+; CHECK-M-NEXT:         lwz r5, 0(r5)
+; CHECK-M-NEXT:         cmpwi   r5, 1
+; CHECK-M-NEXT:         bltlr   cr0
+; CHECK-M-NEXT: # %bb.1:                                # %for.body.preheader
+; CHECK-M-NEXT:         xxspltiw vs0, 1069066811
+; CHECK-M-NEXT:         xxspltiw vs1, 1170469888
+; CHECK-M-NEXT:         mtctr r5
+; CHECK-M-NEXT:         li r5, 0
+; CHECK-M-NEXT:         .align  5
+; CHECK-M-NEXT: L..BB0_2:                               # %for.body
+; CHECK-M-NEXT:                                         # =>This Inner Loop Header: Depth=1
+; CHECK-M-NEXT:         lxvx vs2, r4, r5
+; CHECK-M-NEXT:         xvmaddmsp vs2, vs0, vs1
+; CHECK-M-NEXT:         stxvx vs2, r3, r5
+; CHECK-M-NEXT:         addi r5, r5, 16
+; CHECK-M-NEXT:         bdnz L..BB0_2
+; CHECK-M-NEXT: # %bb.3:                                # %for.end
+; CHECK-M-NEXT:         blr
+
+; CHECK-A:              .csect ..text..[PR],5{{[[:space:]].*}}.vsexp:
+; CHECK-A-NEXT: # %bb.0:                                # %entry
+; CHECK-A-NEXT:         lwz r5, 0(r5)
+; CHECK-A-NEXT:         cmpwi   r5, 1
+; CHECK-A-NEXT:         bltlr   cr0
+; CHECK-A-NEXT: # %bb.1:                                # %for.body.preheader
+; CHECK-A-NEXT:         xxspltiw vs0, 1069066811
+; CHECK-A-NEXT:         mtctr r5
+; CHECK-A-NEXT:         li r5, 0
+; CHECK-A-NEXT:         .align  5
+; CHECK-A-NEXT: L..BB0_2:                               # %for.body
+; CHECK-A-NEXT:                                         # =>This Inner Loop Header: Depth=1
+; CHECK-A-NEXT:         lxvx vs1, r4, r5
+; CHECK-A-NEXT:         xxspltiw vs2, 1170469888
+; CHECK-A-NEXT:         xvmaddasp vs2, vs1, vs0
+; CHECK-A-NEXT:         stxvx vs2, r3, r5
+; CHECK-A-NEXT:         addi r5, r5, 16
+; CHECK-A-NEXT:         bdnz L..BB0_2
+; CHECK-A-NEXT: # %bb.3:                                # %for.end
+; CHECK-A-NEXT:         blr

>From 7788c3a1872078ca9ea5280adddd7abb963c7297 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 9 Oct 2024 11:31:49 -0400
Subject: [PATCH 2/4] minor change

---
 llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index 4b4e47e9532a87..74ddfb327cd2f9 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -87,8 +87,8 @@ namespace {
         if (AltOpc == -1)
           continue;
 
-        // This pass is run after register coalescing, and so we're looking for
-        // a situation like this:
+        // This pass is run after Two-Address instruction pass, and so we're
+        // looking for a situation like this:
         //   ...
         //   %5 = COPY %9; VSLRC:%5,%9
         //   %5<def,tied1> = XSMADDADP %5<tied0>, %17, %16,

>From 917af9b11d6d996191ae3d410a9777c4c364bfa2 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 10 Oct 2024 10:21:34 -0400
Subject: [PATCH 3/4] minor change: simplify the test case

---
 llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll b/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll
index fa86fe7664e41c..8d9589f7b1e98c 100644
--- a/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll
@@ -5,17 +5,17 @@
 ; RUN:  FileCheck --check-prefix=CHECK-A %s
 
 target triple = "powerpc64-ibm-aix7.2.0.0"
-define void @vsexp(ptr noalias nocapture noundef writeonly %__output_a, ptr noalias nocapture noundef readonly %var1321In_a, ptr noalias nocapture noundef readonly %n) local_unnamed_addr #0 {
+define void @vsexp(ptr noalias nocapture noundef writeonly %__output_a, ptr noalias nocapture noundef readonly %var1321In_a, ptr noalias nocapture noundef readonly %n) {
 entry:
   %0 = load i32, ptr %n, align 4
   %cmp11 = icmp sgt i32 %0, 0
   br i1 %cmp11, label %for.body.preheader, label %for.end
 
-for.body.preheader:                               ; preds = %entry
+for.body.preheader:
   %wide.trip.count = zext i32 %0 to i64
   br label %for.body
 
-for.body:                                         ; preds = %for.body.preheader, %for.body
+for.body:
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
   %1 = shl nsw i64 %indvars.iv, 2
   %add.ptr = getelementptr inbounds float, ptr %var1321In_a, i64 %1
@@ -27,7 +27,7 @@ for.body:                                         ; preds = %for.body.preheader,
   %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond.not, label %for.end, label %for.body
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:
   ret void
 }
 

>From b3f61209680b570a593f4a90fcf3c8ca51cdf218 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 10 Oct 2024 16:19:52 -0400
Subject: [PATCH 4/4] add comment of test case

---
 llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll b/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll
index 8d9589f7b1e98c..bb2cfa1b0844fa 100644
--- a/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll
@@ -1,10 +1,11 @@
-; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early < %s | \
-; RUN:  FileCheck --check-prefix=CHECK-M %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false \
+; RUN:   -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early \
+; RUN:    -mtriple powerpc64-ibm-aix7.2.0.0 < %s | FileCheck --check-prefix=CHECK-M %s
 
-; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false -ppc-asm-full-reg-names < %s | \
-; RUN:  FileCheck --check-prefix=CHECK-A %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false \
+; RUN:   -ppc-asm-full-reg-names -mtriple powerpc64-ibm-aix7.2.0.0 < %s | \
+; RUN:   FileCheck --check-prefix=CHECK-A %s
 
-target triple = "powerpc64-ibm-aix7.2.0.0"
 define void @vsexp(ptr noalias nocapture noundef writeonly %__output_a, ptr noalias nocapture noundef readonly %var1321In_a, ptr noalias nocapture noundef readonly %n) {
 entry:
   %0 = load i32, ptr %n, align 4



More information about the llvm-commits mailing list