[llvm] [clang-tools-extra] [clang] [PowerPC] Implement fence builtin (PR #76495)

Qiu Chaofan via cfe-commits cfe-commits at lists.llvm.org
Tue Jan 9 23:31:07 PST 2024


https://github.com/ecnelises updated https://github.com/llvm/llvm-project/pull/76495

>From aaa11bc775b9aa3a0398ba2bbca4087e99f04243 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Thu, 28 Dec 2023 16:54:25 +0800
Subject: [PATCH 1/4] [PowerPC] Implement fence builtin

---
 clang/include/clang/Basic/BuiltinsPPC.def             |  3 +++
 clang/lib/Basic/Targets/PPC.cpp                       |  1 +
 llvm/include/llvm/IR/IntrinsicsPowerPC.td             |  5 +++++
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp              |  7 ++++++-
 llvm/lib/Target/PowerPC/PPCInstrInfo.td               |  4 ++++
 .../CodeGen/PowerPC/builtins-ppc-xlcompat-msync.ll    | 11 +++++++++++
 6 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index a35488ed3dfa56..829c60defe17c6 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -944,6 +944,9 @@ TARGET_BUILTIN(__builtin_pack_vector_int128, "V1LLLiULLiULLi", "", "vsx")
 // Set the floating point rounding mode
 BUILTIN(__builtin_setrnd, "di", "")
 
+// Barrier for instruction motion
+BUILTIN(__builtin_ppc_fence, "v", "")
+
 // Get content from current FPSCR
 BUILTIN(__builtin_readflm, "d", "")
 
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 045c273f03c7a0..41935abfb65d3b 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -212,6 +212,7 @@ static void defineXLCompatMacros(MacroBuilder &Builder) {
   Builder.defineMacro("__darn_32", "__builtin_darn_32");
   Builder.defineMacro("__darn_raw", "__builtin_darn_raw");
   Builder.defineMacro("__dcbf", "__builtin_dcbf");
+  Builder.defineMacro("__fence", "__builtin_ppc_fence");
   Builder.defineMacro("__fmadd", "__builtin_fma");
   Builder.defineMacro("__fmadds", "__builtin_fmaf");
   Builder.defineMacro("__abs", "__builtin_abs");
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 3ede2a3736bf30..6d1e8eb47405dd 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -29,6 +29,11 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
     [IntrArgMemOnly, NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
   def int_ppc_dcbzl : Intrinsic<[], [llvm_ptr_ty], []>;
 
+  // Emit pseudo instruction as fence of instruction motion
+  def int_ppc_fence : ClangBuiltin<"__builtin_ppc_fence">,
+                      DefaultAttrsIntrinsic<[], [],
+                                            [IntrNoMerge, IntrHasSideEffects]>;
+
   // Get content from current FPSCR register
   def int_ppc_readflm : ClangBuiltin<"__builtin_readflm">,
                         DefaultAttrsIntrinsic<[llvm_double_ty], [],
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index aaced58defe603..af55c6cf337120 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2155,11 +2155,16 @@ bool PPCInstrInfo::isPredicated(const MachineInstr &MI) const {
 bool PPCInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
                                         const MachineBasicBlock *MBB,
                                         const MachineFunction &MF) const {
+  switch (MI.getOpcode()) {
+  default: break;
   // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion
   // across them, since some FP operations may change content of FPSCR.
   // TODO: Model FPSCR in PPC instruction definitions and remove the workaround
-  if (MI.getOpcode() == PPC::MFFS || MI.getOpcode() == PPC::MTFSF)
+  case PPC::MFFS:
+  case PPC::MTFSF:
+  case PPC::FENCE:
     return true;
+  }
   return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF);
 }
 
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index b1601739fd4569..c0344dfbf3a728 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1328,6 +1328,9 @@ def SETFLM : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FLM),
                     "#SETFLM", [(set f64:$FRT, (int_ppc_setflm f8rc:$FLM))]>;
 }
 
+let isBarrier = 1, hasSideEffects = 1, Defs = [RM] in
+def FENCE : PPCEmitTimePseudo<(outs), (ins), "#FENCE", []>;
+
 let Defs = [LR] in
   def MovePCtoLR : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR", []>,
                    PPC970_Unit_BRU;
@@ -3187,6 +3190,7 @@ def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm),
 def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
           (TCRETURNri CTRRC:$dst, imm:$imm)>;
 
+def : Pat<(int_ppc_fence), (FENCE)>;
 def : Pat<(int_ppc_readflm), (MFFS)>;
 def : Pat<(int_ppc_mffsl), (MFFSL)>;
 
diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-msync.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-msync.ll
index 2c9fd2034f887c..555de90c56c364 100644
--- a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-msync.ll
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-msync.ll
@@ -29,3 +29,14 @@ entry:
   ret void
 }
 declare void @llvm.ppc.iospace.sync()
+
+define dso_local void @test_builtin_ppc_fence() {
+; CHECK-LABEL: test_builtin_ppc_fence:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #FENCE
+; CHECK-NEXT:    blr
+entry:
+  call void @llvm.ppc.fence()
+  ret void
+}
+declare void @llvm.ppc.fence()

>From 6f31c61891777cd80f339f9e6278762c054710c8 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Thu, 28 Dec 2023 17:26:50 +0800
Subject: [PATCH 2/4] Add test

---
 .../PowerPC/builtins-ppc-xlcompat-sync.c      | 24 ++++++++++
 .../PowerPC/builtins-ppc-xlcompat-msync.ll    | 11 -----
 llvm/test/CodeGen/PowerPC/fence.ll            | 45 +++++++++++++++++++
 3 files changed, 69 insertions(+), 11 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/fence.ll

diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c
index 9187bb855dac22..a5cc97161c56ac 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c
@@ -194,6 +194,18 @@ void test_dcbz() {
   __dcbz(c);
 }
 
+// CHECK-LABEL: @test_fence(
+// CHECK:         call void @llvm.ppc.fence()
+// CHECK-NEXT:    ret void
+//
+// CHECK-32-LABEL: @test_fence(
+// CHECK-32:         call void @llvm.ppc.fence()
+// CHECK-32-NEXT:    ret void
+//
+void test_fence() {
+  __fence();
+}
+
 // CHECK-LABEL: @test_builtin_ppc_popcntb(
 // CHECK:    [[TMP0:%.*]] = load i64, ptr @a, align 8
 // CHECK-NEXT:    [[POPCNTB:%.*]] = call i64 @llvm.ppc.popcntb.i64.i64(i64 [[TMP0]])
@@ -375,3 +387,15 @@ void test_builtin_ppc_dcbtst() {
 void test_builtin_ppc_dcbz() {
   __builtin_ppc_dcbz(c);
 }
+
+// CHECK-LABEL: @test_builtin_ppc_fence(
+// CHECK:         call void @llvm.ppc.fence()
+// CHECK-NEXT:    ret void
+//
+// CHECK-32-LABEL: @test_builtin_ppc_fence(
+// CHECK-32:         call void @llvm.ppc.fence()
+// CHECK-32-NEXT:    ret void
+//
+void test_builtin_ppc_fence() {
+  __builtin_ppc_fence();
+}
diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-msync.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-msync.ll
index 555de90c56c364..2c9fd2034f887c 100644
--- a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-msync.ll
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-msync.ll
@@ -29,14 +29,3 @@ entry:
   ret void
 }
 declare void @llvm.ppc.iospace.sync()
-
-define dso_local void @test_builtin_ppc_fence() {
-; CHECK-LABEL: test_builtin_ppc_fence:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    #FENCE
-; CHECK-NEXT:    blr
-entry:
-  call void @llvm.ppc.fence()
-  ret void
-}
-declare void @llvm.ppc.fence()
diff --git a/llvm/test/CodeGen/PowerPC/fence.ll b/llvm/test/CodeGen/PowerPC/fence.ll
new file mode 100644
index 00000000000000..da14e8be0d4288
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/fence.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc < %s -mtriple powerpc64le-unknown-linux -debug-only=machine-scheduler \
+; RUN:   2>&1 | FileCheck %s --check-prefix=LOG
+
+define dso_local void @test_builtin_ppc_fence() {
+; CHECK-LABEL: test_builtin_ppc_fence:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #FENCE
+; CHECK-NEXT:    blr
+entry:
+  call void @llvm.ppc.fence()
+  ret void
+}
+declare void @llvm.ppc.fence()
+
+; LOG: ***** MI Scheduling *****
+; LOG-NEXT: motion:%bb.0 entry
+; LOG: ExitSU: FENCE implicit-def dead $rm
+; LOG: ***** MI Scheduling *****
+; LOG-NEXT: motion:%bb.0 entry
+; LOG: ExitSU: FENCE implicit-def dead $rm
+;
+; LOG: ***** MI Scheduling *****
+; LOG-NEXT: motion:%bb.0 entry
+; LOG: ExitSU: FENCE implicit-def dead $rm
+; LOG: ***** MI Scheduling *****
+; LOG-NEXT: motion:%bb.0 entry
+; LOG: ExitSU: FENCE implicit-def dead $rm
+define double @motion(double %a, double %b, double %c, double %d) {
+entry:
+  %0 = fdiv double %a, %b
+  %1 = fdiv double %b, %d
+  call void @llvm.ppc.fence()
+  %2 = fdiv double %c, %d
+  %3 = fdiv double %a, %c
+  call void @llvm.ppc.fence()
+  %4 = fadd double %0, %1
+  %5 = fadd double %2, %3
+  %6 = fsub double %4, %5
+  ret double %6
+}

>From c38003f9d247fd4a51b1858d57086b7fb326e3b9 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Thu, 28 Dec 2023 17:27:31 +0800
Subject: [PATCH 3/4] Address format

---
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index af55c6cf337120..538e0e6b3d420c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2156,7 +2156,8 @@ bool PPCInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
                                         const MachineBasicBlock *MBB,
                                         const MachineFunction &MF) const {
   switch (MI.getOpcode()) {
-  default: break;
+  default:
+    break;
   // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion
   // across them, since some FP operations may change content of FPSCR.
   // TODO: Model FPSCR in PPC instruction definitions and remove the workaround

>From f4f39ed88c843b181d434f135e07c54f2f2a88a4 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Wed, 10 Jan 2024 15:30:48 +0800
Subject: [PATCH 4/4] Add comments to fence

---
 clang/include/clang/Basic/BuiltinsPPC.def | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 829c60defe17c6..88ae0ce940852e 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -110,6 +110,10 @@ BUILTIN(__builtin_ppc_fctiw, "dd", "")
 BUILTIN(__builtin_ppc_fctiwz, "dd", "")
 BUILTIN(__builtin_ppc_fctudz, "dd", "")
 BUILTIN(__builtin_ppc_fctuwz, "dd", "")
+
+// fence builtin prevents all instructions moved across it
+BUILTIN(__builtin_ppc_fence, "v", "")
+
 BUILTIN(__builtin_ppc_swdiv_nochk, "ddd", "")
 BUILTIN(__builtin_ppc_swdivs_nochk, "fff", "")
 BUILTIN(__builtin_ppc_alignx, "vIivC*", "nc")
@@ -944,9 +948,6 @@ TARGET_BUILTIN(__builtin_pack_vector_int128, "V1LLLiULLiULLi", "", "vsx")
 // Set the floating point rounding mode
 BUILTIN(__builtin_setrnd, "di", "")
 
-// Barrier for instruction motion
-BUILTIN(__builtin_ppc_fence, "v", "")
-
 // Get content from current FPSCR
 BUILTIN(__builtin_readflm, "d", "")
 



More information about the cfe-commits mailing list