[llvm] 118d423 - [SPARC] Prefer RDPC over CALL to implement GETPCX for 64-bit target

via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 16 07:46:44 PST 2024


Author: Koakuma
Date: 2024-01-16T22:46:39+07:00
New Revision: 118d4234ac6d0893974d324cdb738a1e0255c08d

URL: https://github.com/llvm/llvm-project/commit/118d4234ac6d0893974d324cdb738a1e0255c08d
DIFF: https://github.com/llvm/llvm-project/commit/118d4234ac6d0893974d324cdb738a1e0255c08d.diff

LOG: [SPARC] Prefer RDPC over CALL to implement GETPCX for 64-bit target

On 64-bit target, prefer using RDPC over CALL to get the value of %pc.
This is faster on modern processors (Niagara T1 and newer) and avoids
polluting the processor's predictor state.

The old behavior of using a fake CALL is still done when tuning for
classic UltraSPARC processors, since RDPC is much slower there.

A quick pgbench test on a SPARC T4 shows about 2% speedup on SELECT
loads, and about 7% speedup on INSERT/UPDATE loads.

Reviewed By: @s-barannikov

Github PR: https://github.com/llvm/llvm-project/pull/78280

Added: 
    llvm/test/CodeGen/SPARC/getpcx-call.ll
    llvm/test/CodeGen/SPARC/getpcx-rdpc.ll

Modified: 
    llvm/lib/Target/Sparc/Sparc.td
    llvm/lib/Target/Sparc/SparcAsmPrinter.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/Sparc/Sparc.td b/llvm/lib/Target/Sparc/Sparc.td
index 1a71cfed3128f06..7b1033956524330 100644
--- a/llvm/lib/Target/Sparc/Sparc.td
+++ b/llvm/lib/Target/Sparc/Sparc.td
@@ -62,6 +62,13 @@ def UsePopc : SubtargetFeature<"popc", "UsePopc", "true",
 def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
                               "Use software emulation for floating point">;
 
+//===----------------------------------------------------------------------===//
+// SPARC Subtarget tuning features.
+//
+
+def TuneSlowRDPC : SubtargetFeature<"slow-rdpc", "HasSlowRDPC", "true",
+                                    "rd %pc, %XX is slow", [FeatureV9]>;
+
 //==== Features added predmoninantly for LEON subtarget support
 include "LeonFeatures.td"
 
@@ -89,8 +96,9 @@ def SparcAsmParserVariant : AsmParserVariant {
 // SPARC processors supported.
 //===----------------------------------------------------------------------===//
 
-class Proc<string Name, list<SubtargetFeature> Features>
- : Processor<Name, NoItineraries, Features>;
+class Proc<string Name, list<SubtargetFeature> Features,
+           list<SubtargetFeature> TuneFeatures = []>
+ : Processor<Name, NoItineraries, Features, TuneFeatures>;
 
 def : Proc<"generic",         []>;
 def : Proc<"v7",              [FeatureSoftMulDiv, FeatureNoFSMULD]>;
@@ -118,9 +126,11 @@ def : Proc<"ma2480",          [FeatureLeon, LeonCASA]>;
 def : Proc<"ma2485",          [FeatureLeon, LeonCASA]>;
 def : Proc<"ma2x8x",          [FeatureLeon, LeonCASA]>;
 def : Proc<"v9",              [FeatureV9]>;
-def : Proc<"ultrasparc",      [FeatureV9, FeatureV8Deprecated, FeatureVIS]>;
+def : Proc<"ultrasparc",      [FeatureV9, FeatureV8Deprecated, FeatureVIS],
+                              [TuneSlowRDPC]>;
 def : Proc<"ultrasparc3",     [FeatureV9, FeatureV8Deprecated, FeatureVIS,
-                               FeatureVIS2]>;
+                               FeatureVIS2],
+                              [TuneSlowRDPC]>;
 def : Proc<"niagara",         [FeatureV9, FeatureV8Deprecated, FeatureVIS,
                                FeatureVIS2]>;
 def : Proc<"niagara2",        [FeatureV9, FeatureV8Deprecated, UsePopc,

diff  --git a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index cca624e09267962..215a8ea8319046f 100644
--- a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -13,6 +13,7 @@
 
 #include "MCTargetDesc/SparcInstPrinter.h"
 #include "MCTargetDesc/SparcMCExpr.h"
+#include "MCTargetDesc/SparcMCTargetDesc.h"
 #include "MCTargetDesc/SparcTargetStreamer.h"
 #include "Sparc.h"
 #include "SparcInstrInfo.h"
@@ -111,6 +112,15 @@ static void EmitCall(MCStreamer &OutStreamer,
   OutStreamer.emitInstruction(CallInst, STI);
 }
 
+static void EmitRDPC(MCStreamer &OutStreamer, MCOperand &RD,
+                     const MCSubtargetInfo &STI) {
+  MCInst RDPCInst;
+  RDPCInst.setOpcode(SP::RDASR);
+  RDPCInst.addOperand(RD);
+  RDPCInst.addOperand(MCOperand::createReg(SP::ASR5));
+  OutStreamer.emitInstruction(RDPCInst, STI);
+}
+
 static void EmitSETHI(MCStreamer &OutStreamer,
                       MCOperand &Imm, MCOperand &RD,
                       const MCSubtargetInfo &STI)
@@ -226,7 +236,7 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI,
   MCOperand RegO7   = MCOperand::createReg(SP::O7);
 
   // <StartLabel>:
-  //   call <EndLabel>
+  //   <GET-PC> // This will be either `call <EndLabel>` or `rd %pc, %o7`.
   // <SethiLabel>:
   //     sethi %hi(_GLOBAL_OFFSET_TABLE_+(<SethiLabel>-<StartLabel>)), <MO>
   // <EndLabel>:
@@ -234,8 +244,17 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI,
   //   add <MO>, %o7, <MO>
 
   OutStreamer->emitLabel(StartLabel);
-  MCOperand Callee =  createPCXCallOP(EndLabel, OutContext);
-  EmitCall(*OutStreamer, Callee, STI);
+  if (!STI.getTargetTriple().isSPARC64() ||
+      STI.hasFeature(Sparc::TuneSlowRDPC)) {
+    MCOperand Callee = createPCXCallOP(EndLabel, OutContext);
+    EmitCall(*OutStreamer, Callee, STI);
+  } else {
+    // TODO find out whether it is possible to store PC
+    // in other registers, to enable leaf function optimization.
+    // (On the other hand, approx. over 97.8% of GETPCXes happen
+    // in non-leaf functions, so would this be worth the effort?)
+    EmitRDPC(*OutStreamer, RegO7, STI);
+  }
   OutStreamer->emitLabel(SethiLabel);
   MCOperand hiImm = createPCXRelExprOp(SparcMCExpr::VK_Sparc_PC22,
                                        GOTLabel, StartLabel, SethiLabel,

diff  --git a/llvm/test/CodeGen/SPARC/getpcx-call.ll b/llvm/test/CodeGen/SPARC/getpcx-call.ll
new file mode 100644
index 000000000000000..72d7b5a0bc2f5b4
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/getpcx-call.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -relocation-model=pic -mtriple=sparc | FileCheck --check-prefix=SPARC %s
+; RUN: llc < %s -relocation-model=pic -mtriple=sparcv9 | FileCheck --check-prefix=SPARC64 %s
+
+;; SPARC32 and SPARC64 for classic UltraSPARCs implement GETPCX
+;; with a fake `call`.
+;; All other SPARC64 targets implement it with `rd %pc, %o7`.
+;; Need to do the tests in separate files because apparently `tune-cpu`
+;; attribute applies to the entire file at once.
+
+ at value = external global i32
+
+define i32 @testCall() nounwind #0 {
+; SPARC-LABEL: testCall:
+; SPARC:       ! %bb.0:
+; SPARC-NEXT:    save %sp, -96, %sp
+; SPARC-NEXT:  .Ltmp0:
+; SPARC-NEXT:    call .Ltmp1
+; SPARC-NEXT:  .Ltmp2:
+; SPARC-NEXT:    sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0
+; SPARC-NEXT:  .Ltmp1:
+; SPARC-NEXT:    or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0
+; SPARC-NEXT:    add %i0, %o7, %i0
+; SPARC-NEXT:    sethi %hi(value), %i1
+; SPARC-NEXT:    add %i1, %lo(value), %i1
+; SPARC-NEXT:    ld [%i0+%i1], %i0
+; SPARC-NEXT:    ld [%i0], %i0
+; SPARC-NEXT:    ret
+; SPARC-NEXT:    restore
+;
+; SPARC64-LABEL: testCall:
+; SPARC64:       ! %bb.0:
+; SPARC64-NEXT:    save %sp, -128, %sp
+; SPARC64-NEXT:  .Ltmp0:
+; SPARC64-NEXT:    call .Ltmp1
+; SPARC64-NEXT:  .Ltmp2:
+; SPARC64-NEXT:    sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0
+; SPARC64-NEXT:  .Ltmp1:
+; SPARC64-NEXT:    or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0
+; SPARC64-NEXT:    add %i0, %o7, %i0
+; SPARC64-NEXT:    sethi %hi(value), %i1
+; SPARC64-NEXT:    add %i1, %lo(value), %i1
+; SPARC64-NEXT:    ldx [%i0+%i1], %i0
+; SPARC64-NEXT:    ld [%i0], %i0
+; SPARC64-NEXT:    ret
+; SPARC64-NEXT:    restore
+  %1 = load i32, ptr @value
+  ret i32 %1
+}
+
+attributes #0 = { "tune-cpu"="ultrasparc" }

diff  --git a/llvm/test/CodeGen/SPARC/getpcx-rdpc.ll b/llvm/test/CodeGen/SPARC/getpcx-rdpc.ll
new file mode 100644
index 000000000000000..286750a014e82db
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/getpcx-rdpc.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -relocation-model=pic -mtriple=sparc | FileCheck --check-prefix=SPARC %s
+; RUN: llc < %s -relocation-model=pic -mtriple=sparcv9 | FileCheck --check-prefix=SPARC64 %s
+
+;; SPARC32 and SPARC64 for classic UltraSPARCs implement GETPCX
+;; with a fake `call`.
+;; All other SPARC64 targets implement it with `rd %pc, %o7`.
+;; Need to do the tests in separate files because apparently `tune-cpu`
+;; attribute applies to the entire file at once.
+
+ at value = external global i32
+
+define i32 @testRdpc() nounwind #0 {
+; SPARC-LABEL: testRdpc:
+; SPARC:       ! %bb.0:
+; SPARC-NEXT:    save %sp, -96, %sp
+; SPARC-NEXT:  .Ltmp0:
+; SPARC-NEXT:    call .Ltmp1
+; SPARC-NEXT:  .Ltmp2:
+; SPARC-NEXT:    sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0
+; SPARC-NEXT:  .Ltmp1:
+; SPARC-NEXT:    or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0
+; SPARC-NEXT:    add %i0, %o7, %i0
+; SPARC-NEXT:    sethi %hi(value), %i1
+; SPARC-NEXT:    add %i1, %lo(value), %i1
+; SPARC-NEXT:    ld [%i0+%i1], %i0
+; SPARC-NEXT:    ld [%i0], %i0
+; SPARC-NEXT:    ret
+; SPARC-NEXT:    restore
+;
+; SPARC64-LABEL: testRdpc:
+; SPARC64:       ! %bb.0:
+; SPARC64-NEXT:    save %sp, -128, %sp
+; SPARC64-NEXT:  .Ltmp0:
+; SPARC64-NEXT:    rd %pc, %o7
+; SPARC64-NEXT:  .Ltmp2:
+; SPARC64-NEXT:    sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0
+; SPARC64-NEXT:  .Ltmp1:
+; SPARC64-NEXT:    or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0
+; SPARC64-NEXT:    add %i0, %o7, %i0
+; SPARC64-NEXT:    sethi %hi(value), %i1
+; SPARC64-NEXT:    add %i1, %lo(value), %i1
+; SPARC64-NEXT:    ldx [%i0+%i1], %i0
+; SPARC64-NEXT:    ld [%i0], %i0
+; SPARC64-NEXT:    ret
+; SPARC64-NEXT:    restore
+  %1 = load i32, ptr @value
+  ret i32 %1
+}
+
+attributes #0 = { "tune-cpu"="niagara" }


        


More information about the llvm-commits mailing list