[llvm] [GlobalISel] Micro-optimize getConstantVRegValWithLookThrough (PR #91969)

Pierre van Houtryve via llvm-commits llvm-commits at lists.llvm.org
Mon May 13 07:09:24 PDT 2024


https://github.com/Pierre-vh created https://github.com/llvm/llvm-project/pull/91969

I was benchmarking the MatchTable when I found that `getConstantVRegValWithLookThrough` took a non-negligible amount of time, about 7.5% of all of `AArch64PreLegalizerCombinerImpl::tryCombineAll`.

I decided to take a closer look to see if I could squeeze some performance out of it, and I landed on a few changes that:
 - Avoid copying APint unnecessarily, especially returning std::optional<APInt> can be expensive when a out parameter also works.
 - Avoid indirect call by using templated function pointers instead of function_ref/std::function

Both of those changes seem to speedup this function by about 50%, but my benchmarking (`perf record`) seems inconsistent (so take measurements with a grain of salt), I saw as high as 4.5% and as low as 2% for this function on the exact same input after the changes, but it never got close again to 7% in a few runs so this looks like a stable improvement.

>From af84fe718c40c1b7171179ffb450173079ca8aaa Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Mon, 13 May 2024 15:44:46 +0200
Subject: [PATCH] [GlobalISel] Micro-optimize getConstantVRegValWithLookThrough

I was benchmarking the MatchTable when I found that getConstantVRegValWithLookThrough took a non-negligible amount of time, about 7.5% of all of AArch64PreLegalizerCombinerImpl::tryCombineAll.

I decided to take a closer look to see if I could squeeze some performance out of it, and I landed on a few changes that:
 - Avoid copying APint unnecessarily, especially returning std::optional<APInt> can be expensive when a out parameter also works.
 - Avoid indirect call by using templated function pointers instead of function_ref/std::function

Both of those changes seem to speedup this function by about 50%, but my benchmarking (`perf record`) seems inconsistent (so take measurements with a grain of salt), I saw as high as 4.5% and as low as 2% for this function on the exact same input after the changes, but it never got close again to 7% in a few runs so this looks like a stable improvement.
---
 llvm/lib/CodeGen/GlobalISel/Utils.cpp | 76 ++++++++++++++++-----------
 1 file changed, 44 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 4e3781cb4e9d5..cd5dc0e01ed0e 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -313,13 +313,22 @@ llvm::getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI) {
 
 namespace {
 
-typedef std::function<bool(const MachineInstr *)> IsOpcodeFn;
-typedef std::function<std::optional<APInt>(const MachineInstr *MI)> GetAPCstFn;
-
-std::optional<ValueAndVReg> getConstantVRegValWithLookThrough(
-    Register VReg, const MachineRegisterInfo &MRI, IsOpcodeFn IsConstantOpcode,
-    GetAPCstFn getAPCstValue, bool LookThroughInstrs = true,
-    bool LookThroughAnyExt = false) {
+// This function is used in many places, and as such, it has some
+// micro-optimizations to try and make it as fast as it can be.
+//
+// - We use template arguments to avoid an indirect call caused by passing a
+// function_ref/std::function
+// - GetAPCstValue does not return std::optional<APInt> as that's expensive.
+// Instead it returns true/false and places the result in a pre-constructed
+// APInt.
+//
+// Please change this function carefully and benchmark your changes.
+template <bool (*IsConstantOpcode)(const MachineInstr *),
+          bool (*GetAPCstValue)(const MachineInstr *MI, APInt &)>
+std::optional<ValueAndVReg>
+getConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI,
+                                  bool LookThroughInstrs = true,
+                                  bool LookThroughAnyExt = false) {
   SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes;
   MachineInstr *MI;
 
@@ -353,26 +362,25 @@ std::optional<ValueAndVReg> getConstantVRegValWithLookThrough(
   if (!MI || !IsConstantOpcode(MI))
     return std::nullopt;
 
-  std::optional<APInt> MaybeVal = getAPCstValue(MI);
-  if (!MaybeVal)
+  APInt Val;
+  if (!GetAPCstValue(MI, Val))
     return std::nullopt;
-  APInt &Val = *MaybeVal;
-  for (auto [Opcode, Size] : reverse(SeenOpcodes)) {
-    switch (Opcode) {
+  for (auto &Pair : reverse(SeenOpcodes)) {
+    switch (Pair.first) {
     case TargetOpcode::G_TRUNC:
-      Val = Val.trunc(Size);
+      Val = Val.trunc(Pair.second);
       break;
     case TargetOpcode::G_ANYEXT:
     case TargetOpcode::G_SEXT:
-      Val = Val.sext(Size);
+      Val = Val.sext(Pair.second);
       break;
     case TargetOpcode::G_ZEXT:
-      Val = Val.zext(Size);
+      Val = Val.zext(Pair.second);
       break;
     }
   }
 
-  return ValueAndVReg{Val, VReg};
+  return ValueAndVReg{std::move(Val), VReg};
 }
 
 bool isIConstant(const MachineInstr *MI) {
@@ -394,42 +402,46 @@ bool isAnyConstant(const MachineInstr *MI) {
   return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_FCONSTANT;
 }
 
-std::optional<APInt> getCImmAsAPInt(const MachineInstr *MI) {
+bool getCImmAsAPInt(const MachineInstr *MI, APInt &Result) {
   const MachineOperand &CstVal = MI->getOperand(1);
-  if (CstVal.isCImm())
-    return CstVal.getCImm()->getValue();
-  return std::nullopt;
+  if (!CstVal.isCImm())
+    return false;
+  Result = CstVal.getCImm()->getValue();
+  return true;
 }
 
-std::optional<APInt> getCImmOrFPImmAsAPInt(const MachineInstr *MI) {
+bool getCImmOrFPImmAsAPInt(const MachineInstr *MI, APInt &Result) {
   const MachineOperand &CstVal = MI->getOperand(1);
   if (CstVal.isCImm())
-    return CstVal.getCImm()->getValue();
-  if (CstVal.isFPImm())
-    return CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
-  return std::nullopt;
+    Result = CstVal.getCImm()->getValue();
+  else if (CstVal.isFPImm())
+    Result = CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
+  else
+    return false;
+  return true;
 }
 
 } // end anonymous namespace
 
 std::optional<ValueAndVReg> llvm::getIConstantVRegValWithLookThrough(
     Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
-  return getConstantVRegValWithLookThrough(VReg, MRI, isIConstant,
-                                           getCImmAsAPInt, LookThroughInstrs);
+  return getConstantVRegValWithLookThrough<isIConstant, getCImmAsAPInt>(
+      VReg, MRI, LookThroughInstrs);
 }
 
 std::optional<ValueAndVReg> llvm::getAnyConstantVRegValWithLookThrough(
     Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs,
     bool LookThroughAnyExt) {
-  return getConstantVRegValWithLookThrough(
-      VReg, MRI, isAnyConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs,
-      LookThroughAnyExt);
+  return getConstantVRegValWithLookThrough<isAnyConstant,
+                                           getCImmOrFPImmAsAPInt>(
+      VReg, MRI, LookThroughInstrs, LookThroughAnyExt);
 }
 
 std::optional<FPValueAndVReg> llvm::getFConstantVRegValWithLookThrough(
     Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
-  auto Reg = getConstantVRegValWithLookThrough(
-      VReg, MRI, isFConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs);
+  auto Reg =
+      getConstantVRegValWithLookThrough<isFConstant, getCImmOrFPImmAsAPInt>(
+          VReg, MRI, LookThroughInstrs);
   if (!Reg)
     return std::nullopt;
   return FPValueAndVReg{getConstantFPVRegVal(Reg->VReg, MRI)->getValueAPF(),



More information about the llvm-commits mailing list