[llvm] 922fafa - [GlobalISel] Micro-optimize getConstantVRegValWithLookThrough (#91969)

via llvm-commits llvm-commits at lists.llvm.org
Mon May 13 23:26:05 PDT 2024


Author: Pierre van Houtryve
Date: 2024-05-14T08:26:00+02:00
New Revision: 922fafaff83319e33e8a890a692df073d3ce55c9

URL: https://github.com/llvm/llvm-project/commit/922fafaff83319e33e8a890a692df073d3ce55c9
DIFF: https://github.com/llvm/llvm-project/commit/922fafaff83319e33e8a890a692df073d3ce55c9.diff

LOG: [GlobalISel] Micro-optimize getConstantVRegValWithLookThrough (#91969)

I was benchmarking the MatchTable when I found that
`getConstantVRegValWithLookThrough` took a non-negligible amount of
time, about 7.5% of all of
`AArch64PreLegalizerCombinerImpl::tryCombineAll`.

I decided to take a closer look to see if I could squeeze some
performance out of it, and I landed on a few changes that:
- Avoid copying APint unnecessarily, especially returning
std::optional<APInt> can be expensive when a out parameter also works.
- Avoid indirect call by using templated function pointers instead of
function_ref/std::function

Both of those changes seem to speedup this function by about 50%, but my
benchmarking (`perf record`) seems inconsistent (so take measurements
with a grain of salt), I saw as high as 4.5% and as low as 2% for this
function on the exact same input after the changes, but it never got
close again to 7% in a few runs so this looks like a stable improvement.

Added: 
    

Modified: 
    llvm/lib/CodeGen/GlobalISel/Utils.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 4e3781cb4e9d5..cd5dc0e01ed0e 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -313,13 +313,22 @@ llvm::getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI) {
 
 namespace {
 
-typedef std::function<bool(const MachineInstr *)> IsOpcodeFn;
-typedef std::function<std::optional<APInt>(const MachineInstr *MI)> GetAPCstFn;
-
-std::optional<ValueAndVReg> getConstantVRegValWithLookThrough(
-    Register VReg, const MachineRegisterInfo &MRI, IsOpcodeFn IsConstantOpcode,
-    GetAPCstFn getAPCstValue, bool LookThroughInstrs = true,
-    bool LookThroughAnyExt = false) {
+// This function is used in many places, and as such, it has some
+// micro-optimizations to try and make it as fast as it can be.
+//
+// - We use template arguments to avoid an indirect call caused by passing a
+// function_ref/std::function
+// - GetAPCstValue does not return std::optional<APInt> as that's expensive.
+// Instead it returns true/false and places the result in a pre-constructed
+// APInt.
+//
+// Please change this function carefully and benchmark your changes.
+template <bool (*IsConstantOpcode)(const MachineInstr *),
+          bool (*GetAPCstValue)(const MachineInstr *MI, APInt &)>
+std::optional<ValueAndVReg>
+getConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI,
+                                  bool LookThroughInstrs = true,
+                                  bool LookThroughAnyExt = false) {
   SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes;
   MachineInstr *MI;
 
@@ -353,26 +362,25 @@ std::optional<ValueAndVReg> getConstantVRegValWithLookThrough(
   if (!MI || !IsConstantOpcode(MI))
     return std::nullopt;
 
-  std::optional<APInt> MaybeVal = getAPCstValue(MI);
-  if (!MaybeVal)
+  APInt Val;
+  if (!GetAPCstValue(MI, Val))
     return std::nullopt;
-  APInt &Val = *MaybeVal;
-  for (auto [Opcode, Size] : reverse(SeenOpcodes)) {
-    switch (Opcode) {
+  for (auto &Pair : reverse(SeenOpcodes)) {
+    switch (Pair.first) {
     case TargetOpcode::G_TRUNC:
-      Val = Val.trunc(Size);
+      Val = Val.trunc(Pair.second);
       break;
     case TargetOpcode::G_ANYEXT:
     case TargetOpcode::G_SEXT:
-      Val = Val.sext(Size);
+      Val = Val.sext(Pair.second);
       break;
     case TargetOpcode::G_ZEXT:
-      Val = Val.zext(Size);
+      Val = Val.zext(Pair.second);
       break;
     }
   }
 
-  return ValueAndVReg{Val, VReg};
+  return ValueAndVReg{std::move(Val), VReg};
 }
 
 bool isIConstant(const MachineInstr *MI) {
@@ -394,42 +402,46 @@ bool isAnyConstant(const MachineInstr *MI) {
   return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_FCONSTANT;
 }
 
-std::optional<APInt> getCImmAsAPInt(const MachineInstr *MI) {
+bool getCImmAsAPInt(const MachineInstr *MI, APInt &Result) {
   const MachineOperand &CstVal = MI->getOperand(1);
-  if (CstVal.isCImm())
-    return CstVal.getCImm()->getValue();
-  return std::nullopt;
+  if (!CstVal.isCImm())
+    return false;
+  Result = CstVal.getCImm()->getValue();
+  return true;
 }
 
-std::optional<APInt> getCImmOrFPImmAsAPInt(const MachineInstr *MI) {
+bool getCImmOrFPImmAsAPInt(const MachineInstr *MI, APInt &Result) {
   const MachineOperand &CstVal = MI->getOperand(1);
   if (CstVal.isCImm())
-    return CstVal.getCImm()->getValue();
-  if (CstVal.isFPImm())
-    return CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
-  return std::nullopt;
+    Result = CstVal.getCImm()->getValue();
+  else if (CstVal.isFPImm())
+    Result = CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
+  else
+    return false;
+  return true;
 }
 
 } // end anonymous namespace
 
 std::optional<ValueAndVReg> llvm::getIConstantVRegValWithLookThrough(
     Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
-  return getConstantVRegValWithLookThrough(VReg, MRI, isIConstant,
-                                           getCImmAsAPInt, LookThroughInstrs);
+  return getConstantVRegValWithLookThrough<isIConstant, getCImmAsAPInt>(
+      VReg, MRI, LookThroughInstrs);
 }
 
 std::optional<ValueAndVReg> llvm::getAnyConstantVRegValWithLookThrough(
     Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs,
     bool LookThroughAnyExt) {
-  return getConstantVRegValWithLookThrough(
-      VReg, MRI, isAnyConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs,
-      LookThroughAnyExt);
+  return getConstantVRegValWithLookThrough<isAnyConstant,
+                                           getCImmOrFPImmAsAPInt>(
+      VReg, MRI, LookThroughInstrs, LookThroughAnyExt);
 }
 
 std::optional<FPValueAndVReg> llvm::getFConstantVRegValWithLookThrough(
     Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
-  auto Reg = getConstantVRegValWithLookThrough(
-      VReg, MRI, isFConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs);
+  auto Reg =
+      getConstantVRegValWithLookThrough<isFConstant, getCImmOrFPImmAsAPInt>(
+          VReg, MRI, LookThroughInstrs);
   if (!Reg)
     return std::nullopt;
   return FPValueAndVReg{getConstantFPVRegVal(Reg->VReg, MRI)->getValueAPF(),


        


More information about the llvm-commits mailing list