[llvm] 52f3470 - [Hexagon] Make A2_tfrsi not cheap for operands exceeding 16 bits

Tue Jan 4 15:48:03 PST 2022

Author: SANTANU DAS
Date: 2022-01-04T15:46:26-08:00
New Revision: 52f347010a8c43118293feab05e09a4df52ba04f

URL: https://github.com/llvm/llvm-project/commit/52f347010a8c43118293feab05e09a4df52ba04f
DIFF: https://github.com/llvm/llvm-project/commit/52f347010a8c43118293feab05e09a4df52ba04f.diff

LOG: [Hexagon] Make A2_tfrsi not cheap for operands exceeding 16 bits

This patch aids to reduce code size since it removes generation
of back-to-back A2_tfrsi instructions. It is enabled only at -Os/-Oz.

Added: 
    llvm/test/CodeGen/Hexagon/notcheap.ll

Modified: 
    llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
    llvm/lib/Target/Hexagon/HexagonInstrInfo.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index fdf480722a2f..1aedff9a2cc3 100644

--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -146,6 +146,31 @@ static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB,
   return Count;
 }
 
+// Check if the A2_tfrsi instruction is cheap or not. If the operand has
+// to be constant-extendend it is not cheap since it occupies two slots
+// in a packet.
+bool HexagonInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
+  // Enable the following steps only at Os/Oz
+  if (!(MI.getMF()->getFunction().hasOptSize()))
+    return MI.isAsCheapAsAMove();
+
+  if (MI.getOpcode() == Hexagon::A2_tfrsi) {
+    auto Op = MI.getOperand(1);
+    // If the instruction has a global address as operand, it is not cheap
+    // since the operand will be constant extended.
+    if (Op.getType() == MachineOperand::MO_GlobalAddress)
+      return false;
+    // If the instruction has an operand of size > 16bits, its will be
+    // const-extended and hence, it is not cheap.
+    if (Op.isImm()) {
+      int64_t Imm = Op.getImm();
+      if (!isInt<16>(Imm))
+        return false;
+    }
+  }
+  return MI.isAsCheapAsAMove();
+}
+
 /// Find the hardware loop instruction used to set-up the specified loop.
 /// On Hexagon, we have two instructions used to set-up the hardware loop
 /// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions

diff  --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index 830f04d9eac3..05cdf6c98643 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -335,6 +335,7 @@ class HexagonInstrInfo : public HexagonGenInstrInfo {
   getSerializableBitmaskMachineOperandTargetFlags() const override;
 
   bool isTailCall(const MachineInstr &MI) const override;
+  bool isAsCheapAsAMove(const MachineInstr &MI) const override;
 
   /// HexagonInstrInfo specifics.
 

diff  --git a/llvm/test/CodeGen/Hexagon/notcheap.ll b/llvm/test/CodeGen/Hexagon/notcheap.ll
new file mode 100644
index 000000000000..1731666dfdd6
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/notcheap.ll
@@ -0,0 +1,45 @@
+; RUN: llc  -march=hexagon < %s | FileCheck %s
+; Check if only one transfer immediate instruction is generated for init.end block.
+; Since the transfer immediate of address operand is declared as not cheap, it
+; should generate only one transfer immediate, rather than two of them.
+
+; CHECK-LABEL: .LBB{{.*}}
+; CHECK: r0 = ##_ZZ3foovE1x
+; CHECK-NOT: r{{[1-9]*}} = ##_ZZ3foovE1x
+; CHECK:  memw(r0+#0) += #1
+; CHECK: r{{.*}} = dealloc_return
+
+%struct.FooBaz = type { i32 }
+ at _ZZ3foovE1x = internal global %struct.FooBaz zeroinitializer, align 4
+ at _ZGVZ3foovE1x = internal global i64 0, section ".bss._ZGVZ3foovE1x", align 8
+ at __dso_handle = external dso_local global i8
+
+define dso_local i32* @_Z3foov() local_unnamed_addr optsize {
+entry:
+  %0 = load atomic i8, i8* bitcast (i64* @_ZGVZ3foovE1x to i8*) acquire, align 8
+  %guard.uninitialized = icmp eq i8 %0, 0
+  br i1 %guard.uninitialized, label %init.check, label %init.end
+
+init.check:                                       ; preds = %entry
+  %1 = tail call i32 @__cxa_guard_acquire(i64* nonnull @_ZGVZ3foovE1x)
+  %tobool = icmp eq i32 %1, 0
+  br i1 %tobool, label %init.end, label %init
+
+init:                                             ; preds = %init.check
+  tail call void @_ZN6FooBazC1Ev(%struct.FooBaz* nonnull @_ZZ3foovE1x)
+  %2 = tail call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.FooBaz*)* @_ZN6FooBazD1Ev to void (i8*)*), i8* bitcast (%struct.FooBaz* @_ZZ3foovE1x to i8*), i8* nonnull @__dso_handle)
+  tail call void @__cxa_guard_release(i64* nonnull @_ZGVZ3foovE1x)
+  br label %init.end
+
+init.end:                                         ; preds = %init, %init.check, %entry
+  %3 = load i32, i32* getelementptr inbounds (%struct.FooBaz, %struct.FooBaz* @_ZZ3foovE1x, i32 0, i32 0), align 4
+  %inc = add nsw i32 %3, 1
+  store i32 %inc, i32* getelementptr inbounds (%struct.FooBaz, %struct.FooBaz* @_ZZ3foovE1x, i32 0, i32 0), align 4
+  ret i32* getelementptr inbounds (%struct.FooBaz, %struct.FooBaz* @_ZZ3foovE1x, i32 0, i32 0)
+}
+
+declare dso_local i32 @__cxa_guard_acquire(i64*) local_unnamed_addr
+declare dso_local void @_ZN6FooBazC1Ev(%struct.FooBaz*) unnamed_addr
+declare dso_local void @_ZN6FooBazD1Ev(%struct.FooBaz*) unnamed_addr
+declare dso_local i32 @__cxa_atexit(void (i8*)*, i8*, i8*) local_unnamed_addr
+declare dso_local void @__cxa_guard_release(i64*) local_unnamed_addr