[llvm] [clang][LoongArch] Align global symbol by size (PR #101309)

via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 1 02:22:29 PDT 2024


https://github.com/heiher updated https://github.com/llvm/llvm-project/pull/101309

>From 37f06dda98eed9495f44e787fdaeb162ab7baec3 Mon Sep 17 00:00:00 2001
From: WANG Rui <wangrui at loongson.cn>
Date: Thu, 1 Aug 2024 16:45:48 +0800
Subject: [PATCH] [LoongArch] Align stack objects passed to memory intrinsics

Memcpy, and other memory intrinsics, typically try to use wider load/store
if the source and destination addresses are aligned. In CodeGenPrepare, look
for calls to memory intrinsics and, if the object is on the stack, align it to
4-byte (32-bit) or 8-byte (64-bit) boundaries if it is large enough that we
expect memcpy to use wider load/store instructions to copy it.

Fixes #101295
---
 .../LoongArch/LoongArchISelLowering.cpp       |  21 +++
 .../Target/LoongArch/LoongArchISelLowering.h  |   3 +
 .../LoongArch/unaligned-memcpy-inline.ll      | 132 ++----------------
 3 files changed, 39 insertions(+), 117 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index d80509cf39849..bdbb7ab0f5139 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -25,6 +25,7 @@
 #include "llvm/CodeGen/RuntimeLibcallUtil.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/IntrinsicsLoongArch.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/Debug.h"
@@ -6162,3 +6163,23 @@ bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
     return false;
   return true;
 }
+
+// memcpy, and other memory intrinsics, typically tries to use wider load/store
+// if the source/dest is aligned and the copy size is large enough. We therefore
+// want to align such objects passed to memory intrinsics.
+bool LoongArchTargetLowering::shouldAlignPointerArgs(CallInst *CI,
+                                                     unsigned &MinSize,
+                                                     Align &PrefAlign) const {
+  if (!isa<MemIntrinsic>(CI))
+    return false;
+
+  if (Subtarget.is64Bit()) {
+    MinSize = 8;
+    PrefAlign = Align(8);
+  } else {
+    MinSize = 4;
+    PrefAlign = Align(4);
+  }
+
+  return true;
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index fc5b36c2124e0..d834a5d8587fd 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -257,6 +257,9 @@ class LoongArchTargetLowering : public TargetLowering {
   bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override;
   bool shouldExtendTypeInLibCall(EVT Type) const override;
 
+  bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
+                              Align &PrefAlign) const override;
+
 private:
   /// Target-specific function used to lower LoongArch calling conventions.
   typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI,
diff --git a/llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll b/llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll
index efe8dc779449b..ad48778d2d0ba 100644
--- a/llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll
+++ b/llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll
@@ -103,65 +103,17 @@ define void @t3() {
 ; LA32-NEXT:    .cfi_def_cfa_offset 64
 ; LA32-NEXT:    pcalau12i $a0, %pc_hi20(.L.str)
 ; LA32-NEXT:    addi.w $a0, $a0, %pc_lo12(.L.str)
-; LA32-NEXT:    ld.b $a1, $a0, 21
-; LA32-NEXT:    ld.bu $a2, $a0, 20
-; LA32-NEXT:    slli.w $a1, $a1, 8
-; LA32-NEXT:    or $a1, $a1, $a2
+; LA32-NEXT:    ld.h $a1, $a0, 20
+; LA32-NEXT:    ld.w $a2, $a0, 16
 ; LA32-NEXT:    st.h $a1, $sp, 20
-; LA32-NEXT:    ld.bu $a1, $a0, 17
-; LA32-NEXT:    ld.bu $a2, $a0, 16
-; LA32-NEXT:    ld.bu $a3, $a0, 18
-; LA32-NEXT:    ld.bu $a4, $a0, 19
-; LA32-NEXT:    slli.w $a1, $a1, 8
-; LA32-NEXT:    or $a1, $a1, $a2
-; LA32-NEXT:    slli.w $a2, $a3, 16
-; LA32-NEXT:    slli.w $a3, $a4, 24
-; LA32-NEXT:    or $a2, $a3, $a2
-; LA32-NEXT:    or $a1, $a2, $a1
-; LA32-NEXT:    st.w $a1, $sp, 16
-; LA32-NEXT:    ld.bu $a1, $a0, 13
-; LA32-NEXT:    ld.bu $a2, $a0, 12
-; LA32-NEXT:    ld.bu $a3, $a0, 14
-; LA32-NEXT:    ld.bu $a4, $a0, 15
-; LA32-NEXT:    slli.w $a1, $a1, 8
-; LA32-NEXT:    or $a1, $a1, $a2
-; LA32-NEXT:    slli.w $a2, $a3, 16
-; LA32-NEXT:    slli.w $a3, $a4, 24
-; LA32-NEXT:    or $a2, $a3, $a2
-; LA32-NEXT:    or $a1, $a2, $a1
+; LA32-NEXT:    st.w $a2, $sp, 16
+; LA32-NEXT:    ld.w $a1, $a0, 12
+; LA32-NEXT:    ld.w $a2, $a0, 8
+; LA32-NEXT:    ld.w $a3, $a0, 4
+; LA32-NEXT:    ld.w $a0, $a0, 0
 ; LA32-NEXT:    st.w $a1, $sp, 12
-; LA32-NEXT:    ld.bu $a1, $a0, 9
-; LA32-NEXT:    ld.bu $a2, $a0, 8
-; LA32-NEXT:    ld.bu $a3, $a0, 10
-; LA32-NEXT:    ld.bu $a4, $a0, 11
-; LA32-NEXT:    slli.w $a1, $a1, 8
-; LA32-NEXT:    or $a1, $a1, $a2
-; LA32-NEXT:    slli.w $a2, $a3, 16
-; LA32-NEXT:    slli.w $a3, $a4, 24
-; LA32-NEXT:    or $a2, $a3, $a2
-; LA32-NEXT:    or $a1, $a2, $a1
-; LA32-NEXT:    st.w $a1, $sp, 8
-; LA32-NEXT:    ld.bu $a1, $a0, 5
-; LA32-NEXT:    ld.bu $a2, $a0, 4
-; LA32-NEXT:    ld.bu $a3, $a0, 6
-; LA32-NEXT:    ld.bu $a4, $a0, 7
-; LA32-NEXT:    slli.w $a1, $a1, 8
-; LA32-NEXT:    or $a1, $a1, $a2
-; LA32-NEXT:    slli.w $a2, $a3, 16
-; LA32-NEXT:    slli.w $a3, $a4, 24
-; LA32-NEXT:    or $a2, $a3, $a2
-; LA32-NEXT:    or $a1, $a2, $a1
-; LA32-NEXT:    st.w $a1, $sp, 4
-; LA32-NEXT:    ld.bu $a1, $a0, 1
-; LA32-NEXT:    ld.bu $a2, $a0, 0
-; LA32-NEXT:    ld.bu $a3, $a0, 2
-; LA32-NEXT:    ld.bu $a0, $a0, 3
-; LA32-NEXT:    slli.w $a1, $a1, 8
-; LA32-NEXT:    or $a1, $a1, $a2
-; LA32-NEXT:    slli.w $a2, $a3, 16
-; LA32-NEXT:    slli.w $a0, $a0, 24
-; LA32-NEXT:    or $a0, $a0, $a2
-; LA32-NEXT:    or $a0, $a0, $a1
+; LA32-NEXT:    st.w $a2, $sp, 8
+; LA32-NEXT:    st.w $a3, $sp, 4
 ; LA32-NEXT:    st.w $a0, $sp, 0
 ; LA32-NEXT:    addi.w $sp, $sp, 64
 ; LA32-NEXT:    ret
@@ -172,67 +124,13 @@ define void @t3() {
 ; LA64-NEXT:    .cfi_def_cfa_offset 64
 ; LA64-NEXT:    pcalau12i $a0, %pc_hi20(.L.str)
 ; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(.L.str)
-; LA64-NEXT:    ld.b $a1, $a0, 21
-; LA64-NEXT:    ld.bu $a2, $a0, 20
-; LA64-NEXT:    slli.d $a1, $a1, 8
-; LA64-NEXT:    or $a1, $a1, $a2
+; LA64-NEXT:    ld.h $a1, $a0, 20
+; LA64-NEXT:    ld.w $a2, $a0, 16
+; LA64-NEXT:    ld.d $a3, $a0, 8
+; LA64-NEXT:    ld.d $a0, $a0, 0
 ; LA64-NEXT:    st.h $a1, $sp, 20
-; LA64-NEXT:    ld.bu $a1, $a0, 17
-; LA64-NEXT:    ld.bu $a2, $a0, 16
-; LA64-NEXT:    ld.bu $a3, $a0, 18
-; LA64-NEXT:    ld.b $a4, $a0, 19
-; LA64-NEXT:    slli.d $a1, $a1, 8
-; LA64-NEXT:    or $a1, $a1, $a2
-; LA64-NEXT:    slli.d $a2, $a3, 16
-; LA64-NEXT:    slli.d $a3, $a4, 24
-; LA64-NEXT:    or $a2, $a3, $a2
-; LA64-NEXT:    or $a1, $a2, $a1
-; LA64-NEXT:    st.w $a1, $sp, 16
-; LA64-NEXT:    ld.bu $a1, $a0, 9
-; LA64-NEXT:    ld.bu $a2, $a0, 8
-; LA64-NEXT:    ld.bu $a3, $a0, 10
-; LA64-NEXT:    ld.bu $a4, $a0, 11
-; LA64-NEXT:    slli.d $a1, $a1, 8
-; LA64-NEXT:    or $a1, $a1, $a2
-; LA64-NEXT:    slli.d $a2, $a3, 16
-; LA64-NEXT:    slli.d $a3, $a4, 24
-; LA64-NEXT:    or $a2, $a3, $a2
-; LA64-NEXT:    or $a1, $a2, $a1
-; LA64-NEXT:    ld.bu $a2, $a0, 13
-; LA64-NEXT:    ld.bu $a3, $a0, 12
-; LA64-NEXT:    ld.bu $a4, $a0, 14
-; LA64-NEXT:    ld.bu $a5, $a0, 15
-; LA64-NEXT:    slli.d $a2, $a2, 8
-; LA64-NEXT:    or $a2, $a2, $a3
-; LA64-NEXT:    slli.d $a3, $a4, 16
-; LA64-NEXT:    slli.d $a4, $a5, 24
-; LA64-NEXT:    or $a3, $a4, $a3
-; LA64-NEXT:    or $a2, $a3, $a2
-; LA64-NEXT:    slli.d $a2, $a2, 32
-; LA64-NEXT:    or $a1, $a2, $a1
-; LA64-NEXT:    st.d $a1, $sp, 8
-; LA64-NEXT:    ld.bu $a1, $a0, 1
-; LA64-NEXT:    ld.bu $a2, $a0, 0
-; LA64-NEXT:    ld.bu $a3, $a0, 2
-; LA64-NEXT:    ld.bu $a4, $a0, 3
-; LA64-NEXT:    slli.d $a1, $a1, 8
-; LA64-NEXT:    or $a1, $a1, $a2
-; LA64-NEXT:    slli.d $a2, $a3, 16
-; LA64-NEXT:    slli.d $a3, $a4, 24
-; LA64-NEXT:    or $a2, $a3, $a2
-; LA64-NEXT:    or $a1, $a2, $a1
-; LA64-NEXT:    ld.bu $a2, $a0, 5
-; LA64-NEXT:    ld.bu $a3, $a0, 4
-; LA64-NEXT:    ld.bu $a4, $a0, 6
-; LA64-NEXT:    ld.bu $a0, $a0, 7
-; LA64-NEXT:    slli.d $a2, $a2, 8
-; LA64-NEXT:    or $a2, $a2, $a3
-; LA64-NEXT:    slli.d $a3, $a4, 16
-; LA64-NEXT:    slli.d $a0, $a0, 24
-; LA64-NEXT:    or $a0, $a0, $a3
-; LA64-NEXT:    or $a0, $a0, $a2
-; LA64-NEXT:    slli.d $a0, $a0, 32
-; LA64-NEXT:    or $a0, $a0, $a1
+; LA64-NEXT:    st.w $a2, $sp, 16
+; LA64-NEXT:    st.d $a3, $sp, 8
 ; LA64-NEXT:    st.d $a0, $sp, 0
 ; LA64-NEXT:    addi.d $sp, $sp, 64
 ; LA64-NEXT:    ret



More information about the llvm-commits mailing list