[lld] [ELF] Making cdsort default for function reordering (PR #68638)

Mon Oct 9 17:49:30 PDT 2023

https://github.com/spupyrev updated https://github.com/llvm/llvm-project/pull/68638

>From f5ccfd8402563c250653cee6f8c439336f53f3ea Mon Sep 17 00:00:00 2001
From: spupyrev <spupyrev at fb.com>
Date: Mon, 9 Oct 2023 14:11:21 -0700
Subject: [PATCH] [ELF] Making cdsort default for function reordering

---
 lld/ELF/Driver.cpp                            |  2 +-
 lld/docs/ld.lld.1                             |  4 +--
 lld/test/ELF/cgprofile-bad-clusters.s         |  2 +-
 lld/test/ELF/cgprofile-icf.s                  |  4 +--
 lld/test/ELF/cgprofile-print.s                |  5 +---
 lld/test/ELF/cgprofile-rela.test              |  2 +-
 lld/test/ELF/cgprofile-reproduce.s            |  5 +---
 lld/test/ELF/cgprofile-txt.s                  |  6 ++--
 .../SparseTensor/IR/SparseTensorAttrDefs.td   | 16 +++++-----
 .../SparseTensor/roundtrip_encoding.mlir      | 30 +++++++++----------
 10 files changed, 35 insertions(+), 41 deletions(-)

diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 6272276e94b2d35..e2100d00d54ede6 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1095,7 +1095,7 @@ static void ltoValidateAllVtablesHaveTypeInfos(opt::InputArgList &args) {
 }
 
 static CGProfileSortKind getCGProfileSortKind(opt::InputArgList &args) {
-  StringRef s = args.getLastArgValue(OPT_call_graph_profile_sort, "hfsort");
+  StringRef s = args.getLastArgValue(OPT_call_graph_profile_sort, "cdsort");
   if (s == "hfsort")
     return CGProfileSortKind::Hfsort;
   if (s == "cdsort")
diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1
index 2e46fc18132f3e0..12b17dd37796d13 100644
--- a/lld/docs/ld.lld.1
+++ b/lld/docs/ld.lld.1
@@ -128,9 +128,9 @@ may be:
 .It Cm none
 Ignore call graph profile.
 .It Cm hfsort
-Use hfsort (default).
+Use hfsort.
 .It Cm cdsort
-Use cdsort.
+Use cdsort (default).
 .El
 .Pp
 .It Fl -color-diagnostics Ns = Ns Ar value
diff --git a/lld/test/ELF/cgprofile-bad-clusters.s b/lld/test/ELF/cgprofile-bad-clusters.s
index c162e981acdd633..88e68bfb7b2c0a1 100644
--- a/lld/test/ELF/cgprofile-bad-clusters.s
+++ b/lld/test/ELF/cgprofile-bad-clusters.s
@@ -10,7 +10,7 @@
 # RUN: echo "F G 6" >> %t.call_graph
 # RUN: echo "G H 5" >> %t.call_graph
 # RUN: echo "H I 4" >> %t.call_graph
-# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2
+# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph --call-graph-profile-sort=hfsort  -o %t2
 # RUN: llvm-readobj --symbols %t2 | FileCheck %s
 
     .section    .text.A,"ax", at progbits
diff --git a/lld/test/ELF/cgprofile-icf.s b/lld/test/ELF/cgprofile-icf.s
index a9de5613917cbf1..e28630d0eb30bf0 100644
--- a/lld/test/ELF/cgprofile-icf.s
+++ b/lld/test/ELF/cgprofile-icf.s
@@ -5,9 +5,9 @@
 # RUN: echo "A B 100" > %t.call_graph
 # RUN: echo "A C 40" >> %t.call_graph
 # RUN: echo "C D 61" >> %t.call_graph
-# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t.out -icf=all
+# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph --call-graph-profile-sort=hfsort -o %t.out -icf=all
 # RUN: llvm-readobj --symbols %t.out | FileCheck %s
-# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2.out
+# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph --call-graph-profile-sort=hfsort -o %t2.out
 # RUN: llvm-readobj --symbols %t2.out | FileCheck %s --check-prefix=NOICF
 
     .section    .text.D,"ax", at progbits
diff --git a/lld/test/ELF/cgprofile-print.s b/lld/test/ELF/cgprofile-print.s
index b103ef5109effbb..5f76e355371ef41 100644
--- a/lld/test/ELF/cgprofile-print.s
+++ b/lld/test/ELF/cgprofile-print.s
@@ -5,7 +5,7 @@
 # RUN: echo "B C 50" >> %t.call_graph
 # RUN: echo "C D 40" >> %t.call_graph
 # RUN: echo "D B 10" >> %t.call_graph
-# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2 --print-symbol-order=%t3
+# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2 --call-graph-profile-sort=cdsort --print-symbol-order=%t3
 # RUN: FileCheck %s --input-file %t3
 
 # CHECK: B
@@ -32,6 +32,3 @@ C:
 .globl  D
 D:
  nop
-
-
-
diff --git a/lld/test/ELF/cgprofile-rela.test b/lld/test/ELF/cgprofile-rela.test
index 189f169e65481ee..141dfd4c65b1ea1 100644
--- a/lld/test/ELF/cgprofile-rela.test
+++ b/lld/test/ELF/cgprofile-rela.test
@@ -3,7 +3,7 @@
 # REQUIRES: x86
 
 # RUN: yaml2obj %s -o %t.o
-# RUN: ld.lld %t.o -o %t
+# RUN: ld.lld --call-graph-profile-sort=hfsort %t.o -o %t
 # RUN: llvm-nm --no-sort %t | FileCheck %s
 # RUN: ld.lld --no-call-graph-profile-sort %t.o -o %t
 # RUN: llvm-nm --no-sort %t | FileCheck %s --check-prefix=NO-CG
diff --git a/lld/test/ELF/cgprofile-reproduce.s b/lld/test/ELF/cgprofile-reproduce.s
index b9cb269e4580d78..1b1b36151da99d1 100644
--- a/lld/test/ELF/cgprofile-reproduce.s
+++ b/lld/test/ELF/cgprofile-reproduce.s
@@ -5,7 +5,7 @@
 # RUN: echo "B C 50" >> %t.call_graph
 # RUN: echo "C D 40" >> %t.call_graph
 # RUN: echo "D B 10" >> %t.call_graph
-# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2 --print-symbol-order=%t3
+# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2 --call-graph-profile-sort=hfsort --print-symbol-order=%t3
 # RUN: ld.lld -e A %t --symbol-ordering-file %t3 -o %t2
 # RUN: llvm-readobj --symbols %t2 | FileCheck %s
 
@@ -37,6 +37,3 @@ C:
 .globl  D
 D:
  nop
-
-
-
diff --git a/lld/test/ELF/cgprofile-txt.s b/lld/test/ELF/cgprofile-txt.s
index c9194bbbc43cbe0..cf5b17627cfb63c 100644
--- a/lld/test/ELF/cgprofile-txt.s
+++ b/lld/test/ELF/cgprofile-txt.s
@@ -26,12 +26,12 @@
 # RUN: echo "TooManyPreds10 TooManyPreds 11" >> %t.call_graph
 # RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph --call-graph-profile-sort=hfsort -o %t2
 # RUN: llvm-readobj --symbols %t2 | FileCheck %s
-## --call-graph-profile-sort=hfsort is the default.
-# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2b
-# RUN: cmp %t2 %t2b
 
 # RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph --call-graph-profile-sort=cdsort -o %t2
 # RUN: llvm-readobj --symbols %t2 | FileCheck %s --check-prefix=CDSORT
+## --call-graph-profile-sort=cdsort is the default.
+# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2b
+# RUN: cmp %t2 %t2b
 
 # RUN: not ld.lld -e A %t --call-graph-ordering-file %t.call_graph --call-graph-profile-sort=sort \
 # RUN:   -o /dev/null 2>&1 | FileCheck %s --check-prefix=UNKNOWN
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
index cacc8176c678241..afd978c1c57ebd4 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
@@ -133,9 +133,11 @@ def SparseTensorEncodingAttr : SparseTensor_Attr<"SparseTensorEncoding",
     level-expressions collectively define an affine map from dimension-coordinates to
     level-coordinates. The dimension-expressions collectively define the inverse map,
     which only needs to be provided for elaborate cases where it cannot be inferred
-    automatically. Within the sparse storage format, we refer to indices that are
-    stored explicitly as **coordinates** and offsets into the storage format as
-    **positions**.
+    automatically.
+    
+    Each dimension could also have an optional `SparseTensorDimSliceAttr`.
+    Within the sparse storage format, we refer to indices that are stored explicitly
+    as **coordinates** and offsets into the storage format as **positions**.
 
     The supported level-formats are the following:
 
@@ -176,9 +178,6 @@ def SparseTensorEncodingAttr : SparseTensor_Attr<"SparseTensorEncoding",
       coordinate over all levels).  The choices are `8`, `16`, `32`,
       `64`, or, the default, `0` to indicate a native bitwidth.
 
-    - An optional array of `SparseTensorDimSliceAttr`, which specifies
-      how the sparse tensor is partitioned on each dimension.
-
     Examples:
 
     ```mlir
@@ -228,7 +227,8 @@ def SparseTensorEncodingAttr : SparseTensor_Attr<"SparseTensorEncoding",
     // Same block sparse row storage (2x3 blocks) but this time
     // also with a redundant reverse mapping, which can be inferred.
     #BSR_explicit = #sparse_tensor.encoding<{
-      map = ( i = ib * 2 + ii,
+      map = { ib, jb, ii, jj }
+            ( i = ib * 2 + ii,
               j = jb * 3 + jj) ->
       ( ib = i floordiv 2 : dense,
         jb = j floordiv 3 : compressed,
@@ -265,7 +265,7 @@ def SparseTensorEncodingAttr : SparseTensor_Attr<"SparseTensorEncoding",
              j : #sparse_tensor<slice(0, 8, ?)>) ->
             (i : dense, j : compressed)
     }>
-    ... tensor<?x?xf64, #CSC_SLICE> ...
+    ... tensor<?x?xf64, #CSR_SLICE> ...
 
     ```
   }];
diff --git a/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir
index c4ef50bee01ea2c..ae3805d8b774176 100644
--- a/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir
+++ b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir
@@ -84,18 +84,18 @@ func.func private @sparse_sorted_coo(tensor<10x10xf64, #SortedCOO>)
 
 // -----
 
-#BCSR = #sparse_tensor.encoding<{
+#BSR = #sparse_tensor.encoding<{
    map = ( i, j ) ->
-      ( i floordiv 2 : compressed,
+      ( i floordiv 2 : dense,
         j floordiv 3 : compressed,
         i mod 2      : dense,
         j mod 3      : dense
       )
 }>
 
-// CHECK-LABEL: func private @sparse_bcsr(
-// CHECK-SAME: tensor<10x60xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 floordiv 2 : compressed, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>>
-func.func private @sparse_bcsr(tensor<10x60xf64, #BCSR>)
+// CHECK-LABEL: func private @sparse_bsr(
+// CHECK-SAME: tensor<10x60xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 floordiv 2 : dense, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>>
+func.func private @sparse_bsr(tensor<10x60xf64, #BSR>)
 
 
 // -----
@@ -143,39 +143,39 @@ func.func private @sparse_2_out_of_4(tensor<?x?xf64, #NV_24>)
 
 // -----
 
-#BCSR = #sparse_tensor.encoding<{
+#BSR = #sparse_tensor.encoding<{
   map = ( i, j ) ->
-  ( i floordiv 2 : compressed,
+  ( i floordiv 2 : dense,
     j floordiv 3 : compressed,
     i mod 2      : dense,
     j mod 3      : dense
   )
 }>
 
-// CHECK-LABEL: func private @BCSR(
-// CHECK-SAME: tensor<?x?xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 floordiv 2 : compressed, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>>
-func.func private @BCSR(%arg0: tensor<?x?xf64, #BCSR>) {
+// CHECK-LABEL: func private @BSR(
+// CHECK-SAME: tensor<?x?xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 floordiv 2 : dense, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>>
+func.func private @BSR(%arg0: tensor<?x?xf64, #BSR>) {
   return
 }
 
 // -----
 
-#BCSR_explicit = #sparse_tensor.encoding<{
+#BSR_explicit = #sparse_tensor.encoding<{
   map =
   {il, jl, ii, jj}
   ( i = il * 2 + ii,
     j = jl * 3 + jj
   ) ->
-  ( il = i floordiv 2 : compressed,
+  ( il = i floordiv 2 : dense,
     jl = j floordiv 3 : compressed,
     ii = i mod 2      : dense,
     jj = j mod 3      : dense
   )
 }>
 
-// CHECK-LABEL: func private @BCSR_explicit(
-// CHECK-SAME: tensor<?x?xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 floordiv 2 : compressed, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>>
-func.func private @BCSR_explicit(%arg0: tensor<?x?xf64, #BCSR_explicit>) {
+// CHECK-LABEL: func private @BSR_explicit(
+// CHECK-SAME: tensor<?x?xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 floordiv 2 : dense, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>>
+func.func private @BSR_explicit(%arg0: tensor<?x?xf64, #BSR_explicit>) {
   return
 }