[clang] 974e2e6 - [RISCV] Adjust RV64I data layout by using n32:64 in layout string

Fri Oct 28 08:27:19 PDT 2022

Author: Craig Topper
Date: 2022-10-28T08:27:03-07:00
New Revision: 974e2e690b4024c2677dde26cc76ec31e0047c1d

URL: https://github.com/llvm/llvm-project/commit/974e2e690b4024c2677dde26cc76ec31e0047c1d
DIFF: https://github.com/llvm/llvm-project/commit/974e2e690b4024c2677dde26cc76ec31e0047c1d.diff

LOG: [RISCV] Adjust RV64I data layout by using n32:64 in layout string

Although i32 type is illegal in the backend, RV64I has pretty good support for i32 types by using W instructions.

By adding n32 to the DataLayout string, middle end optimizations will consider i32 to be a native type. One known effect of this is enabling LoopStrengthReduce on loops with i32 induction variables. This can be beneficial because C/C++ code often has loops with i32 induction variables due to the use of `int` or `unsigned int`.

If this patch exposes performance issues, those are better addressed by tuning LSR or other passes.

Reviewed By: asb, frasercrmck

Differential Revision: https://reviews.llvm.org/D116735

Added: 
    

Modified: 
    clang/lib/Basic/Targets/RISCV.h
    llvm/docs/ReleaseNotes.rst
    llvm/lib/IR/AutoUpgrade.cpp
    llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
    llvm/test/CodeGen/RISCV/aext-to-sext.ll
    llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll
    llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
    llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h
index 868b1bd5ad04..a80fbbd7a095 100644

--- a/clang/lib/Basic/Targets/RISCV.h
+++ b/clang/lib/Basic/Targets/RISCV.h
@@ -140,7 +140,7 @@ class LLVM_LIBRARY_VISIBILITY RISCV64TargetInfo : public RISCVTargetInfo {
       : RISCVTargetInfo(Triple, Opts) {
     LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
     IntMaxType = Int64Type = SignedLong;
-    resetDataLayout("e-m:e-p:64:64-i64:64-i128:128-n64-S128");
+    resetDataLayout("e-m:e-p:64:64-i64:64-i128:128-n32:64-S128");
   }
 
   bool setABI(const std::string &Name) override {

diff  --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 1515fe47be37..9205d24346e0 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -119,6 +119,9 @@ Changes to the RISC-V Backend
 
 * Support for the unratified Zbe, Zbf, Zbm, Zbp, Zbr, and Zbt extensions have
   been removed.
+* i32 is now a native type in the datalayout string. This enables
+  LoopStrengthReduce for loops with i32 induction variables, among other
+  optimizations.
 
 Changes to the WebAssembly Backend
 ----------------------------------

diff  --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index d1732a7dee99..15961c32ea8b 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -4847,6 +4847,14 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
     return DL.empty() ? std::string("G1") : (DL + "-G1").str();
   }
 
+  if (T.isRISCV64()) {
+    // Make i32 a native type for 64-bit RISC-V.
+    auto I = DL.find("-n64-");
+    if (I != StringRef::npos)
+      return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
+    return DL.str();
+  }
+
   std::string Res = DL.str();
   if (!T.isX86())
     return Res;

diff  --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 2f5ea98c7d4d..d4529442df4e 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -69,7 +69,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
 
 static StringRef computeDataLayout(const Triple &TT) {
   if (TT.isArch64Bit())
-    return "e-m:e-p:64:64-i64:64-i128:128-n64-S128";
+    return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128";
   assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported");
   return "e-m:e-p:32:32-i64:64-n32-S128";
 }

diff  --git a/llvm/test/CodeGen/RISCV/aext-to-sext.ll b/llvm/test/CodeGen/RISCV/aext-to-sext.ll
index 4a533a187af6..806c495fa677 100644
--- a/llvm/test/CodeGen/RISCV/aext-to-sext.ll
+++ b/llvm/test/CodeGen/RISCV/aext-to-sext.ll
@@ -11,24 +11,21 @@
 define void @quux(i32 signext %arg, i32 signext %arg1) nounwind {
 ; RV64I-LABEL: quux:
 ; RV64I:       # %bb.0: # %bb
-; RV64I-NEXT:    addi sp, sp, -32
-; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    beq a0, a1, .LBB0_3
 ; RV64I-NEXT:  # %bb.1: # %bb2.preheader
-; RV64I-NEXT:    mv s0, a1
-; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    subw s0, a1, a0
 ; RV64I-NEXT:  .LBB0_2: # %bb2
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    call hoge at plt
-; RV64I-NEXT:    addiw s1, s1, 1
-; RV64I-NEXT:    bne s1, s0, .LBB0_2
+; RV64I-NEXT:    addiw s0, s0, -1
+; RV64I-NEXT:    bnez s0, .LBB0_2
 ; RV64I-NEXT:  .LBB0_3: # %bb6
-; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 bb:
   %tmp = icmp eq i32 %arg, %arg1

diff  --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll
index 4f11dcb51b7f..12d8bae8a636 100644
--- a/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll
+++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll
@@ -52,24 +52,20 @@ define void @test(i32 signext %i) nounwind {
 ; RV64-LABEL: test:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    slliw a1, a0, 1
-; RV64-NEXT:    lui a4, 2
-; RV64-NEXT:    blt a4, a1, .LBB0_3
+; RV64-NEXT:    lui a3, 2
+; RV64-NEXT:    blt a3, a1, .LBB0_3
 ; RV64-NEXT:  # %bb.1: # %bb.preheader
-; RV64-NEXT:    li a2, 0
-; RV64-NEXT:    lui a3, %hi(flags2)
-; RV64-NEXT:    addi a3, a3, %lo(flags2)
-; RV64-NEXT:    addiw a4, a4, 1
+; RV64-NEXT:    lui a2, %hi(flags2)
+; RV64-NEXT:    addi a2, a2, %lo(flags2)
+; RV64-NEXT:    addiw a3, a3, 1
 ; RV64-NEXT:  .LBB0_2: # %bb
 ; RV64-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64-NEXT:    mulw a5, a2, a0
-; RV64-NEXT:    addw a5, a5, a1
-; RV64-NEXT:    slli a6, a5, 32
-; RV64-NEXT:    srli a6, a6, 32
-; RV64-NEXT:    add a6, a3, a6
-; RV64-NEXT:    sb zero, 0(a6)
-; RV64-NEXT:    addw a5, a5, a0
-; RV64-NEXT:    addiw a2, a2, 1
-; RV64-NEXT:    blt a5, a4, .LBB0_2
+; RV64-NEXT:    slli a4, a1, 32
+; RV64-NEXT:    srli a4, a4, 32
+; RV64-NEXT:    add a4, a2, a4
+; RV64-NEXT:    addw a1, a1, a0
+; RV64-NEXT:    sb zero, 0(a4)
+; RV64-NEXT:    blt a1, a3, .LBB0_2
 ; RV64-NEXT:  .LBB0_3: # %return
 ; RV64-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll
index ede08528198b..bc7b32d68856 100644
--- a/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll
+++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll
@@ -53,25 +53,24 @@ define void @test(i32 signext %row, i32 signext %N.in) nounwind {
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    blez a1, .LBB0_3
 ; RV64-NEXT:  # %bb.1: # %cond_true.preheader
-; RV64-NEXT:    li a4, 0
+; RV64-NEXT:    li a2, 0
 ; RV64-NEXT:    slli a0, a0, 6
-; RV64-NEXT:    lui a2, %hi(A)
-; RV64-NEXT:    addi a2, a2, %lo(A)
-; RV64-NEXT:    add a0, a2, a0
-; RV64-NEXT:    li a2, 4
-; RV64-NEXT:    li a3, 5
+; RV64-NEXT:    lui a3, %hi(A)
+; RV64-NEXT:    addi a3, a3, %lo(A)
+; RV64-NEXT:    add a0, a3, a0
+; RV64-NEXT:    addi a3, a0, 4
+; RV64-NEXT:    li a4, 4
+; RV64-NEXT:    li a5, 5
 ; RV64-NEXT:  .LBB0_2: # %cond_true
 ; RV64-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64-NEXT:    addiw a5, a4, 1
-; RV64-NEXT:    slli a6, a5, 2
+; RV64-NEXT:    sw a4, 0(a3)
+; RV64-NEXT:    addiw a6, a2, 2
+; RV64-NEXT:    slli a6, a6, 2
 ; RV64-NEXT:    add a6, a0, a6
-; RV64-NEXT:    sw a2, 0(a6)
-; RV64-NEXT:    addiw a4, a4, 2
-; RV64-NEXT:    slli a4, a4, 2
-; RV64-NEXT:    add a4, a0, a4
-; RV64-NEXT:    sw a3, 0(a4)
-; RV64-NEXT:    mv a4, a5
-; RV64-NEXT:    bne a5, a1, .LBB0_2
+; RV64-NEXT:    sw a5, 0(a6)
+; RV64-NEXT:    addiw a2, a2, 1
+; RV64-NEXT:    addi a3, a3, 4
+; RV64-NEXT:    bne a1, a2, .LBB0_2
 ; RV64-NEXT:  .LBB0_3: # %return
 ; RV64-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll
index 69361c543532..9c0ea308a0e3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll
@@ -794,20 +794,20 @@ define void @strided_load_startval_add_with_splat(i8* noalias nocapture %0, i8*
 ; CHECK-NEXT:  # %bb.4:
 ; CHECK-NEXT:    beq a4, a5, .LBB12_7
 ; CHECK-NEXT:  .LBB12_5:
-; CHECK-NEXT:    slli a2, a3, 2
-; CHECK-NEXT:    add a2, a2, a3
-; CHECK-NEXT:    add a1, a1, a2
-; CHECK-NEXT:    li a2, 1024
+; CHECK-NEXT:    addiw a2, a3, -1024
+; CHECK-NEXT:    add a0, a0, a3
+; CHECK-NEXT:    slli a4, a3, 2
+; CHECK-NEXT:    add a3, a4, a3
+; CHECK-NEXT:    add a1, a1, a3
 ; CHECK-NEXT:  .LBB12_6: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    lb a4, 0(a1)
-; CHECK-NEXT:    add a5, a0, a3
-; CHECK-NEXT:    lb a6, 0(a5)
-; CHECK-NEXT:    addw a4, a6, a4
-; CHECK-NEXT:    sb a4, 0(a5)
-; CHECK-NEXT:    addiw a4, a3, 1
-; CHECK-NEXT:    addi a3, a3, 1
+; CHECK-NEXT:    lb a3, 0(a1)
+; CHECK-NEXT:    lb a4, 0(a0)
+; CHECK-NEXT:    addw a3, a4, a3
+; CHECK-NEXT:    sb a3, 0(a0)
+; CHECK-NEXT:    addiw a2, a2, 1
+; CHECK-NEXT:    addi a0, a0, 1
 ; CHECK-NEXT:    addi a1, a1, 5
-; CHECK-NEXT:    bne a4, a2, .LBB12_6
+; CHECK-NEXT:    bnez a2, .LBB12_6
 ; CHECK-NEXT:  .LBB12_7:
 ; CHECK-NEXT:    ret
   %4 = icmp eq i32 %2, 1024

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index 365f7f976498..952e94243f8a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -286,14 +286,12 @@ define dso_local void @splat_load_licm(float* %0) {
 ; RV64-NEXT:    addi a1, a1, %lo(.LCPI12_0)
 ; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV64-NEXT:    vlse32.v v8, (a1), zero
-; RV64-NEXT:    li a1, 0
-; RV64-NEXT:    li a2, 1024
+; RV64-NEXT:    li a1, 1024
 ; RV64-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
-; RV64-NEXT:    slli a3, a1, 2
-; RV64-NEXT:    add a3, a0, a3
-; RV64-NEXT:    addiw a1, a1, 4
-; RV64-NEXT:    vse32.v v8, (a3)
-; RV64-NEXT:    bne a1, a2, .LBB12_1
+; RV64-NEXT:    vse32.v v8, (a0)
+; RV64-NEXT:    addiw a1, a1, -4
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    bnez a1, .LBB12_1
 ; RV64-NEXT:  # %bb.2:
 ; RV64-NEXT:    ret
   br label %2

diff  --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
index 0c835744e4fa..1987e102fda9 100644
--- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
+++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
@@ -31,6 +31,11 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) {
   // Check that AMDGPU targets add -G1 if it's not present.
   EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "r600"), "e-p:32:32-G1");
   EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64", "amdgcn"), "e-p:64:64-G1");
+
+  // Check that RISCV64 upgrades -n64 to -n32:64.
+  EXPECT_EQ(UpgradeDataLayoutString("e-m:e-p:64:64-i64:64-i128:128-n64-S128",
+                                    "riscv64"),
+            "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128");
 }
 
 TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) {