[llvm] 75117fb - [RISCV] Don't advertise i32->i64 zextload as free for RV64.

Thu Jan 6 08:16:47 PST 2022

Author: Craig Topper
Date: 2022-01-06T08:13:42-08:00
New Revision: 75117fb340daf544320a5844786051d478e15d4d

URL: https://github.com/llvm/llvm-project/commit/75117fb340daf544320a5844786051d478e15d4d
DIFF: https://github.com/llvm/llvm-project/commit/75117fb340daf544320a5844786051d478e15d4d.diff

LOG: [RISCV] Don't advertise i32->i64 zextload as free for RV64.

The zextload hook is only used to determine whether to insert a
zero_extend or any_extend for narrow types leaving a basic block.
Returning true from this hook tends to cause any load whose output
leaves the basic block to become an LWU instead of an LW.

Since we tend to prefer sexts for i32 compares on RV64, this can
cause extra sext.w instructions to be created in other basic blocks.

If we use LW instead of LWU this gives the MIR pass from D116397
a better chance of removing them.

Another option might be to teach getPreferredExtendForValue in
FunctionLoweringInfo.cpp about our preference for sign_extend of
i32 compares. That would cause SIGN_EXTEND to be chosen for any
value used by a compare instead of using the isZExtFree heuristic.
That will require code to convert from the llvm::Type* to EVT/MVT
as well as querying the type legalization actions to get the
promoted type in order to call TargetLowering::isSExtCheaperThanZExt.
That seemed like many extra steps when no other target wants it.
Though it would avoid us needing to lean on the MIR pass in some cases.

Reviewed By: asb

Differential Revision: https://reviews.llvm.org/D116567

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/atomic-rmw.ll
    llvm/test/CodeGen/RISCV/atomic-signext.ll
    llvm/test/CodeGen/RISCV/inline-asm.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 624b24187a158..bf925f317d9c2 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1174,10 +1174,11 @@ bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
 
 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
   // Zexts are free if they can be combined with a load.
+  // Don't advertise i32->i64 zextload as being free for RV64. It interacts
+  // poorly with type legalization of compares preferring sext.
   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
     EVT MemVT = LD->getMemoryVT();
-    if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
-         (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
+    if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
          LD->getExtensionType() == ISD::ZEXTLOAD))
       return true;

diff  --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
index 27262442d257a..4c897b29ef1ee 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
@@ -11118,7 +11118,7 @@ define i32 @atomicrmw_max_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB145_2
@@ -11208,7 +11208,7 @@ define i32 @atomicrmw_max_i32_acquire(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB146_2
@@ -11298,7 +11298,7 @@ define i32 @atomicrmw_max_i32_release(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB147_2
@@ -11388,7 +11388,7 @@ define i32 @atomicrmw_max_i32_acq_rel(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB148_2
@@ -11478,7 +11478,7 @@ define i32 @atomicrmw_max_i32_seq_cst(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB149_2
@@ -11568,7 +11568,7 @@ define i32 @atomicrmw_min_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB150_2
@@ -11658,7 +11658,7 @@ define i32 @atomicrmw_min_i32_acquire(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB151_2
@@ -11748,7 +11748,7 @@ define i32 @atomicrmw_min_i32_release(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB152_2
@@ -11838,7 +11838,7 @@ define i32 @atomicrmw_min_i32_acq_rel(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB153_2
@@ -11928,7 +11928,7 @@ define i32 @atomicrmw_min_i32_seq_cst(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB154_2
@@ -12018,7 +12018,7 @@ define i32 @atomicrmw_umax_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB155_2
@@ -12108,7 +12108,7 @@ define i32 @atomicrmw_umax_i32_acquire(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB156_2
@@ -12198,7 +12198,7 @@ define i32 @atomicrmw_umax_i32_release(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB157_2
@@ -12288,7 +12288,7 @@ define i32 @atomicrmw_umax_i32_acq_rel(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB158_2
@@ -12378,7 +12378,7 @@ define i32 @atomicrmw_umax_i32_seq_cst(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB159_2
@@ -12468,7 +12468,7 @@ define i32 @atomicrmw_umin_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB160_2
@@ -12558,7 +12558,7 @@ define i32 @atomicrmw_umin_i32_acquire(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB161_2
@@ -12648,7 +12648,7 @@ define i32 @atomicrmw_umin_i32_release(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB162_2
@@ -12738,7 +12738,7 @@ define i32 @atomicrmw_umin_i32_acq_rel(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB163_2
@@ -12828,7 +12828,7 @@ define i32 @atomicrmw_umin_i32_seq_cst(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB164_2

diff  --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll
index 609863f127e88..a5adcf5e48021 100644
--- a/llvm/test/CodeGen/RISCV/atomic-signext.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll
@@ -2549,7 +2549,7 @@ define signext i32 @atomicrmw_max_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB32_2
@@ -2639,7 +2639,7 @@ define signext i32 @atomicrmw_min_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB33_2
@@ -2729,7 +2729,7 @@ define signext i32 @atomicrmw_umax_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB34_2
@@ -2819,7 +2819,7 @@ define signext i32 @atomicrmw_umin_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lwu a3, 0(a0)
+; RV64I-NEXT:    lw a3, 0(a0)
 ; RV64I-NEXT:    mv s2, a1
 ; RV64I-NEXT:    sext.w s1, a1
 ; RV64I-NEXT:    j .LBB35_2

diff  --git a/llvm/test/CodeGen/RISCV/inline-asm.ll b/llvm/test/CodeGen/RISCV/inline-asm.ll
index 4a96f1dbbd270..0b04a7df2d18a 100644
--- a/llvm/test/CodeGen/RISCV/inline-asm.ll
+++ b/llvm/test/CodeGen/RISCV/inline-asm.ll
@@ -19,7 +19,7 @@ define i32 @constraint_r(i32 %a) nounwind {
 ; RV64I-LABEL: constraint_r:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    lui a1, %hi(gi)
-; RV64I-NEXT:    lwu a1, %lo(gi)(a1)
+; RV64I-NEXT:    lw a1, %lo(gi)(a1)
 ; RV64I-NEXT:    #APP
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    #NO_APP