[llvm] [SelectionDAG][RISCV] Mask constants to narrow size in TargetLowering::expandUnalignedStore. (PR #66567)

Fri Sep 15 19:20:03 PDT 2023

https://github.com/topperc created https://github.com/llvm/llvm-project/pull/66567

If the SRL for Hi constant folds, but we don't remoe those bits from the Lo, we can end up with strange constant folding. I've only seen this with constants being lowered to constant pools during lowering on RISC-V.

On the first split we create two i32 trunc stores and a srl to shift the high part down. The srl gets constant folded, but to produce a new i32 constant. But the truncstore for the low store still uses the original constant.
    
This original constant then gets converted to a constant pool before we revisit the stores to further split them. The constant pool prevents further constant folding of the additional srls.
    
After legalization is done, we run DAGCombiner and get some constant folding of srl via computeKnownBits which can peek through the constant pool load. This can create new constants that also need a constant pool.

>From 7de164c266a5c905744aa16cfc44852fe9da89b6 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Fri, 15 Sep 2023 18:53:53 -0700
Subject: [PATCH 1/2] [RISCV] Add test case to show bad codegen for unaligned
 i64 store of a large constant.

On the first split we create two i32 trunc stores and a srl to shift
the high part down. The srl gets constant folded, but to produce
a new i32 constant. But the truncstore for the low store still uses
the original constant.

This original constant then gets converted to a constant pool
before we revisit the stores to further split them. The constant
pool prevents further constant folding of the additional srls.

After legalization is done, we run DAGCombiner and get some constant
folding of srl via computeKnownBits which can peek through the constant
pool load. This can create new constants that also need a constant pool.
---
 .../CodeGen/RISCV/unaligned-load-store.ll     | 72 +++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
index 429f0543b41b3db..5626c353f1cd5af 100644
--- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
@@ -415,3 +415,75 @@ define void @merge_stores_i32_i64(ptr %p) {
   store i32 0, ptr %p2
   ret void
 }
+
+; FIXME: We shouldn't generate multiple constant pools entries with shifted
+; values.
+;.LCPI0_0:
+;        .quad   280223976814164                 # 0xfedcba987654
+;.LCPI0_1:
+;        .quad   71737338064426034               # 0xfedcba98765432
+;.LCPI0_2:
+;        .quad   -81985529216486896              # 0xfedcba9876543210
+define void @store_large_constant(ptr %x) {
+; RV32I-LABEL: store_large_constant:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    li a1, 254
+; RV32I-NEXT:    sb a1, 7(a0)
+; RV32I-NEXT:    li a1, 220
+; RV32I-NEXT:    sb a1, 6(a0)
+; RV32I-NEXT:    li a1, 186
+; RV32I-NEXT:    sb a1, 5(a0)
+; RV32I-NEXT:    li a1, 152
+; RV32I-NEXT:    sb a1, 4(a0)
+; RV32I-NEXT:    li a1, 118
+; RV32I-NEXT:    sb a1, 3(a0)
+; RV32I-NEXT:    li a1, 84
+; RV32I-NEXT:    sb a1, 2(a0)
+; RV32I-NEXT:    li a1, 50
+; RV32I-NEXT:    sb a1, 1(a0)
+; RV32I-NEXT:    li a1, 16
+; RV32I-NEXT:    sb a1, 0(a0)
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: store_large_constant:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 254
+; RV64I-NEXT:    sb a1, 7(a0)
+; RV64I-NEXT:    li a1, 220
+; RV64I-NEXT:    sb a1, 6(a0)
+; RV64I-NEXT:    li a1, 186
+; RV64I-NEXT:    sb a1, 5(a0)
+; RV64I-NEXT:    li a1, 152
+; RV64I-NEXT:    sb a1, 4(a0)
+; RV64I-NEXT:    li a1, 118
+; RV64I-NEXT:    lui a2, %hi(.LCPI16_0)
+; RV64I-NEXT:    ld a2, %lo(.LCPI16_0)(a2)
+; RV64I-NEXT:    lui a3, %hi(.LCPI16_1)
+; RV64I-NEXT:    ld a3, %lo(.LCPI16_1)(a3)
+; RV64I-NEXT:    lui a4, %hi(.LCPI16_2)
+; RV64I-NEXT:    ld a4, %lo(.LCPI16_2)(a4)
+; RV64I-NEXT:    sb a1, 3(a0)
+; RV64I-NEXT:    sb a2, 2(a0)
+; RV64I-NEXT:    sb a3, 1(a0)
+; RV64I-NEXT:    sb a4, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV32I-FAST-LABEL: store_large_constant:
+; RV32I-FAST:       # %bb.0:
+; RV32I-FAST-NEXT:    lui a1, 1043916
+; RV32I-FAST-NEXT:    addi a1, a1, -1384
+; RV32I-FAST-NEXT:    sw a1, 4(a0)
+; RV32I-FAST-NEXT:    lui a1, 484675
+; RV32I-FAST-NEXT:    addi a1, a1, 528
+; RV32I-FAST-NEXT:    sw a1, 0(a0)
+; RV32I-FAST-NEXT:    ret
+;
+; RV64I-FAST-LABEL: store_large_constant:
+; RV64I-FAST:       # %bb.0:
+; RV64I-FAST-NEXT:    lui a1, %hi(.LCPI16_0)
+; RV64I-FAST-NEXT:    ld a1, %lo(.LCPI16_0)(a1)
+; RV64I-FAST-NEXT:    sd a1, 0(a0)
+; RV64I-FAST-NEXT:    ret
+  store i64 18364758544493064720, ptr %x, align 1
+  ret void
+}

>From a4a8478a239f15865f9013f215c0cdb758637d4d Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Fri, 15 Sep 2023 19:12:06 -0700
Subject: [PATCH 2/2] [SelectionDAG][RISCV] Mask constants to narrow size in
 TargetLowering::expandUnalignedStore.

If the SRL for Hi constant folds, but we don't remoe those bits from
the Lo, we can end up with strange constant folding through DAGCombine later.
I've only seen this with constants being lowered to constant pools
during lowering on RISC-V.
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |  8 +++
 .../CodeGen/RISCV/unaligned-load-store.ll     | 69 +++++--------------
 2 files changed, 27 insertions(+), 50 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index bd1940994a87f0f..1a7799816711c03 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -9558,6 +9558,14 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
   SDValue ShiftAmount = DAG.getConstant(
       NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
   SDValue Lo = Val;
+  // If Val is a constant, replace the upper bits with 0. The SRL will constant
+  // fold and not use the upper bits. A smaller constant may be easier to
+  // materialize.
+  if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
+    Lo = DAG.getNode(
+        ISD::AND, dl, VT, Lo,
+        DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
+                        VT));
   SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
 
   // Store the two parts
diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
index 5626c353f1cd5af..ce0d8fedbfb88f2 100644
--- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
@@ -416,57 +416,26 @@ define void @merge_stores_i32_i64(ptr %p) {
   ret void
 }
 
-; FIXME: We shouldn't generate multiple constant pools entries with shifted
-; values.
-;.LCPI0_0:
-;        .quad   280223976814164                 # 0xfedcba987654
-;.LCPI0_1:
-;        .quad   71737338064426034               # 0xfedcba98765432
-;.LCPI0_2:
-;        .quad   -81985529216486896              # 0xfedcba9876543210
 define void @store_large_constant(ptr %x) {
-; RV32I-LABEL: store_large_constant:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 254
-; RV32I-NEXT:    sb a1, 7(a0)
-; RV32I-NEXT:    li a1, 220
-; RV32I-NEXT:    sb a1, 6(a0)
-; RV32I-NEXT:    li a1, 186
-; RV32I-NEXT:    sb a1, 5(a0)
-; RV32I-NEXT:    li a1, 152
-; RV32I-NEXT:    sb a1, 4(a0)
-; RV32I-NEXT:    li a1, 118
-; RV32I-NEXT:    sb a1, 3(a0)
-; RV32I-NEXT:    li a1, 84
-; RV32I-NEXT:    sb a1, 2(a0)
-; RV32I-NEXT:    li a1, 50
-; RV32I-NEXT:    sb a1, 1(a0)
-; RV32I-NEXT:    li a1, 16
-; RV32I-NEXT:    sb a1, 0(a0)
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: store_large_constant:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 254
-; RV64I-NEXT:    sb a1, 7(a0)
-; RV64I-NEXT:    li a1, 220
-; RV64I-NEXT:    sb a1, 6(a0)
-; RV64I-NEXT:    li a1, 186
-; RV64I-NEXT:    sb a1, 5(a0)
-; RV64I-NEXT:    li a1, 152
-; RV64I-NEXT:    sb a1, 4(a0)
-; RV64I-NEXT:    li a1, 118
-; RV64I-NEXT:    lui a2, %hi(.LCPI16_0)
-; RV64I-NEXT:    ld a2, %lo(.LCPI16_0)(a2)
-; RV64I-NEXT:    lui a3, %hi(.LCPI16_1)
-; RV64I-NEXT:    ld a3, %lo(.LCPI16_1)(a3)
-; RV64I-NEXT:    lui a4, %hi(.LCPI16_2)
-; RV64I-NEXT:    ld a4, %lo(.LCPI16_2)(a4)
-; RV64I-NEXT:    sb a1, 3(a0)
-; RV64I-NEXT:    sb a2, 2(a0)
-; RV64I-NEXT:    sb a3, 1(a0)
-; RV64I-NEXT:    sb a4, 0(a0)
-; RV64I-NEXT:    ret
+; SLOW-LABEL: store_large_constant:
+; SLOW:       # %bb.0:
+; SLOW-NEXT:    li a1, 254
+; SLOW-NEXT:    sb a1, 7(a0)
+; SLOW-NEXT:    li a1, 220
+; SLOW-NEXT:    sb a1, 6(a0)
+; SLOW-NEXT:    li a1, 186
+; SLOW-NEXT:    sb a1, 5(a0)
+; SLOW-NEXT:    li a1, 152
+; SLOW-NEXT:    sb a1, 4(a0)
+; SLOW-NEXT:    li a1, 118
+; SLOW-NEXT:    sb a1, 3(a0)
+; SLOW-NEXT:    li a1, 84
+; SLOW-NEXT:    sb a1, 2(a0)
+; SLOW-NEXT:    li a1, 50
+; SLOW-NEXT:    sb a1, 1(a0)
+; SLOW-NEXT:    li a1, 16
+; SLOW-NEXT:    sb a1, 0(a0)
+; SLOW-NEXT:    ret
 ;
 ; RV32I-FAST-LABEL: store_large_constant:
 ; RV32I-FAST:       # %bb.0: