[llvm] [RISCV] Improve constant materialisation for stores of i8 negative constants (PR #92131)

Tue May 14 07:50:12 PDT 2024

https://github.com/asb created https://github.com/llvm/llvm-project/pull/92131

This follows the same pattern as 20e62658735a1b03ecadc. Although we can't reduce the number of instructions used, if we are able to use a sign-extended 6-bit immediate then the 16-bit c.li instruction can be selected (thus saving code size). Although this _could_ be gated so it only happens if C is enabled, I've opted not to because at worst it's neutral and it doesn't seem helpful to add unnecessary divergence between the RVC and non-RVC paths.

>From 82dc56673e8d91b5fe449af491bac6c6de04a942 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Tue, 14 May 2024 13:44:25 +0100
Subject: [PATCH 1/2] [RISCV][test] Precommit tests for byte store of -1

Although we can't reduce the number of instructions, if we selected `li
rd, -1` instead then this could be encoded in a 16-bit instruction.
---
 llvm/test/CodeGen/RISCV/imm.ll | 74 +++++++++++++++++++++++++++++-----
 1 file changed, 64 insertions(+), 10 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/imm.ll b/llvm/test/CodeGen/RISCV/imm.ll
index c5c1657b526a6..344ed0d2b0836 100644
--- a/llvm/test/CodeGen/RISCV/imm.ll
+++ b/llvm/test/CodeGen/RISCV/imm.ll
@@ -1558,6 +1558,60 @@ define i64 @imm_2reg_1() nounwind {
   ret i64 -1152921504301427080 ; 0xF000_0000_1234_5678
 }
 
+; TODO: Selecting -1 would be better in this case as it can be loaded with a
+; 16 bit instruction when the compressed extension is enabled.
+define void @imm_store_i8_neg1(ptr %p) nounwind {
+; RV32I-LABEL: imm_store_i8_neg1:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    li a1, 255
+; RV32I-NEXT:    sb a1, 0(a0)
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: imm_store_i8_neg1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 255
+; RV64I-NEXT:    sb a1, 0(a0)
+; RV64I-NEXT:    ret
+;
+; RV64IZBA-LABEL: imm_store_i8_neg1:
+; RV64IZBA:       # %bb.0:
+; RV64IZBA-NEXT:    li a1, 255
+; RV64IZBA-NEXT:    sb a1, 0(a0)
+; RV64IZBA-NEXT:    ret
+;
+; RV64IZBB-LABEL: imm_store_i8_neg1:
+; RV64IZBB:       # %bb.0:
+; RV64IZBB-NEXT:    li a1, 255
+; RV64IZBB-NEXT:    sb a1, 0(a0)
+; RV64IZBB-NEXT:    ret
+;
+; RV64IZBS-LABEL: imm_store_i8_neg1:
+; RV64IZBS:       # %bb.0:
+; RV64IZBS-NEXT:    li a1, 255
+; RV64IZBS-NEXT:    sb a1, 0(a0)
+; RV64IZBS-NEXT:    ret
+;
+; RV64IXTHEADBB-LABEL: imm_store_i8_neg1:
+; RV64IXTHEADBB:       # %bb.0:
+; RV64IXTHEADBB-NEXT:    li a1, 255
+; RV64IXTHEADBB-NEXT:    sb a1, 0(a0)
+; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_store_i8_neg1:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    li a1, 255
+; RV32-REMAT-NEXT:    sb a1, 0(a0)
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_store_i8_neg1:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    li a1, 255
+; RV64-REMAT-NEXT:    sb a1, 0(a0)
+; RV64-REMAT-NEXT:    ret
+  store i8 -1, ptr %p
+  ret void
+}
+
 define void @imm_store_i16_neg1(ptr %p) nounwind {
 ; RV32I-LABEL: imm_store_i16_neg1:
 ; RV32I:       # %bb.0:
@@ -2121,8 +2175,8 @@ define i64 @imm_70370820078523() {
 ;
 ; RV64I-POOL-LABEL: imm_70370820078523:
 ; RV64I-POOL:       # %bb.0:
-; RV64I-POOL-NEXT:    lui a0, %hi(.LCPI37_0)
-; RV64I-POOL-NEXT:    ld a0, %lo(.LCPI37_0)(a0)
+; RV64I-POOL-NEXT:    lui a0, %hi(.LCPI38_0)
+; RV64I-POOL-NEXT:    ld a0, %lo(.LCPI38_0)(a0)
 ; RV64I-POOL-NEXT:    ret
 ;
 ; RV64IZBA-LABEL: imm_70370820078523:
@@ -2266,8 +2320,8 @@ define i64 @imm_neg_9223301666034697285() {
 ;
 ; RV64I-POOL-LABEL: imm_neg_9223301666034697285:
 ; RV64I-POOL:       # %bb.0:
-; RV64I-POOL-NEXT:    lui a0, %hi(.LCPI39_0)
-; RV64I-POOL-NEXT:    ld a0, %lo(.LCPI39_0)(a0)
+; RV64I-POOL-NEXT:    lui a0, %hi(.LCPI40_0)
+; RV64I-POOL-NEXT:    ld a0, %lo(.LCPI40_0)(a0)
 ; RV64I-POOL-NEXT:    ret
 ;
 ; RV64IZBA-LABEL: imm_neg_9223301666034697285:
@@ -2544,8 +2598,8 @@ define i64 @imm_neg_9223354442718100411() {
 ;
 ; RV64I-POOL-LABEL: imm_neg_9223354442718100411:
 ; RV64I-POOL:       # %bb.0:
-; RV64I-POOL-NEXT:    lui a0, %hi(.LCPI43_0)
-; RV64I-POOL-NEXT:    ld a0, %lo(.LCPI43_0)(a0)
+; RV64I-POOL-NEXT:    lui a0, %hi(.LCPI44_0)
+; RV64I-POOL-NEXT:    ld a0, %lo(.LCPI44_0)(a0)
 ; RV64I-POOL-NEXT:    ret
 ;
 ; RV64IZBA-LABEL: imm_neg_9223354442718100411:
@@ -3855,8 +3909,8 @@ define i64 @imm64_same_lo_hi_optsize() nounwind optsize {
 ;
 ; RV64I-POOL-LABEL: imm64_same_lo_hi_optsize:
 ; RV64I-POOL:       # %bb.0:
-; RV64I-POOL-NEXT:    lui a0, %hi(.LCPI64_0)
-; RV64I-POOL-NEXT:    ld a0, %lo(.LCPI64_0)(a0)
+; RV64I-POOL-NEXT:    lui a0, %hi(.LCPI65_0)
+; RV64I-POOL-NEXT:    ld a0, %lo(.LCPI65_0)(a0)
 ; RV64I-POOL-NEXT:    ret
 ;
 ; RV64IZBA-LABEL: imm64_same_lo_hi_optsize:
@@ -3930,8 +3984,8 @@ define i64 @imm64_same_lo_hi_negative() nounwind {
 ;
 ; RV64I-POOL-LABEL: imm64_same_lo_hi_negative:
 ; RV64I-POOL:       # %bb.0:
-; RV64I-POOL-NEXT:    lui a0, %hi(.LCPI65_0)
-; RV64I-POOL-NEXT:    ld a0, %lo(.LCPI65_0)(a0)
+; RV64I-POOL-NEXT:    lui a0, %hi(.LCPI66_0)
+; RV64I-POOL-NEXT:    ld a0, %lo(.LCPI66_0)(a0)
 ; RV64I-POOL-NEXT:    ret
 ;
 ; RV64IZBA-LABEL: imm64_same_lo_hi_negative:

>From ed69fa61e6610ccd182cc544b4c7e4d20d2354f5 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Tue, 14 May 2024 14:49:50 +0100
Subject: [PATCH 2/2] [RISCV] Improve constant materialisation of for stores of
 i8 negative constants

This follows the same pattern as 20e62658735a1b03ecadc. Although we
can't reduce the number of instructions used, if we are able to use a
sign-extended 6-bit immediate then the 16-bit c.li instruction can be
used. Although this _could_ be gated so it only happens if C is enabled,
I've opted not to because at worst it's neutral and it doesn't seem
helpful to add unnecessary divergence between the RVC and non-RVC paths.
---
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp    |  5 +++++
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h      |  1 +
 llvm/test/CodeGen/RISCV/imm.ll                 | 18 ++++++++----------
 .../RISCV/rvv/fixed-vectors-fp-shuffles.ll     |  2 +-
 .../RISCV/rvv/fixed-vectors-int-buildvec.ll    |  2 +-
 .../RISCV/rvv/fixed-vectors-int-shuffles.ll    |  6 +++---
 .../rvv/fixed-vectors-reduction-formation.ll   |  8 ++++----
 .../test/CodeGen/RISCV/unaligned-load-store.ll |  2 +-
 8 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 3c4646b95715d..92014032a0f77 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -902,6 +902,11 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
       return;
     }
     int64_t Imm = ConstNode->getSExtValue();
+    // If only the lower 8 bits are used, try to convert this to a simm6 by
+    // sign-extending bit 7. This is neutral without the C extension, and
+    // allows C.LI to be used if C is present.
+    if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
+      Imm = SignExtend64<6>(Imm);
     // If the upper XLen-16 bits are not used, try to convert this to a simm12
     // by sign extending bit 15.
     if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 7d4aec2dfdc98..ece04dd7f4b75 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -121,6 +121,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
 
   bool hasAllNBitUsers(SDNode *Node, unsigned Bits,
                        const unsigned Depth = 0) const;
+  bool hasAllBUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 8); }
   bool hasAllHUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 16); }
   bool hasAllWUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 32); }
 
diff --git a/llvm/test/CodeGen/RISCV/imm.ll b/llvm/test/CodeGen/RISCV/imm.ll
index 344ed0d2b0836..5fd25ab60db01 100644
--- a/llvm/test/CodeGen/RISCV/imm.ll
+++ b/llvm/test/CodeGen/RISCV/imm.ll
@@ -1558,54 +1558,52 @@ define i64 @imm_2reg_1() nounwind {
   ret i64 -1152921504301427080 ; 0xF000_0000_1234_5678
 }
 
-; TODO: Selecting -1 would be better in this case as it can be loaded with a
-; 16 bit instruction when the compressed extension is enabled.
 define void @imm_store_i8_neg1(ptr %p) nounwind {
 ; RV32I-LABEL: imm_store_i8_neg1:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 255
+; RV32I-NEXT:    li a1, -1
 ; RV32I-NEXT:    sb a1, 0(a0)
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: imm_store_i8_neg1:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 255
+; RV64I-NEXT:    li a1, -1
 ; RV64I-NEXT:    sb a1, 0(a0)
 ; RV64I-NEXT:    ret
 ;
 ; RV64IZBA-LABEL: imm_store_i8_neg1:
 ; RV64IZBA:       # %bb.0:
-; RV64IZBA-NEXT:    li a1, 255
+; RV64IZBA-NEXT:    li a1, -1
 ; RV64IZBA-NEXT:    sb a1, 0(a0)
 ; RV64IZBA-NEXT:    ret
 ;
 ; RV64IZBB-LABEL: imm_store_i8_neg1:
 ; RV64IZBB:       # %bb.0:
-; RV64IZBB-NEXT:    li a1, 255
+; RV64IZBB-NEXT:    li a1, -1
 ; RV64IZBB-NEXT:    sb a1, 0(a0)
 ; RV64IZBB-NEXT:    ret
 ;
 ; RV64IZBS-LABEL: imm_store_i8_neg1:
 ; RV64IZBS:       # %bb.0:
-; RV64IZBS-NEXT:    li a1, 255
+; RV64IZBS-NEXT:    li a1, -1
 ; RV64IZBS-NEXT:    sb a1, 0(a0)
 ; RV64IZBS-NEXT:    ret
 ;
 ; RV64IXTHEADBB-LABEL: imm_store_i8_neg1:
 ; RV64IXTHEADBB:       # %bb.0:
-; RV64IXTHEADBB-NEXT:    li a1, 255
+; RV64IXTHEADBB-NEXT:    li a1, -1
 ; RV64IXTHEADBB-NEXT:    sb a1, 0(a0)
 ; RV64IXTHEADBB-NEXT:    ret
 ;
 ; RV32-REMAT-LABEL: imm_store_i8_neg1:
 ; RV32-REMAT:       # %bb.0:
-; RV32-REMAT-NEXT:    li a1, 255
+; RV32-REMAT-NEXT:    li a1, -1
 ; RV32-REMAT-NEXT:    sb a1, 0(a0)
 ; RV32-REMAT-NEXT:    ret
 ;
 ; RV64-REMAT-LABEL: imm_store_i8_neg1:
 ; RV64-REMAT:       # %bb.0:
-; RV64-REMAT-NEXT:    li a1, 255
+; RV64-REMAT-NEXT:    li a1, -1
 ; RV64-REMAT-NEXT:    sb a1, 0(a0)
 ; RV64-REMAT-NEXT:    ret
   store i8 -1, ptr %p
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
index b0f6bebea0381..8dc32d13e4a34 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
@@ -17,7 +17,7 @@ define <4 x half> @shuffle_v4f16(<4 x half> %x, <4 x half> %y) {
 define <8 x float> @shuffle_v8f32(<8 x float> %x, <8 x float> %y) {
 ; CHECK-LABEL: shuffle_v8f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a0, 236
+; CHECK-NEXT:    li a0, -20
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmv.s.x v0, a0
 ; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index ed6c01aaf7fe1..592ce6fc5be0b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -260,7 +260,7 @@ define <4 x i8> @buildvec_vid_stepn3_add3_v4i8() {
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.i v9, 3
 ; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    li a0, 253
+; CHECK-NEXT:    li a0, -3
 ; CHECK-NEXT:    vmadd.vx v8, a0, v9
 ; CHECK-NEXT:    ret
   ret <4 x i8> <i8 3, i8 0, i8 -3, i8 -6>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index 58af6ac246d16..aba69dc846201 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -611,7 +611,7 @@ define <8 x i8> @concat_4xi8_start_undef(<8 x i8> %v, <8 x i8> %w) {
 define <8 x i8> @concat_4xi8_start_undef_at_start(<8 x i8> %v, <8 x i8> %w) {
 ; CHECK-LABEL: concat_4xi8_start_undef_at_start:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a0, 224
+; CHECK-NEXT:    li a0, -32
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
 ; CHECK-NEXT:    vmv.s.x v0, a0
 ; CHECK-NEXT:    vslideup.vi v8, v9, 4, v0.t
@@ -682,7 +682,7 @@ define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
 ; CHECK-NEXT:    vslidedown.vi v8, v8, 2
-; CHECK-NEXT:    li a0, 234
+; CHECK-NEXT:    li a0, -22
 ; CHECK-NEXT:    vmv.s.x v0, a0
 ; CHECK-NEXT:    vslideup.vi v8, v9, 1, v0.t
 ; CHECK-NEXT:    ret
@@ -699,7 +699,7 @@ define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) {
 ; CHECK-NEXT:    lui a0, %hi(.LCPI46_0)
 ; CHECK-NEXT:    addi a0, a0, %lo(.LCPI46_0)
 ; CHECK-NEXT:    vle8.v v10, (a0)
-; CHECK-NEXT:    li a0, 234
+; CHECK-NEXT:    li a0, -22
 ; CHECK-NEXT:    vmv.s.x v0, a0
 ; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
index 5f456c7824316..03624113a8262 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
@@ -160,7 +160,7 @@ define i32 @reduce_sum_16xi32_prefix4(ptr %p) {
 define i32 @reduce_sum_16xi32_prefix5(ptr %p) {
 ; CHECK-LABEL: reduce_sum_16xi32_prefix5:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a1, 224
+; CHECK-NEXT:    li a1, -32
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv.s.x v0, a1
 ; CHECK-NEXT:    vmv.v.i v8, -1
@@ -532,7 +532,7 @@ define i32 @reduce_xor_16xi32_prefix2(ptr %p) {
 define i32 @reduce_xor_16xi32_prefix5(ptr %p) {
 ; CHECK-LABEL: reduce_xor_16xi32_prefix5:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a1, 224
+; CHECK-NEXT:    li a1, -32
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv.s.x v0, a1
 ; CHECK-NEXT:    vmv.v.i v8, -1
@@ -620,7 +620,7 @@ define i32 @reduce_or_16xi32_prefix2(ptr %p) {
 define i32 @reduce_or_16xi32_prefix5(ptr %p) {
 ; CHECK-LABEL: reduce_or_16xi32_prefix5:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a1, 224
+; CHECK-NEXT:    li a1, -32
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv.s.x v0, a1
 ; CHECK-NEXT:    vmv.v.i v8, -1
@@ -757,7 +757,7 @@ define i32 @reduce_umax_16xi32_prefix2(ptr %p) {
 define i32 @reduce_umax_16xi32_prefix5(ptr %p) {
 ; CHECK-LABEL: reduce_umax_16xi32_prefix5:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a1, 224
+; CHECK-NEXT:    li a1, -32
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv.s.x v0, a1
 ; CHECK-NEXT:    vmv.v.i v8, -1
diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
index ce0d8fedbfb88..10497db6edc49 100644
--- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
@@ -419,7 +419,7 @@ define void @merge_stores_i32_i64(ptr %p) {
 define void @store_large_constant(ptr %x) {
 ; SLOW-LABEL: store_large_constant:
 ; SLOW:       # %bb.0:
-; SLOW-NEXT:    li a1, 254
+; SLOW-NEXT:    li a1, -2
 ; SLOW-NEXT:    sb a1, 7(a0)
 ; SLOW-NEXT:    li a1, 220
 ; SLOW-NEXT:    sb a1, 6(a0)