[llvm] [RISCV] Simplify one of the RV32 PACK isel patterns. (PR #152045)

Tue Aug 5 10:34:11 PDT 2025

https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/152045

>From 421b30f2d2aac401a69289fbcd8d16fd3f862696 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Tue, 5 Aug 2025 09:53:43 -0700
Subject: [PATCH 1/2] Pre-commit test

---
 llvm/test/CodeGen/RISCV/rv32zbkb.ll | 82 +++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/rv32zbkb.ll b/llvm/test/CodeGen/RISCV/rv32zbkb.ll
index 4aa6dd4dba6c2..5a73613fb046f 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbkb.ll
@@ -319,3 +319,85 @@ define i64 @zext_i16_to_i64(i16 %a) nounwind {
   %1 = zext i16 %a to i64
   ret i64 %1
 }
+
+define i32 @pack_lo_packh_hi_packh(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2, i8 zeroext %3) nounwind {
+; RV32I-LABEL: pack_lo_packh_hi_packh:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a1, 8
+; RV32I-NEXT:    slli a2, a2, 16
+; RV32I-NEXT:    slli a3, a3, 24
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    or a2, a2, a3
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    ret
+;
+; RV32ZBKB-LABEL: pack_lo_packh_hi_packh:
+; RV32ZBKB:       # %bb.0:
+; RV32ZBKB-NEXT:    slli a3, a3, 24
+; RV32ZBKB-NEXT:    packh a0, a0, a1
+; RV32ZBKB-NEXT:    pack a0, a0, a2
+; RV32ZBKB-NEXT:    or a0, a0, a3
+; RV32ZBKB-NEXT:    ret
+  %a = zext i8 %0 to i32
+  %b = zext i8 %1 to i32
+  %c = zext i8 %2 to i32
+  %d = zext i8 %3 to i32
+  %e = shl i32 %b, 8
+  %f = shl i32 %c, 16
+  %g = shl i32 %d, 24
+  %h = or i32 %a, %e
+  %i = or i32 %h, %f
+  %j = or i32 %i, %g
+  ret i32 %j
+}
+
+define i32 @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2) nounwind {
+; RV32I-LABEL: pack_lo_zext_hi_packh:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a2, 16
+; RV32I-NEXT:    slli a2, a2, 24
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBKB-LABEL: pack_lo_zext_hi_packh:
+; RV32ZBKB:       # %bb.0:
+; RV32ZBKB-NEXT:    packh a1, a2, a2
+; RV32ZBKB-NEXT:    slli a1, a1, 16
+; RV32ZBKB-NEXT:    or a0, a1, a0
+; RV32ZBKB-NEXT:    ret
+  %a = zext i16 %0 to i32
+  %b = zext i8 %1 to i32
+  %c = zext i8 %2 to i32
+  %d = shl i32 %c, 8
+  %e = or i32 %c, %d
+  %f = shl i32 %e, 16
+  %g = or i32 %f, %a
+  ret i32 %g
+}
+
+; Negative test, %a isn't extended so we can't use pack for the outer or, but
+; we can use packh for the high half.
+define i32 @pack_lo_noext_hi_packh(i32 %a, i8 zeroext %1, i8 zeroext %2) nounwind {
+; RV32I-LABEL: pack_lo_noext_hi_packh:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a2, 16
+; RV32I-NEXT:    slli a2, a2, 24
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBKB-LABEL: pack_lo_noext_hi_packh:
+; RV32ZBKB:       # %bb.0:
+; RV32ZBKB-NEXT:    packh a1, a2, a2
+; RV32ZBKB-NEXT:    slli a1, a1, 16
+; RV32ZBKB-NEXT:    or a0, a1, a0
+; RV32ZBKB-NEXT:    ret
+  %b = zext i8 %1 to i32
+  %c = zext i8 %2 to i32
+  %d = shl i32 %c, 8
+  %e = or i32 %c, %d
+  %f = shl i32 %e, 16
+  %g = or i32 %f, %a
+  ret i32 %g
+}

>From 1ae00c1353e4203255743204b19108f9edb8108c Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Mon, 4 Aug 2025 15:04:09 -0700
Subject: [PATCH 2/2] [RISCV] Simplify one of the RV32 PACK isel patterns.

This pattern previously checked a specific variant of 4 bytes
being packed that is generated by unaligned load expansion.

Our simplest PACK patterns misses this case because we don't have
a single shift left by 16. We have two shift lefts hidden behind
another OR.

We only need the pattern to find the 2 shifts in the upper part,
for the lower part we only care that the upper 16 bits are zero.
If the lower bits can also be a PACKH that can be selected separately
after.

I believe this allows tablegen to create more patterns for permutations
of this pattern. The associative and commutative variant expansion
is limited to 3 children.
---
 llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 14 ++++++++------
 llvm/test/CodeGen/RISCV/rv32zbkb.ll       |  8 +++-----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index d2a651444169c..04ffb05c513f4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -641,13 +641,15 @@ def : Pat<(binop_allhusers<or> (shl GPR:$rs2, (XLenVT 8)),
 let Predicates = [HasStdExtZbkb, IsRV32] in {
 def : Pat<(i32 (or (zexti16 (i32 GPR:$rs1)), (shl GPR:$rs2, (i32 16)))),
           (PACK GPR:$rs1, GPR:$rs2)>;
-def : Pat<(or (or
-                  (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)),
+
+// Match a pattern of 2 bytes being inserted into bits [31:16], with bits
+// bits [15:0] coming from a zero extended value. We can use pack with packh for
+// bits [31:16]. If bits [15:0] can also be a packh, it can be matched
+// separately.
+def : Pat<(or (or (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)),
                   (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))),
-              (or
-                  (shl (zexti8 (XLenVT GPR:$op0rs2)), (XLenVT 8)),
-                  (zexti8 (XLenVT GPR:$op0rs1)))),
-          (PACK (XLenVT (PACKH GPR:$op0rs1, GPR:$op0rs2)),
+              (zexti16 (XLenVT GPR:$rs1))),
+          (PACK (XLenVT GPR:$rs1),
                 (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
 }
 
diff --git a/llvm/test/CodeGen/RISCV/rv32zbkb.ll b/llvm/test/CodeGen/RISCV/rv32zbkb.ll
index 5a73613fb046f..7ebbd7802b70f 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbkb.ll
@@ -333,10 +333,9 @@ define i32 @pack_lo_packh_hi_packh(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2,
 ;
 ; RV32ZBKB-LABEL: pack_lo_packh_hi_packh:
 ; RV32ZBKB:       # %bb.0:
-; RV32ZBKB-NEXT:    slli a3, a3, 24
 ; RV32ZBKB-NEXT:    packh a0, a0, a1
-; RV32ZBKB-NEXT:    pack a0, a0, a2
-; RV32ZBKB-NEXT:    or a0, a0, a3
+; RV32ZBKB-NEXT:    packh a1, a2, a3
+; RV32ZBKB-NEXT:    pack a0, a0, a1
 ; RV32ZBKB-NEXT:    ret
   %a = zext i8 %0 to i32
   %b = zext i8 %1 to i32
@@ -363,8 +362,7 @@ define i32 @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2)
 ; RV32ZBKB-LABEL: pack_lo_zext_hi_packh:
 ; RV32ZBKB:       # %bb.0:
 ; RV32ZBKB-NEXT:    packh a1, a2, a2
-; RV32ZBKB-NEXT:    slli a1, a1, 16
-; RV32ZBKB-NEXT:    or a0, a1, a0
+; RV32ZBKB-NEXT:    pack a0, a0, a1
 ; RV32ZBKB-NEXT:    ret
   %a = zext i16 %0 to i32
   %b = zext i8 %1 to i32