[llvm] [RISCV] Add packw+packh isel pattern for unaligned loads on RV64. (PR #152159)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 5 11:37:54 PDT 2025
https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/152159
>From c579d41876651d2e09ad0d5a94db7575b42e485c Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Tue, 5 Aug 2025 10:45:48 -0700
Subject: [PATCH 1/2] Pre-commit tests.
---
llvm/test/CodeGen/RISCV/rv64zbkb.ll | 126 ++++++++++++++++++++++++++++
1 file changed, 126 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/rv64zbkb.ll b/llvm/test/CodeGen/RISCV/rv64zbkb.ll
index 818ea723ca2e1..6584588e68f13 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbkb.ll
@@ -392,3 +392,129 @@ define i64 @zext_i16_to_i64(i16 %a) nounwind {
%1 = zext i16 %a to i64
ret i64 %1
}
+
+define void @pack_lo_packh_hi_packh(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2, i8 zeroext %3, ptr %p) nounwind {
+; RV64I-LABEL: pack_lo_packh_hi_packh:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: slli a2, a2, 16
+; RV64I-NEXT: slli a3, a3, 24
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: or a2, a2, a3
+; RV64I-NEXT: or a0, a0, a2
+; RV64I-NEXT: sw a0, 0(a4)
+; RV64I-NEXT: ret
+;
+; RV64ZBKB-LABEL: pack_lo_packh_hi_packh:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: slli a3, a3, 24
+; RV64ZBKB-NEXT: packh a0, a0, a1
+; RV64ZBKB-NEXT: packw a0, a0, a2
+; RV64ZBKB-NEXT: or a0, a0, a3
+; RV64ZBKB-NEXT: sw a0, 0(a4)
+; RV64ZBKB-NEXT: ret
+ %a = zext i8 %0 to i32
+ %b = zext i8 %1 to i32
+ %c = zext i8 %2 to i32
+ %d = zext i8 %3 to i32
+ %e = shl i32 %b, 8
+ %f = shl i32 %c, 16
+ %g = shl i32 %d, 24
+ %h = or i32 %a, %e
+ %i = or i32 %h, %f
+ %j = or i32 %i, %g
+ store i32 %j, ptr %p
+ ret void
+}
+
+define void @pack_lo_packh_hi_packh_2(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2, i8 zeroext %3, ptr %p) nounwind {
+; RV64I-LABEL: pack_lo_packh_hi_packh_2:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: slli a2, a2, 16
+; RV64I-NEXT: slli a3, a3, 24
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: or a2, a2, a3
+; RV64I-NEXT: or a0, a2, a0
+; RV64I-NEXT: sw a0, 0(a4)
+; RV64I-NEXT: ret
+;
+; RV64ZBKB-LABEL: pack_lo_packh_hi_packh_2:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: slli a2, a2, 16
+; RV64ZBKB-NEXT: slli a3, a3, 24
+; RV64ZBKB-NEXT: packh a0, a0, a1
+; RV64ZBKB-NEXT: or a0, a3, a0
+; RV64ZBKB-NEXT: or a0, a2, a0
+; RV64ZBKB-NEXT: sw a0, 0(a4)
+; RV64ZBKB-NEXT: ret
+ %a = zext i8 %0 to i32
+ %b = zext i8 %1 to i32
+ %c = zext i8 %2 to i32
+ %d = zext i8 %3 to i32
+ %e = shl i32 %b, 8
+ %f = shl i32 %c, 16
+ %g = shl i32 %d, 24
+ %h = or i32 %a, %e
+ %i = or i32 %g, %h
+ %j = or i32 %f, %i
+ store i32 %j, ptr %p
+ ret void
+}
+
+define void @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2, ptr %p) nounwind {
+; RV64I-LABEL: pack_lo_zext_hi_packh:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a1, a2, 16
+; RV64I-NEXT: slli a2, a2, 24
+; RV64I-NEXT: or a1, a2, a1
+; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: sw a0, 0(a3)
+; RV64I-NEXT: ret
+;
+; RV64ZBKB-LABEL: pack_lo_zext_hi_packh:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: packh a1, a2, a2
+; RV64ZBKB-NEXT: slli a1, a1, 16
+; RV64ZBKB-NEXT: or a0, a1, a0
+; RV64ZBKB-NEXT: sw a0, 0(a3)
+; RV64ZBKB-NEXT: ret
+ %a = zext i16 %0 to i32
+ %b = zext i8 %1 to i32
+ %c = zext i8 %2 to i32
+ %d = shl i32 %c, 8
+ %e = or i32 %c, %d
+ %f = shl i32 %e, 16
+ %g = or i32 %f, %a
+ store i32 %g, ptr %p
+ ret void
+}
+
+; Negative test, %a isn't extended so we can't use packw for the outer or, but
+; we can use packh for the high half.
+define void @pack_lo_noext_hi_packh(i32 %a, i8 zeroext %1, i8 zeroext %2, ptr %p) nounwind {
+; RV64I-LABEL: pack_lo_noext_hi_packh:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a1, a2, 16
+; RV64I-NEXT: slli a2, a2, 24
+; RV64I-NEXT: or a1, a2, a1
+; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: sw a0, 0(a3)
+; RV64I-NEXT: ret
+;
+; RV64ZBKB-LABEL: pack_lo_noext_hi_packh:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: packh a1, a2, a2
+; RV64ZBKB-NEXT: slli a1, a1, 16
+; RV64ZBKB-NEXT: or a0, a1, a0
+; RV64ZBKB-NEXT: sw a0, 0(a3)
+; RV64ZBKB-NEXT: ret
+ %b = zext i8 %1 to i32
+ %c = zext i8 %2 to i32
+ %d = shl i32 %c, 8
+ %e = or i32 %c, %d
+ %f = shl i32 %e, 16
+ %g = or i32 %f, %a
+ store i32 %g, ptr %p
+ ret void
+}
>From cd8f2b4ab1e7deb3d1aeea9d101bd9674f3f5147 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Mon, 4 Aug 2025 23:40:11 -0700
Subject: [PATCH 2/2] [RISCV] Add packw+packh isel pattern for unaligned loads
on RV64.
This is similar to an existing pattern from RV32 with the simpliflication
proposed by #152045. Instead of pack we need to use packw and we
need to know that the upper 32 bits are being ignored since packw
sign extends from bit 31.
---
llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 21 +++++++++++++++++++
llvm/test/CodeGen/RISCV/rv64zbkb.ll | 14 +++++--------
.../CodeGen/RISCV/unaligned-load-store.ll | 20 ++++++++----------
3 files changed, 35 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index d2a651444169c..a37dcde68aa38 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -661,6 +661,27 @@ def : Pat<(binop_allwusers<or> (shl GPR:$rs2, (i64 16)),
def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32),
(zexti16 (i64 GPR:$rs1)))),
(PACKW GPR:$rs1, GPR:$rs2)>;
+
+// Match a pattern of 2 bytes being inserted into bits [31:16], with bits
+// bits [15:0] coming from a zero extended value, and bits [63:32] being
+// ignored. We can use packw with packh for bits [31:16]. If bits [15:0] can
+// also be a packh, it can be matched separately.
+def : Pat<(binop_allwusers<or>
+ (or (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)),
+ (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))),
+ (zexti16 (XLenVT GPR:$rs1))),
+ (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
+// We need to manually reassociate the patterns because of the binop_allwusers.
+def : Pat<(binop_allwusers<or>
+ (or (zexti16 (XLenVT GPR:$rs1)),
+ (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))),
+ (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24))),
+ (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
+def : Pat<(binop_allwusers<or>
+ (or (zexti16 (XLenVT GPR:$rs1)),
+ (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 24))),
+ (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))),
+ (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
} // Predicates = [HasStdExtZbkb, IsRV64]
let Predicates = [HasStdExtZbb, IsRV32] in
diff --git a/llvm/test/CodeGen/RISCV/rv64zbkb.ll b/llvm/test/CodeGen/RISCV/rv64zbkb.ll
index 6584588e68f13..37c9eaea6f70b 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbkb.ll
@@ -407,10 +407,9 @@ define void @pack_lo_packh_hi_packh(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2,
;
; RV64ZBKB-LABEL: pack_lo_packh_hi_packh:
; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: slli a3, a3, 24
; RV64ZBKB-NEXT: packh a0, a0, a1
-; RV64ZBKB-NEXT: packw a0, a0, a2
-; RV64ZBKB-NEXT: or a0, a0, a3
+; RV64ZBKB-NEXT: packh a1, a2, a3
+; RV64ZBKB-NEXT: packw a0, a0, a1
; RV64ZBKB-NEXT: sw a0, 0(a4)
; RV64ZBKB-NEXT: ret
%a = zext i8 %0 to i32
@@ -441,11 +440,9 @@ define void @pack_lo_packh_hi_packh_2(i8 zeroext %0, i8 zeroext %1, i8 zeroext %
;
; RV64ZBKB-LABEL: pack_lo_packh_hi_packh_2:
; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: slli a2, a2, 16
-; RV64ZBKB-NEXT: slli a3, a3, 24
; RV64ZBKB-NEXT: packh a0, a0, a1
-; RV64ZBKB-NEXT: or a0, a3, a0
-; RV64ZBKB-NEXT: or a0, a2, a0
+; RV64ZBKB-NEXT: packh a1, a3, a2
+; RV64ZBKB-NEXT: packw a0, a0, a1
; RV64ZBKB-NEXT: sw a0, 0(a4)
; RV64ZBKB-NEXT: ret
%a = zext i8 %0 to i32
@@ -475,8 +472,7 @@ define void @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2,
; RV64ZBKB-LABEL: pack_lo_zext_hi_packh:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: packh a1, a2, a2
-; RV64ZBKB-NEXT: slli a1, a1, 16
-; RV64ZBKB-NEXT: or a0, a1, a0
+; RV64ZBKB-NEXT: packw a0, a0, a1
; RV64ZBKB-NEXT: sw a0, 0(a3)
; RV64ZBKB-NEXT: ret
%a = zext i16 %0 to i32
diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
index c9c49e8f7f532..cb046cdaae75c 100644
--- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
@@ -204,18 +204,16 @@ define i64 @load_i64(ptr %p) {
; RV64IZBKB-NEXT: lbu a2, 5(a0)
; RV64IZBKB-NEXT: lbu a3, 6(a0)
; RV64IZBKB-NEXT: lbu a4, 7(a0)
-; RV64IZBKB-NEXT: lbu a5, 0(a0)
-; RV64IZBKB-NEXT: lbu a6, 1(a0)
-; RV64IZBKB-NEXT: lbu a7, 2(a0)
-; RV64IZBKB-NEXT: lbu a0, 3(a0)
+; RV64IZBKB-NEXT: lbu a5, 1(a0)
+; RV64IZBKB-NEXT: lbu a6, 2(a0)
+; RV64IZBKB-NEXT: lbu a7, 3(a0)
+; RV64IZBKB-NEXT: lbu a0, 0(a0)
+; RV64IZBKB-NEXT: packh a3, a3, a4
; RV64IZBKB-NEXT: packh a1, a1, a2
-; RV64IZBKB-NEXT: packh a2, a3, a4
-; RV64IZBKB-NEXT: packh a3, a5, a6
-; RV64IZBKB-NEXT: packh a0, a7, a0
-; RV64IZBKB-NEXT: slli a2, a2, 16
-; RV64IZBKB-NEXT: slli a0, a0, 16
-; RV64IZBKB-NEXT: or a1, a2, a1
-; RV64IZBKB-NEXT: or a0, a0, a3
+; RV64IZBKB-NEXT: packh a2, a6, a7
+; RV64IZBKB-NEXT: packh a0, a0, a5
+; RV64IZBKB-NEXT: packw a1, a1, a3
+; RV64IZBKB-NEXT: packw a0, a0, a2
; RV64IZBKB-NEXT: pack a0, a0, a1
; RV64IZBKB-NEXT: ret
;
More information about the llvm-commits
mailing list