[llvm] [RISCV] Add a high half PACKW+PACK pattern for RV64. (PR #152760)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 8 09:47:35 PDT 2025
https://github.com/topperc created https://github.com/llvm/llvm-project/pull/152760
Similar to the PACKH+PACK pattern for RV32. We can end up with the
shift left by 32 neeed by our PACK pattern hidden behind an OR that
packs 2 half words.
>From 1bed49789b483435d78b970c61af258edad6ecd5 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Fri, 8 Aug 2025 08:52:35 -0700
Subject: [PATCH 1/2] Pre-commit test
---
.../CodeGen/RISCV/unaligned-load-store.ll | 70 ++++++++++++++++++-
1 file changed, 68 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
index cb046cdaae75c..9989f17af53ed 100644
--- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
@@ -232,6 +232,72 @@ define i64 @load_i64(ptr %p) {
ret i64 %res
}
+define i64 @load_i64_align2(ptr %p) {
+; RV32I-LABEL: load_i64_align2:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lhu a1, 2(a0)
+; RV32I-NEXT: lhu a2, 0(a0)
+; RV32I-NEXT: lhu a3, 6(a0)
+; RV32I-NEXT: lhu a4, 4(a0)
+; RV32I-NEXT: slli a0, a1, 16
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: slli a1, a3, 16
+; RV32I-NEXT: or a1, a1, a4
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: load_i64_align2:
+; RV64I: # %bb.0:
+; RV64I-NEXT: lhu a1, 2(a0)
+; RV64I-NEXT: lhu a2, 0(a0)
+; RV64I-NEXT: lhu a3, 4(a0)
+; RV64I-NEXT: lhu a0, 6(a0)
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: slli a3, a3, 32
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV32IZBKB-LABEL: load_i64_align2:
+; RV32IZBKB: # %bb.0:
+; RV32IZBKB-NEXT: lhu a1, 0(a0)
+; RV32IZBKB-NEXT: lhu a2, 2(a0)
+; RV32IZBKB-NEXT: lhu a3, 4(a0)
+; RV32IZBKB-NEXT: lhu a4, 6(a0)
+; RV32IZBKB-NEXT: pack a0, a1, a2
+; RV32IZBKB-NEXT: pack a1, a3, a4
+; RV32IZBKB-NEXT: ret
+;
+; RV64IZBKB-LABEL: load_i64_align2:
+; RV64IZBKB: # %bb.0:
+; RV64IZBKB-NEXT: lhu a1, 2(a0)
+; RV64IZBKB-NEXT: lhu a2, 0(a0)
+; RV64IZBKB-NEXT: lhu a3, 4(a0)
+; RV64IZBKB-NEXT: lhu a0, 6(a0)
+; RV64IZBKB-NEXT: slli a1, a1, 16
+; RV64IZBKB-NEXT: or a1, a1, a2
+; RV64IZBKB-NEXT: slli a3, a3, 32
+; RV64IZBKB-NEXT: slli a0, a0, 48
+; RV64IZBKB-NEXT: or a0, a0, a3
+; RV64IZBKB-NEXT: or a0, a0, a1
+; RV64IZBKB-NEXT: ret
+;
+; RV32I-FAST-LABEL: load_i64_align2:
+; RV32I-FAST: # %bb.0:
+; RV32I-FAST-NEXT: lw a2, 0(a0)
+; RV32I-FAST-NEXT: lw a1, 4(a0)
+; RV32I-FAST-NEXT: mv a0, a2
+; RV32I-FAST-NEXT: ret
+;
+; RV64I-FAST-LABEL: load_i64_align2:
+; RV64I-FAST: # %bb.0:
+; RV64I-FAST-NEXT: ld a0, 0(a0)
+; RV64I-FAST-NEXT: ret
+ %res = load i64, ptr %p, align 2
+ ret i64 %res
+}
+
define void @store_i8(ptr %p, i8 %v) {
; ALL-LABEL: store_i8:
; ALL: # %bb.0:
@@ -569,8 +635,8 @@ define void @store_large_constant(ptr %x) {
;
; RV64I-FAST-LABEL: store_large_constant:
; RV64I-FAST: # %bb.0:
-; RV64I-FAST-NEXT: lui a1, %hi(.LCPI16_0)
-; RV64I-FAST-NEXT: ld a1, %lo(.LCPI16_0)(a1)
+; RV64I-FAST-NEXT: lui a1, %hi(.LCPI17_0)
+; RV64I-FAST-NEXT: ld a1, %lo(.LCPI17_0)(a1)
; RV64I-FAST-NEXT: sd a1, 0(a0)
; RV64I-FAST-NEXT: ret
store i64 18364758544493064720, ptr %x, align 1
>From b5ab728fe35562022988f19a73c9bd523222c87b Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Fri, 8 Aug 2025 09:42:54 -0700
Subject: [PATCH 2/2] [RISCV] Add a high half PACKW+PACK pattern for RV64.
Similar to the PACKH+PACK pattern for RV32. We can end up with the
shift left by 32 neeed by our PACK pattern hidden behind an OR that
packs 2 half words.
---
llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 10 ++++++++++
llvm/test/CodeGen/RISCV/unaligned-load-store.ll | 15 ++++++---------
2 files changed, 16 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 413ad8b2f9cc9..0b178ae365ac7 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -692,6 +692,16 @@ def : Pat<(binop_allwusers<or>
(shl GPR:$op1rs1, (XLenVT 24))),
(shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))),
(PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
+
+// Match a pattern of 2 halfwords being inserted into bits [63:32], with bits
+// bits [31:0] coming from a zero extended value. We can use pack with packw for
+// bits [63:32]. If bits [63:31] can also be a packw, it can be matched
+// separately.
+def : Pat<(or (or (shl GPR:$op1rs2, (i64 48)),
+ (shl (zexti16 (i64 GPR:$op1rs1)), (i64 32))),
+ (zexti32 (i64 GPR:$rs1))),
+ (PACK (XLenVT GPR:$rs1),
+ (XLenVT (PACKW GPR:$op1rs1, GPR:$op1rs2)))>;
} // Predicates = [HasStdExtZbkb, IsRV64]
let Predicates = [HasStdExtZbb, IsRV32] in
diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
index 9989f17af53ed..b396d16b0eb26 100644
--- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
@@ -272,15 +272,12 @@ define i64 @load_i64_align2(ptr %p) {
; RV64IZBKB-LABEL: load_i64_align2:
; RV64IZBKB: # %bb.0:
; RV64IZBKB-NEXT: lhu a1, 2(a0)
-; RV64IZBKB-NEXT: lhu a2, 0(a0)
-; RV64IZBKB-NEXT: lhu a3, 4(a0)
-; RV64IZBKB-NEXT: lhu a0, 6(a0)
-; RV64IZBKB-NEXT: slli a1, a1, 16
-; RV64IZBKB-NEXT: or a1, a1, a2
-; RV64IZBKB-NEXT: slli a3, a3, 32
-; RV64IZBKB-NEXT: slli a0, a0, 48
-; RV64IZBKB-NEXT: or a0, a0, a3
-; RV64IZBKB-NEXT: or a0, a0, a1
+; RV64IZBKB-NEXT: lhu a2, 4(a0)
+; RV64IZBKB-NEXT: lhu a3, 6(a0)
+; RV64IZBKB-NEXT: lhu a0, 0(a0)
+; RV64IZBKB-NEXT: packw a2, a2, a3
+; RV64IZBKB-NEXT: packw a0, a0, a1
+; RV64IZBKB-NEXT: pack a0, a0, a2
; RV64IZBKB-NEXT: ret
;
; RV32I-FAST-LABEL: load_i64_align2:
More information about the llvm-commits
mailing list