[llvm] 886b213 - [RISCV] Relax one of the zexti8 in the PACKH+PACK(W)/SLLI patterns. (#152384)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 6 17:46:47 PDT 2025
Author: Craig Topper
Date: 2025-08-06T17:46:43-07:00
New Revision: 886b2133e372108da7b19bd2634c28bdbdf8d04a
URL: https://github.com/llvm/llvm-project/commit/886b2133e372108da7b19bd2634c28bdbdf8d04a
DIFF: https://github.com/llvm/llvm-project/commit/886b2133e372108da7b19bd2634c28bdbdf8d04a.diff
LOG: [RISCV] Relax one of the zexti8 in the PACKH+PACK(W)/SLLI patterns. (#152384)
For RV32 we don't need the byte shifted by 24 to be zero extend
since the extended bits are shifted out.
For RV64, we don't need the byte shifted by 24 to be zero extended
if the upper 32 bits of the result aren't demanded.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
llvm/test/CodeGen/RISCV/rv32zbkb.ll
llvm/test/CodeGen/RISCV/rv64zbkb.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 27ad10ad7f17e..413ad8b2f9cc9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -629,9 +629,6 @@ def : Pat<(or (shl (zexti8 (XLenVT GPR:$rs2)), (XLenVT 8)),
def : Pat<(and (or (shl GPR:$rs2, (XLenVT 8)),
(zexti8 (XLenVT GPR:$rs1))), 0xFFFF),
(PACKH GPR:$rs1, GPR:$rs2)>;
-def : Pat<(or (shl (zexti8 (XLenVT GPR:$rs2)), (XLenVT 24)),
- (shl (zexti8 (XLenVT GPR:$rs1)), (XLenVT 16))),
- (SLLI (XLenVT (PACKH GPR:$rs1, GPR:$rs2)), (XLenVT 16))>;
def : Pat<(binop_allhusers<or> (shl GPR:$rs2, (XLenVT 8)),
(zexti8 (XLenVT GPR:$rs1))),
@@ -642,11 +639,15 @@ let Predicates = [HasStdExtZbkb, IsRV32] in {
def : Pat<(i32 (or (zexti16 (i32 GPR:$rs1)), (shl GPR:$rs2, (i32 16)))),
(PACK GPR:$rs1, GPR:$rs2)>;
+def : Pat<(or (shl GPR:$rs2, (XLenVT 24)),
+ (shl (zexti8 (XLenVT GPR:$rs1)), (XLenVT 16))),
+ (SLLI (XLenVT (PACKH GPR:$rs1, GPR:$rs2)), (XLenVT 16))>;
+
// Match a pattern of 2 bytes being inserted into bits [31:16], with bits
// bits [15:0] coming from a zero extended value. We can use pack with packh for
// bits [31:16]. If bits [15:0] can also be a packh, it can be matched
// separately.
-def : Pat<(or (or (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)),
+def : Pat<(or (or (shl GPR:$op1rs2, (XLenVT 24)),
(shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))),
(zexti16 (XLenVT GPR:$rs1))),
(PACK (XLenVT GPR:$rs1),
@@ -657,6 +658,13 @@ let Predicates = [HasStdExtZbkb, IsRV64] in {
def : Pat<(i64 (or (zexti32 (i64 GPR:$rs1)), (shl GPR:$rs2, (i64 32)))),
(PACK GPR:$rs1, GPR:$rs2)>;
+def : Pat<(or (shl (zexti8 (XLenVT GPR:$rs2)), (XLenVT 24)),
+ (shl (zexti8 (XLenVT GPR:$rs1)), (XLenVT 16))),
+ (SLLI (XLenVT (PACKH GPR:$rs1, GPR:$rs2)), (XLenVT 16))>;
+def : Pat<(binop_allwusers<or> (shl GPR:$rs2, (XLenVT 24)),
+ (shl (zexti8 (XLenVT GPR:$rs1)), (XLenVT 16))),
+ (SLLI (XLenVT (PACKH GPR:$rs1, GPR:$rs2)), (XLenVT 16))>;
+
def : Pat<(binop_allwusers<or> (shl GPR:$rs2, (i64 16)),
(zexti16 (i64 GPR:$rs1))),
(PACKW GPR:$rs1, GPR:$rs2)>;
@@ -669,7 +677,7 @@ def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32),
// ignored. We can use packw with packh for bits [31:16]. If bits [15:0] can
// also be a packh, it can be matched separately.
def : Pat<(binop_allwusers<or>
- (or (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)),
+ (or (shl GPR:$op1rs2, (XLenVT 24)),
(shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))),
(zexti16 (XLenVT GPR:$rs1))),
(PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
@@ -677,11 +685,11 @@ def : Pat<(binop_allwusers<or>
def : Pat<(binop_allwusers<or>
(or (zexti16 (XLenVT GPR:$rs1)),
(shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))),
- (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24))),
+ (shl GPR:$op1rs2, (XLenVT 24))),
(PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
def : Pat<(binop_allwusers<or>
(or (zexti16 (XLenVT GPR:$rs1)),
- (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 24))),
+ (shl GPR:$op1rs1, (XLenVT 24))),
(shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))),
(PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
} // Predicates = [HasStdExtZbkb, IsRV64]
diff --git a/llvm/test/CodeGen/RISCV/rv32zbkb.ll b/llvm/test/CodeGen/RISCV/rv32zbkb.ll
index 7ebbd7802b70f..42d326e359d9f 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbkb.ll
@@ -350,10 +350,43 @@ define i32 @pack_lo_packh_hi_packh(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2,
ret i32 %j
}
+define i32 @pack_lo_packh_hi_packh_2(i8 %0, i8 %1, i8 %2, i8 %3) nounwind {
+; RV32I-LABEL: pack_lo_packh_hi_packh_2:
+; RV32I: # %bb.0:
+; RV32I-NEXT: zext.b a0, a0
+; RV32I-NEXT: zext.b a1, a1
+; RV32I-NEXT: zext.b a2, a2
+; RV32I-NEXT: slli a3, a3, 24
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: slli a2, a2, 16
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: or a2, a2, a3
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: ret
+;
+; RV32ZBKB-LABEL: pack_lo_packh_hi_packh_2:
+; RV32ZBKB: # %bb.0:
+; RV32ZBKB-NEXT: packh a0, a0, a1
+; RV32ZBKB-NEXT: packh a1, a2, a3
+; RV32ZBKB-NEXT: pack a0, a0, a1
+; RV32ZBKB-NEXT: ret
+ %a = zext i8 %0 to i32
+ %b = zext i8 %1 to i32
+ %c = zext i8 %2 to i32
+ %d = zext i8 %3 to i32
+ %e = shl i32 %b, 8
+ %f = shl i32 %c, 16
+ %g = shl i32 %d, 24
+ %h = or i32 %a, %e
+ %i = or i32 %h, %f
+ %j = or i32 %i, %g
+ ret i32 %j
+}
+
define i32 @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2) nounwind {
; RV32I-LABEL: pack_lo_zext_hi_packh:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a1, a2, 16
+; RV32I-NEXT: slli a1, a1, 16
; RV32I-NEXT: slli a2, a2, 24
; RV32I-NEXT: or a1, a2, a1
; RV32I-NEXT: or a0, a1, a0
@@ -361,14 +394,14 @@ define i32 @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2)
;
; RV32ZBKB-LABEL: pack_lo_zext_hi_packh:
; RV32ZBKB: # %bb.0:
-; RV32ZBKB-NEXT: packh a1, a2, a2
+; RV32ZBKB-NEXT: packh a1, a1, a2
; RV32ZBKB-NEXT: pack a0, a0, a1
; RV32ZBKB-NEXT: ret
%a = zext i16 %0 to i32
%b = zext i8 %1 to i32
%c = zext i8 %2 to i32
%d = shl i32 %c, 8
- %e = or i32 %c, %d
+ %e = or i32 %b, %d
%f = shl i32 %e, 16
%g = or i32 %f, %a
ret i32 %g
@@ -379,7 +412,7 @@ define i32 @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2)
define i32 @pack_lo_noext_hi_packh(i32 %a, i8 zeroext %1, i8 zeroext %2) nounwind {
; RV32I-LABEL: pack_lo_noext_hi_packh:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a1, a2, 16
+; RV32I-NEXT: slli a1, a1, 16
; RV32I-NEXT: slli a2, a2, 24
; RV32I-NEXT: or a1, a2, a1
; RV32I-NEXT: or a0, a1, a0
@@ -387,14 +420,40 @@ define i32 @pack_lo_noext_hi_packh(i32 %a, i8 zeroext %1, i8 zeroext %2) nounwin
;
; RV32ZBKB-LABEL: pack_lo_noext_hi_packh:
; RV32ZBKB: # %bb.0:
-; RV32ZBKB-NEXT: packh a1, a2, a2
+; RV32ZBKB-NEXT: packh a1, a1, a2
+; RV32ZBKB-NEXT: slli a1, a1, 16
+; RV32ZBKB-NEXT: or a0, a1, a0
+; RV32ZBKB-NEXT: ret
+ %b = zext i8 %1 to i32
+ %c = zext i8 %2 to i32
+ %d = shl i32 %c, 8
+ %e = or i32 %b, %d
+ %f = shl i32 %e, 16
+ %g = or i32 %f, %a
+ ret i32 %g
+}
+
+; Make sure we can match packh+slli without having the input bytes zero extended.
+define i32 @pack_lo_noext_hi_packh_nozeroext(i32 %a, i8 %1, i8 %2) nounwind {
+; RV32I-LABEL: pack_lo_noext_hi_packh_nozeroext:
+; RV32I: # %bb.0:
+; RV32I-NEXT: zext.b a1, a1
+; RV32I-NEXT: slli a2, a2, 24
+; RV32I-NEXT: slli a1, a1, 16
+; RV32I-NEXT: or a0, a2, a0
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBKB-LABEL: pack_lo_noext_hi_packh_nozeroext:
+; RV32ZBKB: # %bb.0:
+; RV32ZBKB-NEXT: packh a1, a1, a2
; RV32ZBKB-NEXT: slli a1, a1, 16
; RV32ZBKB-NEXT: or a0, a1, a0
; RV32ZBKB-NEXT: ret
%b = zext i8 %1 to i32
%c = zext i8 %2 to i32
%d = shl i32 %c, 8
- %e = or i32 %c, %d
+ %e = or i32 %b, %d
%f = shl i32 %e, 16
%g = or i32 %f, %a
ret i32 %g
diff --git a/llvm/test/CodeGen/RISCV/rv64zbkb.ll b/llvm/test/CodeGen/RISCV/rv64zbkb.ll
index 37c9eaea6f70b..f2c41db781067 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbkb.ll
@@ -459,10 +459,46 @@ define void @pack_lo_packh_hi_packh_2(i8 zeroext %0, i8 zeroext %1, i8 zeroext %
ret void
}
+define void @pack_lo_packh_hi_packh_3(i8 %0, i8 %1, i8 %2, i8 %3, ptr %p) nounwind {
+; RV64I-LABEL: pack_lo_packh_hi_packh_3:
+; RV64I: # %bb.0:
+; RV64I-NEXT: zext.b a0, a0
+; RV64I-NEXT: zext.b a1, a1
+; RV64I-NEXT: zext.b a2, a2
+; RV64I-NEXT: slli a3, a3, 24
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: slli a2, a2, 16
+; RV64I-NEXT: or a0, a3, a0
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: or a0, a2, a0
+; RV64I-NEXT: sw a0, 0(a4)
+; RV64I-NEXT: ret
+;
+; RV64ZBKB-LABEL: pack_lo_packh_hi_packh_3:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: packh a0, a0, a1
+; RV64ZBKB-NEXT: packh a1, a3, a2
+; RV64ZBKB-NEXT: packw a0, a0, a1
+; RV64ZBKB-NEXT: sw a0, 0(a4)
+; RV64ZBKB-NEXT: ret
+ %a = zext i8 %0 to i32
+ %b = zext i8 %1 to i32
+ %c = zext i8 %2 to i32
+ %d = zext i8 %3 to i32
+ %e = shl i32 %b, 8
+ %f = shl i32 %c, 16
+ %g = shl i32 %d, 24
+ %h = or i32 %a, %e
+ %i = or i32 %g, %h
+ %j = or i32 %f, %i
+ store i32 %j, ptr %p
+ ret void
+}
+
define void @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2, ptr %p) nounwind {
; RV64I-LABEL: pack_lo_zext_hi_packh:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a1, a2, 16
+; RV64I-NEXT: slli a1, a1, 16
; RV64I-NEXT: slli a2, a2, 24
; RV64I-NEXT: or a1, a2, a1
; RV64I-NEXT: or a0, a1, a0
@@ -471,7 +507,7 @@ define void @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2,
;
; RV64ZBKB-LABEL: pack_lo_zext_hi_packh:
; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: packh a1, a2, a2
+; RV64ZBKB-NEXT: packh a1, a1, a2
; RV64ZBKB-NEXT: packw a0, a0, a1
; RV64ZBKB-NEXT: sw a0, 0(a3)
; RV64ZBKB-NEXT: ret
@@ -479,7 +515,7 @@ define void @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2,
%b = zext i8 %1 to i32
%c = zext i8 %2 to i32
%d = shl i32 %c, 8
- %e = or i32 %c, %d
+ %e = or i32 %b, %d
%f = shl i32 %e, 16
%g = or i32 %f, %a
store i32 %g, ptr %p
@@ -491,7 +527,7 @@ define void @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2,
define void @pack_lo_noext_hi_packh(i32 %a, i8 zeroext %1, i8 zeroext %2, ptr %p) nounwind {
; RV64I-LABEL: pack_lo_noext_hi_packh:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a1, a2, 16
+; RV64I-NEXT: slli a1, a1, 16
; RV64I-NEXT: slli a2, a2, 24
; RV64I-NEXT: or a1, a2, a1
; RV64I-NEXT: or a0, a1, a0
@@ -500,7 +536,36 @@ define void @pack_lo_noext_hi_packh(i32 %a, i8 zeroext %1, i8 zeroext %2, ptr %p
;
; RV64ZBKB-LABEL: pack_lo_noext_hi_packh:
; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: packh a1, a2, a2
+; RV64ZBKB-NEXT: packh a1, a1, a2
+; RV64ZBKB-NEXT: slli a1, a1, 16
+; RV64ZBKB-NEXT: or a0, a1, a0
+; RV64ZBKB-NEXT: sw a0, 0(a3)
+; RV64ZBKB-NEXT: ret
+ %b = zext i8 %1 to i32
+ %c = zext i8 %2 to i32
+ %d = shl i32 %c, 8
+ %e = or i32 %b, %d
+ %f = shl i32 %e, 16
+ %g = or i32 %f, %a
+ store i32 %g, ptr %p
+ ret void
+}
+
+; Make sure we can match packh+slli without having the input bytes zero extended.
+define void @pack_i32_lo_noext_hi_packh_nozeroext(i32 %a, i8 %1, i8 %2, ptr %p) nounwind {
+; RV64I-LABEL: pack_i32_lo_noext_hi_packh_nozeroext:
+; RV64I: # %bb.0:
+; RV64I-NEXT: zext.b a1, a1
+; RV64I-NEXT: slli a2, a2, 24
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: or a0, a2, a0
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: sw a0, 0(a3)
+; RV64I-NEXT: ret
+;
+; RV64ZBKB-LABEL: pack_i32_lo_noext_hi_packh_nozeroext:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: packh a1, a1, a2
; RV64ZBKB-NEXT: slli a1, a1, 16
; RV64ZBKB-NEXT: or a0, a1, a0
; RV64ZBKB-NEXT: sw a0, 0(a3)
@@ -508,9 +573,36 @@ define void @pack_lo_noext_hi_packh(i32 %a, i8 zeroext %1, i8 zeroext %2, ptr %p
%b = zext i8 %1 to i32
%c = zext i8 %2 to i32
%d = shl i32 %c, 8
- %e = or i32 %c, %d
+ %e = or i32 %b, %d
%f = shl i32 %e, 16
%g = or i32 %f, %a
store i32 %g, ptr %p
ret void
}
+
+; Make sure we can match packh+slli without having the input bytes zero extended.
+define i64 @pack_i64_lo_noext_hi_packh_nozeroext(i64 %a, i8 %1, i8 %2, ptr %p) nounwind {
+; RV64I-LABEL: pack_i64_lo_noext_hi_packh_nozeroext:
+; RV64I: # %bb.0:
+; RV64I-NEXT: zext.b a1, a1
+; RV64I-NEXT: zext.b a2, a2
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: slli a2, a2, 24
+; RV64I-NEXT: or a1, a2, a1
+; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: ret
+;
+; RV64ZBKB-LABEL: pack_i64_lo_noext_hi_packh_nozeroext:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: packh a1, a1, a2
+; RV64ZBKB-NEXT: slli a1, a1, 16
+; RV64ZBKB-NEXT: or a0, a1, a0
+; RV64ZBKB-NEXT: ret
+ %b = zext i8 %1 to i64
+ %c = zext i8 %2 to i64
+ %d = shl i64 %c, 8
+ %e = or i64 %b, %d
+ %f = shl i64 %e, 16
+ %g = or i64 %f, %a
+ ret i64 %g
+}
More information about the llvm-commits
mailing list