[llvm] 19e2ebb - [LoongArch] Emit bytepick for picking from concatenation of two values
Weining Lu via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 16 00:07:36 PDT 2023
Author: WANG Xuerui
Date: 2023-03-16T15:07:06+08:00
New Revision: 19e2ebbf45bdd9f2a9abc3f7760bdbea8c808dc0
URL: https://github.com/llvm/llvm-project/commit/19e2ebbf45bdd9f2a9abc3f7760bdbea8c808dc0
DIFF: https://github.com/llvm/llvm-project/commit/19e2ebbf45bdd9f2a9abc3f7760bdbea8c808dc0.diff
LOG: [LoongArch] Emit bytepick for picking from concatenation of two values
It seems the ISA manual's pseudo-code description for the
`BYTEPICK.[WD]` instructions is inaccurate; the behavior described here
should be correct though. The instructions' names are misleading too
(they pick full GRLen-wide words instead of bytes; they just index by
bytes) but let's stick to the official names for now.
Reviewed By: SixWeining
Differential Revision: https://reviews.llvm.org/D143880
Added:
Modified:
llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
llvm/test/CodeGen/LoongArch/bitreverse.ll
llvm/test/CodeGen/LoongArch/bswap.ll
llvm/test/CodeGen/LoongArch/bytepick.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 0606c65f9d3b1..9ba648dfa0481 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -847,6 +847,13 @@ def : PatGprGpr<mulhs, MULH_W>;
def : PatGprGpr<mulhu, MULH_WU>;
def : PatGprGpr<rotr, ROTR_W>;
def : PatGprImm<rotr, ROTRI_W, uimm5>;
+
+foreach Idx = 1...3 in {
+ defvar ShamtA = !mul(8, Idx);
+ defvar ShamtB = !mul(8, !sub(4, Idx));
+ def : Pat<(or (shl GPR:$rk, (i32 ShamtA)), (srl GPR:$rj, (i32 ShamtB))),
+ (BYTEPICK_W GPR:$rj, GPR:$rk, Idx)>;
+}
} // Predicates = [IsLA32]
let Predicates = [IsLA64] in {
@@ -891,6 +898,24 @@ def : Pat<(add GPR:$rj, simm32_hi16_lo12:$imm),
def : Pat<(sext_inreg (add GPR:$rj, simm32_hi16_lo12:$imm), i32),
(ADDI_W (ADDU16I_D GPR:$rj, (HI16ForAddu16idAddiPair $imm)),
(LO12 $imm))>;
+
+foreach Idx = 1...7 in {
+ defvar ShamtA = !mul(8, Idx);
+ defvar ShamtB = !mul(8, !sub(8, Idx));
+ def : Pat<(or (shl GPR:$rk, (i64 ShamtA)), (srl GPR:$rj, (i64 ShamtB))),
+ (BYTEPICK_D GPR:$rj, GPR:$rk, Idx)>;
+}
+
+foreach Idx = 1...3 in {
+ defvar ShamtA = !mul(8, Idx);
+ defvar ShamtB = !mul(8, !sub(4, Idx));
+ // NOTE: the srl node would already be transformed into a loongarch_bstrpick
+ // by the time this pattern gets to execute, hence the weird construction.
+ def : Pat<(sext_inreg (or (shl GPR:$rk, (i64 ShamtA)),
+ (loongarch_bstrpick GPR:$rj, (i64 31),
+ (i64 ShamtB))), i32),
+ (BYTEPICK_W GPR:$rj, GPR:$rk, Idx)>;
+}
} // Predicates = [IsLA64]
def : PatGprGpr<and, AND>;
diff --git a/llvm/test/CodeGen/LoongArch/bitreverse.ll b/llvm/test/CodeGen/LoongArch/bitreverse.ll
index 8cc731eaa7a54..259d8565c6842 100644
--- a/llvm/test/CodeGen/LoongArch/bitreverse.ll
+++ b/llvm/test/CodeGen/LoongArch/bitreverse.ll
@@ -111,11 +111,9 @@ define i24 @test_bitreverse_i24(i24 %a) nounwind {
define i48 @test_bitreverse_i48(i48 %a) nounwind {
; LA32-LABEL: test_bitreverse_i48:
; LA32: # %bb.0:
-; LA32-NEXT: bitrev.w $a1, $a1
-; LA32-NEXT: srli.w $a1, $a1, 16
; LA32-NEXT: bitrev.w $a2, $a0
-; LA32-NEXT: slli.w $a0, $a2, 16
-; LA32-NEXT: or $a0, $a1, $a0
+; LA32-NEXT: bitrev.w $a0, $a1
+; LA32-NEXT: bytepick.w $a0, $a0, $a2, 2
; LA32-NEXT: srli.w $a1, $a2, 16
; LA32-NEXT: ret
;
diff --git a/llvm/test/CodeGen/LoongArch/bswap.ll b/llvm/test/CodeGen/LoongArch/bswap.ll
index 1ef73b4f1c0bf..eb9107302ef68 100644
--- a/llvm/test/CodeGen/LoongArch/bswap.ll
+++ b/llvm/test/CodeGen/LoongArch/bswap.ll
@@ -63,13 +63,11 @@ define i64 @test_bswap_i64(i64 %a) nounwind {
define i48 @test_bswap_i48(i48 %a) nounwind {
; LA32-LABEL: test_bswap_i48:
; LA32: # %bb.0:
-; LA32-NEXT: revb.2h $a1, $a1
-; LA32-NEXT: rotri.w $a1, $a1, 16
-; LA32-NEXT: srli.w $a1, $a1, 16
; LA32-NEXT: revb.2h $a0, $a0
; LA32-NEXT: rotri.w $a2, $a0, 16
-; LA32-NEXT: slli.w $a0, $a2, 16
-; LA32-NEXT: or $a0, $a1, $a0
+; LA32-NEXT: revb.2h $a0, $a1
+; LA32-NEXT: rotri.w $a0, $a0, 16
+; LA32-NEXT: bytepick.w $a0, $a0, $a2, 2
; LA32-NEXT: srli.w $a1, $a2, 16
; LA32-NEXT: ret
;
@@ -91,28 +89,22 @@ define i80 @test_bswap_i80(i80 %a) nounwind {
; LA32-NEXT: ld.w $a3, $a1, 4
; LA32-NEXT: revb.2h $a3, $a3
; LA32-NEXT: rotri.w $a3, $a3, 16
-; LA32-NEXT: srli.w $a4, $a3, 16
-; LA32-NEXT: slli.w $a5, $a2, 16
-; LA32-NEXT: or $a4, $a5, $a4
-; LA32-NEXT: srli.w $a2, $a2, 16
-; LA32-NEXT: st.h $a2, $a0, 8
+; LA32-NEXT: bytepick.w $a4, $a3, $a2, 2
; LA32-NEXT: st.w $a4, $a0, 4
-; LA32-NEXT: slli.w $a2, $a3, 16
; LA32-NEXT: ld.w $a1, $a1, 8
; LA32-NEXT: revb.2h $a1, $a1
; LA32-NEXT: rotri.w $a1, $a1, 16
-; LA32-NEXT: srli.w $a1, $a1, 16
-; LA32-NEXT: or $a1, $a1, $a2
+; LA32-NEXT: bytepick.w $a1, $a1, $a3, 2
; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: srli.w $a1, $a2, 16
+; LA32-NEXT: st.h $a1, $a0, 8
; LA32-NEXT: ret
;
; LA64-LABEL: test_bswap_i80:
; LA64: # %bb.0:
-; LA64-NEXT: revb.d $a1, $a1
-; LA64-NEXT: srli.d $a1, $a1, 48
; LA64-NEXT: revb.d $a2, $a0
-; LA64-NEXT: slli.d $a0, $a2, 16
-; LA64-NEXT: or $a0, $a1, $a0
+; LA64-NEXT: revb.d $a0, $a1
+; LA64-NEXT: bytepick.d $a0, $a0, $a2, 2
; LA64-NEXT: srli.d $a1, $a2, 48
; LA64-NEXT: ret
%tmp = call i80 @llvm.bswap.i80(i80 %a)
diff --git a/llvm/test/CodeGen/LoongArch/bytepick.ll b/llvm/test/CodeGen/LoongArch/bytepick.ll
index 3578f368b05b7..86148b374e70d 100644
--- a/llvm/test/CodeGen/LoongArch/bytepick.ll
+++ b/llvm/test/CodeGen/LoongArch/bytepick.ll
@@ -9,9 +9,7 @@
define i32 @pick_i32_1(i32 %a, i32 %b) {
; LA32-LABEL: pick_i32_1:
; LA32: # %bb.0:
-; LA32-NEXT: slli.w $a0, $a0, 8
-; LA32-NEXT: srli.w $a1, $a1, 24
-; LA32-NEXT: or $a0, $a1, $a0
+; LA32-NEXT: bytepick.w $a0, $a1, $a0, 1
; LA32-NEXT: ret
;
; LA64-LABEL: pick_i32_1:
@@ -31,17 +29,12 @@ define i32 @pick_i32_1(i32 %a, i32 %b) {
define signext i32 @pick_i32_1_sext(i32 %a, i32 %b) {
; LA32-LABEL: pick_i32_1_sext:
; LA32: # %bb.0:
-; LA32-NEXT: slli.w $a0, $a0, 8
-; LA32-NEXT: srli.w $a1, $a1, 24
-; LA32-NEXT: or $a0, $a1, $a0
+; LA32-NEXT: bytepick.w $a0, $a1, $a0, 1
; LA32-NEXT: ret
;
; LA64-LABEL: pick_i32_1_sext:
; LA64: # %bb.0:
-; LA64-NEXT: slli.d $a0, $a0, 8
-; LA64-NEXT: bstrpick.d $a1, $a1, 31, 24
-; LA64-NEXT: or $a0, $a1, $a0
-; LA64-NEXT: addi.w $a0, $a0, 0
+; LA64-NEXT: bytepick.w $a0, $a1, $a0, 1
; LA64-NEXT: ret
%1 = lshr i32 %b, 24
%2 = shl i32 %a, 8
@@ -54,9 +47,7 @@ define signext i32 @pick_i32_1_sext(i32 %a, i32 %b) {
define i32 @pick_i32_2(i32 %a, i32 %b) {
; LA32-LABEL: pick_i32_2:
; LA32: # %bb.0:
-; LA32-NEXT: slli.w $a0, $a0, 16
-; LA32-NEXT: srli.w $a1, $a1, 16
-; LA32-NEXT: or $a0, $a1, $a0
+; LA32-NEXT: bytepick.w $a0, $a1, $a0, 2
; LA32-NEXT: ret
;
; LA64-LABEL: pick_i32_2:
@@ -76,17 +67,12 @@ define i32 @pick_i32_2(i32 %a, i32 %b) {
define signext i32 @pick_i32_2_sext(i32 %a, i32 %b) {
; LA32-LABEL: pick_i32_2_sext:
; LA32: # %bb.0:
-; LA32-NEXT: slli.w $a0, $a0, 16
-; LA32-NEXT: srli.w $a1, $a1, 16
-; LA32-NEXT: or $a0, $a1, $a0
+; LA32-NEXT: bytepick.w $a0, $a1, $a0, 2
; LA32-NEXT: ret
;
; LA64-LABEL: pick_i32_2_sext:
; LA64: # %bb.0:
-; LA64-NEXT: slli.d $a0, $a0, 16
-; LA64-NEXT: bstrpick.d $a1, $a1, 31, 16
-; LA64-NEXT: or $a0, $a1, $a0
-; LA64-NEXT: addi.w $a0, $a0, 0
+; LA64-NEXT: bytepick.w $a0, $a1, $a0, 2
; LA64-NEXT: ret
%1 = lshr i32 %b, 16
%2 = shl i32 %a, 16
@@ -99,9 +85,7 @@ define signext i32 @pick_i32_2_sext(i32 %a, i32 %b) {
define i32 @pick_i32_3(i32 %a, i32 %b) {
; LA32-LABEL: pick_i32_3:
; LA32: # %bb.0:
-; LA32-NEXT: slli.w $a0, $a0, 24
-; LA32-NEXT: srli.w $a1, $a1, 8
-; LA32-NEXT: or $a0, $a1, $a0
+; LA32-NEXT: bytepick.w $a0, $a1, $a0, 3
; LA32-NEXT: ret
;
; LA64-LABEL: pick_i32_3:
@@ -121,17 +105,12 @@ define i32 @pick_i32_3(i32 %a, i32 %b) {
define signext i32 @pick_i32_3_sext(i32 %a, i32 %b) {
; LA32-LABEL: pick_i32_3_sext:
; LA32: # %bb.0:
-; LA32-NEXT: slli.w $a0, $a0, 24
-; LA32-NEXT: srli.w $a1, $a1, 8
-; LA32-NEXT: or $a0, $a1, $a0
+; LA32-NEXT: bytepick.w $a0, $a1, $a0, 3
; LA32-NEXT: ret
;
; LA64-LABEL: pick_i32_3_sext:
; LA64: # %bb.0:
-; LA64-NEXT: slli.d $a0, $a0, 24
-; LA64-NEXT: bstrpick.d $a1, $a1, 31, 8
-; LA64-NEXT: or $a0, $a1, $a0
-; LA64-NEXT: addi.w $a0, $a0, 0
+; LA64-NEXT: bytepick.w $a0, $a1, $a0, 3
; LA64-NEXT: ret
%1 = lshr i32 %b, 8
%2 = shl i32 %a, 24
@@ -144,20 +123,14 @@ define signext i32 @pick_i32_3_sext(i32 %a, i32 %b) {
define i64 @pick_i64_1(i64 %a, i64 %b) {
; LA32-LABEL: pick_i64_1:
; LA32: # %bb.0:
-; LA32-NEXT: srli.w $a2, $a3, 24
-; LA32-NEXT: slli.w $a3, $a0, 8
-; LA32-NEXT: or $a2, $a2, $a3
-; LA32-NEXT: srli.w $a0, $a0, 24
-; LA32-NEXT: slli.w $a1, $a1, 8
-; LA32-NEXT: or $a1, $a1, $a0
+; LA32-NEXT: bytepick.w $a2, $a3, $a0, 1
+; LA32-NEXT: bytepick.w $a1, $a0, $a1, 1
; LA32-NEXT: move $a0, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: pick_i64_1:
; LA64: # %bb.0:
-; LA64-NEXT: slli.d $a0, $a0, 8
-; LA64-NEXT: srli.d $a1, $a1, 56
-; LA64-NEXT: or $a0, $a1, $a0
+; LA64-NEXT: bytepick.d $a0, $a1, $a0, 1
; LA64-NEXT: ret
%1 = lshr i64 %b, 56
%2 = shl i64 %a, 8
@@ -170,20 +143,14 @@ define i64 @pick_i64_1(i64 %a, i64 %b) {
define i64 @pick_i64_2(i64 %a, i64 %b) {
; LA32-LABEL: pick_i64_2:
; LA32: # %bb.0:
-; LA32-NEXT: srli.w $a2, $a3, 16
-; LA32-NEXT: slli.w $a3, $a0, 16
-; LA32-NEXT: or $a2, $a2, $a3
-; LA32-NEXT: srli.w $a0, $a0, 16
-; LA32-NEXT: slli.w $a1, $a1, 16
-; LA32-NEXT: or $a1, $a1, $a0
+; LA32-NEXT: bytepick.w $a2, $a3, $a0, 2
+; LA32-NEXT: bytepick.w $a1, $a0, $a1, 2
; LA32-NEXT: move $a0, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: pick_i64_2:
; LA64: # %bb.0:
-; LA64-NEXT: slli.d $a0, $a0, 16
-; LA64-NEXT: srli.d $a1, $a1, 48
-; LA64-NEXT: or $a0, $a1, $a0
+; LA64-NEXT: bytepick.d $a0, $a1, $a0, 2
; LA64-NEXT: ret
%1 = lshr i64 %b, 48
%2 = shl i64 %a, 16
@@ -196,20 +163,14 @@ define i64 @pick_i64_2(i64 %a, i64 %b) {
define i64 @pick_i64_3(i64 %a, i64 %b) {
; LA32-LABEL: pick_i64_3:
; LA32: # %bb.0:
-; LA32-NEXT: srli.w $a2, $a3, 8
-; LA32-NEXT: slli.w $a3, $a0, 24
-; LA32-NEXT: or $a2, $a2, $a3
-; LA32-NEXT: srli.w $a0, $a0, 8
-; LA32-NEXT: slli.w $a1, $a1, 24
-; LA32-NEXT: or $a1, $a1, $a0
+; LA32-NEXT: bytepick.w $a2, $a3, $a0, 3
+; LA32-NEXT: bytepick.w $a1, $a0, $a1, 3
; LA32-NEXT: move $a0, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: pick_i64_3:
; LA64: # %bb.0:
-; LA64-NEXT: slli.d $a0, $a0, 24
-; LA64-NEXT: srli.d $a1, $a1, 40
-; LA64-NEXT: or $a0, $a1, $a0
+; LA64-NEXT: bytepick.d $a0, $a1, $a0, 3
; LA64-NEXT: ret
%1 = lshr i64 %b, 40
%2 = shl i64 %a, 24
@@ -228,9 +189,7 @@ define i64 @pick_i64_4(i64 %a, i64 %b) {
;
; LA64-LABEL: pick_i64_4:
; LA64: # %bb.0:
-; LA64-NEXT: slli.d $a0, $a0, 32
-; LA64-NEXT: srli.d $a1, $a1, 32
-; LA64-NEXT: or $a0, $a1, $a0
+; LA64-NEXT: bytepick.d $a0, $a1, $a0, 4
; LA64-NEXT: ret
%1 = lshr i64 %b, 32
%2 = shl i64 %a, 32
@@ -243,20 +202,14 @@ define i64 @pick_i64_4(i64 %a, i64 %b) {
define i64 @pick_i64_5(i64 %a, i64 %b) {
; LA32-LABEL: pick_i64_5:
; LA32: # %bb.0:
-; LA32-NEXT: srli.w $a1, $a2, 24
-; LA32-NEXT: slli.w $a2, $a3, 8
-; LA32-NEXT: or $a2, $a1, $a2
-; LA32-NEXT: slli.w $a0, $a0, 8
-; LA32-NEXT: srli.w $a1, $a3, 24
-; LA32-NEXT: or $a1, $a1, $a0
+; LA32-NEXT: bytepick.w $a2, $a2, $a3, 1
+; LA32-NEXT: bytepick.w $a1, $a3, $a0, 1
; LA32-NEXT: move $a0, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: pick_i64_5:
; LA64: # %bb.0:
-; LA64-NEXT: slli.d $a0, $a0, 40
-; LA64-NEXT: srli.d $a1, $a1, 24
-; LA64-NEXT: or $a0, $a1, $a0
+; LA64-NEXT: bytepick.d $a0, $a1, $a0, 5
; LA64-NEXT: ret
%1 = lshr i64 %b, 24
%2 = shl i64 %a,40
@@ -269,20 +222,14 @@ define i64 @pick_i64_5(i64 %a, i64 %b) {
define i64 @pick_i64_6(i64 %a, i64 %b) {
; LA32-LABEL: pick_i64_6:
; LA32: # %bb.0:
-; LA32-NEXT: srli.w $a1, $a2, 16
-; LA32-NEXT: slli.w $a2, $a3, 16
-; LA32-NEXT: or $a2, $a1, $a2
-; LA32-NEXT: slli.w $a0, $a0, 16
-; LA32-NEXT: srli.w $a1, $a3, 16
-; LA32-NEXT: or $a1, $a1, $a0
+; LA32-NEXT: bytepick.w $a2, $a2, $a3, 2
+; LA32-NEXT: bytepick.w $a1, $a3, $a0, 2
; LA32-NEXT: move $a0, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: pick_i64_6:
; LA64: # %bb.0:
-; LA64-NEXT: slli.d $a0, $a0, 48
-; LA64-NEXT: srli.d $a1, $a1, 16
-; LA64-NEXT: or $a0, $a1, $a0
+; LA64-NEXT: bytepick.d $a0, $a1, $a0, 6
; LA64-NEXT: ret
%1 = lshr i64 %b, 16
%2 = shl i64 %a, 48
@@ -295,20 +242,14 @@ define i64 @pick_i64_6(i64 %a, i64 %b) {
define i64 @pick_i64_7(i64 %a, i64 %b) {
; LA32-LABEL: pick_i64_7:
; LA32: # %bb.0:
-; LA32-NEXT: srli.w $a1, $a2, 8
-; LA32-NEXT: slli.w $a2, $a3, 24
-; LA32-NEXT: or $a2, $a1, $a2
-; LA32-NEXT: slli.w $a0, $a0, 24
-; LA32-NEXT: srli.w $a1, $a3, 8
-; LA32-NEXT: or $a1, $a1, $a0
+; LA32-NEXT: bytepick.w $a2, $a2, $a3, 3
+; LA32-NEXT: bytepick.w $a1, $a3, $a0, 3
; LA32-NEXT: move $a0, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: pick_i64_7:
; LA64: # %bb.0:
-; LA64-NEXT: slli.d $a0, $a0, 56
-; LA64-NEXT: srli.d $a1, $a1, 8
-; LA64-NEXT: or $a0, $a1, $a0
+; LA64-NEXT: bytepick.d $a0, $a1, $a0, 7
; LA64-NEXT: ret
%1 = lshr i64 %b, 8
%2 = shl i64 %a, 56
More information about the llvm-commits
mailing list