[llvm] 19e2ebb - [LoongArch] Emit bytepick for picking from concatenation of two values

Weining Lu via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 16 00:07:36 PDT 2023


Author: WANG Xuerui
Date: 2023-03-16T15:07:06+08:00
New Revision: 19e2ebbf45bdd9f2a9abc3f7760bdbea8c808dc0

URL: https://github.com/llvm/llvm-project/commit/19e2ebbf45bdd9f2a9abc3f7760bdbea8c808dc0
DIFF: https://github.com/llvm/llvm-project/commit/19e2ebbf45bdd9f2a9abc3f7760bdbea8c808dc0.diff

LOG: [LoongArch] Emit bytepick for picking from concatenation of two values

It seems the ISA manual's pseudo-code description for the
`BYTEPICK.[WD]` instructions is inaccurate; the behavior described here
should be correct though. The instructions' names are misleading too
(they pick full GRLen-wide words instead of bytes; they just index by
bytes) but let's stick to the official names for now.

Reviewed By: SixWeining

Differential Revision: https://reviews.llvm.org/D143880

Added: 
    

Modified: 
    llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
    llvm/test/CodeGen/LoongArch/bitreverse.ll
    llvm/test/CodeGen/LoongArch/bswap.ll
    llvm/test/CodeGen/LoongArch/bytepick.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 0606c65f9d3b1..9ba648dfa0481 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -847,6 +847,13 @@ def : PatGprGpr<mulhs, MULH_W>;
 def : PatGprGpr<mulhu, MULH_WU>;
 def : PatGprGpr<rotr, ROTR_W>;
 def : PatGprImm<rotr, ROTRI_W, uimm5>;
+
+foreach Idx = 1...3 in {
+  defvar ShamtA = !mul(8, Idx);
+  defvar ShamtB = !mul(8, !sub(4, Idx));
+  def : Pat<(or (shl GPR:$rk, (i32 ShamtA)), (srl GPR:$rj, (i32 ShamtB))),
+            (BYTEPICK_W GPR:$rj, GPR:$rk, Idx)>;
+}
 } // Predicates = [IsLA32]
 
 let Predicates = [IsLA64] in {
@@ -891,6 +898,24 @@ def : Pat<(add GPR:$rj, simm32_hi16_lo12:$imm),
 def : Pat<(sext_inreg (add GPR:$rj, simm32_hi16_lo12:$imm), i32),
           (ADDI_W (ADDU16I_D GPR:$rj, (HI16ForAddu16idAddiPair $imm)),
                   (LO12 $imm))>;
+
+foreach Idx = 1...7 in {
+  defvar ShamtA = !mul(8, Idx);
+  defvar ShamtB = !mul(8, !sub(8, Idx));
+  def : Pat<(or (shl GPR:$rk, (i64 ShamtA)), (srl GPR:$rj, (i64 ShamtB))),
+            (BYTEPICK_D GPR:$rj, GPR:$rk, Idx)>;
+}
+
+foreach Idx = 1...3 in {
+  defvar ShamtA = !mul(8, Idx);
+  defvar ShamtB = !mul(8, !sub(4, Idx));
+  // NOTE: the srl node would already be transformed into a loongarch_bstrpick
+  // by the time this pattern gets to execute, hence the weird construction.
+  def : Pat<(sext_inreg (or (shl GPR:$rk, (i64 ShamtA)),
+                            (loongarch_bstrpick GPR:$rj, (i64 31),
+                                                         (i64 ShamtB))), i32),
+            (BYTEPICK_W GPR:$rj, GPR:$rk, Idx)>;
+}
 } // Predicates = [IsLA64]
 
 def : PatGprGpr<and, AND>;

diff  --git a/llvm/test/CodeGen/LoongArch/bitreverse.ll b/llvm/test/CodeGen/LoongArch/bitreverse.ll
index 8cc731eaa7a54..259d8565c6842 100644
--- a/llvm/test/CodeGen/LoongArch/bitreverse.ll
+++ b/llvm/test/CodeGen/LoongArch/bitreverse.ll
@@ -111,11 +111,9 @@ define i24 @test_bitreverse_i24(i24 %a) nounwind {
 define i48 @test_bitreverse_i48(i48 %a) nounwind {
 ; LA32-LABEL: test_bitreverse_i48:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bitrev.w $a1, $a1
-; LA32-NEXT:    srli.w $a1, $a1, 16
 ; LA32-NEXT:    bitrev.w $a2, $a0
-; LA32-NEXT:    slli.w $a0, $a2, 16
-; LA32-NEXT:    or $a0, $a1, $a0
+; LA32-NEXT:    bitrev.w $a0, $a1
+; LA32-NEXT:    bytepick.w $a0, $a0, $a2, 2
 ; LA32-NEXT:    srli.w $a1, $a2, 16
 ; LA32-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/LoongArch/bswap.ll b/llvm/test/CodeGen/LoongArch/bswap.ll
index 1ef73b4f1c0bf..eb9107302ef68 100644
--- a/llvm/test/CodeGen/LoongArch/bswap.ll
+++ b/llvm/test/CodeGen/LoongArch/bswap.ll
@@ -63,13 +63,11 @@ define i64 @test_bswap_i64(i64 %a) nounwind {
 define i48 @test_bswap_i48(i48 %a) nounwind {
 ; LA32-LABEL: test_bswap_i48:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    revb.2h $a1, $a1
-; LA32-NEXT:    rotri.w $a1, $a1, 16
-; LA32-NEXT:    srli.w $a1, $a1, 16
 ; LA32-NEXT:    revb.2h $a0, $a0
 ; LA32-NEXT:    rotri.w $a2, $a0, 16
-; LA32-NEXT:    slli.w $a0, $a2, 16
-; LA32-NEXT:    or $a0, $a1, $a0
+; LA32-NEXT:    revb.2h $a0, $a1
+; LA32-NEXT:    rotri.w $a0, $a0, 16
+; LA32-NEXT:    bytepick.w $a0, $a0, $a2, 2
 ; LA32-NEXT:    srli.w $a1, $a2, 16
 ; LA32-NEXT:    ret
 ;
@@ -91,28 +89,22 @@ define i80 @test_bswap_i80(i80 %a) nounwind {
 ; LA32-NEXT:    ld.w $a3, $a1, 4
 ; LA32-NEXT:    revb.2h $a3, $a3
 ; LA32-NEXT:    rotri.w $a3, $a3, 16
-; LA32-NEXT:    srli.w $a4, $a3, 16
-; LA32-NEXT:    slli.w $a5, $a2, 16
-; LA32-NEXT:    or $a4, $a5, $a4
-; LA32-NEXT:    srli.w $a2, $a2, 16
-; LA32-NEXT:    st.h $a2, $a0, 8
+; LA32-NEXT:    bytepick.w $a4, $a3, $a2, 2
 ; LA32-NEXT:    st.w $a4, $a0, 4
-; LA32-NEXT:    slli.w $a2, $a3, 16
 ; LA32-NEXT:    ld.w $a1, $a1, 8
 ; LA32-NEXT:    revb.2h $a1, $a1
 ; LA32-NEXT:    rotri.w $a1, $a1, 16
-; LA32-NEXT:    srli.w $a1, $a1, 16
-; LA32-NEXT:    or $a1, $a1, $a2
+; LA32-NEXT:    bytepick.w $a1, $a1, $a3, 2
 ; LA32-NEXT:    st.w $a1, $a0, 0
+; LA32-NEXT:    srli.w $a1, $a2, 16
+; LA32-NEXT:    st.h $a1, $a0, 8
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: test_bswap_i80:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    revb.d $a1, $a1
-; LA64-NEXT:    srli.d $a1, $a1, 48
 ; LA64-NEXT:    revb.d $a2, $a0
-; LA64-NEXT:    slli.d $a0, $a2, 16
-; LA64-NEXT:    or $a0, $a1, $a0
+; LA64-NEXT:    revb.d $a0, $a1
+; LA64-NEXT:    bytepick.d $a0, $a0, $a2, 2
 ; LA64-NEXT:    srli.d $a1, $a2, 48
 ; LA64-NEXT:    ret
   %tmp = call i80 @llvm.bswap.i80(i80 %a)

diff  --git a/llvm/test/CodeGen/LoongArch/bytepick.ll b/llvm/test/CodeGen/LoongArch/bytepick.ll
index 3578f368b05b7..86148b374e70d 100644
--- a/llvm/test/CodeGen/LoongArch/bytepick.ll
+++ b/llvm/test/CodeGen/LoongArch/bytepick.ll
@@ -9,9 +9,7 @@
 define i32 @pick_i32_1(i32 %a, i32 %b) {
 ; LA32-LABEL: pick_i32_1:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    slli.w $a0, $a0, 8
-; LA32-NEXT:    srli.w $a1, $a1, 24
-; LA32-NEXT:    or $a0, $a1, $a0
+; LA32-NEXT:    bytepick.w $a0, $a1, $a0, 1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: pick_i32_1:
@@ -31,17 +29,12 @@ define i32 @pick_i32_1(i32 %a, i32 %b) {
 define signext i32 @pick_i32_1_sext(i32 %a, i32 %b) {
 ; LA32-LABEL: pick_i32_1_sext:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    slli.w $a0, $a0, 8
-; LA32-NEXT:    srli.w $a1, $a1, 24
-; LA32-NEXT:    or $a0, $a1, $a0
+; LA32-NEXT:    bytepick.w $a0, $a1, $a0, 1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: pick_i32_1_sext:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a0, $a0, 8
-; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 24
-; LA64-NEXT:    or $a0, $a1, $a0
-; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    bytepick.w $a0, $a1, $a0, 1
 ; LA64-NEXT:    ret
   %1 = lshr i32 %b, 24
   %2 = shl i32 %a, 8
@@ -54,9 +47,7 @@ define signext i32 @pick_i32_1_sext(i32 %a, i32 %b) {
 define i32 @pick_i32_2(i32 %a, i32 %b) {
 ; LA32-LABEL: pick_i32_2:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    slli.w $a0, $a0, 16
-; LA32-NEXT:    srli.w $a1, $a1, 16
-; LA32-NEXT:    or $a0, $a1, $a0
+; LA32-NEXT:    bytepick.w $a0, $a1, $a0, 2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: pick_i32_2:
@@ -76,17 +67,12 @@ define i32 @pick_i32_2(i32 %a, i32 %b) {
 define signext i32 @pick_i32_2_sext(i32 %a, i32 %b) {
 ; LA32-LABEL: pick_i32_2_sext:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    slli.w $a0, $a0, 16
-; LA32-NEXT:    srli.w $a1, $a1, 16
-; LA32-NEXT:    or $a0, $a1, $a0
+; LA32-NEXT:    bytepick.w $a0, $a1, $a0, 2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: pick_i32_2_sext:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a0, $a0, 16
-; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 16
-; LA64-NEXT:    or $a0, $a1, $a0
-; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    bytepick.w $a0, $a1, $a0, 2
 ; LA64-NEXT:    ret
   %1 = lshr i32 %b, 16
   %2 = shl i32 %a, 16
@@ -99,9 +85,7 @@ define signext i32 @pick_i32_2_sext(i32 %a, i32 %b) {
 define i32 @pick_i32_3(i32 %a, i32 %b) {
 ; LA32-LABEL: pick_i32_3:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    slli.w $a0, $a0, 24
-; LA32-NEXT:    srli.w $a1, $a1, 8
-; LA32-NEXT:    or $a0, $a1, $a0
+; LA32-NEXT:    bytepick.w $a0, $a1, $a0, 3
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: pick_i32_3:
@@ -121,17 +105,12 @@ define i32 @pick_i32_3(i32 %a, i32 %b) {
 define signext i32 @pick_i32_3_sext(i32 %a, i32 %b) {
 ; LA32-LABEL: pick_i32_3_sext:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    slli.w $a0, $a0, 24
-; LA32-NEXT:    srli.w $a1, $a1, 8
-; LA32-NEXT:    or $a0, $a1, $a0
+; LA32-NEXT:    bytepick.w $a0, $a1, $a0, 3
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: pick_i32_3_sext:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a0, $a0, 24
-; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 8
-; LA64-NEXT:    or $a0, $a1, $a0
-; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    bytepick.w $a0, $a1, $a0, 3
 ; LA64-NEXT:    ret
   %1 = lshr i32 %b, 8
   %2 = shl i32 %a, 24
@@ -144,20 +123,14 @@ define signext i32 @pick_i32_3_sext(i32 %a, i32 %b) {
 define i64 @pick_i64_1(i64 %a, i64 %b) {
 ; LA32-LABEL: pick_i64_1:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    srli.w $a2, $a3, 24
-; LA32-NEXT:    slli.w $a3, $a0, 8
-; LA32-NEXT:    or $a2, $a2, $a3
-; LA32-NEXT:    srli.w $a0, $a0, 24
-; LA32-NEXT:    slli.w $a1, $a1, 8
-; LA32-NEXT:    or $a1, $a1, $a0
+; LA32-NEXT:    bytepick.w $a2, $a3, $a0, 1
+; LA32-NEXT:    bytepick.w $a1, $a0, $a1, 1
 ; LA32-NEXT:    move $a0, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: pick_i64_1:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a0, $a0, 8
-; LA64-NEXT:    srli.d $a1, $a1, 56
-; LA64-NEXT:    or $a0, $a1, $a0
+; LA64-NEXT:    bytepick.d $a0, $a1, $a0, 1
 ; LA64-NEXT:    ret
   %1 = lshr i64 %b, 56
   %2 = shl i64 %a, 8
@@ -170,20 +143,14 @@ define i64 @pick_i64_1(i64 %a, i64 %b) {
 define i64 @pick_i64_2(i64 %a, i64 %b) {
 ; LA32-LABEL: pick_i64_2:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    srli.w $a2, $a3, 16
-; LA32-NEXT:    slli.w $a3, $a0, 16
-; LA32-NEXT:    or $a2, $a2, $a3
-; LA32-NEXT:    srli.w $a0, $a0, 16
-; LA32-NEXT:    slli.w $a1, $a1, 16
-; LA32-NEXT:    or $a1, $a1, $a0
+; LA32-NEXT:    bytepick.w $a2, $a3, $a0, 2
+; LA32-NEXT:    bytepick.w $a1, $a0, $a1, 2
 ; LA32-NEXT:    move $a0, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: pick_i64_2:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a0, $a0, 16
-; LA64-NEXT:    srli.d $a1, $a1, 48
-; LA64-NEXT:    or $a0, $a1, $a0
+; LA64-NEXT:    bytepick.d $a0, $a1, $a0, 2
 ; LA64-NEXT:    ret
   %1 = lshr i64 %b, 48
   %2 = shl i64 %a, 16
@@ -196,20 +163,14 @@ define i64 @pick_i64_2(i64 %a, i64 %b) {
 define i64 @pick_i64_3(i64 %a, i64 %b) {
 ; LA32-LABEL: pick_i64_3:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    srli.w $a2, $a3, 8
-; LA32-NEXT:    slli.w $a3, $a0, 24
-; LA32-NEXT:    or $a2, $a2, $a3
-; LA32-NEXT:    srli.w $a0, $a0, 8
-; LA32-NEXT:    slli.w $a1, $a1, 24
-; LA32-NEXT:    or $a1, $a1, $a0
+; LA32-NEXT:    bytepick.w $a2, $a3, $a0, 3
+; LA32-NEXT:    bytepick.w $a1, $a0, $a1, 3
 ; LA32-NEXT:    move $a0, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: pick_i64_3:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a0, $a0, 24
-; LA64-NEXT:    srli.d $a1, $a1, 40
-; LA64-NEXT:    or $a0, $a1, $a0
+; LA64-NEXT:    bytepick.d $a0, $a1, $a0, 3
 ; LA64-NEXT:    ret
   %1 = lshr i64 %b, 40
   %2 = shl i64 %a, 24
@@ -228,9 +189,7 @@ define i64 @pick_i64_4(i64 %a, i64 %b) {
 ;
 ; LA64-LABEL: pick_i64_4:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a0, $a0, 32
-; LA64-NEXT:    srli.d $a1, $a1, 32
-; LA64-NEXT:    or $a0, $a1, $a0
+; LA64-NEXT:    bytepick.d $a0, $a1, $a0, 4
 ; LA64-NEXT:    ret
   %1 = lshr i64 %b, 32
   %2 = shl i64 %a, 32
@@ -243,20 +202,14 @@ define i64 @pick_i64_4(i64 %a, i64 %b) {
 define i64 @pick_i64_5(i64 %a, i64 %b) {
 ; LA32-LABEL: pick_i64_5:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    srli.w $a1, $a2, 24
-; LA32-NEXT:    slli.w $a2, $a3, 8
-; LA32-NEXT:    or $a2, $a1, $a2
-; LA32-NEXT:    slli.w $a0, $a0, 8
-; LA32-NEXT:    srli.w $a1, $a3, 24
-; LA32-NEXT:    or $a1, $a1, $a0
+; LA32-NEXT:    bytepick.w $a2, $a2, $a3, 1
+; LA32-NEXT:    bytepick.w $a1, $a3, $a0, 1
 ; LA32-NEXT:    move $a0, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: pick_i64_5:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a0, $a0, 40
-; LA64-NEXT:    srli.d $a1, $a1, 24
-; LA64-NEXT:    or $a0, $a1, $a0
+; LA64-NEXT:    bytepick.d $a0, $a1, $a0, 5
 ; LA64-NEXT:    ret
   %1 = lshr i64 %b, 24
   %2 = shl i64 %a,40
@@ -269,20 +222,14 @@ define i64 @pick_i64_5(i64 %a, i64 %b) {
 define i64 @pick_i64_6(i64 %a, i64 %b) {
 ; LA32-LABEL: pick_i64_6:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    srli.w $a1, $a2, 16
-; LA32-NEXT:    slli.w $a2, $a3, 16
-; LA32-NEXT:    or $a2, $a1, $a2
-; LA32-NEXT:    slli.w $a0, $a0, 16
-; LA32-NEXT:    srli.w $a1, $a3, 16
-; LA32-NEXT:    or $a1, $a1, $a0
+; LA32-NEXT:    bytepick.w $a2, $a2, $a3, 2
+; LA32-NEXT:    bytepick.w $a1, $a3, $a0, 2
 ; LA32-NEXT:    move $a0, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: pick_i64_6:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a0, $a0, 48
-; LA64-NEXT:    srli.d $a1, $a1, 16
-; LA64-NEXT:    or $a0, $a1, $a0
+; LA64-NEXT:    bytepick.d $a0, $a1, $a0, 6
 ; LA64-NEXT:    ret
   %1 = lshr i64 %b, 16
   %2 = shl i64 %a, 48
@@ -295,20 +242,14 @@ define i64 @pick_i64_6(i64 %a, i64 %b) {
 define i64 @pick_i64_7(i64 %a, i64 %b) {
 ; LA32-LABEL: pick_i64_7:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    srli.w $a1, $a2, 8
-; LA32-NEXT:    slli.w $a2, $a3, 24
-; LA32-NEXT:    or $a2, $a1, $a2
-; LA32-NEXT:    slli.w $a0, $a0, 24
-; LA32-NEXT:    srli.w $a1, $a3, 8
-; LA32-NEXT:    or $a1, $a1, $a0
+; LA32-NEXT:    bytepick.w $a2, $a2, $a3, 3
+; LA32-NEXT:    bytepick.w $a1, $a3, $a0, 3
 ; LA32-NEXT:    move $a0, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: pick_i64_7:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a0, $a0, 56
-; LA64-NEXT:    srli.d $a1, $a1, 8
-; LA64-NEXT:    or $a0, $a1, $a0
+; LA64-NEXT:    bytepick.d $a0, $a1, $a0, 7
 ; LA64-NEXT:    ret
   %1 = lshr i64 %b, 8
   %2 = shl i64 %a, 56


        


More information about the llvm-commits mailing list