[llvm] [RISCV][P-ext] Recognize vector shifts with splat build_vector shift amount. (PR #184909)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 5 22:03:15 PST 2026
https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/184909
>From 429c383a78565ef737cbf96f70910944ff038476 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 5 Mar 2026 14:31:00 -0800
Subject: [PATCH 1/3] Pre-commit tests
---
llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll | 372 ++++++++++++++++++++++++
llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll | 343 ++++++++++++++++++++++
2 files changed, 715 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
index b34130ac726fa..17431dd12fe37 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
@@ -1763,3 +1763,375 @@ define <4 x i8> @test_vselect_v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c) {
%res = select <4 x i1> %mask, <4 x i8> %c, <4 x i8> %b
ret <4 x i8> %res
}
+
+define <2 x i16> @test_bswap_v2i16(<2 x i16> %a) {
+; CHECK-RV32-LABEL: test_bswap_v2i16:
+; CHECK-RV32: # %bb.0:
+; CHECK-RV32-NEXT: srli a1, a0, 24
+; CHECK-RV32-NEXT: slli a2, a0, 16
+; CHECK-RV32-NEXT: srli a2, a2, 24
+; CHECK-RV32-NEXT: pack a1, a2, a1
+; CHECK-RV32-NEXT: slli a2, a0, 8
+; CHECK-RV32-NEXT: srli a0, a0, 16
+; CHECK-RV32-NEXT: slli a0, a0, 8
+; CHECK-RV32-NEXT: pack a0, a2, a0
+; CHECK-RV32-NEXT: or a0, a0, a1
+; CHECK-RV32-NEXT: ret
+;
+; CHECK-RV64-LABEL: test_bswap_v2i16:
+; CHECK-RV64: # %bb.0:
+; CHECK-RV64-NEXT: srli a1, a0, 56
+; CHECK-RV64-NEXT: srli a2, a0, 40
+; CHECK-RV64-NEXT: srliw a3, a0, 24
+; CHECK-RV64-NEXT: zext.b a4, a2
+; CHECK-RV64-NEXT: ppaire.h a1, a4, a1
+; CHECK-RV64-NEXT: slli a4, a0, 48
+; CHECK-RV64-NEXT: srli a4, a4, 56
+; CHECK-RV64-NEXT: ppaire.h a3, a4, a3
+; CHECK-RV64-NEXT: srli a4, a0, 32
+; CHECK-RV64-NEXT: andi a2, a2, -256
+; CHECK-RV64-NEXT: slli a4, a4, 8
+; CHECK-RV64-NEXT: ppaire.h a2, a4, a2
+; CHECK-RV64-NEXT: slliw a4, a0, 8
+; CHECK-RV64-NEXT: srli a0, a0, 16
+; CHECK-RV64-NEXT: slli a0, a0, 8
+; CHECK-RV64-NEXT: ppaire.h a0, a4, a0
+; CHECK-RV64-NEXT: pack a1, a3, a1
+; CHECK-RV64-NEXT: pack a0, a0, a2
+; CHECK-RV64-NEXT: or a0, a0, a1
+; CHECK-RV64-NEXT: ret
+ %res = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %a)
+ ret <2 x i16> %res
+}
+
+define <4 x i8> @test_bitreverse_v4i8(<4 x i8> %a) {
+; CHECK-RV32-LABEL: test_bitreverse_v4i8:
+; CHECK-RV32: # %bb.0:
+; CHECK-RV32-NEXT: pli.b a1, 15
+; CHECK-RV32-NEXT: srli a3, a0, 28
+; CHECK-RV32-NEXT: slli a2, a0, 16
+; CHECK-RV32-NEXT: slli a4, a0, 8
+; CHECK-RV32-NEXT: slli a5, a0, 24
+; CHECK-RV32-NEXT: pli.b a6, 51
+; CHECK-RV32-NEXT: and a0, a0, a1
+; CHECK-RV32-NEXT: srli a2, a2, 28
+; CHECK-RV32-NEXT: srli t2, a4, 28
+; CHECK-RV32-NEXT: srli t1, a5, 28
+; CHECK-RV32-NEXT: srli a5, a0, 20
+; CHECK-RV32-NEXT: ppaire.db a2, t1, a2
+; CHECK-RV32-NEXT: srli a1, a0, 12
+; CHECK-RV32-NEXT: srli a4, a0, 4
+; CHECK-RV32-NEXT: slli a0, a0, 4
+; CHECK-RV32-NEXT: ppaire.db a0, a0, a4
+; CHECK-RV32-NEXT: pack a2, a2, a3
+; CHECK-RV32-NEXT: pack a0, a0, a1
+; CHECK-RV32-NEXT: or a0, a2, a0
+; CHECK-RV32-NEXT: and a1, a0, a6
+; CHECK-RV32-NEXT: srli a3, a1, 22
+; CHECK-RV32-NEXT: srli a5, a1, 14
+; CHECK-RV32-NEXT: srli a2, a1, 6
+; CHECK-RV32-NEXT: slli a4, a1, 2
+; CHECK-RV32-NEXT: srli a1, a0, 26
+; CHECK-RV32-NEXT: ppaire.db a2, a4, a2
+; CHECK-RV32-NEXT: slli a4, a0, 16
+; CHECK-RV32-NEXT: slli a5, a0, 8
+; CHECK-RV32-NEXT: slli a7, a0, 24
+; CHECK-RV32-NEXT: srli a0, a4, 26
+; CHECK-RV32-NEXT: srli a5, a5, 26
+; CHECK-RV32-NEXT: srli a4, a7, 26
+; CHECK-RV32-NEXT: pli.b a7, 85
+; CHECK-RV32-NEXT: ppaire.db a0, a4, a0
+; CHECK-RV32-NEXT: pack a2, a2, a3
+; CHECK-RV32-NEXT: pack a0, a0, a1
+; CHECK-RV32-NEXT: and a0, a0, a6
+; CHECK-RV32-NEXT: or a0, a0, a2
+; CHECK-RV32-NEXT: and a1, a0, a7
+; CHECK-RV32-NEXT: srli a3, a1, 23
+; CHECK-RV32-NEXT: srli a5, a1, 15
+; CHECK-RV32-NEXT: srli a2, a1, 7
+; CHECK-RV32-NEXT: slli a4, a1, 1
+; CHECK-RV32-NEXT: srli a1, a0, 25
+; CHECK-RV32-NEXT: ppaire.db a2, a4, a2
+; CHECK-RV32-NEXT: slli a4, a0, 16
+; CHECK-RV32-NEXT: slli a5, a0, 8
+; CHECK-RV32-NEXT: slli a6, a0, 24
+; CHECK-RV32-NEXT: srli a0, a4, 25
+; CHECK-RV32-NEXT: srli a5, a5, 25
+; CHECK-RV32-NEXT: srli a4, a6, 25
+; CHECK-RV32-NEXT: ppaire.db a0, a4, a0
+; CHECK-RV32-NEXT: pack a2, a2, a3
+; CHECK-RV32-NEXT: pack a0, a0, a1
+; CHECK-RV32-NEXT: and a0, a0, a7
+; CHECK-RV32-NEXT: or a0, a0, a2
+; CHECK-RV32-NEXT: ret
+;
+; CHECK-RV64-LABEL: test_bitreverse_v4i8:
+; CHECK-RV64: # %bb.0:
+; CHECK-RV64-NEXT: pli.b a1, 15
+; CHECK-RV64-NEXT: srli a2, a0, 60
+; CHECK-RV64-NEXT: slli a3, a0, 8
+; CHECK-RV64-NEXT: slli a4, a0, 16
+; CHECK-RV64-NEXT: slli a5, a0, 24
+; CHECK-RV64-NEXT: srliw a6, a0, 28
+; CHECK-RV64-NEXT: slli a7, a0, 40
+; CHECK-RV64-NEXT: slli t0, a0, 48
+; CHECK-RV64-NEXT: slli t1, a0, 56
+; CHECK-RV64-NEXT: and a1, a0, a1
+; CHECK-RV64-NEXT: pli.b a0, 51
+; CHECK-RV64-NEXT: srli a3, a3, 60
+; CHECK-RV64-NEXT: srli a4, a4, 60
+; CHECK-RV64-NEXT: srli a5, a5, 60
+; CHECK-RV64-NEXT: srli a7, a7, 60
+; CHECK-RV64-NEXT: srli t0, t0, 60
+; CHECK-RV64-NEXT: srli t1, t1, 60
+; CHECK-RV64-NEXT: ppaire.b a2, a3, a2
+; CHECK-RV64-NEXT: srli a3, a1, 52
+; CHECK-RV64-NEXT: ppaire.b a4, a5, a4
+; CHECK-RV64-NEXT: srli a5, a1, 44
+; CHECK-RV64-NEXT: ppaire.b a6, a7, a6
+; CHECK-RV64-NEXT: srli a7, a1, 36
+; CHECK-RV64-NEXT: ppaire.b t0, t1, t0
+; CHECK-RV64-NEXT: srli t1, a1, 28
+; CHECK-RV64-NEXT: ppaire.b a3, a5, a3
+; CHECK-RV64-NEXT: srli a5, a1, 20
+; CHECK-RV64-NEXT: ppaire.b a7, t1, a7
+; CHECK-RV64-NEXT: srli t1, a1, 12
+; CHECK-RV64-NEXT: ppaire.b a5, t1, a5
+; CHECK-RV64-NEXT: srli t1, a1, 4
+; CHECK-RV64-NEXT: slli a1, a1, 4
+; CHECK-RV64-NEXT: ppaire.b a1, a1, t1
+; CHECK-RV64-NEXT: ppaire.h a2, a4, a2
+; CHECK-RV64-NEXT: ppaire.h a4, t0, a6
+; CHECK-RV64-NEXT: ppaire.h a3, a7, a3
+; CHECK-RV64-NEXT: ppaire.h a1, a1, a5
+; CHECK-RV64-NEXT: pack a2, a4, a2
+; CHECK-RV64-NEXT: pack a1, a1, a3
+; CHECK-RV64-NEXT: or a1, a2, a1
+; CHECK-RV64-NEXT: and a2, a1, a0
+; CHECK-RV64-NEXT: srli a3, a1, 58
+; CHECK-RV64-NEXT: slli a4, a1, 8
+; CHECK-RV64-NEXT: slli a5, a1, 16
+; CHECK-RV64-NEXT: srli a6, a2, 54
+; CHECK-RV64-NEXT: srli a7, a2, 46
+; CHECK-RV64-NEXT: srli t0, a2, 38
+; CHECK-RV64-NEXT: srli t1, a2, 30
+; CHECK-RV64-NEXT: ppaire.b a6, a7, a6
+; CHECK-RV64-NEXT: srli a7, a2, 22
+; CHECK-RV64-NEXT: ppaire.b t0, t1, t0
+; CHECK-RV64-NEXT: srli t1, a2, 14
+; CHECK-RV64-NEXT: ppaire.b a7, t1, a7
+; CHECK-RV64-NEXT: srli t1, a2, 6
+; CHECK-RV64-NEXT: slli a2, a2, 2
+; CHECK-RV64-NEXT: ppaire.b a2, a2, t1
+; CHECK-RV64-NEXT: slli t1, a1, 24
+; CHECK-RV64-NEXT: srli a4, a4, 58
+; CHECK-RV64-NEXT: ppaire.b a3, a4, a3
+; CHECK-RV64-NEXT: srliw a4, a1, 26
+; CHECK-RV64-NEXT: srli a5, a5, 58
+; CHECK-RV64-NEXT: srli t1, t1, 58
+; CHECK-RV64-NEXT: ppaire.b a5, t1, a5
+; CHECK-RV64-NEXT: slli t1, a1, 40
+; CHECK-RV64-NEXT: srli t1, t1, 58
+; CHECK-RV64-NEXT: ppaire.b a4, t1, a4
+; CHECK-RV64-NEXT: slli t1, a1, 48
+; CHECK-RV64-NEXT: slli a1, a1, 56
+; CHECK-RV64-NEXT: srli t1, t1, 58
+; CHECK-RV64-NEXT: srli a1, a1, 58
+; CHECK-RV64-NEXT: ppaire.b t1, a1, t1
+; CHECK-RV64-NEXT: pli.b a1, 85
+; CHECK-RV64-NEXT: ppaire.h a6, t0, a6
+; CHECK-RV64-NEXT: ppaire.h a2, a2, a7
+; CHECK-RV64-NEXT: ppaire.h a3, a5, a3
+; CHECK-RV64-NEXT: ppaire.h a4, t1, a4
+; CHECK-RV64-NEXT: pack a2, a2, a6
+; CHECK-RV64-NEXT: pack a3, a4, a3
+; CHECK-RV64-NEXT: and a0, a3, a0
+; CHECK-RV64-NEXT: or a0, a0, a2
+; CHECK-RV64-NEXT: and a2, a0, a1
+; CHECK-RV64-NEXT: srli a3, a0, 57
+; CHECK-RV64-NEXT: slli a4, a0, 8
+; CHECK-RV64-NEXT: slli a5, a0, 16
+; CHECK-RV64-NEXT: srli a6, a2, 55
+; CHECK-RV64-NEXT: srli a7, a2, 47
+; CHECK-RV64-NEXT: srli t0, a2, 39
+; CHECK-RV64-NEXT: srli t1, a2, 31
+; CHECK-RV64-NEXT: ppaire.b a6, a7, a6
+; CHECK-RV64-NEXT: srli a7, a2, 23
+; CHECK-RV64-NEXT: ppaire.b t0, t1, t0
+; CHECK-RV64-NEXT: srli t1, a2, 15
+; CHECK-RV64-NEXT: ppaire.b a7, t1, a7
+; CHECK-RV64-NEXT: srli t1, a2, 7
+; CHECK-RV64-NEXT: slli a2, a2, 1
+; CHECK-RV64-NEXT: ppaire.b a2, a2, t1
+; CHECK-RV64-NEXT: slli t1, a0, 24
+; CHECK-RV64-NEXT: srli a4, a4, 57
+; CHECK-RV64-NEXT: ppaire.b a3, a4, a3
+; CHECK-RV64-NEXT: srliw a4, a0, 25
+; CHECK-RV64-NEXT: srli a5, a5, 57
+; CHECK-RV64-NEXT: srli t1, t1, 57
+; CHECK-RV64-NEXT: ppaire.b a5, t1, a5
+; CHECK-RV64-NEXT: slli t1, a0, 40
+; CHECK-RV64-NEXT: srli t1, t1, 57
+; CHECK-RV64-NEXT: ppaire.b a4, t1, a4
+; CHECK-RV64-NEXT: slli t1, a0, 48
+; CHECK-RV64-NEXT: slli a0, a0, 56
+; CHECK-RV64-NEXT: srli t1, t1, 57
+; CHECK-RV64-NEXT: srli a0, a0, 57
+; CHECK-RV64-NEXT: ppaire.b a0, a0, t1
+; CHECK-RV64-NEXT: ppaire.h a6, t0, a6
+; CHECK-RV64-NEXT: ppaire.h a2, a2, a7
+; CHECK-RV64-NEXT: ppaire.h a3, a5, a3
+; CHECK-RV64-NEXT: ppaire.h a0, a0, a4
+; CHECK-RV64-NEXT: pack a2, a2, a6
+; CHECK-RV64-NEXT: pack a0, a0, a3
+; CHECK-RV64-NEXT: and a0, a0, a1
+; CHECK-RV64-NEXT: or a0, a0, a2
+; CHECK-RV64-NEXT: ret
+ %res = call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> %a)
+ ret <4 x i8> %res
+}
+
+define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) {
+; CHECK-RV32-LABEL: test_bitreverse_v2i16:
+; CHECK-RV32: # %bb.0:
+; CHECK-RV32-NEXT: srli a1, a0, 24
+; CHECK-RV32-NEXT: slli a2, a0, 16
+; CHECK-RV32-NEXT: slli a3, a0, 8
+; CHECK-RV32-NEXT: srli a0, a0, 16
+; CHECK-RV32-NEXT: srli a2, a2, 24
+; CHECK-RV32-NEXT: pack a1, a2, a1
+; CHECK-RV32-NEXT: lui a2, 1
+; CHECK-RV32-NEXT: slli a0, a0, 8
+; CHECK-RV32-NEXT: addi a2, a2, -241
+; CHECK-RV32-NEXT: pack a0, a3, a0
+; CHECK-RV32-NEXT: padd.hs a2, zero, a2
+; CHECK-RV32-NEXT: or a0, a0, a1
+; CHECK-RV32-NEXT: and a1, a0, a2
+; CHECK-RV32-NEXT: srli a3, a1, 12
+; CHECK-RV32-NEXT: slli a1, a1, 4
+; CHECK-RV32-NEXT: pack a1, a1, a3
+; CHECK-RV32-NEXT: srli a3, a0, 20
+; CHECK-RV32-NEXT: slli a0, a0, 16
+; CHECK-RV32-NEXT: srli a0, a0, 20
+; CHECK-RV32-NEXT: pack a0, a0, a3
+; CHECK-RV32-NEXT: lui a3, 3
+; CHECK-RV32-NEXT: addi a3, a3, 819
+; CHECK-RV32-NEXT: padd.hs a3, zero, a3
+; CHECK-RV32-NEXT: and a0, a0, a2
+; CHECK-RV32-NEXT: or a0, a0, a1
+; CHECK-RV32-NEXT: and a1, a0, a3
+; CHECK-RV32-NEXT: srli a2, a1, 14
+; CHECK-RV32-NEXT: slli a1, a1, 2
+; CHECK-RV32-NEXT: pack a1, a1, a2
+; CHECK-RV32-NEXT: srli a2, a0, 18
+; CHECK-RV32-NEXT: slli a0, a0, 16
+; CHECK-RV32-NEXT: srli a0, a0, 18
+; CHECK-RV32-NEXT: pack a0, a0, a2
+; CHECK-RV32-NEXT: lui a2, 5
+; CHECK-RV32-NEXT: addi a2, a2, 1365
+; CHECK-RV32-NEXT: padd.hs a2, zero, a2
+; CHECK-RV32-NEXT: and a0, a0, a3
+; CHECK-RV32-NEXT: or a0, a0, a1
+; CHECK-RV32-NEXT: and a1, a0, a2
+; CHECK-RV32-NEXT: srli a3, a1, 15
+; CHECK-RV32-NEXT: slli a1, a1, 1
+; CHECK-RV32-NEXT: pack a1, a1, a3
+; CHECK-RV32-NEXT: srli a3, a0, 17
+; CHECK-RV32-NEXT: slli a0, a0, 16
+; CHECK-RV32-NEXT: srli a0, a0, 17
+; CHECK-RV32-NEXT: pack a0, a0, a3
+; CHECK-RV32-NEXT: and a0, a0, a2
+; CHECK-RV32-NEXT: or a0, a0, a1
+; CHECK-RV32-NEXT: ret
+;
+; CHECK-RV64-LABEL: test_bitreverse_v2i16:
+; CHECK-RV64: # %bb.0:
+; CHECK-RV64-NEXT: srli a1, a0, 56
+; CHECK-RV64-NEXT: srli a2, a0, 40
+; CHECK-RV64-NEXT: srliw a3, a0, 24
+; CHECK-RV64-NEXT: slli a4, a0, 48
+; CHECK-RV64-NEXT: zext.b a5, a2
+; CHECK-RV64-NEXT: ppaire.h a1, a5, a1
+; CHECK-RV64-NEXT: srli a5, a0, 32
+; CHECK-RV64-NEXT: srli a4, a4, 56
+; CHECK-RV64-NEXT: ppaire.h a3, a4, a3
+; CHECK-RV64-NEXT: slliw a4, a0, 8
+; CHECK-RV64-NEXT: srli a0, a0, 16
+; CHECK-RV64-NEXT: andi a2, a2, -256
+; CHECK-RV64-NEXT: slli a5, a5, 8
+; CHECK-RV64-NEXT: ppaire.h a2, a5, a2
+; CHECK-RV64-NEXT: lui a5, 1
+; CHECK-RV64-NEXT: slli a0, a0, 8
+; CHECK-RV64-NEXT: addi a5, a5, -241
+; CHECK-RV64-NEXT: ppaire.h a0, a4, a0
+; CHECK-RV64-NEXT: padd.hs a4, zero, a5
+; CHECK-RV64-NEXT: pack a1, a3, a1
+; CHECK-RV64-NEXT: pack a0, a0, a2
+; CHECK-RV64-NEXT: or a0, a0, a1
+; CHECK-RV64-NEXT: and a1, a0, a4
+; CHECK-RV64-NEXT: srli a2, a0, 52
+; CHECK-RV64-NEXT: srli a3, a1, 44
+; CHECK-RV64-NEXT: srli a5, a1, 28
+; CHECK-RV64-NEXT: ppaire.h a3, a5, a3
+; CHECK-RV64-NEXT: srli a5, a1, 12
+; CHECK-RV64-NEXT: slliw a1, a1, 4
+; CHECK-RV64-NEXT: ppaire.h a1, a1, a5
+; CHECK-RV64-NEXT: slli a5, a0, 16
+; CHECK-RV64-NEXT: srli a5, a5, 52
+; CHECK-RV64-NEXT: ppaire.h a2, a5, a2
+; CHECK-RV64-NEXT: srliw a5, a0, 20
+; CHECK-RV64-NEXT: slli a0, a0, 48
+; CHECK-RV64-NEXT: srli a0, a0, 52
+; CHECK-RV64-NEXT: ppaire.h a0, a0, a5
+; CHECK-RV64-NEXT: lui a5, 3
+; CHECK-RV64-NEXT: addi a5, a5, 819
+; CHECK-RV64-NEXT: padd.hs a5, zero, a5
+; CHECK-RV64-NEXT: pack a1, a1, a3
+; CHECK-RV64-NEXT: pack a0, a0, a2
+; CHECK-RV64-NEXT: and a0, a0, a4
+; CHECK-RV64-NEXT: or a0, a0, a1
+; CHECK-RV64-NEXT: and a1, a0, a5
+; CHECK-RV64-NEXT: srli a2, a0, 50
+; CHECK-RV64-NEXT: srli a3, a1, 46
+; CHECK-RV64-NEXT: srli a4, a1, 30
+; CHECK-RV64-NEXT: ppaire.h a3, a4, a3
+; CHECK-RV64-NEXT: srli a4, a1, 14
+; CHECK-RV64-NEXT: slliw a1, a1, 2
+; CHECK-RV64-NEXT: ppaire.h a1, a1, a4
+; CHECK-RV64-NEXT: slli a4, a0, 16
+; CHECK-RV64-NEXT: srli a4, a4, 50
+; CHECK-RV64-NEXT: ppaire.h a2, a4, a2
+; CHECK-RV64-NEXT: srliw a4, a0, 18
+; CHECK-RV64-NEXT: slli a0, a0, 48
+; CHECK-RV64-NEXT: srli a0, a0, 50
+; CHECK-RV64-NEXT: ppaire.h a0, a0, a4
+; CHECK-RV64-NEXT: lui a4, 5
+; CHECK-RV64-NEXT: addi a4, a4, 1365
+; CHECK-RV64-NEXT: padd.hs a4, zero, a4
+; CHECK-RV64-NEXT: pack a1, a1, a3
+; CHECK-RV64-NEXT: pack a0, a0, a2
+; CHECK-RV64-NEXT: and a0, a0, a5
+; CHECK-RV64-NEXT: or a0, a0, a1
+; CHECK-RV64-NEXT: and a1, a0, a4
+; CHECK-RV64-NEXT: srli a2, a0, 49
+; CHECK-RV64-NEXT: srli a3, a1, 47
+; CHECK-RV64-NEXT: srli a5, a1, 31
+; CHECK-RV64-NEXT: ppaire.h a3, a5, a3
+; CHECK-RV64-NEXT: srli a5, a1, 15
+; CHECK-RV64-NEXT: slliw a1, a1, 1
+; CHECK-RV64-NEXT: ppaire.h a1, a1, a5
+; CHECK-RV64-NEXT: slli a5, a0, 16
+; CHECK-RV64-NEXT: srli a5, a5, 49
+; CHECK-RV64-NEXT: ppaire.h a2, a5, a2
+; CHECK-RV64-NEXT: srliw a5, a0, 17
+; CHECK-RV64-NEXT: slli a0, a0, 48
+; CHECK-RV64-NEXT: srli a0, a0, 49
+; CHECK-RV64-NEXT: ppaire.h a0, a0, a5
+; CHECK-RV64-NEXT: pack a1, a1, a3
+; CHECK-RV64-NEXT: pack a0, a0, a2
+; CHECK-RV64-NEXT: and a0, a0, a4
+; CHECK-RV64-NEXT: or a0, a0, a1
+; CHECK-RV64-NEXT: ret
+ %res = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
+ ret <2 x i16> %res
+}
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
index e577700f1085b..a54360d5d81e5 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
@@ -2133,3 +2133,346 @@ define <2 x i32> @test_vselect_v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) {
%res = select <2 x i1> %mask, <2 x i32> %c, <2 x i32> %b
ret <2 x i32> %res
}
+
+define <4 x i16> @test_bswap_v4i16(<4 x i16> %a) {
+; CHECK-LABEL: test_bswap_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srli a1, a0, 56
+; CHECK-NEXT: srli a2, a0, 40
+; CHECK-NEXT: srliw a3, a0, 24
+; CHECK-NEXT: zext.b a4, a2
+; CHECK-NEXT: ppaire.h a1, a4, a1
+; CHECK-NEXT: slli a4, a0, 48
+; CHECK-NEXT: srli a4, a4, 56
+; CHECK-NEXT: ppaire.h a3, a4, a3
+; CHECK-NEXT: srli a4, a0, 32
+; CHECK-NEXT: andi a2, a2, -256
+; CHECK-NEXT: slli a4, a4, 8
+; CHECK-NEXT: ppaire.h a2, a4, a2
+; CHECK-NEXT: slliw a4, a0, 8
+; CHECK-NEXT: srli a0, a0, 16
+; CHECK-NEXT: slli a0, a0, 8
+; CHECK-NEXT: ppaire.h a0, a4, a0
+; CHECK-NEXT: pack a1, a3, a1
+; CHECK-NEXT: pack a0, a0, a2
+; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: ret
+ %res = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %a)
+ ret <4 x i16> %res
+}
+
+define <2 x i32> @test_bswap_v2i32(<2 x i32> %a) {
+; CHECK-LABEL: test_bswap_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srli a1, a0, 56
+; CHECK-NEXT: srliw a2, a0, 24
+; CHECK-NEXT: srli a3, a0, 40
+; CHECK-NEXT: srliw a4, a0, 8
+; CHECK-NEXT: pack a1, a2, a1
+; CHECK-NEXT: slli a2, a0, 24
+; CHECK-NEXT: pack a3, a4, a3
+; CHECK-NEXT: srli a4, a0, 32
+; CHECK-NEXT: slli a4, a4, 24
+; CHECK-NEXT: pack a2, a2, a4
+; CHECK-NEXT: lui a4, 16
+; CHECK-NEXT: addi a4, a4, -256
+; CHECK-NEXT: padd.ws a4, zero, a4
+; CHECK-NEXT: and a3, a3, a4
+; CHECK-NEXT: and a0, a0, a4
+; CHECK-NEXT: or a1, a3, a1
+; CHECK-NEXT: srli a3, a0, 24
+; CHECK-NEXT: slli a0, a0, 40
+; CHECK-NEXT: srli a0, a0, 32
+; CHECK-NEXT: pack a0, a0, a3
+; CHECK-NEXT: or a1, a2, a1
+; CHECK-NEXT: or a0, a1, a0
+; CHECK-NEXT: ret
+ %res = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
+ ret <2 x i32> %res
+}
+
+define <8 x i8> @test_bitreverse_v8i8(<8 x i8> %a) {
+; CHECK-LABEL: test_bitreverse_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pli.b a1, 15
+; CHECK-NEXT: srli a2, a0, 60
+; CHECK-NEXT: slli a3, a0, 8
+; CHECK-NEXT: slli a4, a0, 16
+; CHECK-NEXT: slli a5, a0, 24
+; CHECK-NEXT: srliw a6, a0, 28
+; CHECK-NEXT: slli a7, a0, 40
+; CHECK-NEXT: slli t0, a0, 48
+; CHECK-NEXT: slli t1, a0, 56
+; CHECK-NEXT: and a1, a0, a1
+; CHECK-NEXT: pli.b a0, 51
+; CHECK-NEXT: srli a3, a3, 60
+; CHECK-NEXT: srli a4, a4, 60
+; CHECK-NEXT: srli a5, a5, 60
+; CHECK-NEXT: srli a7, a7, 60
+; CHECK-NEXT: srli t0, t0, 60
+; CHECK-NEXT: srli t1, t1, 60
+; CHECK-NEXT: ppaire.b a2, a3, a2
+; CHECK-NEXT: srli a3, a1, 52
+; CHECK-NEXT: ppaire.b a4, a5, a4
+; CHECK-NEXT: srli a5, a1, 44
+; CHECK-NEXT: ppaire.b a6, a7, a6
+; CHECK-NEXT: srli a7, a1, 36
+; CHECK-NEXT: ppaire.b t0, t1, t0
+; CHECK-NEXT: srli t1, a1, 28
+; CHECK-NEXT: ppaire.b a3, a5, a3
+; CHECK-NEXT: srli a5, a1, 20
+; CHECK-NEXT: ppaire.b a7, t1, a7
+; CHECK-NEXT: srli t1, a1, 12
+; CHECK-NEXT: ppaire.b a5, t1, a5
+; CHECK-NEXT: srli t1, a1, 4
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: ppaire.b a1, a1, t1
+; CHECK-NEXT: ppaire.h a2, a4, a2
+; CHECK-NEXT: ppaire.h a4, t0, a6
+; CHECK-NEXT: ppaire.h a3, a7, a3
+; CHECK-NEXT: ppaire.h a1, a1, a5
+; CHECK-NEXT: pack a2, a4, a2
+; CHECK-NEXT: pack a1, a1, a3
+; CHECK-NEXT: or a1, a2, a1
+; CHECK-NEXT: and a2, a1, a0
+; CHECK-NEXT: srli a3, a1, 58
+; CHECK-NEXT: slli a4, a1, 8
+; CHECK-NEXT: slli a5, a1, 16
+; CHECK-NEXT: srli a6, a2, 54
+; CHECK-NEXT: srli a7, a2, 46
+; CHECK-NEXT: srli t0, a2, 38
+; CHECK-NEXT: srli t1, a2, 30
+; CHECK-NEXT: ppaire.b a6, a7, a6
+; CHECK-NEXT: srli a7, a2, 22
+; CHECK-NEXT: ppaire.b t0, t1, t0
+; CHECK-NEXT: srli t1, a2, 14
+; CHECK-NEXT: ppaire.b a7, t1, a7
+; CHECK-NEXT: srli t1, a2, 6
+; CHECK-NEXT: slli a2, a2, 2
+; CHECK-NEXT: ppaire.b a2, a2, t1
+; CHECK-NEXT: slli t1, a1, 24
+; CHECK-NEXT: srli a4, a4, 58
+; CHECK-NEXT: ppaire.b a3, a4, a3
+; CHECK-NEXT: srliw a4, a1, 26
+; CHECK-NEXT: srli a5, a5, 58
+; CHECK-NEXT: srli t1, t1, 58
+; CHECK-NEXT: ppaire.b a5, t1, a5
+; CHECK-NEXT: slli t1, a1, 40
+; CHECK-NEXT: srli t1, t1, 58
+; CHECK-NEXT: ppaire.b a4, t1, a4
+; CHECK-NEXT: slli t1, a1, 48
+; CHECK-NEXT: slli a1, a1, 56
+; CHECK-NEXT: srli t1, t1, 58
+; CHECK-NEXT: srli a1, a1, 58
+; CHECK-NEXT: ppaire.b t1, a1, t1
+; CHECK-NEXT: pli.b a1, 85
+; CHECK-NEXT: ppaire.h a6, t0, a6
+; CHECK-NEXT: ppaire.h a2, a2, a7
+; CHECK-NEXT: ppaire.h a3, a5, a3
+; CHECK-NEXT: ppaire.h a4, t1, a4
+; CHECK-NEXT: pack a2, a2, a6
+; CHECK-NEXT: pack a3, a4, a3
+; CHECK-NEXT: and a0, a3, a0
+; CHECK-NEXT: or a0, a0, a2
+; CHECK-NEXT: and a2, a0, a1
+; CHECK-NEXT: srli a3, a0, 57
+; CHECK-NEXT: slli a4, a0, 8
+; CHECK-NEXT: slli a5, a0, 16
+; CHECK-NEXT: srli a6, a2, 55
+; CHECK-NEXT: srli a7, a2, 47
+; CHECK-NEXT: srli t0, a2, 39
+; CHECK-NEXT: srli t1, a2, 31
+; CHECK-NEXT: ppaire.b a6, a7, a6
+; CHECK-NEXT: srli a7, a2, 23
+; CHECK-NEXT: ppaire.b t0, t1, t0
+; CHECK-NEXT: srli t1, a2, 15
+; CHECK-NEXT: ppaire.b a7, t1, a7
+; CHECK-NEXT: srli t1, a2, 7
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: ppaire.b a2, a2, t1
+; CHECK-NEXT: slli t1, a0, 24
+; CHECK-NEXT: srli a4, a4, 57
+; CHECK-NEXT: ppaire.b a3, a4, a3
+; CHECK-NEXT: srliw a4, a0, 25
+; CHECK-NEXT: srli a5, a5, 57
+; CHECK-NEXT: srli t1, t1, 57
+; CHECK-NEXT: ppaire.b a5, t1, a5
+; CHECK-NEXT: slli t1, a0, 40
+; CHECK-NEXT: srli t1, t1, 57
+; CHECK-NEXT: ppaire.b a4, t1, a4
+; CHECK-NEXT: slli t1, a0, 48
+; CHECK-NEXT: slli a0, a0, 56
+; CHECK-NEXT: srli t1, t1, 57
+; CHECK-NEXT: srli a0, a0, 57
+; CHECK-NEXT: ppaire.b a0, a0, t1
+; CHECK-NEXT: ppaire.h a6, t0, a6
+; CHECK-NEXT: ppaire.h a2, a2, a7
+; CHECK-NEXT: ppaire.h a3, a5, a3
+; CHECK-NEXT: ppaire.h a0, a0, a4
+; CHECK-NEXT: pack a2, a2, a6
+; CHECK-NEXT: pack a0, a0, a3
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: or a0, a0, a2
+; CHECK-NEXT: ret
+ %res = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %a)
+ ret <8 x i8> %res
+}
+
+define <4 x i16> @test_bitreverse_v4i16(<4 x i16> %a) {
+; CHECK-LABEL: test_bitreverse_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srli a1, a0, 56
+; CHECK-NEXT: srli a2, a0, 40
+; CHECK-NEXT: srliw a3, a0, 24
+; CHECK-NEXT: slli a4, a0, 48
+; CHECK-NEXT: zext.b a5, a2
+; CHECK-NEXT: ppaire.h a1, a5, a1
+; CHECK-NEXT: srli a5, a0, 32
+; CHECK-NEXT: srli a4, a4, 56
+; CHECK-NEXT: ppaire.h a3, a4, a3
+; CHECK-NEXT: slliw a4, a0, 8
+; CHECK-NEXT: srli a0, a0, 16
+; CHECK-NEXT: andi a2, a2, -256
+; CHECK-NEXT: slli a5, a5, 8
+; CHECK-NEXT: ppaire.h a2, a5, a2
+; CHECK-NEXT: lui a5, 1
+; CHECK-NEXT: slli a0, a0, 8
+; CHECK-NEXT: addi a5, a5, -241
+; CHECK-NEXT: ppaire.h a0, a4, a0
+; CHECK-NEXT: padd.hs a4, zero, a5
+; CHECK-NEXT: pack a1, a3, a1
+; CHECK-NEXT: pack a0, a0, a2
+; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: and a1, a0, a4
+; CHECK-NEXT: srli a2, a0, 52
+; CHECK-NEXT: srli a3, a1, 44
+; CHECK-NEXT: srli a5, a1, 28
+; CHECK-NEXT: ppaire.h a3, a5, a3
+; CHECK-NEXT: srli a5, a1, 12
+; CHECK-NEXT: slliw a1, a1, 4
+; CHECK-NEXT: ppaire.h a1, a1, a5
+; CHECK-NEXT: slli a5, a0, 16
+; CHECK-NEXT: srli a5, a5, 52
+; CHECK-NEXT: ppaire.h a2, a5, a2
+; CHECK-NEXT: srliw a5, a0, 20
+; CHECK-NEXT: slli a0, a0, 48
+; CHECK-NEXT: srli a0, a0, 52
+; CHECK-NEXT: ppaire.h a0, a0, a5
+; CHECK-NEXT: lui a5, 3
+; CHECK-NEXT: addi a5, a5, 819
+; CHECK-NEXT: padd.hs a5, zero, a5
+; CHECK-NEXT: pack a1, a1, a3
+; CHECK-NEXT: pack a0, a0, a2
+; CHECK-NEXT: and a0, a0, a4
+; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: and a1, a0, a5
+; CHECK-NEXT: srli a2, a0, 50
+; CHECK-NEXT: srli a3, a1, 46
+; CHECK-NEXT: srli a4, a1, 30
+; CHECK-NEXT: ppaire.h a3, a4, a3
+; CHECK-NEXT: srli a4, a1, 14
+; CHECK-NEXT: slliw a1, a1, 2
+; CHECK-NEXT: ppaire.h a1, a1, a4
+; CHECK-NEXT: slli a4, a0, 16
+; CHECK-NEXT: srli a4, a4, 50
+; CHECK-NEXT: ppaire.h a2, a4, a2
+; CHECK-NEXT: srliw a4, a0, 18
+; CHECK-NEXT: slli a0, a0, 48
+; CHECK-NEXT: srli a0, a0, 50
+; CHECK-NEXT: ppaire.h a0, a0, a4
+; CHECK-NEXT: lui a4, 5
+; CHECK-NEXT: addi a4, a4, 1365
+; CHECK-NEXT: padd.hs a4, zero, a4
+; CHECK-NEXT: pack a1, a1, a3
+; CHECK-NEXT: pack a0, a0, a2
+; CHECK-NEXT: and a0, a0, a5
+; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: and a1, a0, a4
+; CHECK-NEXT: srli a2, a0, 49
+; CHECK-NEXT: srli a3, a1, 47
+; CHECK-NEXT: srli a5, a1, 31
+; CHECK-NEXT: ppaire.h a3, a5, a3
+; CHECK-NEXT: srli a5, a1, 15
+; CHECK-NEXT: slliw a1, a1, 1
+; CHECK-NEXT: ppaire.h a1, a1, a5
+; CHECK-NEXT: slli a5, a0, 16
+; CHECK-NEXT: srli a5, a5, 49
+; CHECK-NEXT: ppaire.h a2, a5, a2
+; CHECK-NEXT: srliw a5, a0, 17
+; CHECK-NEXT: slli a0, a0, 48
+; CHECK-NEXT: srli a0, a0, 49
+; CHECK-NEXT: ppaire.h a0, a0, a5
+; CHECK-NEXT: pack a1, a1, a3
+; CHECK-NEXT: pack a0, a0, a2
+; CHECK-NEXT: and a0, a0, a4
+; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: ret
+ %res = call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> %a)
+ ret <4 x i16> %res
+}
+
+define <2 x i32> @test_bitreverse_v2i32(<2 x i32> %a) {
+; CHECK-LABEL: test_bitreverse_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srli a1, a0, 56
+; CHECK-NEXT: srliw a2, a0, 24
+; CHECK-NEXT: srli a3, a0, 40
+; CHECK-NEXT: srliw a4, a0, 8
+; CHECK-NEXT: pack a1, a2, a1
+; CHECK-NEXT: slli a2, a0, 24
+; CHECK-NEXT: pack a3, a4, a3
+; CHECK-NEXT: srli a4, a0, 32
+; CHECK-NEXT: slli a4, a4, 24
+; CHECK-NEXT: pack a2, a2, a4
+; CHECK-NEXT: lui a4, 16
+; CHECK-NEXT: addi a4, a4, -256
+; CHECK-NEXT: padd.ws a4, zero, a4
+; CHECK-NEXT: and a3, a3, a4
+; CHECK-NEXT: and a0, a0, a4
+; CHECK-NEXT: or a1, a3, a1
+; CHECK-NEXT: lui a3, 61681
+; CHECK-NEXT: addi a3, a3, -241
+; CHECK-NEXT: padd.ws a3, zero, a3
+; CHECK-NEXT: or a1, a2, a1
+; CHECK-NEXT: srli a2, a0, 24
+; CHECK-NEXT: slli a0, a0, 40
+; CHECK-NEXT: srli a0, a0, 32
+; CHECK-NEXT: pack a0, a0, a2
+; CHECK-NEXT: or a0, a1, a0
+; CHECK-NEXT: and a1, a0, a3
+; CHECK-NEXT: srli a2, a0, 36
+; CHECK-NEXT: srliw a0, a0, 4
+; CHECK-NEXT: pack a0, a0, a2
+; CHECK-NEXT: srli a2, a1, 28
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: pack a1, a1, a2
+; CHECK-NEXT: lui a2, 209715
+; CHECK-NEXT: addi a2, a2, 819
+; CHECK-NEXT: padd.ws a2, zero, a2
+; CHECK-NEXT: and a0, a0, a3
+; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: and a1, a0, a2
+; CHECK-NEXT: srli a3, a0, 34
+; CHECK-NEXT: srliw a0, a0, 2
+; CHECK-NEXT: pack a0, a0, a3
+; CHECK-NEXT: srli a3, a1, 30
+; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: pack a1, a1, a3
+; CHECK-NEXT: lui a3, 349525
+; CHECK-NEXT: addi a3, a3, 1365
+; CHECK-NEXT: padd.ws a3, zero, a3
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: and a1, a0, a3
+; CHECK-NEXT: srli a2, a0, 33
+; CHECK-NEXT: srliw a0, a0, 1
+; CHECK-NEXT: pack a0, a0, a2
+; CHECK-NEXT: srli a2, a1, 31
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: pack a1, a1, a2
+; CHECK-NEXT: and a0, a0, a3
+; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: ret
+ %res = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %a)
+ ret <2 x i32> %res
+}
>From ddd40e864f6c0911a8ee4baabd3dd4cdd9ac2512 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 5 Mar 2026 14:40:07 -0800
Subject: [PATCH 2/3] [RISCV][P-ext] Recognize vector shifts with splat
build_vector shift amount.
If the shift is created during LegalizeVectorOps, the shift amount
will be created a build_vector. Splat_vector is formed by a later
DAGCombine. LegalizeVectorOps will visit the new shift before the
splat_vector can be created. Handle this case too.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 13 +-
llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll | 414 +++-----------------
llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll | 377 ++++--------------
3 files changed, 151 insertions(+), 653 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5c4b1f3a4bdc8..71decc9e150f6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -8859,9 +8859,14 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::SRA:
if (Op.getSimpleValueType().isFixedLengthVector()) {
if (Subtarget.hasStdExtP()) {
- // There's no vector-vector version of shift instruction in P extension
- // so we need to unroll to scalar computation and pack them back.
- if (Op.getOperand(1)->getOpcode() != ISD::SPLAT_VECTOR)
+ SDValue ShAmtVec = Op.getOperand(1);
+ SDValue SplatVal;
+ if (ShAmtVec.getOpcode() == ISD::SPLAT_VECTOR)
+ SplatVal = ShAmtVec.getOperand(0);
+ else if (ShAmtVec.getOpcode() == ISD::BUILD_VECTOR)
+ SplatVal = cast<BuildVectorSDNode>(ShAmtVec)->getSplatValue();
+
+ if (!SplatVal)
return DAG.UnrollVectorOp(Op.getNode());
unsigned Opc;
@@ -8879,7 +8884,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
break;
}
return DAG.getNode(Opc, SDLoc(Op), Op.getValueType(), Op.getOperand(0),
- Op.getOperand(1).getOperand(0));
+ SplatVal);
}
return lowerToScalableOp(Op, DAG);
}
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
index 17431dd12fe37..f315d8fc9a7a5 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
@@ -1765,373 +1765,73 @@ define <4 x i8> @test_vselect_v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c) {
}
define <2 x i16> @test_bswap_v2i16(<2 x i16> %a) {
-; CHECK-RV32-LABEL: test_bswap_v2i16:
-; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: srli a1, a0, 24
-; CHECK-RV32-NEXT: slli a2, a0, 16
-; CHECK-RV32-NEXT: srli a2, a2, 24
-; CHECK-RV32-NEXT: pack a1, a2, a1
-; CHECK-RV32-NEXT: slli a2, a0, 8
-; CHECK-RV32-NEXT: srli a0, a0, 16
-; CHECK-RV32-NEXT: slli a0, a0, 8
-; CHECK-RV32-NEXT: pack a0, a2, a0
-; CHECK-RV32-NEXT: or a0, a0, a1
-; CHECK-RV32-NEXT: ret
-;
-; CHECK-RV64-LABEL: test_bswap_v2i16:
-; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: srli a1, a0, 56
-; CHECK-RV64-NEXT: srli a2, a0, 40
-; CHECK-RV64-NEXT: srliw a3, a0, 24
-; CHECK-RV64-NEXT: zext.b a4, a2
-; CHECK-RV64-NEXT: ppaire.h a1, a4, a1
-; CHECK-RV64-NEXT: slli a4, a0, 48
-; CHECK-RV64-NEXT: srli a4, a4, 56
-; CHECK-RV64-NEXT: ppaire.h a3, a4, a3
-; CHECK-RV64-NEXT: srli a4, a0, 32
-; CHECK-RV64-NEXT: andi a2, a2, -256
-; CHECK-RV64-NEXT: slli a4, a4, 8
-; CHECK-RV64-NEXT: ppaire.h a2, a4, a2
-; CHECK-RV64-NEXT: slliw a4, a0, 8
-; CHECK-RV64-NEXT: srli a0, a0, 16
-; CHECK-RV64-NEXT: slli a0, a0, 8
-; CHECK-RV64-NEXT: ppaire.h a0, a4, a0
-; CHECK-RV64-NEXT: pack a1, a3, a1
-; CHECK-RV64-NEXT: pack a0, a0, a2
-; CHECK-RV64-NEXT: or a0, a0, a1
-; CHECK-RV64-NEXT: ret
+; CHECK-LABEL: test_bswap_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: psrli.h a1, a0, 8
+; CHECK-NEXT: pslli.h a0, a0, 8
+; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: ret
%res = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %a)
ret <2 x i16> %res
}
define <4 x i8> @test_bitreverse_v4i8(<4 x i8> %a) {
-; CHECK-RV32-LABEL: test_bitreverse_v4i8:
-; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: pli.b a1, 15
-; CHECK-RV32-NEXT: srli a3, a0, 28
-; CHECK-RV32-NEXT: slli a2, a0, 16
-; CHECK-RV32-NEXT: slli a4, a0, 8
-; CHECK-RV32-NEXT: slli a5, a0, 24
-; CHECK-RV32-NEXT: pli.b a6, 51
-; CHECK-RV32-NEXT: and a0, a0, a1
-; CHECK-RV32-NEXT: srli a2, a2, 28
-; CHECK-RV32-NEXT: srli t2, a4, 28
-; CHECK-RV32-NEXT: srli t1, a5, 28
-; CHECK-RV32-NEXT: srli a5, a0, 20
-; CHECK-RV32-NEXT: ppaire.db a2, t1, a2
-; CHECK-RV32-NEXT: srli a1, a0, 12
-; CHECK-RV32-NEXT: srli a4, a0, 4
-; CHECK-RV32-NEXT: slli a0, a0, 4
-; CHECK-RV32-NEXT: ppaire.db a0, a0, a4
-; CHECK-RV32-NEXT: pack a2, a2, a3
-; CHECK-RV32-NEXT: pack a0, a0, a1
-; CHECK-RV32-NEXT: or a0, a2, a0
-; CHECK-RV32-NEXT: and a1, a0, a6
-; CHECK-RV32-NEXT: srli a3, a1, 22
-; CHECK-RV32-NEXT: srli a5, a1, 14
-; CHECK-RV32-NEXT: srli a2, a1, 6
-; CHECK-RV32-NEXT: slli a4, a1, 2
-; CHECK-RV32-NEXT: srli a1, a0, 26
-; CHECK-RV32-NEXT: ppaire.db a2, a4, a2
-; CHECK-RV32-NEXT: slli a4, a0, 16
-; CHECK-RV32-NEXT: slli a5, a0, 8
-; CHECK-RV32-NEXT: slli a7, a0, 24
-; CHECK-RV32-NEXT: srli a0, a4, 26
-; CHECK-RV32-NEXT: srli a5, a5, 26
-; CHECK-RV32-NEXT: srli a4, a7, 26
-; CHECK-RV32-NEXT: pli.b a7, 85
-; CHECK-RV32-NEXT: ppaire.db a0, a4, a0
-; CHECK-RV32-NEXT: pack a2, a2, a3
-; CHECK-RV32-NEXT: pack a0, a0, a1
-; CHECK-RV32-NEXT: and a0, a0, a6
-; CHECK-RV32-NEXT: or a0, a0, a2
-; CHECK-RV32-NEXT: and a1, a0, a7
-; CHECK-RV32-NEXT: srli a3, a1, 23
-; CHECK-RV32-NEXT: srli a5, a1, 15
-; CHECK-RV32-NEXT: srli a2, a1, 7
-; CHECK-RV32-NEXT: slli a4, a1, 1
-; CHECK-RV32-NEXT: srli a1, a0, 25
-; CHECK-RV32-NEXT: ppaire.db a2, a4, a2
-; CHECK-RV32-NEXT: slli a4, a0, 16
-; CHECK-RV32-NEXT: slli a5, a0, 8
-; CHECK-RV32-NEXT: slli a6, a0, 24
-; CHECK-RV32-NEXT: srli a0, a4, 25
-; CHECK-RV32-NEXT: srli a5, a5, 25
-; CHECK-RV32-NEXT: srli a4, a6, 25
-; CHECK-RV32-NEXT: ppaire.db a0, a4, a0
-; CHECK-RV32-NEXT: pack a2, a2, a3
-; CHECK-RV32-NEXT: pack a0, a0, a1
-; CHECK-RV32-NEXT: and a0, a0, a7
-; CHECK-RV32-NEXT: or a0, a0, a2
-; CHECK-RV32-NEXT: ret
-;
-; CHECK-RV64-LABEL: test_bitreverse_v4i8:
-; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: pli.b a1, 15
-; CHECK-RV64-NEXT: srli a2, a0, 60
-; CHECK-RV64-NEXT: slli a3, a0, 8
-; CHECK-RV64-NEXT: slli a4, a0, 16
-; CHECK-RV64-NEXT: slli a5, a0, 24
-; CHECK-RV64-NEXT: srliw a6, a0, 28
-; CHECK-RV64-NEXT: slli a7, a0, 40
-; CHECK-RV64-NEXT: slli t0, a0, 48
-; CHECK-RV64-NEXT: slli t1, a0, 56
-; CHECK-RV64-NEXT: and a1, a0, a1
-; CHECK-RV64-NEXT: pli.b a0, 51
-; CHECK-RV64-NEXT: srli a3, a3, 60
-; CHECK-RV64-NEXT: srli a4, a4, 60
-; CHECK-RV64-NEXT: srli a5, a5, 60
-; CHECK-RV64-NEXT: srli a7, a7, 60
-; CHECK-RV64-NEXT: srli t0, t0, 60
-; CHECK-RV64-NEXT: srli t1, t1, 60
-; CHECK-RV64-NEXT: ppaire.b a2, a3, a2
-; CHECK-RV64-NEXT: srli a3, a1, 52
-; CHECK-RV64-NEXT: ppaire.b a4, a5, a4
-; CHECK-RV64-NEXT: srli a5, a1, 44
-; CHECK-RV64-NEXT: ppaire.b a6, a7, a6
-; CHECK-RV64-NEXT: srli a7, a1, 36
-; CHECK-RV64-NEXT: ppaire.b t0, t1, t0
-; CHECK-RV64-NEXT: srli t1, a1, 28
-; CHECK-RV64-NEXT: ppaire.b a3, a5, a3
-; CHECK-RV64-NEXT: srli a5, a1, 20
-; CHECK-RV64-NEXT: ppaire.b a7, t1, a7
-; CHECK-RV64-NEXT: srli t1, a1, 12
-; CHECK-RV64-NEXT: ppaire.b a5, t1, a5
-; CHECK-RV64-NEXT: srli t1, a1, 4
-; CHECK-RV64-NEXT: slli a1, a1, 4
-; CHECK-RV64-NEXT: ppaire.b a1, a1, t1
-; CHECK-RV64-NEXT: ppaire.h a2, a4, a2
-; CHECK-RV64-NEXT: ppaire.h a4, t0, a6
-; CHECK-RV64-NEXT: ppaire.h a3, a7, a3
-; CHECK-RV64-NEXT: ppaire.h a1, a1, a5
-; CHECK-RV64-NEXT: pack a2, a4, a2
-; CHECK-RV64-NEXT: pack a1, a1, a3
-; CHECK-RV64-NEXT: or a1, a2, a1
-; CHECK-RV64-NEXT: and a2, a1, a0
-; CHECK-RV64-NEXT: srli a3, a1, 58
-; CHECK-RV64-NEXT: slli a4, a1, 8
-; CHECK-RV64-NEXT: slli a5, a1, 16
-; CHECK-RV64-NEXT: srli a6, a2, 54
-; CHECK-RV64-NEXT: srli a7, a2, 46
-; CHECK-RV64-NEXT: srli t0, a2, 38
-; CHECK-RV64-NEXT: srli t1, a2, 30
-; CHECK-RV64-NEXT: ppaire.b a6, a7, a6
-; CHECK-RV64-NEXT: srli a7, a2, 22
-; CHECK-RV64-NEXT: ppaire.b t0, t1, t0
-; CHECK-RV64-NEXT: srli t1, a2, 14
-; CHECK-RV64-NEXT: ppaire.b a7, t1, a7
-; CHECK-RV64-NEXT: srli t1, a2, 6
-; CHECK-RV64-NEXT: slli a2, a2, 2
-; CHECK-RV64-NEXT: ppaire.b a2, a2, t1
-; CHECK-RV64-NEXT: slli t1, a1, 24
-; CHECK-RV64-NEXT: srli a4, a4, 58
-; CHECK-RV64-NEXT: ppaire.b a3, a4, a3
-; CHECK-RV64-NEXT: srliw a4, a1, 26
-; CHECK-RV64-NEXT: srli a5, a5, 58
-; CHECK-RV64-NEXT: srli t1, t1, 58
-; CHECK-RV64-NEXT: ppaire.b a5, t1, a5
-; CHECK-RV64-NEXT: slli t1, a1, 40
-; CHECK-RV64-NEXT: srli t1, t1, 58
-; CHECK-RV64-NEXT: ppaire.b a4, t1, a4
-; CHECK-RV64-NEXT: slli t1, a1, 48
-; CHECK-RV64-NEXT: slli a1, a1, 56
-; CHECK-RV64-NEXT: srli t1, t1, 58
-; CHECK-RV64-NEXT: srli a1, a1, 58
-; CHECK-RV64-NEXT: ppaire.b t1, a1, t1
-; CHECK-RV64-NEXT: pli.b a1, 85
-; CHECK-RV64-NEXT: ppaire.h a6, t0, a6
-; CHECK-RV64-NEXT: ppaire.h a2, a2, a7
-; CHECK-RV64-NEXT: ppaire.h a3, a5, a3
-; CHECK-RV64-NEXT: ppaire.h a4, t1, a4
-; CHECK-RV64-NEXT: pack a2, a2, a6
-; CHECK-RV64-NEXT: pack a3, a4, a3
-; CHECK-RV64-NEXT: and a0, a3, a0
-; CHECK-RV64-NEXT: or a0, a0, a2
-; CHECK-RV64-NEXT: and a2, a0, a1
-; CHECK-RV64-NEXT: srli a3, a0, 57
-; CHECK-RV64-NEXT: slli a4, a0, 8
-; CHECK-RV64-NEXT: slli a5, a0, 16
-; CHECK-RV64-NEXT: srli a6, a2, 55
-; CHECK-RV64-NEXT: srli a7, a2, 47
-; CHECK-RV64-NEXT: srli t0, a2, 39
-; CHECK-RV64-NEXT: srli t1, a2, 31
-; CHECK-RV64-NEXT: ppaire.b a6, a7, a6
-; CHECK-RV64-NEXT: srli a7, a2, 23
-; CHECK-RV64-NEXT: ppaire.b t0, t1, t0
-; CHECK-RV64-NEXT: srli t1, a2, 15
-; CHECK-RV64-NEXT: ppaire.b a7, t1, a7
-; CHECK-RV64-NEXT: srli t1, a2, 7
-; CHECK-RV64-NEXT: slli a2, a2, 1
-; CHECK-RV64-NEXT: ppaire.b a2, a2, t1
-; CHECK-RV64-NEXT: slli t1, a0, 24
-; CHECK-RV64-NEXT: srli a4, a4, 57
-; CHECK-RV64-NEXT: ppaire.b a3, a4, a3
-; CHECK-RV64-NEXT: srliw a4, a0, 25
-; CHECK-RV64-NEXT: srli a5, a5, 57
-; CHECK-RV64-NEXT: srli t1, t1, 57
-; CHECK-RV64-NEXT: ppaire.b a5, t1, a5
-; CHECK-RV64-NEXT: slli t1, a0, 40
-; CHECK-RV64-NEXT: srli t1, t1, 57
-; CHECK-RV64-NEXT: ppaire.b a4, t1, a4
-; CHECK-RV64-NEXT: slli t1, a0, 48
-; CHECK-RV64-NEXT: slli a0, a0, 56
-; CHECK-RV64-NEXT: srli t1, t1, 57
-; CHECK-RV64-NEXT: srli a0, a0, 57
-; CHECK-RV64-NEXT: ppaire.b a0, a0, t1
-; CHECK-RV64-NEXT: ppaire.h a6, t0, a6
-; CHECK-RV64-NEXT: ppaire.h a2, a2, a7
-; CHECK-RV64-NEXT: ppaire.h a3, a5, a3
-; CHECK-RV64-NEXT: ppaire.h a0, a0, a4
-; CHECK-RV64-NEXT: pack a2, a2, a6
-; CHECK-RV64-NEXT: pack a0, a0, a3
-; CHECK-RV64-NEXT: and a0, a0, a1
-; CHECK-RV64-NEXT: or a0, a0, a2
-; CHECK-RV64-NEXT: ret
+; CHECK-LABEL: test_bitreverse_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: psrli.b a1, a0, 4
+; CHECK-NEXT: pli.b a2, 15
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: pli.b a2, 51
+; CHECK-NEXT: pslli.b a0, a0, 4
+; CHECK-NEXT: or a0, a1, a0
+; CHECK-NEXT: psrli.b a1, a0, 2
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: pli.b a2, 85
+; CHECK-NEXT: pslli.b a0, a0, 2
+; CHECK-NEXT: or a0, a1, a0
+; CHECK-NEXT: psrli.b a1, a0, 1
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: pslli.b a0, a0, 1
+; CHECK-NEXT: or a0, a1, a0
+; CHECK-NEXT: ret
%res = call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> %a)
ret <4 x i8> %res
}
define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) {
-; CHECK-RV32-LABEL: test_bitreverse_v2i16:
-; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: srli a1, a0, 24
-; CHECK-RV32-NEXT: slli a2, a0, 16
-; CHECK-RV32-NEXT: slli a3, a0, 8
-; CHECK-RV32-NEXT: srli a0, a0, 16
-; CHECK-RV32-NEXT: srli a2, a2, 24
-; CHECK-RV32-NEXT: pack a1, a2, a1
-; CHECK-RV32-NEXT: lui a2, 1
-; CHECK-RV32-NEXT: slli a0, a0, 8
-; CHECK-RV32-NEXT: addi a2, a2, -241
-; CHECK-RV32-NEXT: pack a0, a3, a0
-; CHECK-RV32-NEXT: padd.hs a2, zero, a2
-; CHECK-RV32-NEXT: or a0, a0, a1
-; CHECK-RV32-NEXT: and a1, a0, a2
-; CHECK-RV32-NEXT: srli a3, a1, 12
-; CHECK-RV32-NEXT: slli a1, a1, 4
-; CHECK-RV32-NEXT: pack a1, a1, a3
-; CHECK-RV32-NEXT: srli a3, a0, 20
-; CHECK-RV32-NEXT: slli a0, a0, 16
-; CHECK-RV32-NEXT: srli a0, a0, 20
-; CHECK-RV32-NEXT: pack a0, a0, a3
-; CHECK-RV32-NEXT: lui a3, 3
-; CHECK-RV32-NEXT: addi a3, a3, 819
-; CHECK-RV32-NEXT: padd.hs a3, zero, a3
-; CHECK-RV32-NEXT: and a0, a0, a2
-; CHECK-RV32-NEXT: or a0, a0, a1
-; CHECK-RV32-NEXT: and a1, a0, a3
-; CHECK-RV32-NEXT: srli a2, a1, 14
-; CHECK-RV32-NEXT: slli a1, a1, 2
-; CHECK-RV32-NEXT: pack a1, a1, a2
-; CHECK-RV32-NEXT: srli a2, a0, 18
-; CHECK-RV32-NEXT: slli a0, a0, 16
-; CHECK-RV32-NEXT: srli a0, a0, 18
-; CHECK-RV32-NEXT: pack a0, a0, a2
-; CHECK-RV32-NEXT: lui a2, 5
-; CHECK-RV32-NEXT: addi a2, a2, 1365
-; CHECK-RV32-NEXT: padd.hs a2, zero, a2
-; CHECK-RV32-NEXT: and a0, a0, a3
-; CHECK-RV32-NEXT: or a0, a0, a1
-; CHECK-RV32-NEXT: and a1, a0, a2
-; CHECK-RV32-NEXT: srli a3, a1, 15
-; CHECK-RV32-NEXT: slli a1, a1, 1
-; CHECK-RV32-NEXT: pack a1, a1, a3
-; CHECK-RV32-NEXT: srli a3, a0, 17
-; CHECK-RV32-NEXT: slli a0, a0, 16
-; CHECK-RV32-NEXT: srli a0, a0, 17
-; CHECK-RV32-NEXT: pack a0, a0, a3
-; CHECK-RV32-NEXT: and a0, a0, a2
-; CHECK-RV32-NEXT: or a0, a0, a1
-; CHECK-RV32-NEXT: ret
-;
-; CHECK-RV64-LABEL: test_bitreverse_v2i16:
-; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: srli a1, a0, 56
-; CHECK-RV64-NEXT: srli a2, a0, 40
-; CHECK-RV64-NEXT: srliw a3, a0, 24
-; CHECK-RV64-NEXT: slli a4, a0, 48
-; CHECK-RV64-NEXT: zext.b a5, a2
-; CHECK-RV64-NEXT: ppaire.h a1, a5, a1
-; CHECK-RV64-NEXT: srli a5, a0, 32
-; CHECK-RV64-NEXT: srli a4, a4, 56
-; CHECK-RV64-NEXT: ppaire.h a3, a4, a3
-; CHECK-RV64-NEXT: slliw a4, a0, 8
-; CHECK-RV64-NEXT: srli a0, a0, 16
-; CHECK-RV64-NEXT: andi a2, a2, -256
-; CHECK-RV64-NEXT: slli a5, a5, 8
-; CHECK-RV64-NEXT: ppaire.h a2, a5, a2
-; CHECK-RV64-NEXT: lui a5, 1
-; CHECK-RV64-NEXT: slli a0, a0, 8
-; CHECK-RV64-NEXT: addi a5, a5, -241
-; CHECK-RV64-NEXT: ppaire.h a0, a4, a0
-; CHECK-RV64-NEXT: padd.hs a4, zero, a5
-; CHECK-RV64-NEXT: pack a1, a3, a1
-; CHECK-RV64-NEXT: pack a0, a0, a2
-; CHECK-RV64-NEXT: or a0, a0, a1
-; CHECK-RV64-NEXT: and a1, a0, a4
-; CHECK-RV64-NEXT: srli a2, a0, 52
-; CHECK-RV64-NEXT: srli a3, a1, 44
-; CHECK-RV64-NEXT: srli a5, a1, 28
-; CHECK-RV64-NEXT: ppaire.h a3, a5, a3
-; CHECK-RV64-NEXT: srli a5, a1, 12
-; CHECK-RV64-NEXT: slliw a1, a1, 4
-; CHECK-RV64-NEXT: ppaire.h a1, a1, a5
-; CHECK-RV64-NEXT: slli a5, a0, 16
-; CHECK-RV64-NEXT: srli a5, a5, 52
-; CHECK-RV64-NEXT: ppaire.h a2, a5, a2
-; CHECK-RV64-NEXT: srliw a5, a0, 20
-; CHECK-RV64-NEXT: slli a0, a0, 48
-; CHECK-RV64-NEXT: srli a0, a0, 52
-; CHECK-RV64-NEXT: ppaire.h a0, a0, a5
-; CHECK-RV64-NEXT: lui a5, 3
-; CHECK-RV64-NEXT: addi a5, a5, 819
-; CHECK-RV64-NEXT: padd.hs a5, zero, a5
-; CHECK-RV64-NEXT: pack a1, a1, a3
-; CHECK-RV64-NEXT: pack a0, a0, a2
-; CHECK-RV64-NEXT: and a0, a0, a4
-; CHECK-RV64-NEXT: or a0, a0, a1
-; CHECK-RV64-NEXT: and a1, a0, a5
-; CHECK-RV64-NEXT: srli a2, a0, 50
-; CHECK-RV64-NEXT: srli a3, a1, 46
-; CHECK-RV64-NEXT: srli a4, a1, 30
-; CHECK-RV64-NEXT: ppaire.h a3, a4, a3
-; CHECK-RV64-NEXT: srli a4, a1, 14
-; CHECK-RV64-NEXT: slliw a1, a1, 2
-; CHECK-RV64-NEXT: ppaire.h a1, a1, a4
-; CHECK-RV64-NEXT: slli a4, a0, 16
-; CHECK-RV64-NEXT: srli a4, a4, 50
-; CHECK-RV64-NEXT: ppaire.h a2, a4, a2
-; CHECK-RV64-NEXT: srliw a4, a0, 18
-; CHECK-RV64-NEXT: slli a0, a0, 48
-; CHECK-RV64-NEXT: srli a0, a0, 50
-; CHECK-RV64-NEXT: ppaire.h a0, a0, a4
-; CHECK-RV64-NEXT: lui a4, 5
-; CHECK-RV64-NEXT: addi a4, a4, 1365
-; CHECK-RV64-NEXT: padd.hs a4, zero, a4
-; CHECK-RV64-NEXT: pack a1, a1, a3
-; CHECK-RV64-NEXT: pack a0, a0, a2
-; CHECK-RV64-NEXT: and a0, a0, a5
-; CHECK-RV64-NEXT: or a0, a0, a1
-; CHECK-RV64-NEXT: and a1, a0, a4
-; CHECK-RV64-NEXT: srli a2, a0, 49
-; CHECK-RV64-NEXT: srli a3, a1, 47
-; CHECK-RV64-NEXT: srli a5, a1, 31
-; CHECK-RV64-NEXT: ppaire.h a3, a5, a3
-; CHECK-RV64-NEXT: srli a5, a1, 15
-; CHECK-RV64-NEXT: slliw a1, a1, 1
-; CHECK-RV64-NEXT: ppaire.h a1, a1, a5
-; CHECK-RV64-NEXT: slli a5, a0, 16
-; CHECK-RV64-NEXT: srli a5, a5, 49
-; CHECK-RV64-NEXT: ppaire.h a2, a5, a2
-; CHECK-RV64-NEXT: srliw a5, a0, 17
-; CHECK-RV64-NEXT: slli a0, a0, 48
-; CHECK-RV64-NEXT: srli a0, a0, 49
-; CHECK-RV64-NEXT: ppaire.h a0, a0, a5
-; CHECK-RV64-NEXT: pack a1, a1, a3
-; CHECK-RV64-NEXT: pack a0, a0, a2
-; CHECK-RV64-NEXT: and a0, a0, a4
-; CHECK-RV64-NEXT: or a0, a0, a1
-; CHECK-RV64-NEXT: ret
+; CHECK-LABEL: test_bitreverse_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: psrli.h a1, a0, 8
+; CHECK-NEXT: pslli.h a0, a0, 8
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: addi a1, a2, -241
+; CHECK-NEXT: psrli.h a2, a0, 4
+; CHECK-NEXT: padd.hs a1, zero, a1
+; CHECK-NEXT: and a2, a2, a1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: lui a1, 3
+; CHECK-NEXT: addi a1, a1, 819
+; CHECK-NEXT: padd.hs a1, zero, a1
+; CHECK-NEXT: pslli.h a0, a0, 4
+; CHECK-NEXT: or a0, a2, a0
+; CHECK-NEXT: psrli.h a2, a0, 2
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: and a1, a2, a1
+; CHECK-NEXT: lui a2, 5
+; CHECK-NEXT: addi a2, a2, 1365
+; CHECK-NEXT: padd.hs a2, zero, a2
+; CHECK-NEXT: pslli.h a0, a0, 2
+; CHECK-NEXT: or a0, a1, a0
+; CHECK-NEXT: psrli.h a1, a0, 1
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: pslli.h a0, a0, 1
+; CHECK-NEXT: or a0, a1, a0
+; CHECK-NEXT: ret
%res = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
ret <2 x i16> %res
}
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
index a54360d5d81e5..e4f35c8255c4e 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
@@ -2137,24 +2137,8 @@ define <2 x i32> @test_vselect_v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) {
define <4 x i16> @test_bswap_v4i16(<4 x i16> %a) {
; CHECK-LABEL: test_bswap_v4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: srli a1, a0, 56
-; CHECK-NEXT: srli a2, a0, 40
-; CHECK-NEXT: srliw a3, a0, 24
-; CHECK-NEXT: zext.b a4, a2
-; CHECK-NEXT: ppaire.h a1, a4, a1
-; CHECK-NEXT: slli a4, a0, 48
-; CHECK-NEXT: srli a4, a4, 56
-; CHECK-NEXT: ppaire.h a3, a4, a3
-; CHECK-NEXT: srli a4, a0, 32
-; CHECK-NEXT: andi a2, a2, -256
-; CHECK-NEXT: slli a4, a4, 8
-; CHECK-NEXT: ppaire.h a2, a4, a2
-; CHECK-NEXT: slliw a4, a0, 8
-; CHECK-NEXT: srli a0, a0, 16
-; CHECK-NEXT: slli a0, a0, 8
-; CHECK-NEXT: ppaire.h a0, a4, a0
-; CHECK-NEXT: pack a1, a3, a1
-; CHECK-NEXT: pack a0, a0, a2
+; CHECK-NEXT: psrli.h a1, a0, 8
+; CHECK-NEXT: pslli.h a0, a0, 8
; CHECK-NEXT: or a0, a0, a1
; CHECK-NEXT: ret
%res = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %a)
@@ -2164,28 +2148,18 @@ define <4 x i16> @test_bswap_v4i16(<4 x i16> %a) {
define <2 x i32> @test_bswap_v2i32(<2 x i32> %a) {
; CHECK-LABEL: test_bswap_v2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: srli a1, a0, 56
-; CHECK-NEXT: srliw a2, a0, 24
-; CHECK-NEXT: srli a3, a0, 40
-; CHECK-NEXT: srliw a4, a0, 8
-; CHECK-NEXT: pack a1, a2, a1
-; CHECK-NEXT: slli a2, a0, 24
-; CHECK-NEXT: pack a3, a4, a3
-; CHECK-NEXT: srli a4, a0, 32
-; CHECK-NEXT: slli a4, a4, 24
-; CHECK-NEXT: pack a2, a2, a4
-; CHECK-NEXT: lui a4, 16
-; CHECK-NEXT: addi a4, a4, -256
-; CHECK-NEXT: padd.ws a4, zero, a4
-; CHECK-NEXT: and a3, a3, a4
-; CHECK-NEXT: and a0, a0, a4
-; CHECK-NEXT: or a1, a3, a1
-; CHECK-NEXT: srli a3, a0, 24
-; CHECK-NEXT: slli a0, a0, 40
-; CHECK-NEXT: srli a0, a0, 32
-; CHECK-NEXT: pack a0, a0, a3
-; CHECK-NEXT: or a1, a2, a1
-; CHECK-NEXT: or a0, a1, a0
+; CHECK-NEXT: psrli.w a1, a0, 8
+; CHECK-NEXT: lui a2, 16
+; CHECK-NEXT: psrli.w a3, a0, 24
+; CHECK-NEXT: addi a2, a2, -256
+; CHECK-NEXT: padd.ws a2, zero, a2
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: and a2, a0, a2
+; CHECK-NEXT: or a1, a1, a3
+; CHECK-NEXT: pslli.w a2, a2, 8
+; CHECK-NEXT: pslli.w a0, a0, 24
+; CHECK-NEXT: or a0, a0, a2
+; CHECK-NEXT: or a0, a0, a1
; CHECK-NEXT: ret
%res = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
ret <2 x i32> %res
@@ -2194,125 +2168,24 @@ define <2 x i32> @test_bswap_v2i32(<2 x i32> %a) {
define <8 x i8> @test_bitreverse_v8i8(<8 x i8> %a) {
; CHECK-LABEL: test_bitreverse_v8i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pli.b a1, 15
-; CHECK-NEXT: srli a2, a0, 60
-; CHECK-NEXT: slli a3, a0, 8
-; CHECK-NEXT: slli a4, a0, 16
-; CHECK-NEXT: slli a5, a0, 24
-; CHECK-NEXT: srliw a6, a0, 28
-; CHECK-NEXT: slli a7, a0, 40
-; CHECK-NEXT: slli t0, a0, 48
-; CHECK-NEXT: slli t1, a0, 56
-; CHECK-NEXT: and a1, a0, a1
-; CHECK-NEXT: pli.b a0, 51
-; CHECK-NEXT: srli a3, a3, 60
-; CHECK-NEXT: srli a4, a4, 60
-; CHECK-NEXT: srli a5, a5, 60
-; CHECK-NEXT: srli a7, a7, 60
-; CHECK-NEXT: srli t0, t0, 60
-; CHECK-NEXT: srli t1, t1, 60
-; CHECK-NEXT: ppaire.b a2, a3, a2
-; CHECK-NEXT: srli a3, a1, 52
-; CHECK-NEXT: ppaire.b a4, a5, a4
-; CHECK-NEXT: srli a5, a1, 44
-; CHECK-NEXT: ppaire.b a6, a7, a6
-; CHECK-NEXT: srli a7, a1, 36
-; CHECK-NEXT: ppaire.b t0, t1, t0
-; CHECK-NEXT: srli t1, a1, 28
-; CHECK-NEXT: ppaire.b a3, a5, a3
-; CHECK-NEXT: srli a5, a1, 20
-; CHECK-NEXT: ppaire.b a7, t1, a7
-; CHECK-NEXT: srli t1, a1, 12
-; CHECK-NEXT: ppaire.b a5, t1, a5
-; CHECK-NEXT: srli t1, a1, 4
-; CHECK-NEXT: slli a1, a1, 4
-; CHECK-NEXT: ppaire.b a1, a1, t1
-; CHECK-NEXT: ppaire.h a2, a4, a2
-; CHECK-NEXT: ppaire.h a4, t0, a6
-; CHECK-NEXT: ppaire.h a3, a7, a3
-; CHECK-NEXT: ppaire.h a1, a1, a5
-; CHECK-NEXT: pack a2, a4, a2
-; CHECK-NEXT: pack a1, a1, a3
-; CHECK-NEXT: or a1, a2, a1
-; CHECK-NEXT: and a2, a1, a0
-; CHECK-NEXT: srli a3, a1, 58
-; CHECK-NEXT: slli a4, a1, 8
-; CHECK-NEXT: slli a5, a1, 16
-; CHECK-NEXT: srli a6, a2, 54
-; CHECK-NEXT: srli a7, a2, 46
-; CHECK-NEXT: srli t0, a2, 38
-; CHECK-NEXT: srli t1, a2, 30
-; CHECK-NEXT: ppaire.b a6, a7, a6
-; CHECK-NEXT: srli a7, a2, 22
-; CHECK-NEXT: ppaire.b t0, t1, t0
-; CHECK-NEXT: srli t1, a2, 14
-; CHECK-NEXT: ppaire.b a7, t1, a7
-; CHECK-NEXT: srli t1, a2, 6
-; CHECK-NEXT: slli a2, a2, 2
-; CHECK-NEXT: ppaire.b a2, a2, t1
-; CHECK-NEXT: slli t1, a1, 24
-; CHECK-NEXT: srli a4, a4, 58
-; CHECK-NEXT: ppaire.b a3, a4, a3
-; CHECK-NEXT: srliw a4, a1, 26
-; CHECK-NEXT: srli a5, a5, 58
-; CHECK-NEXT: srli t1, t1, 58
-; CHECK-NEXT: ppaire.b a5, t1, a5
-; CHECK-NEXT: slli t1, a1, 40
-; CHECK-NEXT: srli t1, t1, 58
-; CHECK-NEXT: ppaire.b a4, t1, a4
-; CHECK-NEXT: slli t1, a1, 48
-; CHECK-NEXT: slli a1, a1, 56
-; CHECK-NEXT: srli t1, t1, 58
-; CHECK-NEXT: srli a1, a1, 58
-; CHECK-NEXT: ppaire.b t1, a1, t1
-; CHECK-NEXT: pli.b a1, 85
-; CHECK-NEXT: ppaire.h a6, t0, a6
-; CHECK-NEXT: ppaire.h a2, a2, a7
-; CHECK-NEXT: ppaire.h a3, a5, a3
-; CHECK-NEXT: ppaire.h a4, t1, a4
-; CHECK-NEXT: pack a2, a2, a6
-; CHECK-NEXT: pack a3, a4, a3
-; CHECK-NEXT: and a0, a3, a0
-; CHECK-NEXT: or a0, a0, a2
-; CHECK-NEXT: and a2, a0, a1
-; CHECK-NEXT: srli a3, a0, 57
-; CHECK-NEXT: slli a4, a0, 8
-; CHECK-NEXT: slli a5, a0, 16
-; CHECK-NEXT: srli a6, a2, 55
-; CHECK-NEXT: srli a7, a2, 47
-; CHECK-NEXT: srli t0, a2, 39
-; CHECK-NEXT: srli t1, a2, 31
-; CHECK-NEXT: ppaire.b a6, a7, a6
-; CHECK-NEXT: srli a7, a2, 23
-; CHECK-NEXT: ppaire.b t0, t1, t0
-; CHECK-NEXT: srli t1, a2, 15
-; CHECK-NEXT: ppaire.b a7, t1, a7
-; CHECK-NEXT: srli t1, a2, 7
-; CHECK-NEXT: slli a2, a2, 1
-; CHECK-NEXT: ppaire.b a2, a2, t1
-; CHECK-NEXT: slli t1, a0, 24
-; CHECK-NEXT: srli a4, a4, 57
-; CHECK-NEXT: ppaire.b a3, a4, a3
-; CHECK-NEXT: srliw a4, a0, 25
-; CHECK-NEXT: srli a5, a5, 57
-; CHECK-NEXT: srli t1, t1, 57
-; CHECK-NEXT: ppaire.b a5, t1, a5
-; CHECK-NEXT: slli t1, a0, 40
-; CHECK-NEXT: srli t1, t1, 57
-; CHECK-NEXT: ppaire.b a4, t1, a4
-; CHECK-NEXT: slli t1, a0, 48
-; CHECK-NEXT: slli a0, a0, 56
-; CHECK-NEXT: srli t1, t1, 57
-; CHECK-NEXT: srli a0, a0, 57
-; CHECK-NEXT: ppaire.b a0, a0, t1
-; CHECK-NEXT: ppaire.h a6, t0, a6
-; CHECK-NEXT: ppaire.h a2, a2, a7
-; CHECK-NEXT: ppaire.h a3, a5, a3
-; CHECK-NEXT: ppaire.h a0, a0, a4
-; CHECK-NEXT: pack a2, a2, a6
-; CHECK-NEXT: pack a0, a0, a3
-; CHECK-NEXT: and a0, a0, a1
-; CHECK-NEXT: or a0, a0, a2
+; CHECK-NEXT: psrli.b a1, a0, 4
+; CHECK-NEXT: pli.b a2, 15
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: pli.b a2, 51
+; CHECK-NEXT: pslli.b a0, a0, 4
+; CHECK-NEXT: or a0, a1, a0
+; CHECK-NEXT: psrli.b a1, a0, 2
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: pli.b a2, 85
+; CHECK-NEXT: pslli.b a0, a0, 2
+; CHECK-NEXT: or a0, a1, a0
+; CHECK-NEXT: psrli.b a1, a0, 1
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: pslli.b a0, a0, 1
+; CHECK-NEXT: or a0, a1, a0
; CHECK-NEXT: ret
%res = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %a)
ret <8 x i8> %res
@@ -2321,91 +2194,33 @@ define <8 x i8> @test_bitreverse_v8i8(<8 x i8> %a) {
define <4 x i16> @test_bitreverse_v4i16(<4 x i16> %a) {
; CHECK-LABEL: test_bitreverse_v4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: srli a1, a0, 56
-; CHECK-NEXT: srli a2, a0, 40
-; CHECK-NEXT: srliw a3, a0, 24
-; CHECK-NEXT: slli a4, a0, 48
-; CHECK-NEXT: zext.b a5, a2
-; CHECK-NEXT: ppaire.h a1, a5, a1
-; CHECK-NEXT: srli a5, a0, 32
-; CHECK-NEXT: srli a4, a4, 56
-; CHECK-NEXT: ppaire.h a3, a4, a3
-; CHECK-NEXT: slliw a4, a0, 8
-; CHECK-NEXT: srli a0, a0, 16
-; CHECK-NEXT: andi a2, a2, -256
-; CHECK-NEXT: slli a5, a5, 8
-; CHECK-NEXT: ppaire.h a2, a5, a2
-; CHECK-NEXT: lui a5, 1
-; CHECK-NEXT: slli a0, a0, 8
-; CHECK-NEXT: addi a5, a5, -241
-; CHECK-NEXT: ppaire.h a0, a4, a0
-; CHECK-NEXT: padd.hs a4, zero, a5
-; CHECK-NEXT: pack a1, a3, a1
-; CHECK-NEXT: pack a0, a0, a2
-; CHECK-NEXT: or a0, a0, a1
-; CHECK-NEXT: and a1, a0, a4
-; CHECK-NEXT: srli a2, a0, 52
-; CHECK-NEXT: srli a3, a1, 44
-; CHECK-NEXT: srli a5, a1, 28
-; CHECK-NEXT: ppaire.h a3, a5, a3
-; CHECK-NEXT: srli a5, a1, 12
-; CHECK-NEXT: slliw a1, a1, 4
-; CHECK-NEXT: ppaire.h a1, a1, a5
-; CHECK-NEXT: slli a5, a0, 16
-; CHECK-NEXT: srli a5, a5, 52
-; CHECK-NEXT: ppaire.h a2, a5, a2
-; CHECK-NEXT: srliw a5, a0, 20
-; CHECK-NEXT: slli a0, a0, 48
-; CHECK-NEXT: srli a0, a0, 52
-; CHECK-NEXT: ppaire.h a0, a0, a5
-; CHECK-NEXT: lui a5, 3
-; CHECK-NEXT: addi a5, a5, 819
-; CHECK-NEXT: padd.hs a5, zero, a5
-; CHECK-NEXT: pack a1, a1, a3
-; CHECK-NEXT: pack a0, a0, a2
-; CHECK-NEXT: and a0, a0, a4
-; CHECK-NEXT: or a0, a0, a1
-; CHECK-NEXT: and a1, a0, a5
-; CHECK-NEXT: srli a2, a0, 50
-; CHECK-NEXT: srli a3, a1, 46
-; CHECK-NEXT: srli a4, a1, 30
-; CHECK-NEXT: ppaire.h a3, a4, a3
-; CHECK-NEXT: srli a4, a1, 14
-; CHECK-NEXT: slliw a1, a1, 2
-; CHECK-NEXT: ppaire.h a1, a1, a4
-; CHECK-NEXT: slli a4, a0, 16
-; CHECK-NEXT: srli a4, a4, 50
-; CHECK-NEXT: ppaire.h a2, a4, a2
-; CHECK-NEXT: srliw a4, a0, 18
-; CHECK-NEXT: slli a0, a0, 48
-; CHECK-NEXT: srli a0, a0, 50
-; CHECK-NEXT: ppaire.h a0, a0, a4
-; CHECK-NEXT: lui a4, 5
-; CHECK-NEXT: addi a4, a4, 1365
-; CHECK-NEXT: padd.hs a4, zero, a4
-; CHECK-NEXT: pack a1, a1, a3
-; CHECK-NEXT: pack a0, a0, a2
-; CHECK-NEXT: and a0, a0, a5
-; CHECK-NEXT: or a0, a0, a1
-; CHECK-NEXT: and a1, a0, a4
-; CHECK-NEXT: srli a2, a0, 49
-; CHECK-NEXT: srli a3, a1, 47
-; CHECK-NEXT: srli a5, a1, 31
-; CHECK-NEXT: ppaire.h a3, a5, a3
-; CHECK-NEXT: srli a5, a1, 15
-; CHECK-NEXT: slliw a1, a1, 1
-; CHECK-NEXT: ppaire.h a1, a1, a5
-; CHECK-NEXT: slli a5, a0, 16
-; CHECK-NEXT: srli a5, a5, 49
-; CHECK-NEXT: ppaire.h a2, a5, a2
-; CHECK-NEXT: srliw a5, a0, 17
-; CHECK-NEXT: slli a0, a0, 48
-; CHECK-NEXT: srli a0, a0, 49
-; CHECK-NEXT: ppaire.h a0, a0, a5
-; CHECK-NEXT: pack a1, a1, a3
-; CHECK-NEXT: pack a0, a0, a2
-; CHECK-NEXT: and a0, a0, a4
+; CHECK-NEXT: psrli.h a1, a0, 8
+; CHECK-NEXT: pslli.h a0, a0, 8
+; CHECK-NEXT: lui a2, 1
; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: addi a1, a2, -241
+; CHECK-NEXT: psrli.h a2, a0, 4
+; CHECK-NEXT: padd.hs a1, zero, a1
+; CHECK-NEXT: and a2, a2, a1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: lui a1, 3
+; CHECK-NEXT: addi a1, a1, 819
+; CHECK-NEXT: padd.hs a1, zero, a1
+; CHECK-NEXT: pslli.h a0, a0, 4
+; CHECK-NEXT: or a0, a2, a0
+; CHECK-NEXT: psrli.h a2, a0, 2
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: and a1, a2, a1
+; CHECK-NEXT: lui a2, 5
+; CHECK-NEXT: addi a2, a2, 1365
+; CHECK-NEXT: padd.hs a2, zero, a2
+; CHECK-NEXT: pslli.h a0, a0, 2
+; CHECK-NEXT: or a0, a1, a0
+; CHECK-NEXT: psrli.h a1, a0, 1
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: pslli.h a0, a0, 1
+; CHECK-NEXT: or a0, a1, a0
; CHECK-NEXT: ret
%res = call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> %a)
ret <4 x i16> %res
@@ -2414,64 +2229,42 @@ define <4 x i16> @test_bitreverse_v4i16(<4 x i16> %a) {
define <2 x i32> @test_bitreverse_v2i32(<2 x i32> %a) {
; CHECK-LABEL: test_bitreverse_v2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: srli a1, a0, 56
-; CHECK-NEXT: srliw a2, a0, 24
-; CHECK-NEXT: srli a3, a0, 40
-; CHECK-NEXT: srliw a4, a0, 8
-; CHECK-NEXT: pack a1, a2, a1
-; CHECK-NEXT: slli a2, a0, 24
-; CHECK-NEXT: pack a3, a4, a3
-; CHECK-NEXT: srli a4, a0, 32
-; CHECK-NEXT: slli a4, a4, 24
-; CHECK-NEXT: pack a2, a2, a4
-; CHECK-NEXT: lui a4, 16
-; CHECK-NEXT: addi a4, a4, -256
-; CHECK-NEXT: padd.ws a4, zero, a4
-; CHECK-NEXT: and a3, a3, a4
-; CHECK-NEXT: and a0, a0, a4
-; CHECK-NEXT: or a1, a3, a1
+; CHECK-NEXT: psrli.w a1, a0, 8
+; CHECK-NEXT: lui a2, 16
+; CHECK-NEXT: psrli.w a3, a0, 24
+; CHECK-NEXT: addi a2, a2, -256
+; CHECK-NEXT: padd.ws a2, zero, a2
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: and a2, a0, a2
+; CHECK-NEXT: pslli.w a0, a0, 24
+; CHECK-NEXT: or a1, a1, a3
; CHECK-NEXT: lui a3, 61681
+; CHECK-NEXT: pslli.w a2, a2, 8
+; CHECK-NEXT: or a0, a0, a2
+; CHECK-NEXT: lui a2, 209715
; CHECK-NEXT: addi a3, a3, -241
; CHECK-NEXT: padd.ws a3, zero, a3
-; CHECK-NEXT: or a1, a2, a1
-; CHECK-NEXT: srli a2, a0, 24
-; CHECK-NEXT: slli a0, a0, 40
-; CHECK-NEXT: srli a0, a0, 32
-; CHECK-NEXT: pack a0, a0, a2
-; CHECK-NEXT: or a0, a1, a0
-; CHECK-NEXT: and a1, a0, a3
-; CHECK-NEXT: srli a2, a0, 36
-; CHECK-NEXT: srliw a0, a0, 4
-; CHECK-NEXT: pack a0, a0, a2
-; CHECK-NEXT: srli a2, a1, 28
-; CHECK-NEXT: slli a1, a1, 4
-; CHECK-NEXT: pack a1, a1, a2
-; CHECK-NEXT: lui a2, 209715
-; CHECK-NEXT: addi a2, a2, 819
-; CHECK-NEXT: padd.ws a2, zero, a2
-; CHECK-NEXT: and a0, a0, a3
; CHECK-NEXT: or a0, a0, a1
-; CHECK-NEXT: and a1, a0, a2
-; CHECK-NEXT: srli a3, a0, 34
-; CHECK-NEXT: srliw a0, a0, 2
-; CHECK-NEXT: pack a0, a0, a3
-; CHECK-NEXT: srli a3, a1, 30
-; CHECK-NEXT: slli a1, a1, 2
-; CHECK-NEXT: pack a1, a1, a3
+; CHECK-NEXT: psrli.w a1, a0, 4
+; CHECK-NEXT: and a0, a0, a3
+; CHECK-NEXT: and a1, a1, a3
; CHECK-NEXT: lui a3, 349525
+; CHECK-NEXT: addi a2, a2, 819
; CHECK-NEXT: addi a3, a3, 1365
+; CHECK-NEXT: padd.ws a2, zero, a2
; CHECK-NEXT: padd.ws a3, zero, a3
+; CHECK-NEXT: pslli.w a0, a0, 4
+; CHECK-NEXT: or a0, a1, a0
+; CHECK-NEXT: psrli.w a1, a0, 2
; CHECK-NEXT: and a0, a0, a2
-; CHECK-NEXT: or a0, a0, a1
-; CHECK-NEXT: and a1, a0, a3
-; CHECK-NEXT: srli a2, a0, 33
-; CHECK-NEXT: srliw a0, a0, 1
-; CHECK-NEXT: pack a0, a0, a2
-; CHECK-NEXT: srli a2, a1, 31
-; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: pack a1, a1, a2
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: pslli.w a0, a0, 2
+; CHECK-NEXT: or a0, a1, a0
+; CHECK-NEXT: psrli.w a1, a0, 1
; CHECK-NEXT: and a0, a0, a3
-; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: and a1, a1, a3
+; CHECK-NEXT: pslli.w a0, a0, 1
+; CHECK-NEXT: or a0, a1, a0
; CHECK-NEXT: ret
%res = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %a)
ret <2 x i32> %res
>From a5a80c5adbc9a92b4442efd3a67d5362bd04615a Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 5 Mar 2026 22:03:00 -0800
Subject: [PATCH 3/3] fixup! update tests
---
llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll | 26 +++++++----------
llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll | 38 +++++++++----------------
2 files changed, 23 insertions(+), 41 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
index daef11e0f0273..581f962538797 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
@@ -1832,24 +1832,18 @@ define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) {
; CHECK: # %bb.0:
; CHECK-NEXT: psrli.h a1, a0, 8
; CHECK-NEXT: pslli.h a0, a0, 8
-; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: pli.b a2, 15
; CHECK-NEXT: or a0, a0, a1
-; CHECK-NEXT: addi a1, a2, -241
-; CHECK-NEXT: psrli.h a2, a0, 4
-; CHECK-NEXT: padd.hs a1, zero, a1
-; CHECK-NEXT: and a2, a2, a1
-; CHECK-NEXT: and a0, a0, a1
-; CHECK-NEXT: lui a1, 3
-; CHECK-NEXT: addi a1, a1, 819
-; CHECK-NEXT: padd.hs a1, zero, a1
+; CHECK-NEXT: psrli.h a1, a0, 4
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: pli.b a2, 51
; CHECK-NEXT: pslli.h a0, a0, 4
-; CHECK-NEXT: or a0, a2, a0
-; CHECK-NEXT: psrli.h a2, a0, 2
-; CHECK-NEXT: and a0, a0, a1
-; CHECK-NEXT: and a1, a2, a1
-; CHECK-NEXT: lui a2, 5
-; CHECK-NEXT: addi a2, a2, 1365
-; CHECK-NEXT: padd.hs a2, zero, a2
+; CHECK-NEXT: or a0, a1, a0
+; CHECK-NEXT: psrli.h a1, a0, 2
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: pli.b a2, 85
; CHECK-NEXT: pslli.h a0, a0, 2
; CHECK-NEXT: or a0, a1, a0
; CHECK-NEXT: psrli.h a1, a0, 1
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
index 99d218738d8ad..36bb90c8f099c 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
@@ -2263,24 +2263,18 @@ define <4 x i16> @test_bitreverse_v4i16(<4 x i16> %a) {
; CHECK: # %bb.0:
; CHECK-NEXT: psrli.h a1, a0, 8
; CHECK-NEXT: pslli.h a0, a0, 8
-; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: pli.b a2, 15
; CHECK-NEXT: or a0, a0, a1
-; CHECK-NEXT: addi a1, a2, -241
-; CHECK-NEXT: psrli.h a2, a0, 4
-; CHECK-NEXT: padd.hs a1, zero, a1
-; CHECK-NEXT: and a2, a2, a1
-; CHECK-NEXT: and a0, a0, a1
-; CHECK-NEXT: lui a1, 3
-; CHECK-NEXT: addi a1, a1, 819
-; CHECK-NEXT: padd.hs a1, zero, a1
+; CHECK-NEXT: psrli.h a1, a0, 4
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: pli.b a2, 51
; CHECK-NEXT: pslli.h a0, a0, 4
-; CHECK-NEXT: or a0, a2, a0
-; CHECK-NEXT: psrli.h a2, a0, 2
-; CHECK-NEXT: and a0, a0, a1
-; CHECK-NEXT: and a1, a2, a1
-; CHECK-NEXT: lui a2, 5
-; CHECK-NEXT: addi a2, a2, 1365
-; CHECK-NEXT: padd.hs a2, zero, a2
+; CHECK-NEXT: or a0, a1, a0
+; CHECK-NEXT: psrli.h a1, a0, 2
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: pli.b a2, 85
; CHECK-NEXT: pslli.h a0, a0, 2
; CHECK-NEXT: or a0, a1, a0
; CHECK-NEXT: psrli.h a1, a0, 1
@@ -2305,21 +2299,15 @@ define <2 x i32> @test_bitreverse_v2i32(<2 x i32> %a) {
; CHECK-NEXT: and a2, a0, a2
; CHECK-NEXT: pslli.w a0, a0, 24
; CHECK-NEXT: or a1, a1, a3
-; CHECK-NEXT: lui a3, 61681
+; CHECK-NEXT: pli.b a3, 15
; CHECK-NEXT: pslli.w a2, a2, 8
; CHECK-NEXT: or a0, a0, a2
-; CHECK-NEXT: lui a2, 209715
-; CHECK-NEXT: addi a3, a3, -241
-; CHECK-NEXT: padd.ws a3, zero, a3
+; CHECK-NEXT: pli.b a2, 51
; CHECK-NEXT: or a0, a0, a1
; CHECK-NEXT: psrli.w a1, a0, 4
; CHECK-NEXT: and a0, a0, a3
; CHECK-NEXT: and a1, a1, a3
-; CHECK-NEXT: lui a3, 349525
-; CHECK-NEXT: addi a2, a2, 819
-; CHECK-NEXT: addi a3, a3, 1365
-; CHECK-NEXT: padd.ws a2, zero, a2
-; CHECK-NEXT: padd.ws a3, zero, a3
+; CHECK-NEXT: pli.b a3, 85
; CHECK-NEXT: pslli.w a0, a0, 4
; CHECK-NEXT: or a0, a1, a0
; CHECK-NEXT: psrli.w a1, a0, 2
More information about the llvm-commits
mailing list