[llvm] 715cf6f - [RISCV] Add another isel optimization for (and (shl X, c2), c1).
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 24 15:11:23 PDT 2021
Author: Craig Topper
Date: 2021-09-24T15:10:25-07:00
New Revision: 715cf6ffb9a0491aa8749bf024d741de520fa1f2
URL: https://github.com/llvm/llvm-project/commit/715cf6ffb9a0491aa8749bf024d741de520fa1f2
DIFF: https://github.com/llvm/llvm-project/commit/715cf6ffb9a0491aa8749bf024d741de520fa1f2.diff
LOG: [RISCV] Add another isel optimization for (and (shl X, c2), c1).
Where c1 is a shifted mask with 32-c2 leading zeros and c3 trailing
zeros and c3>c2. We can select it as (slli (srliw X, c3-c2), c3).
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
llvm/test/CodeGen/RISCV/rv64zbb.ll
llvm/test/CodeGen/RISCV/rv64zbp.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index ac9bc5b05fbe..f942821e6bd8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -700,6 +700,17 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, SLLI);
return;
}
+ // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
+ if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !ZExtOrANDI) {
+ SDNode *SRLIW = CurDAG->getMachineNode(
+ RISCV::SRLIW, DL, XLenVT, X,
+ CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
+ SDNode *SLLI =
+ CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0),
+ CurDAG->getTargetConstant(C3, DL, XLenVT));
+ ReplaceNode(Node, SLLI);
+ return;
+ }
}
break;
diff --git a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
index d3a332c8b708..8113b8d604d7 100644
--- a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
@@ -115,13 +115,12 @@ define i64 @test_bswap_i64(i64 %a) nounwind {
; RV64I-NEXT: srli a4, a0, 56
; RV64I-NEXT: or a2, a2, a4
; RV64I-NEXT: or a1, a1, a2
-; RV64I-NEXT: slli a2, a0, 8
-; RV64I-NEXT: slli a4, a3, 32
+; RV64I-NEXT: slli a2, a0, 24
+; RV64I-NEXT: slli a4, a3, 40
; RV64I-NEXT: and a2, a2, a4
-; RV64I-NEXT: slli a4, a0, 24
-; RV64I-NEXT: slli a5, a3, 40
-; RV64I-NEXT: and a4, a4, a5
-; RV64I-NEXT: or a2, a4, a2
+; RV64I-NEXT: srliw a4, a0, 24
+; RV64I-NEXT: slli a4, a4, 32
+; RV64I-NEXT: or a2, a2, a4
; RV64I-NEXT: slli a4, a0, 40
; RV64I-NEXT: slli a3, a3, 48
; RV64I-NEXT: and a3, a4, a3
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index c5ff85f8d6d1..8334ab0206c3 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -1585,13 +1585,12 @@ define i64 @bswap_i64(i64 %a) {
; RV64I-NEXT: srli a4, a0, 56
; RV64I-NEXT: or a2, a2, a4
; RV64I-NEXT: or a1, a1, a2
-; RV64I-NEXT: slli a2, a0, 8
-; RV64I-NEXT: slli a4, a3, 32
+; RV64I-NEXT: slli a2, a0, 24
+; RV64I-NEXT: slli a4, a3, 40
; RV64I-NEXT: and a2, a2, a4
-; RV64I-NEXT: slli a4, a0, 24
-; RV64I-NEXT: slli a5, a3, 40
-; RV64I-NEXT: and a4, a4, a5
-; RV64I-NEXT: or a2, a4, a2
+; RV64I-NEXT: srliw a4, a0, 24
+; RV64I-NEXT: slli a4, a4, 32
+; RV64I-NEXT: or a2, a2, a4
; RV64I-NEXT: slli a4, a0, 40
; RV64I-NEXT: slli a3, a3, 48
; RV64I-NEXT: and a3, a4, a3
diff --git a/llvm/test/CodeGen/RISCV/rv64zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbp.ll
index 96121858ff53..0045248bf35d 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbp.ll
@@ -2752,13 +2752,12 @@ define i64 @bswap_i64(i64 %a) {
; RV64I-NEXT: srli a4, a0, 56
; RV64I-NEXT: or a2, a2, a4
; RV64I-NEXT: or a1, a1, a2
-; RV64I-NEXT: slli a2, a0, 8
-; RV64I-NEXT: slli a4, a3, 32
+; RV64I-NEXT: slli a2, a0, 24
+; RV64I-NEXT: slli a4, a3, 40
; RV64I-NEXT: and a2, a2, a4
-; RV64I-NEXT: slli a4, a0, 24
-; RV64I-NEXT: slli a5, a3, 40
-; RV64I-NEXT: and a4, a4, a5
-; RV64I-NEXT: or a2, a4, a2
+; RV64I-NEXT: srliw a4, a0, 24
+; RV64I-NEXT: slli a4, a4, 32
+; RV64I-NEXT: or a2, a2, a4
; RV64I-NEXT: slli a4, a0, 40
; RV64I-NEXT: slli a3, a3, 48
; RV64I-NEXT: and a3, a4, a3
@@ -2988,13 +2987,12 @@ define i64 @bitreverse_i64(i64 %a) nounwind {
; RV64I-NEXT: srli a4, a0, 56
; RV64I-NEXT: or a2, a2, a4
; RV64I-NEXT: or a1, a1, a2
-; RV64I-NEXT: slli a2, a0, 8
-; RV64I-NEXT: slli a4, a3, 32
+; RV64I-NEXT: slli a2, a0, 24
+; RV64I-NEXT: slli a4, a3, 40
; RV64I-NEXT: and a2, a2, a4
-; RV64I-NEXT: slli a4, a0, 24
-; RV64I-NEXT: slli a5, a3, 40
-; RV64I-NEXT: and a4, a4, a5
-; RV64I-NEXT: or a2, a4, a2
+; RV64I-NEXT: srliw a4, a0, 24
+; RV64I-NEXT: slli a4, a4, 32
+; RV64I-NEXT: or a2, a2, a4
; RV64I-NEXT: slli a4, a0, 40
; RV64I-NEXT: slli a3, a3, 48
; RV64I-NEXT: and a3, a4, a3
@@ -3182,31 +3180,30 @@ define i32 @bitreverse_bswap_i32(i32 %a) {
define i64 @bitreverse_bswap_i64(i64 %a) {
; RV64I-LABEL: bitreverse_bswap_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: srli a1, a0, 24
+; RV64I-NEXT: srli a2, a0, 24
; RV64I-NEXT: lui a6, 4080
-; RV64I-NEXT: and a1, a1, a6
-; RV64I-NEXT: srli a3, a0, 8
-; RV64I-NEXT: addi a5, zero, 255
-; RV64I-NEXT: slli a7, a5, 24
-; RV64I-NEXT: and a3, a3, a7
-; RV64I-NEXT: or a3, a3, a1
+; RV64I-NEXT: and a3, a2, a6
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: addi a1, zero, 255
+; RV64I-NEXT: slli a7, a1, 24
+; RV64I-NEXT: and a4, a4, a7
+; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: srli a4, a0, 40
-; RV64I-NEXT: lui a1, 16
-; RV64I-NEXT: addiw a1, a1, -256
-; RV64I-NEXT: and a4, a4, a1
+; RV64I-NEXT: lui a5, 16
+; RV64I-NEXT: addiw a5, a5, -256
+; RV64I-NEXT: and a4, a4, a5
; RV64I-NEXT: srli a2, a0, 56
; RV64I-NEXT: or a2, a4, a2
; RV64I-NEXT: or a2, a3, a2
-; RV64I-NEXT: slli a4, a0, 8
-; RV64I-NEXT: slli t0, a5, 32
-; RV64I-NEXT: and a3, a4, t0
; RV64I-NEXT: slli a4, a0, 24
-; RV64I-NEXT: slli t1, a5, 40
-; RV64I-NEXT: and a4, a4, t1
+; RV64I-NEXT: slli t0, a1, 40
+; RV64I-NEXT: and a4, a4, t0
+; RV64I-NEXT: srliw a3, a0, 24
+; RV64I-NEXT: slli a3, a3, 32
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: slli a4, a0, 40
-; RV64I-NEXT: slli a5, a5, 48
-; RV64I-NEXT: and a4, a4, a5
+; RV64I-NEXT: slli a1, a1, 48
+; RV64I-NEXT: and a4, a4, a1
; RV64I-NEXT: slli a0, a0, 56
; RV64I-NEXT: or a0, a0, a4
; RV64I-NEXT: or a0, a0, a3
@@ -3251,26 +3248,26 @@ define i64 @bitreverse_bswap_i64(i64 %a) {
; RV64I-NEXT: slli a0, a0, 1
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: srli a2, a0, 40
-; RV64I-NEXT: and a1, a2, a1
-; RV64I-NEXT: srli a2, a0, 56
-; RV64I-NEXT: or a1, a1, a2
-; RV64I-NEXT: srli a2, a0, 24
-; RV64I-NEXT: and a2, a2, a6
-; RV64I-NEXT: srli a3, a0, 8
-; RV64I-NEXT: and a3, a3, a7
+; RV64I-NEXT: and a2, a2, a5
+; RV64I-NEXT: srli a3, a0, 56
+; RV64I-NEXT: or a2, a2, a3
+; RV64I-NEXT: srli a3, a0, 24
+; RV64I-NEXT: and a3, a3, a6
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: and a4, a4, a7
+; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a2, a3, a2
-; RV64I-NEXT: or a1, a2, a1
-; RV64I-NEXT: slli a2, a0, 8
-; RV64I-NEXT: and a2, a2, t0
; RV64I-NEXT: slli a3, a0, 24
-; RV64I-NEXT: and a3, a3, t1
-; RV64I-NEXT: or a2, a3, a2
-; RV64I-NEXT: slli a3, a0, 40
-; RV64I-NEXT: and a3, a3, a5
+; RV64I-NEXT: and a3, a3, t0
+; RV64I-NEXT: srliw a4, a0, 24
+; RV64I-NEXT: slli a4, a4, 32
+; RV64I-NEXT: or a3, a3, a4
+; RV64I-NEXT: slli a4, a0, 40
+; RV64I-NEXT: and a1, a4, a1
; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: or a0, a0, a2
-; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: ret
;
; RV64B-LABEL: bitreverse_bswap_i64:
@@ -3637,13 +3634,12 @@ define i64 @shfl16(i64 %a, i64 %b) nounwind {
; RV64I-NEXT: slli a1, a1, 16
; RV64I-NEXT: addi a1, a1, -1
; RV64I-NEXT: and a1, a0, a1
-; RV64I-NEXT: slli a2, a0, 16
-; RV64I-NEXT: lui a3, 65535
-; RV64I-NEXT: slli a4, a3, 20
-; RV64I-NEXT: and a2, a2, a4
+; RV64I-NEXT: srliw a2, a0, 16
+; RV64I-NEXT: slli a2, a2, 32
; RV64I-NEXT: or a1, a2, a1
; RV64I-NEXT: srli a0, a0, 16
-; RV64I-NEXT: slli a2, a3, 4
+; RV64I-NEXT: lui a2, 65535
+; RV64I-NEXT: slli a2, a2, 4
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
index 262a02e9735a..16086b327232 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
@@ -631,53 +631,52 @@ define void @bswap_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX2-RV64-NEXT: addiw a7, a3, -256
; LMULMAX2-RV64-NEXT: and a2, a2, a7
; LMULMAX2-RV64-NEXT: srli a4, a1, 56
-; LMULMAX2-RV64-NEXT: or t0, a2, a4
+; LMULMAX2-RV64-NEXT: or a2, a2, a4
; LMULMAX2-RV64-NEXT: srli a4, a1, 24
; LMULMAX2-RV64-NEXT: lui a6, 4080
; LMULMAX2-RV64-NEXT: and a4, a4, a6
; LMULMAX2-RV64-NEXT: srli a5, a1, 8
-; LMULMAX2-RV64-NEXT: addi a3, zero, 255
-; LMULMAX2-RV64-NEXT: slli a2, a3, 24
-; LMULMAX2-RV64-NEXT: and a5, a5, a2
+; LMULMAX2-RV64-NEXT: addi t0, zero, 255
+; LMULMAX2-RV64-NEXT: slli a3, t0, 24
+; LMULMAX2-RV64-NEXT: and a5, a5, a3
; LMULMAX2-RV64-NEXT: or a4, a5, a4
-; LMULMAX2-RV64-NEXT: or t0, a4, t0
-; LMULMAX2-RV64-NEXT: slli a5, a1, 8
-; LMULMAX2-RV64-NEXT: slli t1, a3, 32
-; LMULMAX2-RV64-NEXT: and a5, a5, t1
+; LMULMAX2-RV64-NEXT: or t1, a4, a2
; LMULMAX2-RV64-NEXT: slli a4, a1, 24
-; LMULMAX2-RV64-NEXT: slli t2, a3, 40
+; LMULMAX2-RV64-NEXT: slli t2, t0, 40
; LMULMAX2-RV64-NEXT: and a4, a4, t2
-; LMULMAX2-RV64-NEXT: or a4, a4, a5
-; LMULMAX2-RV64-NEXT: slli a5, a1, 40
-; LMULMAX2-RV64-NEXT: slli a3, a3, 48
-; LMULMAX2-RV64-NEXT: and a5, a5, a3
+; LMULMAX2-RV64-NEXT: srliw a2, a1, 24
+; LMULMAX2-RV64-NEXT: slli a2, a2, 32
+; LMULMAX2-RV64-NEXT: or a2, a4, a2
+; LMULMAX2-RV64-NEXT: slli a4, a1, 40
+; LMULMAX2-RV64-NEXT: slli a5, t0, 48
+; LMULMAX2-RV64-NEXT: and a4, a4, a5
; LMULMAX2-RV64-NEXT: slli a1, a1, 56
-; LMULMAX2-RV64-NEXT: or a1, a1, a5
; LMULMAX2-RV64-NEXT: or a1, a1, a4
-; LMULMAX2-RV64-NEXT: or a1, a1, t0
+; LMULMAX2-RV64-NEXT: or a1, a1, a2
+; LMULMAX2-RV64-NEXT: or a1, a1, t1
; LMULMAX2-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu
; LMULMAX2-RV64-NEXT: vmv.v.x v26, a1
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v25
-; LMULMAX2-RV64-NEXT: srli a4, a1, 24
-; LMULMAX2-RV64-NEXT: and a4, a4, a6
-; LMULMAX2-RV64-NEXT: srli a5, a1, 8
-; LMULMAX2-RV64-NEXT: and a2, a5, a2
-; LMULMAX2-RV64-NEXT: or a2, a2, a4
-; LMULMAX2-RV64-NEXT: srli a4, a1, 40
-; LMULMAX2-RV64-NEXT: and a4, a4, a7
-; LMULMAX2-RV64-NEXT: srli a5, a1, 56
-; LMULMAX2-RV64-NEXT: or a4, a4, a5
-; LMULMAX2-RV64-NEXT: or a2, a2, a4
-; LMULMAX2-RV64-NEXT: slli a4, a1, 8
-; LMULMAX2-RV64-NEXT: and a4, a4, t1
-; LMULMAX2-RV64-NEXT: slli a5, a1, 24
-; LMULMAX2-RV64-NEXT: and a5, a5, t2
-; LMULMAX2-RV64-NEXT: or a4, a5, a4
-; LMULMAX2-RV64-NEXT: slli a5, a1, 40
-; LMULMAX2-RV64-NEXT: and a3, a5, a3
+; LMULMAX2-RV64-NEXT: srli a2, a1, 24
+; LMULMAX2-RV64-NEXT: and a2, a2, a6
+; LMULMAX2-RV64-NEXT: srli a4, a1, 8
+; LMULMAX2-RV64-NEXT: and a3, a4, a3
+; LMULMAX2-RV64-NEXT: or a2, a3, a2
+; LMULMAX2-RV64-NEXT: srli a3, a1, 40
+; LMULMAX2-RV64-NEXT: and a3, a3, a7
+; LMULMAX2-RV64-NEXT: srli a4, a1, 56
+; LMULMAX2-RV64-NEXT: or a3, a3, a4
+; LMULMAX2-RV64-NEXT: or a2, a2, a3
+; LMULMAX2-RV64-NEXT: slli a3, a1, 24
+; LMULMAX2-RV64-NEXT: and a3, a3, t2
+; LMULMAX2-RV64-NEXT: srliw a4, a1, 24
+; LMULMAX2-RV64-NEXT: slli a4, a4, 32
+; LMULMAX2-RV64-NEXT: or a3, a3, a4
+; LMULMAX2-RV64-NEXT: slli a4, a1, 40
+; LMULMAX2-RV64-NEXT: and a4, a4, a5
; LMULMAX2-RV64-NEXT: slli a1, a1, 56
-; LMULMAX2-RV64-NEXT: or a1, a1, a3
; LMULMAX2-RV64-NEXT: or a1, a1, a4
+; LMULMAX2-RV64-NEXT: or a1, a1, a3
; LMULMAX2-RV64-NEXT: or a1, a1, a2
; LMULMAX2-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu
; LMULMAX2-RV64-NEXT: vmv.s.x v26, a1
@@ -762,53 +761,52 @@ define void @bswap_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX1-RV64-NEXT: addiw a7, a3, -256
; LMULMAX1-RV64-NEXT: and a2, a2, a7
; LMULMAX1-RV64-NEXT: srli a4, a1, 56
-; LMULMAX1-RV64-NEXT: or t0, a2, a4
+; LMULMAX1-RV64-NEXT: or a2, a2, a4
; LMULMAX1-RV64-NEXT: srli a4, a1, 24
; LMULMAX1-RV64-NEXT: lui a6, 4080
; LMULMAX1-RV64-NEXT: and a4, a4, a6
; LMULMAX1-RV64-NEXT: srli a5, a1, 8
-; LMULMAX1-RV64-NEXT: addi a3, zero, 255
-; LMULMAX1-RV64-NEXT: slli a2, a3, 24
-; LMULMAX1-RV64-NEXT: and a5, a5, a2
+; LMULMAX1-RV64-NEXT: addi t0, zero, 255
+; LMULMAX1-RV64-NEXT: slli a3, t0, 24
+; LMULMAX1-RV64-NEXT: and a5, a5, a3
; LMULMAX1-RV64-NEXT: or a4, a5, a4
-; LMULMAX1-RV64-NEXT: or t0, a4, t0
-; LMULMAX1-RV64-NEXT: slli a5, a1, 8
-; LMULMAX1-RV64-NEXT: slli t1, a3, 32
-; LMULMAX1-RV64-NEXT: and a5, a5, t1
+; LMULMAX1-RV64-NEXT: or t1, a4, a2
; LMULMAX1-RV64-NEXT: slli a4, a1, 24
-; LMULMAX1-RV64-NEXT: slli t2, a3, 40
+; LMULMAX1-RV64-NEXT: slli t2, t0, 40
; LMULMAX1-RV64-NEXT: and a4, a4, t2
-; LMULMAX1-RV64-NEXT: or a4, a4, a5
-; LMULMAX1-RV64-NEXT: slli a5, a1, 40
-; LMULMAX1-RV64-NEXT: slli a3, a3, 48
-; LMULMAX1-RV64-NEXT: and a5, a5, a3
+; LMULMAX1-RV64-NEXT: srliw a2, a1, 24
+; LMULMAX1-RV64-NEXT: slli a2, a2, 32
+; LMULMAX1-RV64-NEXT: or a2, a4, a2
+; LMULMAX1-RV64-NEXT: slli a4, a1, 40
+; LMULMAX1-RV64-NEXT: slli a5, t0, 48
+; LMULMAX1-RV64-NEXT: and a4, a4, a5
; LMULMAX1-RV64-NEXT: slli a1, a1, 56
-; LMULMAX1-RV64-NEXT: or a1, a1, a5
; LMULMAX1-RV64-NEXT: or a1, a1, a4
-; LMULMAX1-RV64-NEXT: or a1, a1, t0
+; LMULMAX1-RV64-NEXT: or a1, a1, a2
+; LMULMAX1-RV64-NEXT: or a1, a1, t1
; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu
; LMULMAX1-RV64-NEXT: vmv.v.x v26, a1
; LMULMAX1-RV64-NEXT: vmv.x.s a1, v25
-; LMULMAX1-RV64-NEXT: srli a4, a1, 24
-; LMULMAX1-RV64-NEXT: and a4, a4, a6
-; LMULMAX1-RV64-NEXT: srli a5, a1, 8
-; LMULMAX1-RV64-NEXT: and a2, a5, a2
-; LMULMAX1-RV64-NEXT: or a2, a2, a4
-; LMULMAX1-RV64-NEXT: srli a4, a1, 40
-; LMULMAX1-RV64-NEXT: and a4, a4, a7
-; LMULMAX1-RV64-NEXT: srli a5, a1, 56
-; LMULMAX1-RV64-NEXT: or a4, a4, a5
-; LMULMAX1-RV64-NEXT: or a2, a2, a4
-; LMULMAX1-RV64-NEXT: slli a4, a1, 8
-; LMULMAX1-RV64-NEXT: and a4, a4, t1
-; LMULMAX1-RV64-NEXT: slli a5, a1, 24
-; LMULMAX1-RV64-NEXT: and a5, a5, t2
-; LMULMAX1-RV64-NEXT: or a4, a5, a4
-; LMULMAX1-RV64-NEXT: slli a5, a1, 40
-; LMULMAX1-RV64-NEXT: and a3, a5, a3
+; LMULMAX1-RV64-NEXT: srli a2, a1, 24
+; LMULMAX1-RV64-NEXT: and a2, a2, a6
+; LMULMAX1-RV64-NEXT: srli a4, a1, 8
+; LMULMAX1-RV64-NEXT: and a3, a4, a3
+; LMULMAX1-RV64-NEXT: or a2, a3, a2
+; LMULMAX1-RV64-NEXT: srli a3, a1, 40
+; LMULMAX1-RV64-NEXT: and a3, a3, a7
+; LMULMAX1-RV64-NEXT: srli a4, a1, 56
+; LMULMAX1-RV64-NEXT: or a3, a3, a4
+; LMULMAX1-RV64-NEXT: or a2, a2, a3
+; LMULMAX1-RV64-NEXT: slli a3, a1, 24
+; LMULMAX1-RV64-NEXT: and a3, a3, t2
+; LMULMAX1-RV64-NEXT: srliw a4, a1, 24
+; LMULMAX1-RV64-NEXT: slli a4, a4, 32
+; LMULMAX1-RV64-NEXT: or a3, a3, a4
+; LMULMAX1-RV64-NEXT: slli a4, a1, 40
+; LMULMAX1-RV64-NEXT: and a4, a4, a5
; LMULMAX1-RV64-NEXT: slli a1, a1, 56
-; LMULMAX1-RV64-NEXT: or a1, a1, a3
; LMULMAX1-RV64-NEXT: or a1, a1, a4
+; LMULMAX1-RV64-NEXT: or a1, a1, a3
; LMULMAX1-RV64-NEXT: or a1, a1, a2
; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu
; LMULMAX1-RV64-NEXT: vmv.s.x v26, a1
@@ -1980,57 +1978,56 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV64-NEXT: andi sp, sp, -32
; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; LMULMAX2-RV64-NEXT: vle64.v v26, (a0)
-; LMULMAX2-RV64-NEXT: vmv.x.s a2, v26
-; LMULMAX2-RV64-NEXT: srli a1, a2, 24
+; LMULMAX2-RV64-NEXT: vmv.x.s a1, v26
+; LMULMAX2-RV64-NEXT: srli a2, a1, 24
; LMULMAX2-RV64-NEXT: lui a6, 4080
-; LMULMAX2-RV64-NEXT: and a1, a1, a6
-; LMULMAX2-RV64-NEXT: srli a3, a2, 8
-; LMULMAX2-RV64-NEXT: addi a5, zero, 255
-; LMULMAX2-RV64-NEXT: slli a7, a5, 24
-; LMULMAX2-RV64-NEXT: and a3, a3, a7
-; LMULMAX2-RV64-NEXT: or a3, a3, a1
-; LMULMAX2-RV64-NEXT: srli a4, a2, 40
-; LMULMAX2-RV64-NEXT: lui a1, 16
-; LMULMAX2-RV64-NEXT: addiw t0, a1, -256
+; LMULMAX2-RV64-NEXT: and a3, a2, a6
+; LMULMAX2-RV64-NEXT: srli a4, a1, 8
+; LMULMAX2-RV64-NEXT: addi a7, zero, 255
+; LMULMAX2-RV64-NEXT: slli t0, a7, 24
; LMULMAX2-RV64-NEXT: and a4, a4, t0
-; LMULMAX2-RV64-NEXT: srli a1, a2, 56
-; LMULMAX2-RV64-NEXT: or a1, a4, a1
-; LMULMAX2-RV64-NEXT: or a1, a3, a1
-; LMULMAX2-RV64-NEXT: slli a4, a2, 8
-; LMULMAX2-RV64-NEXT: slli t1, a5, 32
-; LMULMAX2-RV64-NEXT: and a3, a4, t1
-; LMULMAX2-RV64-NEXT: slli a4, a2, 24
-; LMULMAX2-RV64-NEXT: slli t2, a5, 40
-; LMULMAX2-RV64-NEXT: and a4, a4, t2
; LMULMAX2-RV64-NEXT: or a3, a4, a3
-; LMULMAX2-RV64-NEXT: slli a4, a2, 40
-; LMULMAX2-RV64-NEXT: slli a5, a5, 48
+; LMULMAX2-RV64-NEXT: srli a4, a1, 40
+; LMULMAX2-RV64-NEXT: lui a5, 16
+; LMULMAX2-RV64-NEXT: addiw a5, a5, -256
; LMULMAX2-RV64-NEXT: and a4, a4, a5
-; LMULMAX2-RV64-NEXT: slli a2, a2, 56
-; LMULMAX2-RV64-NEXT: or a2, a2, a4
-; LMULMAX2-RV64-NEXT: or a2, a2, a3
-; LMULMAX2-RV64-NEXT: or a1, a2, a1
+; LMULMAX2-RV64-NEXT: srli a2, a1, 56
+; LMULMAX2-RV64-NEXT: or a2, a4, a2
+; LMULMAX2-RV64-NEXT: or a2, a3, a2
+; LMULMAX2-RV64-NEXT: slli a4, a1, 24
+; LMULMAX2-RV64-NEXT: slli t1, a7, 40
+; LMULMAX2-RV64-NEXT: and a4, a4, t1
+; LMULMAX2-RV64-NEXT: srliw a3, a1, 24
+; LMULMAX2-RV64-NEXT: slli a3, a3, 32
+; LMULMAX2-RV64-NEXT: or a3, a4, a3
+; LMULMAX2-RV64-NEXT: slli a4, a1, 40
+; LMULMAX2-RV64-NEXT: slli a7, a7, 48
+; LMULMAX2-RV64-NEXT: and a4, a4, a7
+; LMULMAX2-RV64-NEXT: slli a1, a1, 56
+; LMULMAX2-RV64-NEXT: or a1, a1, a4
+; LMULMAX2-RV64-NEXT: or a1, a1, a3
+; LMULMAX2-RV64-NEXT: or a1, a1, a2
; LMULMAX2-RV64-NEXT: sd a1, 32(sp)
; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e64, m2, ta, mu
; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 3
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV64-NEXT: srli a2, a1, 40
-; LMULMAX2-RV64-NEXT: and a2, a2, t0
+; LMULMAX2-RV64-NEXT: and a2, a2, a5
; LMULMAX2-RV64-NEXT: srli a3, a1, 56
; LMULMAX2-RV64-NEXT: or a2, a2, a3
; LMULMAX2-RV64-NEXT: srli a3, a1, 24
; LMULMAX2-RV64-NEXT: and a3, a3, a6
; LMULMAX2-RV64-NEXT: srli a4, a1, 8
-; LMULMAX2-RV64-NEXT: and a4, a4, a7
+; LMULMAX2-RV64-NEXT: and a4, a4, t0
; LMULMAX2-RV64-NEXT: or a3, a4, a3
; LMULMAX2-RV64-NEXT: or a2, a3, a2
-; LMULMAX2-RV64-NEXT: slli a3, a1, 8
+; LMULMAX2-RV64-NEXT: slli a3, a1, 24
; LMULMAX2-RV64-NEXT: and a3, a3, t1
-; LMULMAX2-RV64-NEXT: slli a4, a1, 24
-; LMULMAX2-RV64-NEXT: and a4, a4, t2
-; LMULMAX2-RV64-NEXT: or a3, a4, a3
+; LMULMAX2-RV64-NEXT: srliw a4, a1, 24
+; LMULMAX2-RV64-NEXT: slli a4, a4, 32
+; LMULMAX2-RV64-NEXT: or a3, a3, a4
; LMULMAX2-RV64-NEXT: slli a4, a1, 40
-; LMULMAX2-RV64-NEXT: and a4, a4, a5
+; LMULMAX2-RV64-NEXT: and a4, a4, a7
; LMULMAX2-RV64-NEXT: slli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a1, a4
; LMULMAX2-RV64-NEXT: or a1, a1, a3
@@ -2039,22 +2036,22 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 2
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV64-NEXT: srli a2, a1, 40
-; LMULMAX2-RV64-NEXT: and a2, a2, t0
+; LMULMAX2-RV64-NEXT: and a2, a2, a5
; LMULMAX2-RV64-NEXT: srli a3, a1, 56
; LMULMAX2-RV64-NEXT: or a2, a2, a3
; LMULMAX2-RV64-NEXT: srli a3, a1, 24
; LMULMAX2-RV64-NEXT: and a3, a3, a6
; LMULMAX2-RV64-NEXT: srli a4, a1, 8
-; LMULMAX2-RV64-NEXT: and a4, a4, a7
+; LMULMAX2-RV64-NEXT: and a4, a4, t0
; LMULMAX2-RV64-NEXT: or a3, a4, a3
; LMULMAX2-RV64-NEXT: or a2, a3, a2
-; LMULMAX2-RV64-NEXT: slli a3, a1, 8
+; LMULMAX2-RV64-NEXT: slli a3, a1, 24
; LMULMAX2-RV64-NEXT: and a3, a3, t1
-; LMULMAX2-RV64-NEXT: slli a4, a1, 24
-; LMULMAX2-RV64-NEXT: and a4, a4, t2
-; LMULMAX2-RV64-NEXT: or a3, a4, a3
+; LMULMAX2-RV64-NEXT: srliw a4, a1, 24
+; LMULMAX2-RV64-NEXT: slli a4, a4, 32
+; LMULMAX2-RV64-NEXT: or a3, a3, a4
; LMULMAX2-RV64-NEXT: slli a4, a1, 40
-; LMULMAX2-RV64-NEXT: and a4, a4, a5
+; LMULMAX2-RV64-NEXT: and a4, a4, a7
; LMULMAX2-RV64-NEXT: slli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a1, a4
; LMULMAX2-RV64-NEXT: or a1, a1, a3
@@ -2063,22 +2060,22 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV64-NEXT: vslidedown.vi v26, v26, 1
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV64-NEXT: srli a2, a1, 40
-; LMULMAX2-RV64-NEXT: and a2, a2, t0
+; LMULMAX2-RV64-NEXT: and a2, a2, a5
; LMULMAX2-RV64-NEXT: srli a3, a1, 56
; LMULMAX2-RV64-NEXT: or a2, a2, a3
; LMULMAX2-RV64-NEXT: srli a3, a1, 24
; LMULMAX2-RV64-NEXT: and a3, a3, a6
; LMULMAX2-RV64-NEXT: srli a4, a1, 8
-; LMULMAX2-RV64-NEXT: and a4, a4, a7
+; LMULMAX2-RV64-NEXT: and a4, a4, t0
; LMULMAX2-RV64-NEXT: or a3, a4, a3
; LMULMAX2-RV64-NEXT: or a2, a3, a2
-; LMULMAX2-RV64-NEXT: slli a3, a1, 8
+; LMULMAX2-RV64-NEXT: slli a3, a1, 24
; LMULMAX2-RV64-NEXT: and a3, a3, t1
-; LMULMAX2-RV64-NEXT: slli a4, a1, 24
-; LMULMAX2-RV64-NEXT: and a4, a4, t2
-; LMULMAX2-RV64-NEXT: or a3, a4, a3
+; LMULMAX2-RV64-NEXT: srliw a4, a1, 24
+; LMULMAX2-RV64-NEXT: slli a4, a4, 32
+; LMULMAX2-RV64-NEXT: or a3, a3, a4
; LMULMAX2-RV64-NEXT: slli a4, a1, 40
-; LMULMAX2-RV64-NEXT: and a4, a4, a5
+; LMULMAX2-RV64-NEXT: and a4, a4, a7
; LMULMAX2-RV64-NEXT: slli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a1, a4
; LMULMAX2-RV64-NEXT: or a1, a1, a3
@@ -2220,110 +2217,109 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu
; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v27, 1
-; LMULMAX1-RV64-NEXT: vmv.x.s a2, v26
-; LMULMAX1-RV64-NEXT: srli a1, a2, 40
-; LMULMAX1-RV64-NEXT: lui a3, 16
-; LMULMAX1-RV64-NEXT: addiw t0, a3, -256
+; LMULMAX1-RV64-NEXT: vmv.x.s a4, v26
+; LMULMAX1-RV64-NEXT: srli a1, a4, 40
+; LMULMAX1-RV64-NEXT: lui a2, 16
+; LMULMAX1-RV64-NEXT: addiw t0, a2, -256
; LMULMAX1-RV64-NEXT: and a1, a1, t0
-; LMULMAX1-RV64-NEXT: srli a3, a2, 56
-; LMULMAX1-RV64-NEXT: or a1, a1, a3
-; LMULMAX1-RV64-NEXT: srli a3, a2, 24
+; LMULMAX1-RV64-NEXT: srli a3, a4, 56
+; LMULMAX1-RV64-NEXT: or a3, a1, a3
+; LMULMAX1-RV64-NEXT: srli a1, a4, 24
; LMULMAX1-RV64-NEXT: lui a7, 4080
-; LMULMAX1-RV64-NEXT: and a3, a3, a7
-; LMULMAX1-RV64-NEXT: srli a5, a2, 8
-; LMULMAX1-RV64-NEXT: addi a4, zero, 255
-; LMULMAX1-RV64-NEXT: slli t1, a4, 24
-; LMULMAX1-RV64-NEXT: and a5, a5, t1
+; LMULMAX1-RV64-NEXT: and a5, a1, a7
+; LMULMAX1-RV64-NEXT: srli a2, a4, 8
+; LMULMAX1-RV64-NEXT: addi a1, zero, 255
+; LMULMAX1-RV64-NEXT: slli t1, a1, 24
+; LMULMAX1-RV64-NEXT: and a2, a2, t1
+; LMULMAX1-RV64-NEXT: or a2, a2, a5
+; LMULMAX1-RV64-NEXT: or a2, a2, a3
+; LMULMAX1-RV64-NEXT: slli a5, a4, 24
+; LMULMAX1-RV64-NEXT: slli t2, a1, 40
+; LMULMAX1-RV64-NEXT: and a5, a5, t2
+; LMULMAX1-RV64-NEXT: srliw a3, a4, 24
+; LMULMAX1-RV64-NEXT: slli a3, a3, 32
; LMULMAX1-RV64-NEXT: or a3, a5, a3
-; LMULMAX1-RV64-NEXT: or a3, a3, a1
-; LMULMAX1-RV64-NEXT: slli a1, a2, 8
-; LMULMAX1-RV64-NEXT: slli t2, a4, 32
-; LMULMAX1-RV64-NEXT: and a1, a1, t2
-; LMULMAX1-RV64-NEXT: slli a5, a2, 24
-; LMULMAX1-RV64-NEXT: slli t3, a4, 40
-; LMULMAX1-RV64-NEXT: and a5, a5, t3
-; LMULMAX1-RV64-NEXT: or a5, a5, a1
-; LMULMAX1-RV64-NEXT: slli a1, a2, 40
-; LMULMAX1-RV64-NEXT: slli a4, a4, 48
-; LMULMAX1-RV64-NEXT: and a1, a1, a4
-; LMULMAX1-RV64-NEXT: slli a2, a2, 56
-; LMULMAX1-RV64-NEXT: or a1, a2, a1
-; LMULMAX1-RV64-NEXT: or a1, a1, a5
-; LMULMAX1-RV64-NEXT: or a1, a1, a3
-; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu
-; LMULMAX1-RV64-NEXT: vmv.v.x v26, a1
-; LMULMAX1-RV64-NEXT: vmv.x.s a1, v27
-; LMULMAX1-RV64-NEXT: srli a2, a1, 24
-; LMULMAX1-RV64-NEXT: and a2, a2, a7
-; LMULMAX1-RV64-NEXT: srli a3, a1, 8
-; LMULMAX1-RV64-NEXT: and a3, a3, t1
+; LMULMAX1-RV64-NEXT: slli a5, a4, 40
+; LMULMAX1-RV64-NEXT: slli a1, a1, 48
+; LMULMAX1-RV64-NEXT: and a5, a5, a1
+; LMULMAX1-RV64-NEXT: slli a4, a4, 56
+; LMULMAX1-RV64-NEXT: or a4, a4, a5
+; LMULMAX1-RV64-NEXT: or a3, a4, a3
; LMULMAX1-RV64-NEXT: or a2, a3, a2
-; LMULMAX1-RV64-NEXT: srli a3, a1, 40
-; LMULMAX1-RV64-NEXT: and a3, a3, t0
-; LMULMAX1-RV64-NEXT: srli a5, a1, 56
-; LMULMAX1-RV64-NEXT: or a3, a3, a5
+; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu
+; LMULMAX1-RV64-NEXT: vmv.v.x v26, a2
+; LMULMAX1-RV64-NEXT: vmv.x.s a2, v27
+; LMULMAX1-RV64-NEXT: srli a3, a2, 24
+; LMULMAX1-RV64-NEXT: and a3, a3, a7
+; LMULMAX1-RV64-NEXT: srli a4, a2, 8
+; LMULMAX1-RV64-NEXT: and a4, a4, t1
+; LMULMAX1-RV64-NEXT: or a3, a4, a3
+; LMULMAX1-RV64-NEXT: srli a4, a2, 40
+; LMULMAX1-RV64-NEXT: and a4, a4, t0
+; LMULMAX1-RV64-NEXT: srli a5, a2, 56
+; LMULMAX1-RV64-NEXT: or a4, a4, a5
+; LMULMAX1-RV64-NEXT: or a3, a3, a4
+; LMULMAX1-RV64-NEXT: slli a4, a2, 24
+; LMULMAX1-RV64-NEXT: and a4, a4, t2
+; LMULMAX1-RV64-NEXT: srliw a5, a2, 24
+; LMULMAX1-RV64-NEXT: slli a5, a5, 32
+; LMULMAX1-RV64-NEXT: or a4, a4, a5
+; LMULMAX1-RV64-NEXT: slli a5, a2, 40
+; LMULMAX1-RV64-NEXT: and a5, a5, a1
+; LMULMAX1-RV64-NEXT: slli a2, a2, 56
+; LMULMAX1-RV64-NEXT: or a2, a2, a5
+; LMULMAX1-RV64-NEXT: or a2, a2, a4
; LMULMAX1-RV64-NEXT: or a2, a2, a3
-; LMULMAX1-RV64-NEXT: slli a3, a1, 8
-; LMULMAX1-RV64-NEXT: and a3, a3, t2
-; LMULMAX1-RV64-NEXT: slli a5, a1, 24
-; LMULMAX1-RV64-NEXT: and a5, a5, t3
-; LMULMAX1-RV64-NEXT: or a3, a5, a3
-; LMULMAX1-RV64-NEXT: slli a5, a1, 40
-; LMULMAX1-RV64-NEXT: and a5, a5, a4
-; LMULMAX1-RV64-NEXT: slli a1, a1, 56
-; LMULMAX1-RV64-NEXT: or a1, a1, a5
-; LMULMAX1-RV64-NEXT: or a1, a1, a3
-; LMULMAX1-RV64-NEXT: or a1, a1, a2
; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu
-; LMULMAX1-RV64-NEXT: vmv.s.x v26, a1
+; LMULMAX1-RV64-NEXT: vmv.s.x v26, a2
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu
; LMULMAX1-RV64-NEXT: vslidedown.vi v27, v25, 1
-; LMULMAX1-RV64-NEXT: vmv.x.s a1, v27
-; LMULMAX1-RV64-NEXT: srli a2, a1, 40
-; LMULMAX1-RV64-NEXT: and a2, a2, t0
-; LMULMAX1-RV64-NEXT: srli a3, a1, 56
-; LMULMAX1-RV64-NEXT: or a2, a2, a3
-; LMULMAX1-RV64-NEXT: srli a3, a1, 24
-; LMULMAX1-RV64-NEXT: and a3, a3, a7
-; LMULMAX1-RV64-NEXT: srli a5, a1, 8
-; LMULMAX1-RV64-NEXT: and a5, a5, t1
-; LMULMAX1-RV64-NEXT: or a3, a5, a3
-; LMULMAX1-RV64-NEXT: or a2, a3, a2
-; LMULMAX1-RV64-NEXT: slli a3, a1, 8
-; LMULMAX1-RV64-NEXT: and a3, a3, t2
-; LMULMAX1-RV64-NEXT: slli a5, a1, 24
-; LMULMAX1-RV64-NEXT: and a5, a5, t3
-; LMULMAX1-RV64-NEXT: or a3, a5, a3
-; LMULMAX1-RV64-NEXT: slli a5, a1, 40
-; LMULMAX1-RV64-NEXT: and a5, a5, a4
-; LMULMAX1-RV64-NEXT: slli a1, a1, 56
-; LMULMAX1-RV64-NEXT: or a1, a1, a5
-; LMULMAX1-RV64-NEXT: or a1, a1, a3
-; LMULMAX1-RV64-NEXT: or a1, a1, a2
-; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu
-; LMULMAX1-RV64-NEXT: vmv.v.x v27, a1
-; LMULMAX1-RV64-NEXT: vmv.x.s a1, v25
-; LMULMAX1-RV64-NEXT: srli a2, a1, 24
-; LMULMAX1-RV64-NEXT: and a2, a2, a7
-; LMULMAX1-RV64-NEXT: srli a3, a1, 8
-; LMULMAX1-RV64-NEXT: and a3, a3, t1
-; LMULMAX1-RV64-NEXT: or a2, a3, a2
-; LMULMAX1-RV64-NEXT: srli a3, a1, 40
+; LMULMAX1-RV64-NEXT: vmv.x.s a2, v27
+; LMULMAX1-RV64-NEXT: srli a3, a2, 40
; LMULMAX1-RV64-NEXT: and a3, a3, t0
-; LMULMAX1-RV64-NEXT: srli a5, a1, 56
-; LMULMAX1-RV64-NEXT: or a3, a3, a5
+; LMULMAX1-RV64-NEXT: srli a4, a2, 56
+; LMULMAX1-RV64-NEXT: or a3, a3, a4
+; LMULMAX1-RV64-NEXT: srli a4, a2, 24
+; LMULMAX1-RV64-NEXT: and a4, a4, a7
+; LMULMAX1-RV64-NEXT: srli a5, a2, 8
+; LMULMAX1-RV64-NEXT: and a5, a5, t1
+; LMULMAX1-RV64-NEXT: or a4, a5, a4
+; LMULMAX1-RV64-NEXT: or a3, a4, a3
+; LMULMAX1-RV64-NEXT: slli a4, a2, 24
+; LMULMAX1-RV64-NEXT: and a4, a4, t2
+; LMULMAX1-RV64-NEXT: srliw a5, a2, 24
+; LMULMAX1-RV64-NEXT: slli a5, a5, 32
+; LMULMAX1-RV64-NEXT: or a4, a4, a5
+; LMULMAX1-RV64-NEXT: slli a5, a2, 40
+; LMULMAX1-RV64-NEXT: and a5, a5, a1
+; LMULMAX1-RV64-NEXT: slli a2, a2, 56
+; LMULMAX1-RV64-NEXT: or a2, a2, a5
+; LMULMAX1-RV64-NEXT: or a2, a2, a4
; LMULMAX1-RV64-NEXT: or a2, a2, a3
-; LMULMAX1-RV64-NEXT: slli a3, a1, 8
-; LMULMAX1-RV64-NEXT: and a3, a3, t2
-; LMULMAX1-RV64-NEXT: slli a5, a1, 24
-; LMULMAX1-RV64-NEXT: and a5, a5, t3
-; LMULMAX1-RV64-NEXT: or a3, a5, a3
-; LMULMAX1-RV64-NEXT: slli a5, a1, 40
-; LMULMAX1-RV64-NEXT: and a4, a5, a4
-; LMULMAX1-RV64-NEXT: slli a1, a1, 56
+; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu
+; LMULMAX1-RV64-NEXT: vmv.v.x v27, a2
+; LMULMAX1-RV64-NEXT: vmv.x.s a2, v25
+; LMULMAX1-RV64-NEXT: srli a3, a2, 24
+; LMULMAX1-RV64-NEXT: and a3, a3, a7
+; LMULMAX1-RV64-NEXT: srli a4, a2, 8
+; LMULMAX1-RV64-NEXT: and a4, a4, t1
+; LMULMAX1-RV64-NEXT: or a3, a4, a3
+; LMULMAX1-RV64-NEXT: srli a4, a2, 40
+; LMULMAX1-RV64-NEXT: and a4, a4, t0
+; LMULMAX1-RV64-NEXT: srli a5, a2, 56
+; LMULMAX1-RV64-NEXT: or a4, a4, a5
+; LMULMAX1-RV64-NEXT: or a3, a3, a4
+; LMULMAX1-RV64-NEXT: slli a4, a2, 24
+; LMULMAX1-RV64-NEXT: and a4, a4, t2
+; LMULMAX1-RV64-NEXT: srliw a5, a2, 24
+; LMULMAX1-RV64-NEXT: slli a5, a5, 32
+; LMULMAX1-RV64-NEXT: or a4, a4, a5
+; LMULMAX1-RV64-NEXT: slli a5, a2, 40
+; LMULMAX1-RV64-NEXT: and a1, a5, a1
+; LMULMAX1-RV64-NEXT: slli a2, a2, 56
+; LMULMAX1-RV64-NEXT: or a1, a2, a1
; LMULMAX1-RV64-NEXT: or a1, a1, a4
; LMULMAX1-RV64-NEXT: or a1, a1, a3
-; LMULMAX1-RV64-NEXT: or a1, a1, a2
; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu
; LMULMAX1-RV64-NEXT: vmv.s.x v27, a1
; LMULMAX1-RV64-NEXT: vse64.v v27, (a0)
More information about the llvm-commits
mailing list