[llvm] 715cf6f - [RISCV] Add another isel optimization for (and (shl X, c2), c1).

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 24 15:11:23 PDT 2021


Author: Craig Topper
Date: 2021-09-24T15:10:25-07:00
New Revision: 715cf6ffb9a0491aa8749bf024d741de520fa1f2

URL: https://github.com/llvm/llvm-project/commit/715cf6ffb9a0491aa8749bf024d741de520fa1f2
DIFF: https://github.com/llvm/llvm-project/commit/715cf6ffb9a0491aa8749bf024d741de520fa1f2.diff

LOG: [RISCV] Add another isel optimization for (and (shl X, c2), c1).

Where c1 is a shifted mask with 32-c2 leading zeros and c3 trailing
zeros and c3>c2. We can select it as (slli (srliw X, c3-c2), c3).

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
    llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
    llvm/test/CodeGen/RISCV/rv64zbb.ll
    llvm/test/CodeGen/RISCV/rv64zbp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index ac9bc5b05fbe..f942821e6bd8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -700,6 +700,17 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
         ReplaceNode(Node, SLLI);
         return;
       }
+      // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
+      if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !ZExtOrANDI) {
+        SDNode *SRLIW = CurDAG->getMachineNode(
+            RISCV::SRLIW, DL, XLenVT, X,
+            CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
+        SDNode *SLLI =
+            CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0),
+                                   CurDAG->getTargetConstant(C3, DL, XLenVT));
+        ReplaceNode(Node, SLLI);
+        return;
+      }
     }
 
     break;

diff  --git a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
index d3a332c8b708..8113b8d604d7 100644
--- a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
@@ -115,13 +115,12 @@ define i64 @test_bswap_i64(i64 %a) nounwind {
 ; RV64I-NEXT:    srli a4, a0, 56
 ; RV64I-NEXT:    or a2, a2, a4
 ; RV64I-NEXT:    or a1, a1, a2
-; RV64I-NEXT:    slli a2, a0, 8
-; RV64I-NEXT:    slli a4, a3, 32
+; RV64I-NEXT:    slli a2, a0, 24
+; RV64I-NEXT:    slli a4, a3, 40
 ; RV64I-NEXT:    and a2, a2, a4
-; RV64I-NEXT:    slli a4, a0, 24
-; RV64I-NEXT:    slli a5, a3, 40
-; RV64I-NEXT:    and a4, a4, a5
-; RV64I-NEXT:    or a2, a4, a2
+; RV64I-NEXT:    srliw a4, a0, 24
+; RV64I-NEXT:    slli a4, a4, 32
+; RV64I-NEXT:    or a2, a2, a4
 ; RV64I-NEXT:    slli a4, a0, 40
 ; RV64I-NEXT:    slli a3, a3, 48
 ; RV64I-NEXT:    and a3, a4, a3

diff  --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index c5ff85f8d6d1..8334ab0206c3 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -1585,13 +1585,12 @@ define i64 @bswap_i64(i64 %a) {
 ; RV64I-NEXT:    srli a4, a0, 56
 ; RV64I-NEXT:    or a2, a2, a4
 ; RV64I-NEXT:    or a1, a1, a2
-; RV64I-NEXT:    slli a2, a0, 8
-; RV64I-NEXT:    slli a4, a3, 32
+; RV64I-NEXT:    slli a2, a0, 24
+; RV64I-NEXT:    slli a4, a3, 40
 ; RV64I-NEXT:    and a2, a2, a4
-; RV64I-NEXT:    slli a4, a0, 24
-; RV64I-NEXT:    slli a5, a3, 40
-; RV64I-NEXT:    and a4, a4, a5
-; RV64I-NEXT:    or a2, a4, a2
+; RV64I-NEXT:    srliw a4, a0, 24
+; RV64I-NEXT:    slli a4, a4, 32
+; RV64I-NEXT:    or a2, a2, a4
 ; RV64I-NEXT:    slli a4, a0, 40
 ; RV64I-NEXT:    slli a3, a3, 48
 ; RV64I-NEXT:    and a3, a4, a3

diff  --git a/llvm/test/CodeGen/RISCV/rv64zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbp.ll
index 96121858ff53..0045248bf35d 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbp.ll
@@ -2752,13 +2752,12 @@ define i64 @bswap_i64(i64 %a) {
 ; RV64I-NEXT:    srli a4, a0, 56
 ; RV64I-NEXT:    or a2, a2, a4
 ; RV64I-NEXT:    or a1, a1, a2
-; RV64I-NEXT:    slli a2, a0, 8
-; RV64I-NEXT:    slli a4, a3, 32
+; RV64I-NEXT:    slli a2, a0, 24
+; RV64I-NEXT:    slli a4, a3, 40
 ; RV64I-NEXT:    and a2, a2, a4
-; RV64I-NEXT:    slli a4, a0, 24
-; RV64I-NEXT:    slli a5, a3, 40
-; RV64I-NEXT:    and a4, a4, a5
-; RV64I-NEXT:    or a2, a4, a2
+; RV64I-NEXT:    srliw a4, a0, 24
+; RV64I-NEXT:    slli a4, a4, 32
+; RV64I-NEXT:    or a2, a2, a4
 ; RV64I-NEXT:    slli a4, a0, 40
 ; RV64I-NEXT:    slli a3, a3, 48
 ; RV64I-NEXT:    and a3, a4, a3
@@ -2988,13 +2987,12 @@ define i64 @bitreverse_i64(i64 %a) nounwind {
 ; RV64I-NEXT:    srli a4, a0, 56
 ; RV64I-NEXT:    or a2, a2, a4
 ; RV64I-NEXT:    or a1, a1, a2
-; RV64I-NEXT:    slli a2, a0, 8
-; RV64I-NEXT:    slli a4, a3, 32
+; RV64I-NEXT:    slli a2, a0, 24
+; RV64I-NEXT:    slli a4, a3, 40
 ; RV64I-NEXT:    and a2, a2, a4
-; RV64I-NEXT:    slli a4, a0, 24
-; RV64I-NEXT:    slli a5, a3, 40
-; RV64I-NEXT:    and a4, a4, a5
-; RV64I-NEXT:    or a2, a4, a2
+; RV64I-NEXT:    srliw a4, a0, 24
+; RV64I-NEXT:    slli a4, a4, 32
+; RV64I-NEXT:    or a2, a2, a4
 ; RV64I-NEXT:    slli a4, a0, 40
 ; RV64I-NEXT:    slli a3, a3, 48
 ; RV64I-NEXT:    and a3, a4, a3
@@ -3182,31 +3180,30 @@ define i32 @bitreverse_bswap_i32(i32 %a) {
 define i64 @bitreverse_bswap_i64(i64 %a) {
 ; RV64I-LABEL: bitreverse_bswap_i64:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    srli a1, a0, 24
+; RV64I-NEXT:    srli a2, a0, 24
 ; RV64I-NEXT:    lui a6, 4080
-; RV64I-NEXT:    and a1, a1, a6
-; RV64I-NEXT:    srli a3, a0, 8
-; RV64I-NEXT:    addi a5, zero, 255
-; RV64I-NEXT:    slli a7, a5, 24
-; RV64I-NEXT:    and a3, a3, a7
-; RV64I-NEXT:    or a3, a3, a1
+; RV64I-NEXT:    and a3, a2, a6
+; RV64I-NEXT:    srli a4, a0, 8
+; RV64I-NEXT:    addi a1, zero, 255
+; RV64I-NEXT:    slli a7, a1, 24
+; RV64I-NEXT:    and a4, a4, a7
+; RV64I-NEXT:    or a3, a4, a3
 ; RV64I-NEXT:    srli a4, a0, 40
-; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw a1, a1, -256
-; RV64I-NEXT:    and a4, a4, a1
+; RV64I-NEXT:    lui a5, 16
+; RV64I-NEXT:    addiw a5, a5, -256
+; RV64I-NEXT:    and a4, a4, a5
 ; RV64I-NEXT:    srli a2, a0, 56
 ; RV64I-NEXT:    or a2, a4, a2
 ; RV64I-NEXT:    or a2, a3, a2
-; RV64I-NEXT:    slli a4, a0, 8
-; RV64I-NEXT:    slli t0, a5, 32
-; RV64I-NEXT:    and a3, a4, t0
 ; RV64I-NEXT:    slli a4, a0, 24
-; RV64I-NEXT:    slli t1, a5, 40
-; RV64I-NEXT:    and a4, a4, t1
+; RV64I-NEXT:    slli t0, a1, 40
+; RV64I-NEXT:    and a4, a4, t0
+; RV64I-NEXT:    srliw a3, a0, 24
+; RV64I-NEXT:    slli a3, a3, 32
 ; RV64I-NEXT:    or a3, a4, a3
 ; RV64I-NEXT:    slli a4, a0, 40
-; RV64I-NEXT:    slli a5, a5, 48
-; RV64I-NEXT:    and a4, a4, a5
+; RV64I-NEXT:    slli a1, a1, 48
+; RV64I-NEXT:    and a4, a4, a1
 ; RV64I-NEXT:    slli a0, a0, 56
 ; RV64I-NEXT:    or a0, a0, a4
 ; RV64I-NEXT:    or a0, a0, a3
@@ -3251,26 +3248,26 @@ define i64 @bitreverse_bswap_i64(i64 %a) {
 ; RV64I-NEXT:    slli a0, a0, 1
 ; RV64I-NEXT:    or a0, a2, a0
 ; RV64I-NEXT:    srli a2, a0, 40
-; RV64I-NEXT:    and a1, a2, a1
-; RV64I-NEXT:    srli a2, a0, 56
-; RV64I-NEXT:    or a1, a1, a2
-; RV64I-NEXT:    srli a2, a0, 24
-; RV64I-NEXT:    and a2, a2, a6
-; RV64I-NEXT:    srli a3, a0, 8
-; RV64I-NEXT:    and a3, a3, a7
+; RV64I-NEXT:    and a2, a2, a5
+; RV64I-NEXT:    srli a3, a0, 56
+; RV64I-NEXT:    or a2, a2, a3
+; RV64I-NEXT:    srli a3, a0, 24
+; RV64I-NEXT:    and a3, a3, a6
+; RV64I-NEXT:    srli a4, a0, 8
+; RV64I-NEXT:    and a4, a4, a7
+; RV64I-NEXT:    or a3, a4, a3
 ; RV64I-NEXT:    or a2, a3, a2
-; RV64I-NEXT:    or a1, a2, a1
-; RV64I-NEXT:    slli a2, a0, 8
-; RV64I-NEXT:    and a2, a2, t0
 ; RV64I-NEXT:    slli a3, a0, 24
-; RV64I-NEXT:    and a3, a3, t1
-; RV64I-NEXT:    or a2, a3, a2
-; RV64I-NEXT:    slli a3, a0, 40
-; RV64I-NEXT:    and a3, a3, a5
+; RV64I-NEXT:    and a3, a3, t0
+; RV64I-NEXT:    srliw a4, a0, 24
+; RV64I-NEXT:    slli a4, a4, 32
+; RV64I-NEXT:    or a3, a3, a4
+; RV64I-NEXT:    slli a4, a0, 40
+; RV64I-NEXT:    and a1, a4, a1
 ; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    or a0, a0, a3
 ; RV64I-NEXT:    or a0, a0, a2
-; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64B-LABEL: bitreverse_bswap_i64:
@@ -3637,13 +3634,12 @@ define i64 @shfl16(i64 %a, i64 %b) nounwind {
 ; RV64I-NEXT:    slli a1, a1, 16
 ; RV64I-NEXT:    addi a1, a1, -1
 ; RV64I-NEXT:    and a1, a0, a1
-; RV64I-NEXT:    slli a2, a0, 16
-; RV64I-NEXT:    lui a3, 65535
-; RV64I-NEXT:    slli a4, a3, 20
-; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    srliw a2, a0, 16
+; RV64I-NEXT:    slli a2, a2, 32
 ; RV64I-NEXT:    or a1, a2, a1
 ; RV64I-NEXT:    srli a0, a0, 16
-; RV64I-NEXT:    slli a2, a3, 4
+; RV64I-NEXT:    lui a2, 65535
+; RV64I-NEXT:    slli a2, a2, 4
 ; RV64I-NEXT:    and a0, a0, a2
 ; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    ret

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
index 262a02e9735a..16086b327232 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
@@ -631,53 +631,52 @@ define void @bswap_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
 ; LMULMAX2-RV64-NEXT:    addiw a7, a3, -256
 ; LMULMAX2-RV64-NEXT:    and a2, a2, a7
 ; LMULMAX2-RV64-NEXT:    srli a4, a1, 56
-; LMULMAX2-RV64-NEXT:    or t0, a2, a4
+; LMULMAX2-RV64-NEXT:    or a2, a2, a4
 ; LMULMAX2-RV64-NEXT:    srli a4, a1, 24
 ; LMULMAX2-RV64-NEXT:    lui a6, 4080
 ; LMULMAX2-RV64-NEXT:    and a4, a4, a6
 ; LMULMAX2-RV64-NEXT:    srli a5, a1, 8
-; LMULMAX2-RV64-NEXT:    addi a3, zero, 255
-; LMULMAX2-RV64-NEXT:    slli a2, a3, 24
-; LMULMAX2-RV64-NEXT:    and a5, a5, a2
+; LMULMAX2-RV64-NEXT:    addi t0, zero, 255
+; LMULMAX2-RV64-NEXT:    slli a3, t0, 24
+; LMULMAX2-RV64-NEXT:    and a5, a5, a3
 ; LMULMAX2-RV64-NEXT:    or a4, a5, a4
-; LMULMAX2-RV64-NEXT:    or t0, a4, t0
-; LMULMAX2-RV64-NEXT:    slli a5, a1, 8
-; LMULMAX2-RV64-NEXT:    slli t1, a3, 32
-; LMULMAX2-RV64-NEXT:    and a5, a5, t1
+; LMULMAX2-RV64-NEXT:    or t1, a4, a2
 ; LMULMAX2-RV64-NEXT:    slli a4, a1, 24
-; LMULMAX2-RV64-NEXT:    slli t2, a3, 40
+; LMULMAX2-RV64-NEXT:    slli t2, t0, 40
 ; LMULMAX2-RV64-NEXT:    and a4, a4, t2
-; LMULMAX2-RV64-NEXT:    or a4, a4, a5
-; LMULMAX2-RV64-NEXT:    slli a5, a1, 40
-; LMULMAX2-RV64-NEXT:    slli a3, a3, 48
-; LMULMAX2-RV64-NEXT:    and a5, a5, a3
+; LMULMAX2-RV64-NEXT:    srliw a2, a1, 24
+; LMULMAX2-RV64-NEXT:    slli a2, a2, 32
+; LMULMAX2-RV64-NEXT:    or a2, a4, a2
+; LMULMAX2-RV64-NEXT:    slli a4, a1, 40
+; LMULMAX2-RV64-NEXT:    slli a5, t0, 48
+; LMULMAX2-RV64-NEXT:    and a4, a4, a5
 ; LMULMAX2-RV64-NEXT:    slli a1, a1, 56
-; LMULMAX2-RV64-NEXT:    or a1, a1, a5
 ; LMULMAX2-RV64-NEXT:    or a1, a1, a4
-; LMULMAX2-RV64-NEXT:    or a1, a1, t0
+; LMULMAX2-RV64-NEXT:    or a1, a1, a2
+; LMULMAX2-RV64-NEXT:    or a1, a1, t1
 ; LMULMAX2-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
 ; LMULMAX2-RV64-NEXT:    vmv.v.x v26, a1
 ; LMULMAX2-RV64-NEXT:    vmv.x.s a1, v25
-; LMULMAX2-RV64-NEXT:    srli a4, a1, 24
-; LMULMAX2-RV64-NEXT:    and a4, a4, a6
-; LMULMAX2-RV64-NEXT:    srli a5, a1, 8
-; LMULMAX2-RV64-NEXT:    and a2, a5, a2
-; LMULMAX2-RV64-NEXT:    or a2, a2, a4
-; LMULMAX2-RV64-NEXT:    srli a4, a1, 40
-; LMULMAX2-RV64-NEXT:    and a4, a4, a7
-; LMULMAX2-RV64-NEXT:    srli a5, a1, 56
-; LMULMAX2-RV64-NEXT:    or a4, a4, a5
-; LMULMAX2-RV64-NEXT:    or a2, a2, a4
-; LMULMAX2-RV64-NEXT:    slli a4, a1, 8
-; LMULMAX2-RV64-NEXT:    and a4, a4, t1
-; LMULMAX2-RV64-NEXT:    slli a5, a1, 24
-; LMULMAX2-RV64-NEXT:    and a5, a5, t2
-; LMULMAX2-RV64-NEXT:    or a4, a5, a4
-; LMULMAX2-RV64-NEXT:    slli a5, a1, 40
-; LMULMAX2-RV64-NEXT:    and a3, a5, a3
+; LMULMAX2-RV64-NEXT:    srli a2, a1, 24
+; LMULMAX2-RV64-NEXT:    and a2, a2, a6
+; LMULMAX2-RV64-NEXT:    srli a4, a1, 8
+; LMULMAX2-RV64-NEXT:    and a3, a4, a3
+; LMULMAX2-RV64-NEXT:    or a2, a3, a2
+; LMULMAX2-RV64-NEXT:    srli a3, a1, 40
+; LMULMAX2-RV64-NEXT:    and a3, a3, a7
+; LMULMAX2-RV64-NEXT:    srli a4, a1, 56
+; LMULMAX2-RV64-NEXT:    or a3, a3, a4
+; LMULMAX2-RV64-NEXT:    or a2, a2, a3
+; LMULMAX2-RV64-NEXT:    slli a3, a1, 24
+; LMULMAX2-RV64-NEXT:    and a3, a3, t2
+; LMULMAX2-RV64-NEXT:    srliw a4, a1, 24
+; LMULMAX2-RV64-NEXT:    slli a4, a4, 32
+; LMULMAX2-RV64-NEXT:    or a3, a3, a4
+; LMULMAX2-RV64-NEXT:    slli a4, a1, 40
+; LMULMAX2-RV64-NEXT:    and a4, a4, a5
 ; LMULMAX2-RV64-NEXT:    slli a1, a1, 56
-; LMULMAX2-RV64-NEXT:    or a1, a1, a3
 ; LMULMAX2-RV64-NEXT:    or a1, a1, a4
+; LMULMAX2-RV64-NEXT:    or a1, a1, a3
 ; LMULMAX2-RV64-NEXT:    or a1, a1, a2
 ; LMULMAX2-RV64-NEXT:    vsetvli zero, zero, e64, m1, tu, mu
 ; LMULMAX2-RV64-NEXT:    vmv.s.x v26, a1
@@ -762,53 +761,52 @@ define void @bswap_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
 ; LMULMAX1-RV64-NEXT:    addiw a7, a3, -256
 ; LMULMAX1-RV64-NEXT:    and a2, a2, a7
 ; LMULMAX1-RV64-NEXT:    srli a4, a1, 56
-; LMULMAX1-RV64-NEXT:    or t0, a2, a4
+; LMULMAX1-RV64-NEXT:    or a2, a2, a4
 ; LMULMAX1-RV64-NEXT:    srli a4, a1, 24
 ; LMULMAX1-RV64-NEXT:    lui a6, 4080
 ; LMULMAX1-RV64-NEXT:    and a4, a4, a6
 ; LMULMAX1-RV64-NEXT:    srli a5, a1, 8
-; LMULMAX1-RV64-NEXT:    addi a3, zero, 255
-; LMULMAX1-RV64-NEXT:    slli a2, a3, 24
-; LMULMAX1-RV64-NEXT:    and a5, a5, a2
+; LMULMAX1-RV64-NEXT:    addi t0, zero, 255
+; LMULMAX1-RV64-NEXT:    slli a3, t0, 24
+; LMULMAX1-RV64-NEXT:    and a5, a5, a3
 ; LMULMAX1-RV64-NEXT:    or a4, a5, a4
-; LMULMAX1-RV64-NEXT:    or t0, a4, t0
-; LMULMAX1-RV64-NEXT:    slli a5, a1, 8
-; LMULMAX1-RV64-NEXT:    slli t1, a3, 32
-; LMULMAX1-RV64-NEXT:    and a5, a5, t1
+; LMULMAX1-RV64-NEXT:    or t1, a4, a2
 ; LMULMAX1-RV64-NEXT:    slli a4, a1, 24
-; LMULMAX1-RV64-NEXT:    slli t2, a3, 40
+; LMULMAX1-RV64-NEXT:    slli t2, t0, 40
 ; LMULMAX1-RV64-NEXT:    and a4, a4, t2
-; LMULMAX1-RV64-NEXT:    or a4, a4, a5
-; LMULMAX1-RV64-NEXT:    slli a5, a1, 40
-; LMULMAX1-RV64-NEXT:    slli a3, a3, 48
-; LMULMAX1-RV64-NEXT:    and a5, a5, a3
+; LMULMAX1-RV64-NEXT:    srliw a2, a1, 24
+; LMULMAX1-RV64-NEXT:    slli a2, a2, 32
+; LMULMAX1-RV64-NEXT:    or a2, a4, a2
+; LMULMAX1-RV64-NEXT:    slli a4, a1, 40
+; LMULMAX1-RV64-NEXT:    slli a5, t0, 48
+; LMULMAX1-RV64-NEXT:    and a4, a4, a5
 ; LMULMAX1-RV64-NEXT:    slli a1, a1, 56
-; LMULMAX1-RV64-NEXT:    or a1, a1, a5
 ; LMULMAX1-RV64-NEXT:    or a1, a1, a4
-; LMULMAX1-RV64-NEXT:    or a1, a1, t0
+; LMULMAX1-RV64-NEXT:    or a1, a1, a2
+; LMULMAX1-RV64-NEXT:    or a1, a1, t1
 ; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
 ; LMULMAX1-RV64-NEXT:    vmv.v.x v26, a1
 ; LMULMAX1-RV64-NEXT:    vmv.x.s a1, v25
-; LMULMAX1-RV64-NEXT:    srli a4, a1, 24
-; LMULMAX1-RV64-NEXT:    and a4, a4, a6
-; LMULMAX1-RV64-NEXT:    srli a5, a1, 8
-; LMULMAX1-RV64-NEXT:    and a2, a5, a2
-; LMULMAX1-RV64-NEXT:    or a2, a2, a4
-; LMULMAX1-RV64-NEXT:    srli a4, a1, 40
-; LMULMAX1-RV64-NEXT:    and a4, a4, a7
-; LMULMAX1-RV64-NEXT:    srli a5, a1, 56
-; LMULMAX1-RV64-NEXT:    or a4, a4, a5
-; LMULMAX1-RV64-NEXT:    or a2, a2, a4
-; LMULMAX1-RV64-NEXT:    slli a4, a1, 8
-; LMULMAX1-RV64-NEXT:    and a4, a4, t1
-; LMULMAX1-RV64-NEXT:    slli a5, a1, 24
-; LMULMAX1-RV64-NEXT:    and a5, a5, t2
-; LMULMAX1-RV64-NEXT:    or a4, a5, a4
-; LMULMAX1-RV64-NEXT:    slli a5, a1, 40
-; LMULMAX1-RV64-NEXT:    and a3, a5, a3
+; LMULMAX1-RV64-NEXT:    srli a2, a1, 24
+; LMULMAX1-RV64-NEXT:    and a2, a2, a6
+; LMULMAX1-RV64-NEXT:    srli a4, a1, 8
+; LMULMAX1-RV64-NEXT:    and a3, a4, a3
+; LMULMAX1-RV64-NEXT:    or a2, a3, a2
+; LMULMAX1-RV64-NEXT:    srli a3, a1, 40
+; LMULMAX1-RV64-NEXT:    and a3, a3, a7
+; LMULMAX1-RV64-NEXT:    srli a4, a1, 56
+; LMULMAX1-RV64-NEXT:    or a3, a3, a4
+; LMULMAX1-RV64-NEXT:    or a2, a2, a3
+; LMULMAX1-RV64-NEXT:    slli a3, a1, 24
+; LMULMAX1-RV64-NEXT:    and a3, a3, t2
+; LMULMAX1-RV64-NEXT:    srliw a4, a1, 24
+; LMULMAX1-RV64-NEXT:    slli a4, a4, 32
+; LMULMAX1-RV64-NEXT:    or a3, a3, a4
+; LMULMAX1-RV64-NEXT:    slli a4, a1, 40
+; LMULMAX1-RV64-NEXT:    and a4, a4, a5
 ; LMULMAX1-RV64-NEXT:    slli a1, a1, 56
-; LMULMAX1-RV64-NEXT:    or a1, a1, a3
 ; LMULMAX1-RV64-NEXT:    or a1, a1, a4
+; LMULMAX1-RV64-NEXT:    or a1, a1, a3
 ; LMULMAX1-RV64-NEXT:    or a1, a1, a2
 ; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, tu, mu
 ; LMULMAX1-RV64-NEXT:    vmv.s.x v26, a1
@@ -1980,57 +1978,56 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX2-RV64-NEXT:    andi sp, sp, -32
 ; LMULMAX2-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; LMULMAX2-RV64-NEXT:    vle64.v v26, (a0)
-; LMULMAX2-RV64-NEXT:    vmv.x.s a2, v26
-; LMULMAX2-RV64-NEXT:    srli a1, a2, 24
+; LMULMAX2-RV64-NEXT:    vmv.x.s a1, v26
+; LMULMAX2-RV64-NEXT:    srli a2, a1, 24
 ; LMULMAX2-RV64-NEXT:    lui a6, 4080
-; LMULMAX2-RV64-NEXT:    and a1, a1, a6
-; LMULMAX2-RV64-NEXT:    srli a3, a2, 8
-; LMULMAX2-RV64-NEXT:    addi a5, zero, 255
-; LMULMAX2-RV64-NEXT:    slli a7, a5, 24
-; LMULMAX2-RV64-NEXT:    and a3, a3, a7
-; LMULMAX2-RV64-NEXT:    or a3, a3, a1
-; LMULMAX2-RV64-NEXT:    srli a4, a2, 40
-; LMULMAX2-RV64-NEXT:    lui a1, 16
-; LMULMAX2-RV64-NEXT:    addiw t0, a1, -256
+; LMULMAX2-RV64-NEXT:    and a3, a2, a6
+; LMULMAX2-RV64-NEXT:    srli a4, a1, 8
+; LMULMAX2-RV64-NEXT:    addi a7, zero, 255
+; LMULMAX2-RV64-NEXT:    slli t0, a7, 24
 ; LMULMAX2-RV64-NEXT:    and a4, a4, t0
-; LMULMAX2-RV64-NEXT:    srli a1, a2, 56
-; LMULMAX2-RV64-NEXT:    or a1, a4, a1
-; LMULMAX2-RV64-NEXT:    or a1, a3, a1
-; LMULMAX2-RV64-NEXT:    slli a4, a2, 8
-; LMULMAX2-RV64-NEXT:    slli t1, a5, 32
-; LMULMAX2-RV64-NEXT:    and a3, a4, t1
-; LMULMAX2-RV64-NEXT:    slli a4, a2, 24
-; LMULMAX2-RV64-NEXT:    slli t2, a5, 40
-; LMULMAX2-RV64-NEXT:    and a4, a4, t2
 ; LMULMAX2-RV64-NEXT:    or a3, a4, a3
-; LMULMAX2-RV64-NEXT:    slli a4, a2, 40
-; LMULMAX2-RV64-NEXT:    slli a5, a5, 48
+; LMULMAX2-RV64-NEXT:    srli a4, a1, 40
+; LMULMAX2-RV64-NEXT:    lui a5, 16
+; LMULMAX2-RV64-NEXT:    addiw a5, a5, -256
 ; LMULMAX2-RV64-NEXT:    and a4, a4, a5
-; LMULMAX2-RV64-NEXT:    slli a2, a2, 56
-; LMULMAX2-RV64-NEXT:    or a2, a2, a4
-; LMULMAX2-RV64-NEXT:    or a2, a2, a3
-; LMULMAX2-RV64-NEXT:    or a1, a2, a1
+; LMULMAX2-RV64-NEXT:    srli a2, a1, 56
+; LMULMAX2-RV64-NEXT:    or a2, a4, a2
+; LMULMAX2-RV64-NEXT:    or a2, a3, a2
+; LMULMAX2-RV64-NEXT:    slli a4, a1, 24
+; LMULMAX2-RV64-NEXT:    slli t1, a7, 40
+; LMULMAX2-RV64-NEXT:    and a4, a4, t1
+; LMULMAX2-RV64-NEXT:    srliw a3, a1, 24
+; LMULMAX2-RV64-NEXT:    slli a3, a3, 32
+; LMULMAX2-RV64-NEXT:    or a3, a4, a3
+; LMULMAX2-RV64-NEXT:    slli a4, a1, 40
+; LMULMAX2-RV64-NEXT:    slli a7, a7, 48
+; LMULMAX2-RV64-NEXT:    and a4, a4, a7
+; LMULMAX2-RV64-NEXT:    slli a1, a1, 56
+; LMULMAX2-RV64-NEXT:    or a1, a1, a4
+; LMULMAX2-RV64-NEXT:    or a1, a1, a3
+; LMULMAX2-RV64-NEXT:    or a1, a1, a2
 ; LMULMAX2-RV64-NEXT:    sd a1, 32(sp)
 ; LMULMAX2-RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, mu
 ; LMULMAX2-RV64-NEXT:    vslidedown.vi v28, v26, 3
 ; LMULMAX2-RV64-NEXT:    vmv.x.s a1, v28
 ; LMULMAX2-RV64-NEXT:    srli a2, a1, 40
-; LMULMAX2-RV64-NEXT:    and a2, a2, t0
+; LMULMAX2-RV64-NEXT:    and a2, a2, a5
 ; LMULMAX2-RV64-NEXT:    srli a3, a1, 56
 ; LMULMAX2-RV64-NEXT:    or a2, a2, a3
 ; LMULMAX2-RV64-NEXT:    srli a3, a1, 24
 ; LMULMAX2-RV64-NEXT:    and a3, a3, a6
 ; LMULMAX2-RV64-NEXT:    srli a4, a1, 8
-; LMULMAX2-RV64-NEXT:    and a4, a4, a7
+; LMULMAX2-RV64-NEXT:    and a4, a4, t0
 ; LMULMAX2-RV64-NEXT:    or a3, a4, a3
 ; LMULMAX2-RV64-NEXT:    or a2, a3, a2
-; LMULMAX2-RV64-NEXT:    slli a3, a1, 8
+; LMULMAX2-RV64-NEXT:    slli a3, a1, 24
 ; LMULMAX2-RV64-NEXT:    and a3, a3, t1
-; LMULMAX2-RV64-NEXT:    slli a4, a1, 24
-; LMULMAX2-RV64-NEXT:    and a4, a4, t2
-; LMULMAX2-RV64-NEXT:    or a3, a4, a3
+; LMULMAX2-RV64-NEXT:    srliw a4, a1, 24
+; LMULMAX2-RV64-NEXT:    slli a4, a4, 32
+; LMULMAX2-RV64-NEXT:    or a3, a3, a4
 ; LMULMAX2-RV64-NEXT:    slli a4, a1, 40
-; LMULMAX2-RV64-NEXT:    and a4, a4, a5
+; LMULMAX2-RV64-NEXT:    and a4, a4, a7
 ; LMULMAX2-RV64-NEXT:    slli a1, a1, 56
 ; LMULMAX2-RV64-NEXT:    or a1, a1, a4
 ; LMULMAX2-RV64-NEXT:    or a1, a1, a3
@@ -2039,22 +2036,22 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX2-RV64-NEXT:    vslidedown.vi v28, v26, 2
 ; LMULMAX2-RV64-NEXT:    vmv.x.s a1, v28
 ; LMULMAX2-RV64-NEXT:    srli a2, a1, 40
-; LMULMAX2-RV64-NEXT:    and a2, a2, t0
+; LMULMAX2-RV64-NEXT:    and a2, a2, a5
 ; LMULMAX2-RV64-NEXT:    srli a3, a1, 56
 ; LMULMAX2-RV64-NEXT:    or a2, a2, a3
 ; LMULMAX2-RV64-NEXT:    srli a3, a1, 24
 ; LMULMAX2-RV64-NEXT:    and a3, a3, a6
 ; LMULMAX2-RV64-NEXT:    srli a4, a1, 8
-; LMULMAX2-RV64-NEXT:    and a4, a4, a7
+; LMULMAX2-RV64-NEXT:    and a4, a4, t0
 ; LMULMAX2-RV64-NEXT:    or a3, a4, a3
 ; LMULMAX2-RV64-NEXT:    or a2, a3, a2
-; LMULMAX2-RV64-NEXT:    slli a3, a1, 8
+; LMULMAX2-RV64-NEXT:    slli a3, a1, 24
 ; LMULMAX2-RV64-NEXT:    and a3, a3, t1
-; LMULMAX2-RV64-NEXT:    slli a4, a1, 24
-; LMULMAX2-RV64-NEXT:    and a4, a4, t2
-; LMULMAX2-RV64-NEXT:    or a3, a4, a3
+; LMULMAX2-RV64-NEXT:    srliw a4, a1, 24
+; LMULMAX2-RV64-NEXT:    slli a4, a4, 32
+; LMULMAX2-RV64-NEXT:    or a3, a3, a4
 ; LMULMAX2-RV64-NEXT:    slli a4, a1, 40
-; LMULMAX2-RV64-NEXT:    and a4, a4, a5
+; LMULMAX2-RV64-NEXT:    and a4, a4, a7
 ; LMULMAX2-RV64-NEXT:    slli a1, a1, 56
 ; LMULMAX2-RV64-NEXT:    or a1, a1, a4
 ; LMULMAX2-RV64-NEXT:    or a1, a1, a3
@@ -2063,22 +2060,22 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX2-RV64-NEXT:    vslidedown.vi v26, v26, 1
 ; LMULMAX2-RV64-NEXT:    vmv.x.s a1, v26
 ; LMULMAX2-RV64-NEXT:    srli a2, a1, 40
-; LMULMAX2-RV64-NEXT:    and a2, a2, t0
+; LMULMAX2-RV64-NEXT:    and a2, a2, a5
 ; LMULMAX2-RV64-NEXT:    srli a3, a1, 56
 ; LMULMAX2-RV64-NEXT:    or a2, a2, a3
 ; LMULMAX2-RV64-NEXT:    srli a3, a1, 24
 ; LMULMAX2-RV64-NEXT:    and a3, a3, a6
 ; LMULMAX2-RV64-NEXT:    srli a4, a1, 8
-; LMULMAX2-RV64-NEXT:    and a4, a4, a7
+; LMULMAX2-RV64-NEXT:    and a4, a4, t0
 ; LMULMAX2-RV64-NEXT:    or a3, a4, a3
 ; LMULMAX2-RV64-NEXT:    or a2, a3, a2
-; LMULMAX2-RV64-NEXT:    slli a3, a1, 8
+; LMULMAX2-RV64-NEXT:    slli a3, a1, 24
 ; LMULMAX2-RV64-NEXT:    and a3, a3, t1
-; LMULMAX2-RV64-NEXT:    slli a4, a1, 24
-; LMULMAX2-RV64-NEXT:    and a4, a4, t2
-; LMULMAX2-RV64-NEXT:    or a3, a4, a3
+; LMULMAX2-RV64-NEXT:    srliw a4, a1, 24
+; LMULMAX2-RV64-NEXT:    slli a4, a4, 32
+; LMULMAX2-RV64-NEXT:    or a3, a3, a4
 ; LMULMAX2-RV64-NEXT:    slli a4, a1, 40
-; LMULMAX2-RV64-NEXT:    and a4, a4, a5
+; LMULMAX2-RV64-NEXT:    and a4, a4, a7
 ; LMULMAX2-RV64-NEXT:    slli a1, a1, 56
 ; LMULMAX2-RV64-NEXT:    or a1, a1, a4
 ; LMULMAX2-RV64-NEXT:    or a1, a1, a3
@@ -2220,110 +2217,109 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX1-RV64-NEXT:    vle64.v v25, (a0)
 ; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
 ; LMULMAX1-RV64-NEXT:    vslidedown.vi v26, v27, 1
-; LMULMAX1-RV64-NEXT:    vmv.x.s a2, v26
-; LMULMAX1-RV64-NEXT:    srli a1, a2, 40
-; LMULMAX1-RV64-NEXT:    lui a3, 16
-; LMULMAX1-RV64-NEXT:    addiw t0, a3, -256
+; LMULMAX1-RV64-NEXT:    vmv.x.s a4, v26
+; LMULMAX1-RV64-NEXT:    srli a1, a4, 40
+; LMULMAX1-RV64-NEXT:    lui a2, 16
+; LMULMAX1-RV64-NEXT:    addiw t0, a2, -256
 ; LMULMAX1-RV64-NEXT:    and a1, a1, t0
-; LMULMAX1-RV64-NEXT:    srli a3, a2, 56
-; LMULMAX1-RV64-NEXT:    or a1, a1, a3
-; LMULMAX1-RV64-NEXT:    srli a3, a2, 24
+; LMULMAX1-RV64-NEXT:    srli a3, a4, 56
+; LMULMAX1-RV64-NEXT:    or a3, a1, a3
+; LMULMAX1-RV64-NEXT:    srli a1, a4, 24
 ; LMULMAX1-RV64-NEXT:    lui a7, 4080
-; LMULMAX1-RV64-NEXT:    and a3, a3, a7
-; LMULMAX1-RV64-NEXT:    srli a5, a2, 8
-; LMULMAX1-RV64-NEXT:    addi a4, zero, 255
-; LMULMAX1-RV64-NEXT:    slli t1, a4, 24
-; LMULMAX1-RV64-NEXT:    and a5, a5, t1
+; LMULMAX1-RV64-NEXT:    and a5, a1, a7
+; LMULMAX1-RV64-NEXT:    srli a2, a4, 8
+; LMULMAX1-RV64-NEXT:    addi a1, zero, 255
+; LMULMAX1-RV64-NEXT:    slli t1, a1, 24
+; LMULMAX1-RV64-NEXT:    and a2, a2, t1
+; LMULMAX1-RV64-NEXT:    or a2, a2, a5
+; LMULMAX1-RV64-NEXT:    or a2, a2, a3
+; LMULMAX1-RV64-NEXT:    slli a5, a4, 24
+; LMULMAX1-RV64-NEXT:    slli t2, a1, 40
+; LMULMAX1-RV64-NEXT:    and a5, a5, t2
+; LMULMAX1-RV64-NEXT:    srliw a3, a4, 24
+; LMULMAX1-RV64-NEXT:    slli a3, a3, 32
 ; LMULMAX1-RV64-NEXT:    or a3, a5, a3
-; LMULMAX1-RV64-NEXT:    or a3, a3, a1
-; LMULMAX1-RV64-NEXT:    slli a1, a2, 8
-; LMULMAX1-RV64-NEXT:    slli t2, a4, 32
-; LMULMAX1-RV64-NEXT:    and a1, a1, t2
-; LMULMAX1-RV64-NEXT:    slli a5, a2, 24
-; LMULMAX1-RV64-NEXT:    slli t3, a4, 40
-; LMULMAX1-RV64-NEXT:    and a5, a5, t3
-; LMULMAX1-RV64-NEXT:    or a5, a5, a1
-; LMULMAX1-RV64-NEXT:    slli a1, a2, 40
-; LMULMAX1-RV64-NEXT:    slli a4, a4, 48
-; LMULMAX1-RV64-NEXT:    and a1, a1, a4
-; LMULMAX1-RV64-NEXT:    slli a2, a2, 56
-; LMULMAX1-RV64-NEXT:    or a1, a2, a1
-; LMULMAX1-RV64-NEXT:    or a1, a1, a5
-; LMULMAX1-RV64-NEXT:    or a1, a1, a3
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; LMULMAX1-RV64-NEXT:    vmv.v.x v26, a1
-; LMULMAX1-RV64-NEXT:    vmv.x.s a1, v27
-; LMULMAX1-RV64-NEXT:    srli a2, a1, 24
-; LMULMAX1-RV64-NEXT:    and a2, a2, a7
-; LMULMAX1-RV64-NEXT:    srli a3, a1, 8
-; LMULMAX1-RV64-NEXT:    and a3, a3, t1
+; LMULMAX1-RV64-NEXT:    slli a5, a4, 40
+; LMULMAX1-RV64-NEXT:    slli a1, a1, 48
+; LMULMAX1-RV64-NEXT:    and a5, a5, a1
+; LMULMAX1-RV64-NEXT:    slli a4, a4, 56
+; LMULMAX1-RV64-NEXT:    or a4, a4, a5
+; LMULMAX1-RV64-NEXT:    or a3, a4, a3
 ; LMULMAX1-RV64-NEXT:    or a2, a3, a2
-; LMULMAX1-RV64-NEXT:    srli a3, a1, 40
-; LMULMAX1-RV64-NEXT:    and a3, a3, t0
-; LMULMAX1-RV64-NEXT:    srli a5, a1, 56
-; LMULMAX1-RV64-NEXT:    or a3, a3, a5
+; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
+; LMULMAX1-RV64-NEXT:    vmv.v.x v26, a2
+; LMULMAX1-RV64-NEXT:    vmv.x.s a2, v27
+; LMULMAX1-RV64-NEXT:    srli a3, a2, 24
+; LMULMAX1-RV64-NEXT:    and a3, a3, a7
+; LMULMAX1-RV64-NEXT:    srli a4, a2, 8
+; LMULMAX1-RV64-NEXT:    and a4, a4, t1
+; LMULMAX1-RV64-NEXT:    or a3, a4, a3
+; LMULMAX1-RV64-NEXT:    srli a4, a2, 40
+; LMULMAX1-RV64-NEXT:    and a4, a4, t0
+; LMULMAX1-RV64-NEXT:    srli a5, a2, 56
+; LMULMAX1-RV64-NEXT:    or a4, a4, a5
+; LMULMAX1-RV64-NEXT:    or a3, a3, a4
+; LMULMAX1-RV64-NEXT:    slli a4, a2, 24
+; LMULMAX1-RV64-NEXT:    and a4, a4, t2
+; LMULMAX1-RV64-NEXT:    srliw a5, a2, 24
+; LMULMAX1-RV64-NEXT:    slli a5, a5, 32
+; LMULMAX1-RV64-NEXT:    or a4, a4, a5
+; LMULMAX1-RV64-NEXT:    slli a5, a2, 40
+; LMULMAX1-RV64-NEXT:    and a5, a5, a1
+; LMULMAX1-RV64-NEXT:    slli a2, a2, 56
+; LMULMAX1-RV64-NEXT:    or a2, a2, a5
+; LMULMAX1-RV64-NEXT:    or a2, a2, a4
 ; LMULMAX1-RV64-NEXT:    or a2, a2, a3
-; LMULMAX1-RV64-NEXT:    slli a3, a1, 8
-; LMULMAX1-RV64-NEXT:    and a3, a3, t2
-; LMULMAX1-RV64-NEXT:    slli a5, a1, 24
-; LMULMAX1-RV64-NEXT:    and a5, a5, t3
-; LMULMAX1-RV64-NEXT:    or a3, a5, a3
-; LMULMAX1-RV64-NEXT:    slli a5, a1, 40
-; LMULMAX1-RV64-NEXT:    and a5, a5, a4
-; LMULMAX1-RV64-NEXT:    slli a1, a1, 56
-; LMULMAX1-RV64-NEXT:    or a1, a1, a5
-; LMULMAX1-RV64-NEXT:    or a1, a1, a3
-; LMULMAX1-RV64-NEXT:    or a1, a1, a2
 ; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, tu, mu
-; LMULMAX1-RV64-NEXT:    vmv.s.x v26, a1
+; LMULMAX1-RV64-NEXT:    vmv.s.x v26, a2
 ; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
 ; LMULMAX1-RV64-NEXT:    vslidedown.vi v27, v25, 1
-; LMULMAX1-RV64-NEXT:    vmv.x.s a1, v27
-; LMULMAX1-RV64-NEXT:    srli a2, a1, 40
-; LMULMAX1-RV64-NEXT:    and a2, a2, t0
-; LMULMAX1-RV64-NEXT:    srli a3, a1, 56
-; LMULMAX1-RV64-NEXT:    or a2, a2, a3
-; LMULMAX1-RV64-NEXT:    srli a3, a1, 24
-; LMULMAX1-RV64-NEXT:    and a3, a3, a7
-; LMULMAX1-RV64-NEXT:    srli a5, a1, 8
-; LMULMAX1-RV64-NEXT:    and a5, a5, t1
-; LMULMAX1-RV64-NEXT:    or a3, a5, a3
-; LMULMAX1-RV64-NEXT:    or a2, a3, a2
-; LMULMAX1-RV64-NEXT:    slli a3, a1, 8
-; LMULMAX1-RV64-NEXT:    and a3, a3, t2
-; LMULMAX1-RV64-NEXT:    slli a5, a1, 24
-; LMULMAX1-RV64-NEXT:    and a5, a5, t3
-; LMULMAX1-RV64-NEXT:    or a3, a5, a3
-; LMULMAX1-RV64-NEXT:    slli a5, a1, 40
-; LMULMAX1-RV64-NEXT:    and a5, a5, a4
-; LMULMAX1-RV64-NEXT:    slli a1, a1, 56
-; LMULMAX1-RV64-NEXT:    or a1, a1, a5
-; LMULMAX1-RV64-NEXT:    or a1, a1, a3
-; LMULMAX1-RV64-NEXT:    or a1, a1, a2
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; LMULMAX1-RV64-NEXT:    vmv.v.x v27, a1
-; LMULMAX1-RV64-NEXT:    vmv.x.s a1, v25
-; LMULMAX1-RV64-NEXT:    srli a2, a1, 24
-; LMULMAX1-RV64-NEXT:    and a2, a2, a7
-; LMULMAX1-RV64-NEXT:    srli a3, a1, 8
-; LMULMAX1-RV64-NEXT:    and a3, a3, t1
-; LMULMAX1-RV64-NEXT:    or a2, a3, a2
-; LMULMAX1-RV64-NEXT:    srli a3, a1, 40
+; LMULMAX1-RV64-NEXT:    vmv.x.s a2, v27
+; LMULMAX1-RV64-NEXT:    srli a3, a2, 40
 ; LMULMAX1-RV64-NEXT:    and a3, a3, t0
-; LMULMAX1-RV64-NEXT:    srli a5, a1, 56
-; LMULMAX1-RV64-NEXT:    or a3, a3, a5
+; LMULMAX1-RV64-NEXT:    srli a4, a2, 56
+; LMULMAX1-RV64-NEXT:    or a3, a3, a4
+; LMULMAX1-RV64-NEXT:    srli a4, a2, 24
+; LMULMAX1-RV64-NEXT:    and a4, a4, a7
+; LMULMAX1-RV64-NEXT:    srli a5, a2, 8
+; LMULMAX1-RV64-NEXT:    and a5, a5, t1
+; LMULMAX1-RV64-NEXT:    or a4, a5, a4
+; LMULMAX1-RV64-NEXT:    or a3, a4, a3
+; LMULMAX1-RV64-NEXT:    slli a4, a2, 24
+; LMULMAX1-RV64-NEXT:    and a4, a4, t2
+; LMULMAX1-RV64-NEXT:    srliw a5, a2, 24
+; LMULMAX1-RV64-NEXT:    slli a5, a5, 32
+; LMULMAX1-RV64-NEXT:    or a4, a4, a5
+; LMULMAX1-RV64-NEXT:    slli a5, a2, 40
+; LMULMAX1-RV64-NEXT:    and a5, a5, a1
+; LMULMAX1-RV64-NEXT:    slli a2, a2, 56
+; LMULMAX1-RV64-NEXT:    or a2, a2, a5
+; LMULMAX1-RV64-NEXT:    or a2, a2, a4
 ; LMULMAX1-RV64-NEXT:    or a2, a2, a3
-; LMULMAX1-RV64-NEXT:    slli a3, a1, 8
-; LMULMAX1-RV64-NEXT:    and a3, a3, t2
-; LMULMAX1-RV64-NEXT:    slli a5, a1, 24
-; LMULMAX1-RV64-NEXT:    and a5, a5, t3
-; LMULMAX1-RV64-NEXT:    or a3, a5, a3
-; LMULMAX1-RV64-NEXT:    slli a5, a1, 40
-; LMULMAX1-RV64-NEXT:    and a4, a5, a4
-; LMULMAX1-RV64-NEXT:    slli a1, a1, 56
+; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
+; LMULMAX1-RV64-NEXT:    vmv.v.x v27, a2
+; LMULMAX1-RV64-NEXT:    vmv.x.s a2, v25
+; LMULMAX1-RV64-NEXT:    srli a3, a2, 24
+; LMULMAX1-RV64-NEXT:    and a3, a3, a7
+; LMULMAX1-RV64-NEXT:    srli a4, a2, 8
+; LMULMAX1-RV64-NEXT:    and a4, a4, t1
+; LMULMAX1-RV64-NEXT:    or a3, a4, a3
+; LMULMAX1-RV64-NEXT:    srli a4, a2, 40
+; LMULMAX1-RV64-NEXT:    and a4, a4, t0
+; LMULMAX1-RV64-NEXT:    srli a5, a2, 56
+; LMULMAX1-RV64-NEXT:    or a4, a4, a5
+; LMULMAX1-RV64-NEXT:    or a3, a3, a4
+; LMULMAX1-RV64-NEXT:    slli a4, a2, 24
+; LMULMAX1-RV64-NEXT:    and a4, a4, t2
+; LMULMAX1-RV64-NEXT:    srliw a5, a2, 24
+; LMULMAX1-RV64-NEXT:    slli a5, a5, 32
+; LMULMAX1-RV64-NEXT:    or a4, a4, a5
+; LMULMAX1-RV64-NEXT:    slli a5, a2, 40
+; LMULMAX1-RV64-NEXT:    and a1, a5, a1
+; LMULMAX1-RV64-NEXT:    slli a2, a2, 56
+; LMULMAX1-RV64-NEXT:    or a1, a2, a1
 ; LMULMAX1-RV64-NEXT:    or a1, a1, a4
 ; LMULMAX1-RV64-NEXT:    or a1, a1, a3
-; LMULMAX1-RV64-NEXT:    or a1, a1, a2
 ; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, tu, mu
 ; LMULMAX1-RV64-NEXT:    vmv.s.x v27, a1
 ; LMULMAX1-RV64-NEXT:    vse64.v v27, (a0)


        


More information about the llvm-commits mailing list