[llvm] 3bffa2c - [RISCV] Add missing CHECKs to vector test
Fraser Cormack via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 19 09:58:54 PDT 2021
Author: Fraser Cormack
Date: 2021-03-19T16:52:16Z
New Revision: 3bffa2c2aad810637601f3276aa329a77c4dd241
URL: https://github.com/llvm/llvm-project/commit/3bffa2c2aad810637601f3276aa329a77c4dd241
DIFF: https://github.com/llvm/llvm-project/commit/3bffa2c2aad810637601f3276aa329a77c4dd241.diff
LOG: [RISCV] Add missing CHECKs to vector test
Since the "LMUL-MAX=2" output for some test functions differed between
RV32 and RV64, the update_llc_test_checks script failed to emit a
unified LMULMAX2 check for them. I'm not sure why it didn't warn about
this.
This patch also takes the opportunity to add unified RV32/RV64 checks to
help shorten the test file when the output for LMULMAX1 and LMULMAX2 is
identical but differs between the two ISAs.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D98944
Added:
Modified:
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index 7eb49f1b8fe5..33f2e0d3998e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX2,LMULMAX2-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2,LMULMAX2-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1,LMULMAX1-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1,LMULMAX1-RV64
define void @add_v16i8(<16 x i8>* %x, <16 x i8>* %y) {
; CHECK-LABEL: add_v16i8:
@@ -943,58 +943,58 @@ define void @mulhu_v4i32(<4 x i32>* %x) {
}
define void @mulhu_v2i64(<2 x i64>* %x) {
-; LMULMAX1-RV32-LABEL: mulhu_v2i64:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI55_0)
-; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI55_0)
-; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle32.v v26, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vmulhu.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI55_1)
-; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI55_1)
-; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle32.v v26, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: mulhu_v2i64:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: lui a1, 1035469
-; LMULMAX1-RV64-NEXT: addiw a1, a1, -819
-; LMULMAX1-RV64-NEXT: slli a1, a1, 12
-; LMULMAX1-RV64-NEXT: addi a1, a1, -819
-; LMULMAX1-RV64-NEXT: slli a1, a1, 12
-; LMULMAX1-RV64-NEXT: addi a1, a1, -819
-; LMULMAX1-RV64-NEXT: slli a1, a1, 12
-; LMULMAX1-RV64-NEXT: addi a1, a1, -819
-; LMULMAX1-RV64-NEXT: vmv.v.x v26, a1
-; LMULMAX1-RV64-NEXT: lui a1, 1026731
-; LMULMAX1-RV64-NEXT: addiw a1, a1, -1365
-; LMULMAX1-RV64-NEXT: slli a1, a1, 12
-; LMULMAX1-RV64-NEXT: addi a1, a1, -1365
-; LMULMAX1-RV64-NEXT: slli a1, a1, 12
-; LMULMAX1-RV64-NEXT: addi a1, a1, -1365
-; LMULMAX1-RV64-NEXT: slli a1, a1, 12
-; LMULMAX1-RV64-NEXT: addi a1, a1, -1365
-; LMULMAX1-RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vmv.s.x v26, a1
-; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vmulhu.vv v25, v25, v26
-; LMULMAX1-RV64-NEXT: vmv.v.i v26, 2
-; LMULMAX1-RV64-NEXT: addi a1, zero, 1
-; LMULMAX1-RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vmv.s.x v26, a1
-; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vsrl.vv v25, v25, v26
-; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; RV32-LABEL: mulhu_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vle64.v v25, (a0)
+; RV32-NEXT: lui a1, %hi(.LCPI55_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI55_0)
+; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; RV32-NEXT: vle32.v v26, (a1)
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vmulhu.vv v25, v25, v26
+; RV32-NEXT: lui a1, %hi(.LCPI55_1)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI55_1)
+; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; RV32-NEXT: vle32.v v26, (a1)
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vsrl.vv v25, v25, v26
+; RV32-NEXT: vse64.v v25, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: mulhu_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV64-NEXT: vle64.v v25, (a0)
+; RV64-NEXT: lui a1, 1035469
+; RV64-NEXT: addiw a1, a1, -819
+; RV64-NEXT: slli a1, a1, 12
+; RV64-NEXT: addi a1, a1, -819
+; RV64-NEXT: slli a1, a1, 12
+; RV64-NEXT: addi a1, a1, -819
+; RV64-NEXT: slli a1, a1, 12
+; RV64-NEXT: addi a1, a1, -819
+; RV64-NEXT: vmv.v.x v26, a1
+; RV64-NEXT: lui a1, 1026731
+; RV64-NEXT: addiw a1, a1, -1365
+; RV64-NEXT: slli a1, a1, 12
+; RV64-NEXT: addi a1, a1, -1365
+; RV64-NEXT: slli a1, a1, 12
+; RV64-NEXT: addi a1, a1, -1365
+; RV64-NEXT: slli a1, a1, 12
+; RV64-NEXT: addi a1, a1, -1365
+; RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu
+; RV64-NEXT: vmv.s.x v26, a1
+; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV64-NEXT: vmulhu.vv v25, v25, v26
+; RV64-NEXT: vmv.v.i v26, 2
+; RV64-NEXT: addi a1, zero, 1
+; RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu
+; RV64-NEXT: vmv.s.x v26, a1
+; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV64-NEXT: vsrl.vv v25, v25, v26
+; RV64-NEXT: vse64.v v25, (a0)
+; RV64-NEXT: ret
%a = load <2 x i64>, <2 x i64>* %x
%b = udiv <2 x i64> %a, <i64 3, i64 5>
store <2 x i64> %b, <2 x i64>* %x
@@ -1043,33 +1043,33 @@ define void @mulhs_v8i16(<8 x i16>* %x) {
}
define void @mulhs_v4i32(<4 x i32>* %x) {
-; LMULMAX1-RV32-LABEL: mulhs_v4i32:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle32.v v25, (a0)
-; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI58_0)
-; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI58_0)
-; LMULMAX1-RV32-NEXT: vle32.v v26, (a1)
-; LMULMAX1-RV32-NEXT: vmulh.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vsrl.vi v26, v25, 31
-; LMULMAX1-RV32-NEXT: vsra.vi v25, v25, 1
-; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vse32.v v25, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: mulhs_v4i32:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vle32.v v25, (a0)
-; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI58_0)
-; LMULMAX1-RV64-NEXT: addi a1, a1, %lo(.LCPI58_0)
-; LMULMAX1-RV64-NEXT: vle32.v v26, (a1)
-; LMULMAX1-RV64-NEXT: vmulh.vv v25, v25, v26
-; LMULMAX1-RV64-NEXT: vsra.vi v25, v25, 1
-; LMULMAX1-RV64-NEXT: vsrl.vi v26, v25, 31
-; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v26
-; LMULMAX1-RV64-NEXT: vse32.v v25, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; RV32-LABEL: mulhs_v4i32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; RV32-NEXT: vle32.v v25, (a0)
+; RV32-NEXT: lui a1, %hi(.LCPI58_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI58_0)
+; RV32-NEXT: vle32.v v26, (a1)
+; RV32-NEXT: vmulh.vv v25, v25, v26
+; RV32-NEXT: vsrl.vi v26, v25, 31
+; RV32-NEXT: vsra.vi v25, v25, 1
+; RV32-NEXT: vadd.vv v25, v25, v26
+; RV32-NEXT: vse32.v v25, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: mulhs_v4i32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; RV64-NEXT: vle32.v v25, (a0)
+; RV64-NEXT: lui a1, %hi(.LCPI58_0)
+; RV64-NEXT: addi a1, a1, %lo(.LCPI58_0)
+; RV64-NEXT: vle32.v v26, (a1)
+; RV64-NEXT: vmulh.vv v25, v25, v26
+; RV64-NEXT: vsra.vi v25, v25, 1
+; RV64-NEXT: vsrl.vi v26, v25, 31
+; RV64-NEXT: vadd.vv v25, v25, v26
+; RV64-NEXT: vse32.v v25, (a0)
+; RV64-NEXT: ret
%a = load <4 x i32>, <4 x i32>* %x
%b = sdiv <4 x i32> %a, <i32 -5, i32 5, i32 -5, i32 5>
store <4 x i32> %b, <4 x i32>* %x
@@ -1077,76 +1077,76 @@ define void @mulhs_v4i32(<4 x i32>* %x) {
}
define void @mulhs_v2i64(<2 x i64>* %x) {
-; LMULMAX1-RV32-LABEL: mulhs_v2i64:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI59_0)
-; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI59_0)
-; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle32.v v26, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vmul.vv v26, v25, v26
-; LMULMAX1-RV32-NEXT: lui a1, 349525
-; LMULMAX1-RV32-NEXT: addi a2, a1, 1365
-; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vmv.v.x v27, a2
-; LMULMAX1-RV32-NEXT: addi a1, a1, 1366
-; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vmv.s.x v27, a1
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vmulh.vv v25, v25, v27
-; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI59_1)
-; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI59_1)
-; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle32.v v26, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vsrl.vv v26, v25, v26
-; LMULMAX1-RV32-NEXT: addi a1, zero, 1
-; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vmv.s.x v27, a1
-; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vmv.v.i v28, 0
-; LMULMAX1-RV32-NEXT: vsetivli a1, 3, e32,m1,tu,mu
-; LMULMAX1-RV32-NEXT: vslideup.vi v28, v27, 2
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vsra.vv v25, v25, v28
-; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: mulhs_v2i64:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: vmv.v.i v26, -1
-; LMULMAX1-RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vmv.s.x v26, zero
-; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vmul.vv v26, v25, v26
-; LMULMAX1-RV64-NEXT: lui a1, 21845
-; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365
-; LMULMAX1-RV64-NEXT: slli a1, a1, 12
-; LMULMAX1-RV64-NEXT: addi a1, a1, 1365
-; LMULMAX1-RV64-NEXT: slli a1, a1, 12
-; LMULMAX1-RV64-NEXT: addi a1, a1, 1365
-; LMULMAX1-RV64-NEXT: slli a1, a1, 12
-; LMULMAX1-RV64-NEXT: addi a2, a1, 1365
-; LMULMAX1-RV64-NEXT: vmv.v.x v27, a2
-; LMULMAX1-RV64-NEXT: addi a1, a1, 1366
-; LMULMAX1-RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vmv.s.x v27, a1
-; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vmulh.vv v25, v25, v27
-; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v26
-; LMULMAX1-RV64-NEXT: addi a1, zero, 63
-; LMULMAX1-RV64-NEXT: vsrl.vx v26, v25, a1
-; LMULMAX1-RV64-NEXT: vid.v v27
-; LMULMAX1-RV64-NEXT: vsra.vv v25, v25, v27
-; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v26
-; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; RV32-LABEL: mulhs_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vle64.v v25, (a0)
+; RV32-NEXT: lui a1, %hi(.LCPI59_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI59_0)
+; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; RV32-NEXT: vle32.v v26, (a1)
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vmul.vv v26, v25, v26
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a2, a1, 1365
+; RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu
+; RV32-NEXT: vmv.v.x v27, a2
+; RV32-NEXT: addi a1, a1, 1366
+; RV32-NEXT: vsetvli a2, zero, e32,m1,ta,mu
+; RV32-NEXT: vmv.s.x v27, a1
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vmulh.vv v25, v25, v27
+; RV32-NEXT: vadd.vv v25, v25, v26
+; RV32-NEXT: lui a1, %hi(.LCPI59_1)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI59_1)
+; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; RV32-NEXT: vle32.v v26, (a1)
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vsrl.vv v26, v25, v26
+; RV32-NEXT: addi a1, zero, 1
+; RV32-NEXT: vsetvli a2, zero, e32,m1,ta,mu
+; RV32-NEXT: vmv.s.x v27, a1
+; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; RV32-NEXT: vmv.v.i v28, 0
+; RV32-NEXT: vsetivli a1, 3, e32,m1,tu,mu
+; RV32-NEXT: vslideup.vi v28, v27, 2
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vsra.vv v25, v25, v28
+; RV32-NEXT: vadd.vv v25, v25, v26
+; RV32-NEXT: vse64.v v25, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: mulhs_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV64-NEXT: vle64.v v25, (a0)
+; RV64-NEXT: vmv.v.i v26, -1
+; RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu
+; RV64-NEXT: vmv.s.x v26, zero
+; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV64-NEXT: vmul.vv v26, v25, v26
+; RV64-NEXT: lui a1, 21845
+; RV64-NEXT: addiw a1, a1, 1365
+; RV64-NEXT: slli a1, a1, 12
+; RV64-NEXT: addi a1, a1, 1365
+; RV64-NEXT: slli a1, a1, 12
+; RV64-NEXT: addi a1, a1, 1365
+; RV64-NEXT: slli a1, a1, 12
+; RV64-NEXT: addi a2, a1, 1365
+; RV64-NEXT: vmv.v.x v27, a2
+; RV64-NEXT: addi a1, a1, 1366
+; RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu
+; RV64-NEXT: vmv.s.x v27, a1
+; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV64-NEXT: vmulh.vv v25, v25, v27
+; RV64-NEXT: vadd.vv v25, v25, v26
+; RV64-NEXT: addi a1, zero, 63
+; RV64-NEXT: vsrl.vx v26, v25, a1
+; RV64-NEXT: vid.v v27
+; RV64-NEXT: vsra.vv v25, v25, v27
+; RV64-NEXT: vadd.vv v25, v25, v26
+; RV64-NEXT: vse64.v v25, (a0)
+; RV64-NEXT: ret
%a = load <2 x i64>, <2 x i64>* %x
%b = sdiv <2 x i64> %a, <i64 3, i64 -3>
store <2 x i64> %b, <2 x i64>* %x
@@ -3841,37 +3841,21 @@ define void @extract_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-NEXT: vse64.v v26, (a0)
; LMULMAX2-NEXT: ret
;
-; LMULMAX1-RV32-LABEL: extract_v4i64:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: addi a2, a0, 16
-; LMULMAX1-RV32-NEXT: vle64.v v26, (a2)
-; LMULMAX1-RV32-NEXT: vle64.v v27, (a1)
-; LMULMAX1-RV32-NEXT: addi a1, a1, 16
-; LMULMAX1-RV32-NEXT: vle64.v v28, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vadd.vv v26, v26, v28
-; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v27
-; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: vse64.v v26, (a2)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: extract_v4i64:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: addi a2, a0, 16
-; LMULMAX1-RV64-NEXT: vle64.v v26, (a2)
-; LMULMAX1-RV64-NEXT: vle64.v v27, (a1)
-; LMULMAX1-RV64-NEXT: addi a1, a1, 16
-; LMULMAX1-RV64-NEXT: vle64.v v28, (a1)
-; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vadd.vv v26, v26, v28
-; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v27
-; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: vse64.v v26, (a2)
-; LMULMAX1-RV64-NEXT: ret
+; LMULMAX1-LABEL: extract_v4i64:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a2, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vle64.v v25, (a0)
+; LMULMAX1-NEXT: addi a2, a0, 16
+; LMULMAX1-NEXT: vle64.v v26, (a2)
+; LMULMAX1-NEXT: vle64.v v27, (a1)
+; LMULMAX1-NEXT: addi a1, a1, 16
+; LMULMAX1-NEXT: vle64.v v28, (a1)
+; LMULMAX1-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vadd.vv v26, v26, v28
+; LMULMAX1-NEXT: vadd.vv v25, v25, v27
+; LMULMAX1-NEXT: vse64.v v25, (a0)
+; LMULMAX1-NEXT: vse64.v v26, (a2)
+; LMULMAX1-NEXT: ret
%a = load <4 x i64>, <4 x i64>* %x
%b = load <4 x i64>, <4 x i64>* %y
br label %"compute"
@@ -3908,35 +3892,20 @@ define void @mulhu_v32i8(<32 x i8>* %x) {
; LMULMAX2-NEXT: vse8.v v26, (a0)
; LMULMAX2-NEXT: ret
;
-; LMULMAX1-RV32-LABEL: mulhu_v32i8:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 16, e8,m1,ta,mu
-; LMULMAX1-RV32-NEXT: addi a1, a0, 16
-; LMULMAX1-RV32-NEXT: vle8.v v25, (a1)
-; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI129_0)
-; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI129_0)
-; LMULMAX1-RV32-NEXT: vle8.v v26, (a2)
-; LMULMAX1-RV32-NEXT: vle8.v v27, (a0)
-; LMULMAX1-RV32-NEXT: vdivu.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vdivu.vv v26, v27, v26
-; LMULMAX1-RV32-NEXT: vse8.v v26, (a0)
-; LMULMAX1-RV32-NEXT: vse8.v v25, (a1)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: mulhu_v32i8:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 16, e8,m1,ta,mu
-; LMULMAX1-RV64-NEXT: addi a1, a0, 16
-; LMULMAX1-RV64-NEXT: vle8.v v25, (a1)
-; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI129_0)
-; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI129_0)
-; LMULMAX1-RV64-NEXT: vle8.v v26, (a2)
-; LMULMAX1-RV64-NEXT: vle8.v v27, (a0)
-; LMULMAX1-RV64-NEXT: vdivu.vv v25, v25, v26
-; LMULMAX1-RV64-NEXT: vdivu.vv v26, v27, v26
-; LMULMAX1-RV64-NEXT: vse8.v v26, (a0)
-; LMULMAX1-RV64-NEXT: vse8.v v25, (a1)
-; LMULMAX1-RV64-NEXT: ret
+; LMULMAX1-LABEL: mulhu_v32i8:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: addi a1, a0, 16
+; LMULMAX1-NEXT: vle8.v v25, (a1)
+; LMULMAX1-NEXT: lui a2, %hi(.LCPI129_0)
+; LMULMAX1-NEXT: addi a2, a2, %lo(.LCPI129_0)
+; LMULMAX1-NEXT: vle8.v v26, (a2)
+; LMULMAX1-NEXT: vle8.v v27, (a0)
+; LMULMAX1-NEXT: vdivu.vv v25, v25, v26
+; LMULMAX1-NEXT: vdivu.vv v26, v27, v26
+; LMULMAX1-NEXT: vse8.v v26, (a0)
+; LMULMAX1-NEXT: vse8.v v25, (a1)
+; LMULMAX1-NEXT: ret
%a = load <32 x i8>, <32 x i8>* %x
%b = udiv <32 x i8> %a, <i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25>
store <32 x i8> %b, <32 x i8>* %x
@@ -3969,35 +3938,20 @@ define void @mulhu_v16i16(<16 x i16>* %x) {
; LMULMAX2-NEXT: vse16.v v26, (a0)
; LMULMAX2-NEXT: ret
;
-; LMULMAX1-RV32-LABEL: mulhu_v16i16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu
-; LMULMAX1-RV32-NEXT: addi a1, a0, 16
-; LMULMAX1-RV32-NEXT: vle16.v v25, (a1)
-; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI130_0)
-; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI130_0)
-; LMULMAX1-RV32-NEXT: vle16.v v26, (a2)
-; LMULMAX1-RV32-NEXT: vle16.v v27, (a0)
-; LMULMAX1-RV32-NEXT: vdivu.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vdivu.vv v26, v27, v26
-; LMULMAX1-RV32-NEXT: vse16.v v26, (a0)
-; LMULMAX1-RV32-NEXT: vse16.v v25, (a1)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: mulhu_v16i16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu
-; LMULMAX1-RV64-NEXT: addi a1, a0, 16
-; LMULMAX1-RV64-NEXT: vle16.v v25, (a1)
-; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI130_0)
-; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI130_0)
-; LMULMAX1-RV64-NEXT: vle16.v v26, (a2)
-; LMULMAX1-RV64-NEXT: vle16.v v27, (a0)
-; LMULMAX1-RV64-NEXT: vdivu.vv v25, v25, v26
-; LMULMAX1-RV64-NEXT: vdivu.vv v26, v27, v26
-; LMULMAX1-RV64-NEXT: vse16.v v26, (a0)
-; LMULMAX1-RV64-NEXT: vse16.v v25, (a1)
-; LMULMAX1-RV64-NEXT: ret
+; LMULMAX1-LABEL: mulhu_v16i16:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: addi a1, a0, 16
+; LMULMAX1-NEXT: vle16.v v25, (a1)
+; LMULMAX1-NEXT: lui a2, %hi(.LCPI130_0)
+; LMULMAX1-NEXT: addi a2, a2, %lo(.LCPI130_0)
+; LMULMAX1-NEXT: vle16.v v26, (a2)
+; LMULMAX1-NEXT: vle16.v v27, (a0)
+; LMULMAX1-NEXT: vdivu.vv v25, v25, v26
+; LMULMAX1-NEXT: vdivu.vv v26, v27, v26
+; LMULMAX1-NEXT: vse16.v v26, (a0)
+; LMULMAX1-NEXT: vse16.v v25, (a1)
+; LMULMAX1-NEXT: ret
%a = load <16 x i16>, <16 x i16>* %x
%b = udiv <16 x i16> %a, <i16 7, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 7, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
store <16 x i16> %b, <16 x i16>* %x
@@ -4086,6 +4040,63 @@ define void @mulhu_v8i32(<8 x i32>* %x) {
}
define void @mulhu_v4i64(<4 x i64>* %x) {
+; LMULMAX2-RV32-LABEL: mulhu_v4i64:
+; LMULMAX2-RV32: # %bb.0:
+; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu
+; LMULMAX2-RV32-NEXT: vle64.v v26, (a0)
+; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI132_0)
+; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI132_0)
+; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu
+; LMULMAX2-RV32-NEXT: vle32.v v28, (a1)
+; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu
+; LMULMAX2-RV32-NEXT: vmulhu.vv v28, v26, v28
+; LMULMAX2-RV32-NEXT: vsub.vv v26, v26, v28
+; LMULMAX2-RV32-NEXT: lui a1, 524288
+; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32,m2,ta,mu
+; LMULMAX2-RV32-NEXT: vmv.s.x v30, a1
+; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu
+; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0
+; LMULMAX2-RV32-NEXT: vsetivli a1, 6, e32,m2,tu,mu
+; LMULMAX2-RV32-NEXT: vslideup.vi v8, v30, 5
+; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu
+; LMULMAX2-RV32-NEXT: vmulhu.vv v26, v26, v8
+; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v28
+; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI132_1)
+; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI132_1)
+; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu
+; LMULMAX2-RV32-NEXT: vle32.v v28, (a1)
+; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu
+; LMULMAX2-RV32-NEXT: vsrl.vv v26, v26, v28
+; LMULMAX2-RV32-NEXT: vse64.v v26, (a0)
+; LMULMAX2-RV32-NEXT: ret
+;
+; LMULMAX2-RV64-LABEL: mulhu_v4i64:
+; LMULMAX2-RV64: # %bb.0:
+; LMULMAX2-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu
+; LMULMAX2-RV64-NEXT: vle64.v v26, (a0)
+; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI132_0)
+; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI132_0)
+; LMULMAX2-RV64-NEXT: vle64.v v28, (a1)
+; LMULMAX2-RV64-NEXT: vmulhu.vv v28, v26, v28
+; LMULMAX2-RV64-NEXT: vsub.vv v26, v26, v28
+; LMULMAX2-RV64-NEXT: addi a1, zero, -1
+; LMULMAX2-RV64-NEXT: slli a1, a1, 63
+; LMULMAX2-RV64-NEXT: vsetvli a2, zero, e64,m2,ta,mu
+; LMULMAX2-RV64-NEXT: vmv.s.x v30, a1
+; LMULMAX2-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu
+; LMULMAX2-RV64-NEXT: vmv.v.i v8, 0
+; LMULMAX2-RV64-NEXT: vsetivli a1, 3, e64,m2,tu,mu
+; LMULMAX2-RV64-NEXT: vslideup.vi v8, v30, 2
+; LMULMAX2-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu
+; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI132_1)
+; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI132_1)
+; LMULMAX2-RV64-NEXT: vle64.v v30, (a1)
+; LMULMAX2-RV64-NEXT: vmulhu.vv v26, v26, v8
+; LMULMAX2-RV64-NEXT: vadd.vv v26, v26, v28
+; LMULMAX2-RV64-NEXT: vsrl.vv v26, v26, v30
+; LMULMAX2-RV64-NEXT: vse64.v v26, (a0)
+; LMULMAX2-RV64-NEXT: ret
+;
; LMULMAX1-RV32-LABEL: mulhu_v4i64:
; LMULMAX1-RV32: # %bb.0:
; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
@@ -4203,35 +4214,20 @@ define void @mulhs_v32i8(<32 x i8>* %x) {
; LMULMAX2-NEXT: vse8.v v26, (a0)
; LMULMAX2-NEXT: ret
;
-; LMULMAX1-RV32-LABEL: mulhs_v32i8:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 16, e8,m1,ta,mu
-; LMULMAX1-RV32-NEXT: addi a1, a0, 16
-; LMULMAX1-RV32-NEXT: vle8.v v25, (a1)
-; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI133_0)
-; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI133_0)
-; LMULMAX1-RV32-NEXT: vle8.v v26, (a2)
-; LMULMAX1-RV32-NEXT: vle8.v v27, (a0)
-; LMULMAX1-RV32-NEXT: vdivu.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vdivu.vv v26, v27, v26
-; LMULMAX1-RV32-NEXT: vse8.v v26, (a0)
-; LMULMAX1-RV32-NEXT: vse8.v v25, (a1)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: mulhs_v32i8:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 16, e8,m1,ta,mu
-; LMULMAX1-RV64-NEXT: addi a1, a0, 16
-; LMULMAX1-RV64-NEXT: vle8.v v25, (a1)
-; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI133_0)
-; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI133_0)
-; LMULMAX1-RV64-NEXT: vle8.v v26, (a2)
-; LMULMAX1-RV64-NEXT: vle8.v v27, (a0)
-; LMULMAX1-RV64-NEXT: vdivu.vv v25, v25, v26
-; LMULMAX1-RV64-NEXT: vdivu.vv v26, v27, v26
-; LMULMAX1-RV64-NEXT: vse8.v v26, (a0)
-; LMULMAX1-RV64-NEXT: vse8.v v25, (a1)
-; LMULMAX1-RV64-NEXT: ret
+; LMULMAX1-LABEL: mulhs_v32i8:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: addi a1, a0, 16
+; LMULMAX1-NEXT: vle8.v v25, (a1)
+; LMULMAX1-NEXT: lui a2, %hi(.LCPI133_0)
+; LMULMAX1-NEXT: addi a2, a2, %lo(.LCPI133_0)
+; LMULMAX1-NEXT: vle8.v v26, (a2)
+; LMULMAX1-NEXT: vle8.v v27, (a0)
+; LMULMAX1-NEXT: vdivu.vv v25, v25, v26
+; LMULMAX1-NEXT: vdivu.vv v26, v27, v26
+; LMULMAX1-NEXT: vse8.v v26, (a0)
+; LMULMAX1-NEXT: vse8.v v25, (a1)
+; LMULMAX1-NEXT: ret
%a = load <32 x i8>, <32 x i8>* %x
%b = udiv <32 x i8> %a, <i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9>
store <32 x i8> %b, <32 x i8>* %x
@@ -4253,35 +4249,20 @@ define void @mulhs_v16i16(<16 x i16>* %x) {
; LMULMAX2-NEXT: vse16.v v26, (a0)
; LMULMAX2-NEXT: ret
;
-; LMULMAX1-RV32-LABEL: mulhs_v16i16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu
-; LMULMAX1-RV32-NEXT: addi a1, a0, 16
-; LMULMAX1-RV32-NEXT: vle16.v v25, (a1)
-; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI134_0)
-; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI134_0)
-; LMULMAX1-RV32-NEXT: vle16.v v26, (a2)
-; LMULMAX1-RV32-NEXT: vle16.v v27, (a0)
-; LMULMAX1-RV32-NEXT: vdiv.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vdiv.vv v26, v27, v26
-; LMULMAX1-RV32-NEXT: vse16.v v26, (a0)
-; LMULMAX1-RV32-NEXT: vse16.v v25, (a1)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: mulhs_v16i16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu
-; LMULMAX1-RV64-NEXT: addi a1, a0, 16
-; LMULMAX1-RV64-NEXT: vle16.v v25, (a1)
-; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI134_0)
-; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI134_0)
-; LMULMAX1-RV64-NEXT: vle16.v v26, (a2)
-; LMULMAX1-RV64-NEXT: vle16.v v27, (a0)
-; LMULMAX1-RV64-NEXT: vdiv.vv v25, v25, v26
-; LMULMAX1-RV64-NEXT: vdiv.vv v26, v27, v26
-; LMULMAX1-RV64-NEXT: vse16.v v26, (a0)
-; LMULMAX1-RV64-NEXT: vse16.v v25, (a1)
-; LMULMAX1-RV64-NEXT: ret
+; LMULMAX1-LABEL: mulhs_v16i16:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: addi a1, a0, 16
+; LMULMAX1-NEXT: vle16.v v25, (a1)
+; LMULMAX1-NEXT: lui a2, %hi(.LCPI134_0)
+; LMULMAX1-NEXT: addi a2, a2, %lo(.LCPI134_0)
+; LMULMAX1-NEXT: vle16.v v26, (a2)
+; LMULMAX1-NEXT: vle16.v v27, (a0)
+; LMULMAX1-NEXT: vdiv.vv v25, v25, v26
+; LMULMAX1-NEXT: vdiv.vv v26, v27, v26
+; LMULMAX1-NEXT: vse16.v v26, (a0)
+; LMULMAX1-NEXT: vse16.v v25, (a1)
+; LMULMAX1-NEXT: ret
%a = load <16 x i16>, <16 x i16>* %x
%b = sdiv <16 x i16> %a, <i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7, i16 -7, i16 7, i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7, i16 -7, i16 7>
store <16 x i16> %b, <16 x i16>* %x
@@ -4289,6 +4270,34 @@ define void @mulhs_v16i16(<16 x i16>* %x) {
}
define void @mulhs_v8i32(<8 x i32>* %x) {
+; LMULMAX2-RV32-LABEL: mulhs_v8i32:
+; LMULMAX2-RV32: # %bb.0:
+; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu
+; LMULMAX2-RV32-NEXT: vle32.v v26, (a0)
+; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI135_0)
+; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI135_0)
+; LMULMAX2-RV32-NEXT: vle32.v v28, (a1)
+; LMULMAX2-RV32-NEXT: vmulh.vv v26, v26, v28
+; LMULMAX2-RV32-NEXT: vsrl.vi v28, v26, 31
+; LMULMAX2-RV32-NEXT: vsra.vi v26, v26, 1
+; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v28
+; LMULMAX2-RV32-NEXT: vse32.v v26, (a0)
+; LMULMAX2-RV32-NEXT: ret
+;
+; LMULMAX2-RV64-LABEL: mulhs_v8i32:
+; LMULMAX2-RV64: # %bb.0:
+; LMULMAX2-RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu
+; LMULMAX2-RV64-NEXT: vle32.v v26, (a0)
+; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI135_0)
+; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI135_0)
+; LMULMAX2-RV64-NEXT: vle32.v v28, (a1)
+; LMULMAX2-RV64-NEXT: vmulh.vv v26, v26, v28
+; LMULMAX2-RV64-NEXT: vsra.vi v26, v26, 1
+; LMULMAX2-RV64-NEXT: vsrl.vi v28, v26, 31
+; LMULMAX2-RV64-NEXT: vadd.vv v26, v26, v28
+; LMULMAX2-RV64-NEXT: vse32.v v26, (a0)
+; LMULMAX2-RV64-NEXT: ret
+;
; LMULMAX1-RV32-LABEL: mulhs_v8i32:
; LMULMAX1-RV32: # %bb.0:
; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
@@ -4331,6 +4340,62 @@ define void @mulhs_v8i32(<8 x i32>* %x) {
}
define void @mulhs_v4i64(<4 x i64>* %x) {
+; LMULMAX2-RV32-LABEL: mulhs_v4i64:
+; LMULMAX2-RV32: # %bb.0:
+; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu
+; LMULMAX2-RV32-NEXT: vle64.v v26, (a0)
+; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI136_0)
+; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI136_0)
+; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu
+; LMULMAX2-RV32-NEXT: vle32.v v28, (a1)
+; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu
+; LMULMAX2-RV32-NEXT: vmul.vv v28, v26, v28
+; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI136_1)
+; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI136_1)
+; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu
+; LMULMAX2-RV32-NEXT: vle32.v v30, (a1)
+; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu
+; LMULMAX2-RV32-NEXT: vmulh.vv v26, v26, v30
+; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v28
+; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI136_2)
+; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI136_2)
+; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu
+; LMULMAX2-RV32-NEXT: vle32.v v28, (a1)
+; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu
+; LMULMAX2-RV32-NEXT: vsrl.vv v28, v26, v28
+; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI136_3)
+; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI136_3)
+; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu
+; LMULMAX2-RV32-NEXT: vle32.v v30, (a1)
+; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu
+; LMULMAX2-RV32-NEXT: vsra.vv v26, v26, v30
+; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v28
+; LMULMAX2-RV32-NEXT: vse64.v v26, (a0)
+; LMULMAX2-RV32-NEXT: ret
+;
+; LMULMAX2-RV64-LABEL: mulhs_v4i64:
+; LMULMAX2-RV64: # %bb.0:
+; LMULMAX2-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu
+; LMULMAX2-RV64-NEXT: vle64.v v26, (a0)
+; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI136_0)
+; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI136_0)
+; LMULMAX2-RV64-NEXT: vle64.v v28, (a1)
+; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI136_1)
+; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI136_1)
+; LMULMAX2-RV64-NEXT: vle64.v v30, (a1)
+; LMULMAX2-RV64-NEXT: vmul.vv v28, v26, v28
+; LMULMAX2-RV64-NEXT: vmulh.vv v26, v26, v30
+; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI136_2)
+; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI136_2)
+; LMULMAX2-RV64-NEXT: vle64.v v30, (a1)
+; LMULMAX2-RV64-NEXT: vadd.vv v26, v26, v28
+; LMULMAX2-RV64-NEXT: addi a1, zero, 63
+; LMULMAX2-RV64-NEXT: vsrl.vx v28, v26, a1
+; LMULMAX2-RV64-NEXT: vsra.vv v26, v26, v30
+; LMULMAX2-RV64-NEXT: vadd.vv v26, v26, v28
+; LMULMAX2-RV64-NEXT: vse64.v v26, (a0)
+; LMULMAX2-RV64-NEXT: ret
+;
; LMULMAX1-RV32-LABEL: mulhs_v4i64:
; LMULMAX1-RV32: # %bb.0:
; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
@@ -5199,24 +5264,24 @@ define void @add_vi_v4i32(<4 x i32>* %x) {
}
define void @add_vi_v2i64(<2 x i64>* %x) {
-; LMULMAX1-RV32-LABEL: add_vi_v2i64:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vmv.v.i v26, -1
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: add_vi_v2i64:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: vadd.vi v25, v25, -1
-; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; RV32-LABEL: add_vi_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vle64.v v25, (a0)
+; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; RV32-NEXT: vmv.v.i v26, -1
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vadd.vv v25, v25, v26
+; RV32-NEXT: vse64.v v25, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: add_vi_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV64-NEXT: vle64.v v25, (a0)
+; RV64-NEXT: vadd.vi v25, v25, -1
+; RV64-NEXT: vse64.v v25, (a0)
+; RV64-NEXT: ret
%a = load <2 x i64>, <2 x i64>* %x
%b = insertelement <2 x i64> undef, i64 -1, i32 0
%c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -5274,26 +5339,26 @@ define void @add_iv_v4i32(<4 x i32>* %x) {
}
define void @add_iv_v2i64(<2 x i64>* %x) {
-; LMULMAX1-RV32-LABEL: add_iv_v2i64:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI160_0)
-; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI160_0)
-; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle32.v v26, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: add_iv_v2i64:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: vadd.vi v25, v25, 1
-; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; RV32-LABEL: add_iv_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vle64.v v25, (a0)
+; RV32-NEXT: lui a1, %hi(.LCPI160_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI160_0)
+; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; RV32-NEXT: vle32.v v26, (a1)
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vadd.vv v25, v25, v26
+; RV32-NEXT: vse64.v v25, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: add_iv_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV64-NEXT: vle64.v v25, (a0)
+; RV64-NEXT: vadd.vi v25, v25, 1
+; RV64-NEXT: vse64.v v25, (a0)
+; RV64-NEXT: ret
%a = load <2 x i64>, <2 x i64>* %x
%b = insertelement <2 x i64> undef, i64 1, i32 0
%c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -5450,25 +5515,25 @@ define void @sub_vi_v4i32(<4 x i32>* %x) {
}
define void @sub_vi_v2i64(<2 x i64>* %x) {
-; LMULMAX1-RV32-LABEL: sub_vi_v2i64:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vmv.v.i v26, -1
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vsub.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: sub_vi_v2i64:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: addi a1, zero, -1
-; LMULMAX1-RV64-NEXT: vsub.vx v25, v25, a1
-; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; RV32-LABEL: sub_vi_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vle64.v v25, (a0)
+; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; RV32-NEXT: vmv.v.i v26, -1
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vsub.vv v25, v25, v26
+; RV32-NEXT: vse64.v v25, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: sub_vi_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV64-NEXT: vle64.v v25, (a0)
+; RV64-NEXT: addi a1, zero, -1
+; RV64-NEXT: vsub.vx v25, v25, a1
+; RV64-NEXT: vse64.v v25, (a0)
+; RV64-NEXT: ret
%a = load <2 x i64>, <2 x i64>* %x
%b = insertelement <2 x i64> undef, i64 -1, i32 0
%c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -5526,26 +5591,26 @@ define void @sub_iv_v4i32(<4 x i32>* %x) {
}
define void @sub_iv_v2i64(<2 x i64>* %x) {
-; LMULMAX1-RV32-LABEL: sub_iv_v2i64:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI174_0)
-; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI174_0)
-; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle32.v v26, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vsub.vv v25, v26, v25
-; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: sub_iv_v2i64:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: vrsub.vi v25, v25, 1
-; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; RV32-LABEL: sub_iv_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vle64.v v25, (a0)
+; RV32-NEXT: lui a1, %hi(.LCPI174_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI174_0)
+; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; RV32-NEXT: vle32.v v26, (a1)
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vsub.vv v25, v26, v25
+; RV32-NEXT: vse64.v v25, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: sub_iv_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV64-NEXT: vle64.v v25, (a0)
+; RV64-NEXT: vrsub.vi v25, v25, 1
+; RV64-NEXT: vse64.v v25, (a0)
+; RV64-NEXT: ret
%a = load <2 x i64>, <2 x i64>* %x
%b = insertelement <2 x i64> undef, i64 1, i32 0
%c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -5795,26 +5860,26 @@ define void @and_vi_v4i32(<4 x i32>* %x) {
}
define void @and_vi_v2i64(<2 x i64>* %x) {
-; LMULMAX1-RV32-LABEL: and_vi_v2i64:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI190_0)
-; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI190_0)
-; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle32.v v26, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: and_vi_v2i64:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: vand.vi v25, v25, -2
-; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; RV32-LABEL: and_vi_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vle64.v v25, (a0)
+; RV32-NEXT: lui a1, %hi(.LCPI190_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI190_0)
+; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; RV32-NEXT: vle32.v v26, (a1)
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vand.vv v25, v25, v26
+; RV32-NEXT: vse64.v v25, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: and_vi_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV64-NEXT: vle64.v v25, (a0)
+; RV64-NEXT: vand.vi v25, v25, -2
+; RV64-NEXT: vse64.v v25, (a0)
+; RV64-NEXT: ret
%a = load <2 x i64>, <2 x i64>* %x
%b = insertelement <2 x i64> undef, i64 -2, i32 0
%c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -5872,26 +5937,26 @@ define void @and_iv_v4i32(<4 x i32>* %x) {
}
define void @and_iv_v2i64(<2 x i64>* %x) {
-; LMULMAX1-RV32-LABEL: and_iv_v2i64:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI194_0)
-; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI194_0)
-; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle32.v v26, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: and_iv_v2i64:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: vand.vi v25, v25, 1
-; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; RV32-LABEL: and_iv_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vle64.v v25, (a0)
+; RV32-NEXT: lui a1, %hi(.LCPI194_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI194_0)
+; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; RV32-NEXT: vle32.v v26, (a1)
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vand.vv v25, v25, v26
+; RV32-NEXT: vse64.v v25, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: and_iv_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV64-NEXT: vle64.v v25, (a0)
+; RV64-NEXT: vand.vi v25, v25, 1
+; RV64-NEXT: vse64.v v25, (a0)
+; RV64-NEXT: ret
%a = load <2 x i64>, <2 x i64>* %x
%b = insertelement <2 x i64> undef, i64 1, i32 0
%c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -6045,26 +6110,26 @@ define void @or_vi_v4i32(<4 x i32>* %x) {
}
define void @or_vi_v2i64(<2 x i64>* %x) {
-; LMULMAX1-RV32-LABEL: or_vi_v2i64:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI204_0)
-; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI204_0)
-; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle32.v v26, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: or_vi_v2i64:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: vor.vi v25, v25, -2
-; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; RV32-LABEL: or_vi_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vle64.v v25, (a0)
+; RV32-NEXT: lui a1, %hi(.LCPI204_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI204_0)
+; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; RV32-NEXT: vle32.v v26, (a1)
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vor.vv v25, v25, v26
+; RV32-NEXT: vse64.v v25, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: or_vi_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV64-NEXT: vle64.v v25, (a0)
+; RV64-NEXT: vor.vi v25, v25, -2
+; RV64-NEXT: vse64.v v25, (a0)
+; RV64-NEXT: ret
%a = load <2 x i64>, <2 x i64>* %x
%b = insertelement <2 x i64> undef, i64 -2, i32 0
%c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -6122,26 +6187,26 @@ define void @or_iv_v4i32(<4 x i32>* %x) {
}
define void @or_iv_v2i64(<2 x i64>* %x) {
-; LMULMAX1-RV32-LABEL: or_iv_v2i64:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI208_0)
-; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI208_0)
-; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle32.v v26, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: or_iv_v2i64:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: vor.vi v25, v25, 1
-; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; RV32-LABEL: or_iv_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vle64.v v25, (a0)
+; RV32-NEXT: lui a1, %hi(.LCPI208_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI208_0)
+; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; RV32-NEXT: vle32.v v26, (a1)
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vor.vv v25, v25, v26
+; RV32-NEXT: vse64.v v25, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: or_iv_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV64-NEXT: vle64.v v25, (a0)
+; RV64-NEXT: vor.vi v25, v25, 1
+; RV64-NEXT: vse64.v v25, (a0)
+; RV64-NEXT: ret
%a = load <2 x i64>, <2 x i64>* %x
%b = insertelement <2 x i64> undef, i64 1, i32 0
%c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -6295,24 +6360,24 @@ define void @xor_vi_v4i32(<4 x i32>* %x) {
}
define void @xor_vi_v2i64(<2 x i64>* %x) {
-; LMULMAX1-RV32-LABEL: xor_vi_v2i64:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vmv.v.i v26, -1
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vxor.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: xor_vi_v2i64:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: vxor.vi v25, v25, -1
-; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; RV32-LABEL: xor_vi_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vle64.v v25, (a0)
+; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; RV32-NEXT: vmv.v.i v26, -1
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vxor.vv v25, v25, v26
+; RV32-NEXT: vse64.v v25, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: xor_vi_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV64-NEXT: vle64.v v25, (a0)
+; RV64-NEXT: vxor.vi v25, v25, -1
+; RV64-NEXT: vse64.v v25, (a0)
+; RV64-NEXT: ret
%a = load <2 x i64>, <2 x i64>* %x
%b = insertelement <2 x i64> undef, i64 -1, i32 0
%c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -6370,26 +6435,26 @@ define void @xor_iv_v4i32(<4 x i32>* %x) {
}
define void @xor_iv_v2i64(<2 x i64>* %x) {
-; LMULMAX1-RV32-LABEL: xor_iv_v2i64:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI222_0)
-; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI222_0)
-; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle32.v v26, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vxor.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: xor_iv_v2i64:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: vxor.vi v25, v25, 1
-; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; RV32-LABEL: xor_iv_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vle64.v v25, (a0)
+; RV32-NEXT: lui a1, %hi(.LCPI222_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI222_0)
+; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; RV32-NEXT: vle32.v v26, (a1)
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vxor.vv v25, v25, v26
+; RV32-NEXT: vse64.v v25, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: xor_iv_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV64-NEXT: vle64.v v25, (a0)
+; RV64-NEXT: vxor.vi v25, v25, 1
+; RV64-NEXT: vse64.v v25, (a0)
+; RV64-NEXT: ret
%a = load <2 x i64>, <2 x i64>* %x
%b = insertelement <2 x i64> undef, i64 1, i32 0
%c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -6543,26 +6608,26 @@ define void @lshr_vi_v4i32(<4 x i32>* %x) {
}
define void @lshr_vi_v2i64(<2 x i64>* %x) {
-; LMULMAX1-RV32-LABEL: lshr_vi_v2i64:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI232_0)
-; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI232_0)
-; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle32.v v26, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: lshr_vi_v2i64:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: vsrl.vi v25, v25, 31
-; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; RV32-LABEL: lshr_vi_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vle64.v v25, (a0)
+; RV32-NEXT: lui a1, %hi(.LCPI232_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI232_0)
+; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; RV32-NEXT: vle32.v v26, (a1)
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vsrl.vv v25, v25, v26
+; RV32-NEXT: vse64.v v25, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: lshr_vi_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV64-NEXT: vle64.v v25, (a0)
+; RV64-NEXT: vsrl.vi v25, v25, 31
+; RV64-NEXT: vse64.v v25, (a0)
+; RV64-NEXT: ret
%a = load <2 x i64>, <2 x i64>* %x
%b = insertelement <2 x i64> undef, i64 31, i32 0
%c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -6668,26 +6733,26 @@ define void @ashr_vi_v4i32(<4 x i32>* %x) {
}
define void @ashr_vi_v2i64(<2 x i64>* %x) {
-; LMULMAX1-RV32-LABEL: ashr_vi_v2i64:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI239_0)
-; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI239_0)
-; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle32.v v26, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vsra.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: ashr_vi_v2i64:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: vsra.vi v25, v25, 31
-; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; RV32-LABEL: ashr_vi_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vle64.v v25, (a0)
+; RV32-NEXT: lui a1, %hi(.LCPI239_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI239_0)
+; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; RV32-NEXT: vle32.v v26, (a1)
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vsra.vv v25, v25, v26
+; RV32-NEXT: vse64.v v25, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: ashr_vi_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV64-NEXT: vle64.v v25, (a0)
+; RV64-NEXT: vsra.vi v25, v25, 31
+; RV64-NEXT: vse64.v v25, (a0)
+; RV64-NEXT: ret
%a = load <2 x i64>, <2 x i64>* %x
%b = insertelement <2 x i64> undef, i64 31, i32 0
%c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -6793,26 +6858,26 @@ define void @shl_vi_v4i32(<4 x i32>* %x) {
}
define void @shl_vi_v2i64(<2 x i64>* %x) {
-; LMULMAX1-RV32-LABEL: shl_vi_v2i64:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI246_0)
-; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI246_0)
-; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle32.v v26, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vsll.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: shl_vi_v2i64:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: vsll.vi v25, v25, 31
-; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; RV32-LABEL: shl_vi_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vle64.v v25, (a0)
+; RV32-NEXT: lui a1, %hi(.LCPI246_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI246_0)
+; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; RV32-NEXT: vle32.v v26, (a1)
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vsll.vv v25, v25, v26
+; RV32-NEXT: vse64.v v25, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: shl_vi_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV64-NEXT: vle64.v v25, (a0)
+; RV64-NEXT: vsll.vi v25, v25, 31
+; RV64-NEXT: vse64.v v25, (a0)
+; RV64-NEXT: ret
%a = load <2 x i64>, <2 x i64>* %x
%b = insertelement <2 x i64> undef, i64 31, i32 0
%c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -7078,33 +7143,33 @@ define void @mulhu_vx_v16i8(<16 x i8>* %x) {
}
define void @mulhu_vx_v8i16(<8 x i16>* %x) {
-; LMULMAX1-RV32-LABEL: mulhu_vx_v8i16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle16.v v25, (a0)
-; LMULMAX1-RV32-NEXT: lui a1, 2
-; LMULMAX1-RV32-NEXT: addi a1, a1, 1171
-; LMULMAX1-RV32-NEXT: vmulhu.vx v26, v25, a1
-; LMULMAX1-RV32-NEXT: vsub.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vsrl.vi v25, v25, 1
-; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vsrl.vi v25, v25, 2
-; LMULMAX1-RV32-NEXT: vse16.v v25, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: mulhu_vx_v8i16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vle16.v v25, (a0)
-; LMULMAX1-RV64-NEXT: lui a1, 2
-; LMULMAX1-RV64-NEXT: addiw a1, a1, 1171
-; LMULMAX1-RV64-NEXT: vmulhu.vx v26, v25, a1
-; LMULMAX1-RV64-NEXT: vsub.vv v25, v25, v26
-; LMULMAX1-RV64-NEXT: vsrl.vi v25, v25, 1
-; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v26
-; LMULMAX1-RV64-NEXT: vsrl.vi v25, v25, 2
-; LMULMAX1-RV64-NEXT: vse16.v v25, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; RV32-LABEL: mulhu_vx_v8i16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; RV32-NEXT: vle16.v v25, (a0)
+; RV32-NEXT: lui a1, 2
+; RV32-NEXT: addi a1, a1, 1171
+; RV32-NEXT: vmulhu.vx v26, v25, a1
+; RV32-NEXT: vsub.vv v25, v25, v26
+; RV32-NEXT: vsrl.vi v25, v25, 1
+; RV32-NEXT: vadd.vv v25, v25, v26
+; RV32-NEXT: vsrl.vi v25, v25, 2
+; RV32-NEXT: vse16.v v25, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: mulhu_vx_v8i16:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; RV64-NEXT: vle16.v v25, (a0)
+; RV64-NEXT: lui a1, 2
+; RV64-NEXT: addiw a1, a1, 1171
+; RV64-NEXT: vmulhu.vx v26, v25, a1
+; RV64-NEXT: vsub.vv v25, v25, v26
+; RV64-NEXT: vsrl.vi v25, v25, 1
+; RV64-NEXT: vadd.vv v25, v25, v26
+; RV64-NEXT: vsrl.vi v25, v25, 2
+; RV64-NEXT: vse16.v v25, (a0)
+; RV64-NEXT: ret
%a = load <8 x i16>, <8 x i16>* %x
%b = udiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
store <8 x i16> %b, <8 x i16>* %x
@@ -7112,27 +7177,27 @@ define void @mulhu_vx_v8i16(<8 x i16>* %x) {
}
define void @mulhu_vx_v4i32(<4 x i32>* %x) {
-; LMULMAX1-RV32-LABEL: mulhu_vx_v4i32:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle32.v v25, (a0)
-; LMULMAX1-RV32-NEXT: lui a1, 838861
-; LMULMAX1-RV32-NEXT: addi a1, a1, -819
-; LMULMAX1-RV32-NEXT: vmulhu.vx v25, v25, a1
-; LMULMAX1-RV32-NEXT: vsrl.vi v25, v25, 2
-; LMULMAX1-RV32-NEXT: vse32.v v25, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: mulhu_vx_v4i32:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vle32.v v25, (a0)
-; LMULMAX1-RV64-NEXT: lui a1, 838861
-; LMULMAX1-RV64-NEXT: addiw a1, a1, -819
-; LMULMAX1-RV64-NEXT: vmulhu.vx v25, v25, a1
-; LMULMAX1-RV64-NEXT: vsrl.vi v25, v25, 2
-; LMULMAX1-RV64-NEXT: vse32.v v25, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; RV32-LABEL: mulhu_vx_v4i32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; RV32-NEXT: vle32.v v25, (a0)
+; RV32-NEXT: lui a1, 838861
+; RV32-NEXT: addi a1, a1, -819
+; RV32-NEXT: vmulhu.vx v25, v25, a1
+; RV32-NEXT: vsrl.vi v25, v25, 2
+; RV32-NEXT: vse32.v v25, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: mulhu_vx_v4i32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; RV64-NEXT: vle32.v v25, (a0)
+; RV64-NEXT: lui a1, 838861
+; RV64-NEXT: addiw a1, a1, -819
+; RV64-NEXT: vmulhu.vx v25, v25, a1
+; RV64-NEXT: vsrl.vi v25, v25, 2
+; RV64-NEXT: vse32.v v25, (a0)
+; RV64-NEXT: ret
%a = load <4 x i32>, <4 x i32>* %x
%b = udiv <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
store <4 x i32> %b, <4 x i32>* %x
@@ -7140,41 +7205,41 @@ define void @mulhu_vx_v4i32(<4 x i32>* %x) {
}
define void @mulhu_vx_v2i64(<2 x i64>* %x) {
-; LMULMAX1-RV32-LABEL: mulhu_vx_v2i64:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI265_0)
-; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI265_0)
-; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle32.v v26, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vmulhu.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI265_1)
-; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI265_1)
-; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle32.v v26, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: mulhu_vx_v2i64:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: lui a1, 1026731
-; LMULMAX1-RV64-NEXT: addiw a1, a1, -1365
-; LMULMAX1-RV64-NEXT: slli a1, a1, 12
-; LMULMAX1-RV64-NEXT: addi a1, a1, -1365
-; LMULMAX1-RV64-NEXT: slli a1, a1, 12
-; LMULMAX1-RV64-NEXT: addi a1, a1, -1365
-; LMULMAX1-RV64-NEXT: slli a1, a1, 12
-; LMULMAX1-RV64-NEXT: addi a1, a1, -1365
-; LMULMAX1-RV64-NEXT: vmulhu.vx v25, v25, a1
-; LMULMAX1-RV64-NEXT: vsrl.vi v25, v25, 1
-; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; RV32-LABEL: mulhu_vx_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vle64.v v25, (a0)
+; RV32-NEXT: lui a1, %hi(.LCPI265_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI265_0)
+; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; RV32-NEXT: vle32.v v26, (a1)
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vmulhu.vv v25, v25, v26
+; RV32-NEXT: lui a1, %hi(.LCPI265_1)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI265_1)
+; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; RV32-NEXT: vle32.v v26, (a1)
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vsrl.vv v25, v25, v26
+; RV32-NEXT: vse64.v v25, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: mulhu_vx_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV64-NEXT: vle64.v v25, (a0)
+; RV64-NEXT: lui a1, 1026731
+; RV64-NEXT: addiw a1, a1, -1365
+; RV64-NEXT: slli a1, a1, 12
+; RV64-NEXT: addi a1, a1, -1365
+; RV64-NEXT: slli a1, a1, 12
+; RV64-NEXT: addi a1, a1, -1365
+; RV64-NEXT: slli a1, a1, 12
+; RV64-NEXT: addi a1, a1, -1365
+; RV64-NEXT: vmulhu.vx v25, v25, a1
+; RV64-NEXT: vsrl.vi v25, v25, 1
+; RV64-NEXT: vse64.v v25, (a0)
+; RV64-NEXT: ret
%a = load <2 x i64>, <2 x i64>* %x
%b = udiv <2 x i64> %a, <i64 3, i64 3>
store <2 x i64> %b, <2 x i64>* %x
@@ -7198,31 +7263,31 @@ define void @mulhs_vx_v16i8(<16 x i8>* %x) {
}
define void @mulhs_vx_v8i16(<8 x i16>* %x) {
-; LMULMAX1-RV32-LABEL: mulhs_vx_v8i16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle16.v v25, (a0)
-; LMULMAX1-RV32-NEXT: lui a1, 5
-; LMULMAX1-RV32-NEXT: addi a1, a1, -1755
-; LMULMAX1-RV32-NEXT: vmulh.vx v25, v25, a1
-; LMULMAX1-RV32-NEXT: vsra.vi v25, v25, 1
-; LMULMAX1-RV32-NEXT: vsrl.vi v26, v25, 15
-; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vse16.v v25, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: mulhs_vx_v8i16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vle16.v v25, (a0)
-; LMULMAX1-RV64-NEXT: lui a1, 5
-; LMULMAX1-RV64-NEXT: addiw a1, a1, -1755
-; LMULMAX1-RV64-NEXT: vmulh.vx v25, v25, a1
-; LMULMAX1-RV64-NEXT: vsra.vi v25, v25, 1
-; LMULMAX1-RV64-NEXT: vsrl.vi v26, v25, 15
-; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v26
-; LMULMAX1-RV64-NEXT: vse16.v v25, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; RV32-LABEL: mulhs_vx_v8i16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; RV32-NEXT: vle16.v v25, (a0)
+; RV32-NEXT: lui a1, 5
+; RV32-NEXT: addi a1, a1, -1755
+; RV32-NEXT: vmulh.vx v25, v25, a1
+; RV32-NEXT: vsra.vi v25, v25, 1
+; RV32-NEXT: vsrl.vi v26, v25, 15
+; RV32-NEXT: vadd.vv v25, v25, v26
+; RV32-NEXT: vse16.v v25, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: mulhs_vx_v8i16:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; RV64-NEXT: vle16.v v25, (a0)
+; RV64-NEXT: lui a1, 5
+; RV64-NEXT: addiw a1, a1, -1755
+; RV64-NEXT: vmulh.vx v25, v25, a1
+; RV64-NEXT: vsra.vi v25, v25, 1
+; RV64-NEXT: vsrl.vi v26, v25, 15
+; RV64-NEXT: vadd.vv v25, v25, v26
+; RV64-NEXT: vse16.v v25, (a0)
+; RV64-NEXT: ret
%a = load <8 x i16>, <8 x i16>* %x
%b = sdiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
store <8 x i16> %b, <8 x i16>* %x
@@ -7230,31 +7295,31 @@ define void @mulhs_vx_v8i16(<8 x i16>* %x) {
}
define void @mulhs_vx_v4i32(<4 x i32>* %x) {
-; LMULMAX1-RV32-LABEL: mulhs_vx_v4i32:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle32.v v25, (a0)
-; LMULMAX1-RV32-NEXT: lui a1, 629146
-; LMULMAX1-RV32-NEXT: addi a1, a1, -1639
-; LMULMAX1-RV32-NEXT: vmulh.vx v25, v25, a1
-; LMULMAX1-RV32-NEXT: vsrl.vi v26, v25, 31
-; LMULMAX1-RV32-NEXT: vsra.vi v25, v25, 1
-; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vse32.v v25, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: mulhs_vx_v4i32:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vle32.v v25, (a0)
-; LMULMAX1-RV64-NEXT: lui a1, 629146
-; LMULMAX1-RV64-NEXT: addiw a1, a1, -1639
-; LMULMAX1-RV64-NEXT: vmulh.vx v25, v25, a1
-; LMULMAX1-RV64-NEXT: vsra.vi v25, v25, 1
-; LMULMAX1-RV64-NEXT: vsrl.vi v26, v25, 31
-; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v26
-; LMULMAX1-RV64-NEXT: vse32.v v25, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; RV32-LABEL: mulhs_vx_v4i32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; RV32-NEXT: vle32.v v25, (a0)
+; RV32-NEXT: lui a1, 629146
+; RV32-NEXT: addi a1, a1, -1639
+; RV32-NEXT: vmulh.vx v25, v25, a1
+; RV32-NEXT: vsrl.vi v26, v25, 31
+; RV32-NEXT: vsra.vi v25, v25, 1
+; RV32-NEXT: vadd.vv v25, v25, v26
+; RV32-NEXT: vse32.v v25, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: mulhs_vx_v4i32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; RV64-NEXT: vle32.v v25, (a0)
+; RV64-NEXT: lui a1, 629146
+; RV64-NEXT: addiw a1, a1, -1639
+; RV64-NEXT: vmulh.vx v25, v25, a1
+; RV64-NEXT: vsra.vi v25, v25, 1
+; RV64-NEXT: vsrl.vi v26, v25, 31
+; RV64-NEXT: vadd.vv v25, v25, v26
+; RV64-NEXT: vse32.v v25, (a0)
+; RV64-NEXT: ret
%a = load <4 x i32>, <4 x i32>* %x
%b = sdiv <4 x i32> %a, <i32 -5, i32 -5, i32 -5, i32 -5>
store <4 x i32> %b, <4 x i32>* %x
@@ -7262,44 +7327,44 @@ define void @mulhs_vx_v4i32(<4 x i32>* %x) {
}
define void @mulhs_vx_v2i64(<2 x i64>* %x) {
-; LMULMAX1-RV32-LABEL: mulhs_vx_v2i64:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI269_0)
-; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI269_0)
-; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle32.v v26, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vmulh.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI269_1)
-; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI269_1)
-; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vle32.v v26, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV32-NEXT: vsrl.vv v26, v25, v26
-; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26
-; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: mulhs_vx_v2i64:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
-; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: lui a1, 21845
-; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365
-; LMULMAX1-RV64-NEXT: slli a1, a1, 12
-; LMULMAX1-RV64-NEXT: addi a1, a1, 1365
-; LMULMAX1-RV64-NEXT: slli a1, a1, 12
-; LMULMAX1-RV64-NEXT: addi a1, a1, 1365
-; LMULMAX1-RV64-NEXT: slli a1, a1, 12
-; LMULMAX1-RV64-NEXT: addi a1, a1, 1366
-; LMULMAX1-RV64-NEXT: vmulh.vx v25, v25, a1
-; LMULMAX1-RV64-NEXT: addi a1, zero, 63
-; LMULMAX1-RV64-NEXT: vsrl.vx v26, v25, a1
-; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v26
-; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; RV32-LABEL: mulhs_vx_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vle64.v v25, (a0)
+; RV32-NEXT: lui a1, %hi(.LCPI269_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI269_0)
+; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; RV32-NEXT: vle32.v v26, (a1)
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vmulh.vv v25, v25, v26
+; RV32-NEXT: lui a1, %hi(.LCPI269_1)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI269_1)
+; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; RV32-NEXT: vle32.v v26, (a1)
+; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV32-NEXT: vsrl.vv v26, v25, v26
+; RV32-NEXT: vadd.vv v25, v25, v26
+; RV32-NEXT: vse64.v v25, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: mulhs_vx_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
+; RV64-NEXT: vle64.v v25, (a0)
+; RV64-NEXT: lui a1, 21845
+; RV64-NEXT: addiw a1, a1, 1365
+; RV64-NEXT: slli a1, a1, 12
+; RV64-NEXT: addi a1, a1, 1365
+; RV64-NEXT: slli a1, a1, 12
+; RV64-NEXT: addi a1, a1, 1365
+; RV64-NEXT: slli a1, a1, 12
+; RV64-NEXT: addi a1, a1, 1366
+; RV64-NEXT: vmulh.vx v25, v25, a1
+; RV64-NEXT: addi a1, zero, 63
+; RV64-NEXT: vsrl.vx v26, v25, a1
+; RV64-NEXT: vadd.vv v25, v25, v26
+; RV64-NEXT: vse64.v v25, (a0)
+; RV64-NEXT: ret
%a = load <2 x i64>, <2 x i64>* %x
%b = sdiv <2 x i64> %a, <i64 3, i64 3>
store <2 x i64> %b, <2 x i64>* %x
More information about the llvm-commits
mailing list