[llvm] [LoongArch][NFC] Pre-commit tests for `[x]vmulw{ev/od}` instructions (PR #161346)

via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 30 02:56:05 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-loongarch

Author: ZhaoQi (zhaoqi5)

<details>
<summary>Changes</summary>



---

Patch is 177.13 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/161346.diff


2 Files Affected:

- (added) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll (+3475) 
- (added) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mulwev_od.ll (+1145) 


``````````diff
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll
new file mode 100644
index 0000000000000..a1cad306d260e
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll
@@ -0,0 +1,3475 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+
+define void @vmulwev_h_b(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vmulwev_h_b:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr3, $a1, 0
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvpermi.d $xr2, $xr3, 14
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr3, 0
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr3, 2
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr3, 4
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 2
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr3, 6
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 3
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr3, 8
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 4
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr3, 10
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 5
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr3, 12
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 6
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr3, 14
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 7
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 0
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr3, $a1, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 2
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr3, $a1, 1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 4
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr3, $a1, 2
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 6
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr3, $a1, 3
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 8
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr3, $a1, 4
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 10
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr3, $a1, 5
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 12
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr3, $a1, 6
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 14
+; CHECK-NEXT:    xvpermi.d $xr2, $xr0, 14
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr3, $a1, 7
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 0
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr4, $a1, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 2
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr4, $a1, 1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 4
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr4, $a1, 2
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 6
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr4, $a1, 3
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 8
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr4, $a1, 4
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 10
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr4, $a1, 5
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 12
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr4, $a1, 6
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 14
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr4, $a1, 7
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 0
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a1, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 2
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a1, 1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 4
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a1, 2
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 6
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a1, 3
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 8
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a1, 4
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 10
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a1, 5
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 12
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a1, 6
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 14
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a1, 7
+; CHECK-NEXT:    xvpermi.q $xr1, $xr3, 2
+; CHECK-NEXT:    xvpermi.q $xr4, $xr0, 2
+; CHECK-NEXT:    xvmul.h $xr0, $xr1, $xr4
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %va = load <32 x i8>, ptr %a
+  %vb = load <32 x i8>, ptr %b
+  %vas = shufflevector <32 x i8> %va, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  %vbs = shufflevector <32 x i8> %vb, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  %vae = sext <16 x i8> %vas to <16 x i16>
+  %vbe = sext <16 x i8> %vbs to <16 x i16>
+  %mul = mul <16 x i16> %vae, %vbe
+  store <16 x i16> %mul, ptr %res
+  ret void
+}
+
+define void @vmulwev_w_h(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vmulwev_w_h:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvpermi.d $xr2, $xr0, 14
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr2, 6
+; CHECK-NEXT:    vpickve2gr.h $a2, $vr2, 4
+; CHECK-NEXT:    vpickve2gr.h $a3, $vr2, 2
+; CHECK-NEXT:    vpickve2gr.h $a4, $vr2, 0
+; CHECK-NEXT:    vpickve2gr.h $a5, $vr0, 6
+; CHECK-NEXT:    vpickve2gr.h $a6, $vr0, 4
+; CHECK-NEXT:    vpickve2gr.h $a7, $vr0, 2
+; CHECK-NEXT:    vpickve2gr.h $t0, $vr0, 0
+; CHECK-NEXT:    xvpermi.d $xr0, $xr1, 14
+; CHECK-NEXT:    vpickve2gr.h $t1, $vr0, 6
+; CHECK-NEXT:    vpickve2gr.h $t2, $vr0, 4
+; CHECK-NEXT:    vpickve2gr.h $t3, $vr0, 2
+; CHECK-NEXT:    vpickve2gr.h $t4, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.h $t5, $vr1, 6
+; CHECK-NEXT:    vpickve2gr.h $t6, $vr1, 4
+; CHECK-NEXT:    vpickve2gr.h $t7, $vr1, 2
+; CHECK-NEXT:    vpickve2gr.h $t8, $vr1, 0
+; CHECK-NEXT:    ext.w.h $t0, $t0
+; CHECK-NEXT:    vinsgr2vr.w $vr0, $t0, 0
+; CHECK-NEXT:    ext.w.h $a7, $a7
+; CHECK-NEXT:    vinsgr2vr.w $vr0, $a7, 1
+; CHECK-NEXT:    ext.w.h $a6, $a6
+; CHECK-NEXT:    vinsgr2vr.w $vr0, $a6, 2
+; CHECK-NEXT:    ext.w.h $a5, $a5
+; CHECK-NEXT:    vinsgr2vr.w $vr0, $a5, 3
+; CHECK-NEXT:    ext.w.h $a4, $a4
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a4, 0
+; CHECK-NEXT:    ext.w.h $a3, $a3
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a3, 1
+; CHECK-NEXT:    ext.w.h $a2, $a2
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a2, 2
+; CHECK-NEXT:    ext.w.h $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a1, 3
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT:    ext.w.h $a1, $t8
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a1, 0
+; CHECK-NEXT:    ext.w.h $a1, $t7
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a1, 1
+; CHECK-NEXT:    ext.w.h $a1, $t6
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a1, 2
+; CHECK-NEXT:    ext.w.h $a1, $t5
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a1, 3
+; CHECK-NEXT:    ext.w.h $a1, $t4
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a1, 0
+; CHECK-NEXT:    ext.w.h $a1, $t3
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a1, 1
+; CHECK-NEXT:    ext.w.h $a1, $t2
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a1, 2
+; CHECK-NEXT:    ext.w.h $a1, $t1
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a1, 3
+; CHECK-NEXT:    xvpermi.q $xr1, $xr2, 2
+; CHECK-NEXT:    xvmul.w $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %va = load <16 x i16>, ptr %a
+  %vb = load <16 x i16>, ptr %b
+  %vas = shufflevector <16 x i16> %va, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %vbs = shufflevector <16 x i16> %vb, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %vae = sext <8 x i16> %vas to <8 x i32>
+  %vbe = sext <8 x i16> %vbs to <8 x i32>
+  %mul = mul <8 x i32> %vae, %vbe
+  store <8 x i32> %mul, ptr %res
+  ret void
+}
+
+define void @vmulwev_d_w(ptr %res, ptr %a, ptr %b) nounwind {
+; LA32-LABEL: vmulwev_d_w:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    xvld $xr0, $a1, 0
+; LA32-NEXT:    xvld $xr1, $a2, 0
+; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 2
+; LA32-NEXT:    xvpickve2gr.w $a2, $xr0, 0
+; LA32-NEXT:    xvpickve2gr.w $a3, $xr0, 6
+; LA32-NEXT:    xvpickve2gr.w $a4, $xr0, 4
+; LA32-NEXT:    xvpickve2gr.w $a5, $xr1, 2
+; LA32-NEXT:    xvpickve2gr.w $a6, $xr1, 0
+; LA32-NEXT:    xvpickve2gr.w $a7, $xr1, 6
+; LA32-NEXT:    xvpickve2gr.w $t0, $xr1, 4
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a4, 0
+; LA32-NEXT:    srai.w $a4, $a4, 31
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a4, 1
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a3, 2
+; LA32-NEXT:    srai.w $a3, $a3, 31
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a3, 3
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a2, 0
+; LA32-NEXT:    srai.w $a2, $a2, 31
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a2, 1
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a1, 2
+; LA32-NEXT:    srai.w $a1, $a1, 31
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a1, 3
+; LA32-NEXT:    xvpermi.q $xr1, $xr0, 2
+; LA32-NEXT:    vinsgr2vr.w $vr0, $t0, 0
+; LA32-NEXT:    srai.w $a1, $t0, 31
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a1, 1
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a7, 2
+; LA32-NEXT:    srai.w $a1, $a7, 31
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a1, 3
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a6, 0
+; LA32-NEXT:    srai.w $a1, $a6, 31
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a1, 1
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a5, 2
+; LA32-NEXT:    srai.w $a1, $a5, 31
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a1, 3
+; LA32-NEXT:    xvpermi.q $xr2, $xr0, 2
+; LA32-NEXT:    xvmul.d $xr0, $xr1, $xr2
+; LA32-NEXT:    xvst $xr0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: vmulwev_d_w:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    xvld $xr0, $a1, 0
+; LA64-NEXT:    xvld $xr1, $a2, 0
+; LA64-NEXT:    xvpickve2gr.w $a1, $xr0, 2
+; LA64-NEXT:    xvpickve2gr.w $a2, $xr0, 0
+; LA64-NEXT:    xvpickve2gr.w $a3, $xr0, 6
+; LA64-NEXT:    xvpickve2gr.w $a4, $xr0, 4
+; LA64-NEXT:    xvpickve2gr.w $a5, $xr1, 2
+; LA64-NEXT:    xvpickve2gr.w $a6, $xr1, 0
+; LA64-NEXT:    xvpickve2gr.w $a7, $xr1, 6
+; LA64-NEXT:    xvpickve2gr.w $t0, $xr1, 4
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a4, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a3, 1
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a2, 0
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a1, 1
+; LA64-NEXT:    xvpermi.q $xr1, $xr0, 2
+; LA64-NEXT:    vinsgr2vr.d $vr0, $t0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a7, 1
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a6, 0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a5, 1
+; LA64-NEXT:    xvpermi.q $xr2, $xr0, 2
+; LA64-NEXT:    xvmul.d $xr0, $xr1, $xr2
+; LA64-NEXT:    xvst $xr0, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %va = load <8 x i32>, ptr %a
+  %vb = load <8 x i32>, ptr %b
+  %vas = shufflevector <8 x i32> %va, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %vbs = shufflevector <8 x i32> %vb, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %vae = sext <4 x i32> %vas to <4 x i64>
+  %vbe = sext <4 x i32> %vbs to <4 x i64>
+  %mul = mul <4 x i64> %vae, %vbe
+  store <4 x i64> %mul, ptr %res
+  ret void
+}
+
+define void @vmulwev_q_d(ptr %res, ptr %a, ptr %b) nounwind {
+; LA32-LABEL: vmulwev_q_d:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    addi.w $sp, $sp, -32
+; LA32-NEXT:    st.w $fp, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT:    st.w $s0, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT:    st.w $s1, $sp, 20 # 4-byte Folded Spill
+; LA32-NEXT:    st.w $s2, $sp, 16 # 4-byte Folded Spill
+; LA32-NEXT:    st.w $s3, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT:    st.w $s4, $sp, 8 # 4-byte Folded Spill
+; LA32-NEXT:    xvld $xr0, $a1, 0
+; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 4
+; LA32-NEXT:    xvld $xr1, $a2, 0
+; LA32-NEXT:    xvpickve2gr.w $a2, $xr0, 0
+; LA32-NEXT:    xvpickve2gr.w $t3, $xr0, 1
+; LA32-NEXT:    xvpickve2gr.w $a5, $xr0, 5
+; LA32-NEXT:    xvpickve2gr.w $a3, $xr1, 4
+; LA32-NEXT:    xvpickve2gr.w $a4, $xr1, 0
+; LA32-NEXT:    xvpickve2gr.w $t4, $xr1, 1
+; LA32-NEXT:    xvpickve2gr.w $a7, $xr1, 5
+; LA32-NEXT:    srai.w $t1, $a5, 31
+; LA32-NEXT:    srai.w $t5, $t3, 31
+; LA32-NEXT:    srai.w $t0, $a7, 31
+; LA32-NEXT:    srai.w $t6, $t4, 31
+; LA32-NEXT:    mulh.wu $a6, $a2, $a4
+; LA32-NEXT:    mul.w $t2, $t3, $a4
+; LA32-NEXT:    add.w $a6, $t2, $a6
+; LA32-NEXT:    sltu $t2, $a6, $t2
+; LA32-NEXT:    mulh.wu $t7, $t3, $a4
+; LA32-NEXT:    add.w $t7, $t7, $t2
+; LA32-NEXT:    mul.w $t2, $a2, $t4
+; LA32-NEXT:    add.w $a6, $t2, $a6
+; LA32-NEXT:    sltu $t2, $a6, $t2
+; LA32-NEXT:    mulh.wu $t8, $a2, $t4
+; LA32-NEXT:    add.w $t2, $t8, $t2
+; LA32-NEXT:    add.w $t8, $t7, $t2
+; LA32-NEXT:    mul.w $fp, $t3, $t4
+; LA32-NEXT:    add.w $s0, $fp, $t8
+; LA32-NEXT:    mul.w $s1, $a4, $t5
+; LA32-NEXT:    mul.w $s2, $t6, $a2
+; LA32-NEXT:    add.w $s3, $s2, $s1
+; LA32-NEXT:    add.w $t2, $s0, $s3
+; LA32-NEXT:    sltu $s4, $t2, $s0
+; LA32-NEXT:    sltu $fp, $s0, $fp
+; LA32-NEXT:    sltu $t7, $t8, $t7
+; LA32-NEXT:    mulh.wu $t8, $t3, $t4
+; LA32-NEXT:    add.w $t7, $t8, $t7
+; LA32-NEXT:    add.w $t7, $t7, $fp
+; LA32-NEXT:    mulh.wu $t8, $a4, $t5
+; LA32-NEXT:    add.w $t8, $t8, $s1
+; LA32-NEXT:    mul.w $t4, $t4, $t5
+; LA32-NEXT:    add.w $t4, $t8, $t4
+; LA32-NEXT:    mul.w $t3, $t6, $t3
+; LA32-NEXT:    mulh.wu $t5, $t6, $a2
+; LA32-NEXT:    add.w $t3, $t5, $t3
+; LA32-NEXT:    add.w $t3, $t3, $s2
+; LA32-NEXT:    add.w $t3, $t3, $t4
+; LA32-NEXT:    sltu $t4, $s3, $s2
+; LA32-NEXT:    add.w $t3, $t3, $t4
+; LA32-NEXT:    add.w $t3, $t7, $t3
+; LA32-NEXT:    add.w $t3, $t3, $s4
+; LA32-NEXT:    mulh.wu $t4, $a1, $a3
+; LA32-NEXT:    mul.w $t5, $a5, $a3
+; LA32-NEXT:    add.w $t4, $t5, $t4
+; LA32-NEXT:    sltu $t5, $t4, $t5
+; LA32-NEXT:    mulh.wu $t6, $a5, $a3
+; LA32-NEXT:    add.w $t5, $t6, $t5
+; LA32-NEXT:    mul.w $t6, $a1, $a7
+; LA32-NEXT:    add.w $t4, $t6, $t4
+; LA32-NEXT:    sltu $t6, $t4, $t6
+; LA32-NEXT:    mulh.wu $t7, $a1, $a7
+; LA32-NEXT:    add.w $t6, $t7, $t6
+; LA32-NEXT:    add.w $t6, $t5, $t6
+; LA32-NEXT:    mul.w $t7, $a5, $a7
+; LA32-NEXT:    add.w $t8, $t7, $t6
+; LA32-NEXT:    mul.w $fp, $a3, $t1
+; LA32-NEXT:    mul.w $s0, $t0, $a1
+; LA32-NEXT:    add.w $s1, $s0, $fp
+; LA32-NEXT:    add.w $s2, $t8, $s1
+; LA32-NEXT:    sltu $s3, $s2, $t8
+; LA32-NEXT:    sltu $t7, $t8, $t7
+; LA32-NEXT:    sltu $t5, $t6, $t5
+; LA32-NEXT:    mulh.wu $t6, $a5, $a7
+; LA32-NEXT:    add.w $t5, $t6, $t5
+; LA32-NEXT:    add.w $t5, $t5, $t7
+; LA32-NEXT:    mulh.wu $t6, $a3, $t1
+; LA32-NEXT:    add.w $t6, $t6, $fp
+; LA32-NEXT:    mul.w $a7, $a7, $t1
+; LA32-NEXT:    add.w $a7, $t6, $a7
+; LA32-NEXT:    mul.w $a5, $t0, $a5
+; LA32-NEXT:    mulh.wu $t0, $t0, $a1
+; LA32-NEXT:    add.w $a5, $t0, $a5
+; LA32-NEXT:    add.w $a5, $a5, $s0
+; LA32-NEXT:    add.w $a5, $a5, $a7
+; LA32-NEXT:    sltu $a7, $s1, $s0
+; LA32-NEXT:    add.w $a5, $a5, $a7
+; LA32-NEXT:    add.w $a5, $t5, $a5
+; LA32-NEXT:    add.w $a5, $a5, $s3
+; LA32-NEXT:    mul.w $a2, $a2, $a4
+; LA32-NEXT:    mul.w $a1, $a1, $a3
+; LA32-NEXT:    st.w $a1, $a0, 16
+; LA32-NEXT:    st.w $a2, $a0, 0
+; LA32-NEXT:    st.w $t4, $a0, 20
+; LA32-NEXT:    st.w $a6, $a0, 4
+; LA32-NEXT:    st.w $s2, $a0, 24
+; LA32-NEXT:    st.w $t2, $a0, 8
+; LA32-NEXT:    st.w $a5, $a0, 28
+; LA32-NEXT:    st.w $t3, $a0, 12
+; LA32-NEXT:    ld.w $s4, $sp, 8 # 4-byte Folded Reload
+; LA32-NEXT:    ld.w $s3, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT:    ld.w $s2, $sp, 16 # 4-byte Folded Reload
+; LA32-NEXT:    ld.w $s1, $sp, 20 # 4-byte Folded Reload
+; LA32-NEXT:    ld.w $s0, $sp, 24 # 4-byte Folded Reload
+; LA32-NEXT:    ld.w $fp, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT:    addi.w $sp, $sp, 32
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: vmulwev_q_d:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    xvld $xr0, $a1, 0
+; LA64-NEXT:    xvld $xr1, $a2, 0
+; LA64-NEXT:    xvpickve2gr.d $a1, $xr0, 2
+; LA64-NEXT:    xvpickve2gr.d $a2, $xr0, 0
+; LA64-NEXT:    xvpickve2gr.d $a3, $xr1, 2
+; LA64-NEXT:    xvpickve2gr.d $a4, $xr1, 0
+; LA64-NEXT:    mul.d $a5, $a2, $a4
+; LA64-NEXT:    mulh.d $a2, $a2, $a4
+; LA64-NEXT:    mul.d $a4, $a1, $a3
+; LA64-NEXT:    mulh.d $a1, $a1, $a3
+; LA64-NEXT:    st.d $a1, $a0, 24
+; LA64-NEXT:    st.d $a4, $a0, 16
+; LA64-NEXT:    st.d $a2, $a0, 8
+; LA64-NEXT:    st.d $a5, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %va = load <4 x i64>, ptr %a
+  %vb = load <4 x i64>, ptr %b
+  %vas = shufflevector <4 x i64> %va, <4 x i64> undef, <2 x i32> <i32 0, i32 2>
+  %vbs = shufflevector <4 x i64> %vb, <4 x i64> undef, <2 x i32> <i32 0, i32 2>
+  %vae = sext <2 x i64> %vas to <2 x i128>
+  %vbe = sext <2 x i64> %vbs to <2 x i128>
+  %mul = mul <2 x i128> %vae, %vbe
+  store <2 x i128> %mul, ptr %res
+  ret void
+}
+
+define void @vmulwod_h_b(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: vmulwod_h_b:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr3, $a1, 0
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvpermi.d $xr2, $xr3, 14
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr3, 1
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr3, 3
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr3, 5
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 2
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr3, 7
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 3
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr3, 9
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 4
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr3, 11
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 5
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr3, 13
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 6
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr3, 15
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 7
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 1
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr3, $a1, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 3
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr3, $a1, 1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 5
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr3, $a1, 2
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 7
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr3, $a1, 3
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 9
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr3, $a1, 4
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 11
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr3, $a1, 5
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 13
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr3, $a1, 6
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr2, 15
+; CHECK-NEXT:    xvpermi.d $xr2, $xr0, 14
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr3, $a1, 7
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 1
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr4, $a1, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 3
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr4, $a1, 1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 5
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr4, $a1, 2
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 7
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr4, $a1, 3
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 9
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr4, $a1, 4
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 11
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr4, $a1, 5
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 13
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr4, $a1, 6
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 15
+; CHECK-NEXT:    ext.w.b $a1, $a1
+; CHECK-NEXT:    vinsgr2vr.h $vr...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/161346


More information about the llvm-commits mailing list