[llvm] [DAG] expandCLMUL - unroll vector clmul if vector multiplies are not supported (PR #182041)

via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 18 07:18:15 PST 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-powerpc

Author: Simon Pilgrim (RKSimon)

<details>
<summary>Changes</summary>

Fixes powerpc cases reported on #<!-- -->182039

I'm hoping #<!-- -->177566 can be adapted to improve upon this.

---

Patch is 307.00 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/182041.diff


2 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+4) 
- (added) llvm/test/CodeGen/PowerPC/clmul-vector.ll (+8874) 


``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e4b4d80896fa7..3c7b46a9021da 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8439,6 +8439,10 @@ SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const {
   unsigned BW = VT.getScalarSizeInBits();
   unsigned Opcode = Node->getOpcode();
 
+  // Scalarize if the vector multiplication is unlikely to work.
+  if (VT.isVector() && !isOperationLegalOrCustom(ISD::MUL, VT))
+    return DAG.UnrollVectorOp(Node);
+
   switch (Opcode) {
   case ISD::CLMUL: {
     // NOTE: If you change this expansion, please update the cost model
diff --git a/llvm/test/CodeGen/PowerPC/clmul-vector.ll b/llvm/test/CodeGen/PowerPC/clmul-vector.ll
new file mode 100644
index 0000000000000..9089dca5b0ed7
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/clmul-vector.ll
@@ -0,0 +1,8874 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=powerpc64-- | FileCheck %s --check-prefixes=CHECK,BE
+; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s --check-prefixes=CHECK,LE
+
+define <16 x i8> @clmul_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; BE-LABEL: clmul_v16i8:
+; BE:       # %bb.0:
+; BE-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
+; BE-NEXT:    vspltisb 4, 2
+; BE-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
+; BE-NEXT:    vand 4, 3, 4
+; BE-NEXT:    lvx 10, 0, 3
+; BE-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
+; BE-NEXT:    vspltisb 5, 1
+; BE-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
+; BE-NEXT:    vspltisb 0, 4
+; BE-NEXT:    vand 5, 3, 5
+; BE-NEXT:    vspltisb 6, 8
+; BE-NEXT:    vspltisb 8, -1
+; BE-NEXT:    vmuloub 9, 2, 4
+; BE-NEXT:    vmuleub 4, 2, 4
+; BE-NEXT:    vand 1, 3, 0
+; BE-NEXT:    vperm 4, 4, 9, 10
+; BE-NEXT:    vmuloub 9, 2, 5
+; BE-NEXT:    vmuleub 5, 2, 5
+; BE-NEXT:    vand 7, 3, 6
+; BE-NEXT:    vaddubm 6, 6, 6
+; BE-NEXT:    vperm 5, 5, 9, 10
+; BE-NEXT:    vmuloub 9, 2, 1
+; BE-NEXT:    vmuleub 1, 2, 1
+; BE-NEXT:    vperm 1, 1, 9, 10
+; BE-NEXT:    vmuloub 9, 2, 7
+; BE-NEXT:    vmuleub 7, 2, 7
+; BE-NEXT:    vand 6, 3, 6
+; BE-NEXT:    vperm 7, 7, 9, 10
+; BE-NEXT:    vmuloub 9, 2, 6
+; BE-NEXT:    vmuleub 6, 2, 6
+; BE-NEXT:    vperm 6, 6, 9, 10
+; BE-NEXT:    lvx 9, 0, 3
+; BE-NEXT:    vslb 0, 0, 0
+; BE-NEXT:    vslb 8, 8, 8
+; BE-NEXT:    vand 0, 3, 0
+; BE-NEXT:    vand 8, 3, 8
+; BE-NEXT:    vand 3, 3, 9
+; BE-NEXT:    vmuloub 9, 2, 0
+; BE-NEXT:    vmuleub 0, 2, 0
+; BE-NEXT:    vxor 4, 5, 4
+; BE-NEXT:    vperm 0, 0, 9, 10
+; BE-NEXT:    vmuloub 9, 2, 8
+; BE-NEXT:    vmuleub 8, 2, 8
+; BE-NEXT:    vmuloub 5, 2, 3
+; BE-NEXT:    vmuleub 2, 2, 3
+; BE-NEXT:    vxor 3, 4, 1
+; BE-NEXT:    vxor 3, 3, 7
+; BE-NEXT:    vperm 2, 2, 5, 10
+; BE-NEXT:    vxor 3, 3, 6
+; BE-NEXT:    vxor 2, 3, 2
+; BE-NEXT:    vperm 8, 8, 9, 10
+; BE-NEXT:    vxor 2, 2, 0
+; BE-NEXT:    vxor 2, 2, 8
+; BE-NEXT:    blr
+;
+; LE-LABEL: clmul_v16i8:
+; LE:       # %bb.0:
+; LE-NEXT:    vspltisb 4, 2
+; LE-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
+; LE-NEXT:    vspltisb 5, 1
+; LE-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
+; LE-NEXT:    xxland 36, 35, 36
+; LE-NEXT:    xxland 37, 35, 37
+; LE-NEXT:    vspltisb 0, 4
+; LE-NEXT:    vspltisb 1, 8
+; LE-NEXT:    lxvd2x 0, 0, 3
+; LE-NEXT:    vmuloub 7, 2, 4
+; LE-NEXT:    vmuleub 4, 2, 4
+; LE-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
+; LE-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
+; LE-NEXT:    xxswapd 38, 0
+; LE-NEXT:    lxvd2x 0, 0, 3
+; LE-NEXT:    vperm 4, 4, 7, 6
+; LE-NEXT:    vmuloub 7, 2, 5
+; LE-NEXT:    vmuleub 5, 2, 5
+; LE-NEXT:    vperm 5, 5, 7, 6
+; LE-NEXT:    xxland 39, 35, 32
+; LE-NEXT:    vslb 0, 0, 0
+; LE-NEXT:    vmuloub 8, 2, 7
+; LE-NEXT:    vmuleub 7, 2, 7
+; LE-NEXT:    xxland 32, 35, 32
+; LE-NEXT:    vperm 7, 7, 8, 6
+; LE-NEXT:    xxland 40, 35, 33
+; LE-NEXT:    vaddubm 1, 1, 1
+; LE-NEXT:    vmuloub 9, 2, 8
+; LE-NEXT:    vmuleub 8, 2, 8
+; LE-NEXT:    xxland 33, 35, 33
+; LE-NEXT:    vperm 8, 8, 9, 6
+; LE-NEXT:    vmuloub 9, 2, 1
+; LE-NEXT:    vmuleub 1, 2, 1
+; LE-NEXT:    vperm 1, 1, 9, 6
+; LE-NEXT:    xxland 41, 35, 0
+; LE-NEXT:    xxlxor 0, 37, 36
+; LE-NEXT:    vmuloub 10, 2, 9
+; LE-NEXT:    vmuleub 9, 2, 9
+; LE-NEXT:    xxlxor 0, 0, 39
+; LE-NEXT:    xxlxor 0, 0, 40
+; LE-NEXT:    xxlxor 0, 0, 33
+; LE-NEXT:    vperm 9, 9, 10, 6
+; LE-NEXT:    vmuloub 10, 2, 0
+; LE-NEXT:    vmuleub 0, 2, 0
+; LE-NEXT:    xxlxor 0, 0, 41
+; LE-NEXT:    vperm 0, 0, 10, 6
+; LE-NEXT:    xxleqv 42, 42, 42
+; LE-NEXT:    vslb 10, 10, 10
+; LE-NEXT:    xxlxor 0, 0, 32
+; LE-NEXT:    xxland 35, 35, 42
+; LE-NEXT:    vmuloub 10, 2, 3
+; LE-NEXT:    vmuleub 2, 2, 3
+; LE-NEXT:    vperm 2, 2, 10, 6
+; LE-NEXT:    xxlxor 34, 0, 34
+; LE-NEXT:    blr
+  %res = call <16 x i8> @llvm.clmul.v16i8(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+define <8 x i16> @clmul_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; BE-LABEL: clmul_v8i16:
+; BE:       # %bb.0:
+; BE-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
+; BE-NEXT:    vspltish 6, 2
+; BE-NEXT:    addi 3, 3, .LCPI1_0 at toc@l
+; BE-NEXT:    vand 4, 3, 6
+; BE-NEXT:    lvx 13, 0, 3
+; BE-NEXT:    addis 3, 2, .LCPI1_1 at toc@ha
+; BE-NEXT:    addi 3, 3, .LCPI1_1 at toc@l
+; BE-NEXT:    vspltish 7, 1
+; BE-NEXT:    lvx 14, 0, 3
+; BE-NEXT:    addis 3, 2, .LCPI1_2 at toc@ha
+; BE-NEXT:    addi 3, 3, .LCPI1_2 at toc@l
+; BE-NEXT:    vspltish 8, 4
+; BE-NEXT:    lvx 15, 0, 3
+; BE-NEXT:    addis 3, 2, .LCPI1_3 at toc@ha
+; BE-NEXT:    addi 3, 3, .LCPI1_3 at toc@l
+; BE-NEXT:    vspltish 9, 8
+; BE-NEXT:    vand 5, 3, 7
+; BE-NEXT:    lvx 16, 0, 3
+; BE-NEXT:    addis 3, 2, .LCPI1_4 at toc@ha
+; BE-NEXT:    addi 3, 3, .LCPI1_4 at toc@l
+; BE-NEXT:    vspltisb 12, -1
+; BE-NEXT:    lvx 17, 0, 3
+; BE-NEXT:    vand 0, 3, 8
+; BE-NEXT:    vand 1, 3, 9
+; BE-NEXT:    vslh 10, 8, 8
+; BE-NEXT:    vsldoi 7, 7, 7, 1
+; BE-NEXT:    vsldoi 6, 6, 6, 1
+; BE-NEXT:    vsldoi 8, 8, 8, 1
+; BE-NEXT:    vslh 11, 9, 9
+; BE-NEXT:    vadduhm 9, 9, 9
+; BE-NEXT:    vslh 12, 12, 12
+; BE-NEXT:    vand 9, 3, 9
+; BE-NEXT:    vand 10, 3, 10
+; BE-NEXT:    vand 7, 3, 7
+; BE-NEXT:    vand 6, 3, 6
+; BE-NEXT:    vand 8, 3, 8
+; BE-NEXT:    vand 11, 3, 11
+; BE-NEXT:    vand 12, 3, 12
+; BE-NEXT:    vand 13, 3, 13
+; BE-NEXT:    vand 14, 3, 14
+; BE-NEXT:    vand 15, 3, 15
+; BE-NEXT:    vand 16, 3, 16
+; BE-NEXT:    vand 3, 3, 17
+; BE-NEXT:    vxor 17, 17, 17
+; BE-NEXT:    vmladduhm 4, 2, 4, 17
+; BE-NEXT:    vmladduhm 5, 2, 5, 17
+; BE-NEXT:    vmladduhm 0, 2, 0, 17
+; BE-NEXT:    vmladduhm 1, 2, 1, 17
+; BE-NEXT:    vmladduhm 9, 2, 9, 17
+; BE-NEXT:    vmladduhm 10, 2, 10, 17
+; BE-NEXT:    vmladduhm 7, 2, 7, 17
+; BE-NEXT:    vmladduhm 6, 2, 6, 17
+; BE-NEXT:    vmladduhm 8, 2, 8, 17
+; BE-NEXT:    vmladduhm 11, 2, 11, 17
+; BE-NEXT:    vmladduhm 12, 2, 12, 17
+; BE-NEXT:    vmladduhm 13, 2, 13, 17
+; BE-NEXT:    vmladduhm 14, 2, 14, 17
+; BE-NEXT:    vmladduhm 15, 2, 15, 17
+; BE-NEXT:    vmladduhm 16, 2, 16, 17
+; BE-NEXT:    vmladduhm 2, 2, 3, 17
+; BE-NEXT:    vxor 3, 5, 4
+; BE-NEXT:    vxor 3, 3, 0
+; BE-NEXT:    vxor 3, 3, 1
+; BE-NEXT:    vxor 3, 3, 9
+; BE-NEXT:    vxor 3, 3, 13
+; BE-NEXT:    vxor 3, 3, 10
+; BE-NEXT:    vxor 3, 3, 14
+; BE-NEXT:    vxor 3, 3, 7
+; BE-NEXT:    vxor 3, 3, 6
+; BE-NEXT:    vxor 3, 3, 8
+; BE-NEXT:    vxor 3, 3, 11
+; BE-NEXT:    vxor 3, 3, 15
+; BE-NEXT:    vxor 3, 3, 16
+; BE-NEXT:    vxor 2, 3, 2
+; BE-NEXT:    vxor 2, 2, 12
+; BE-NEXT:    blr
+;
+; LE-LABEL: clmul_v8i16:
+; LE:       # %bb.0:
+; LE-NEXT:    vspltish 5, 2
+; LE-NEXT:    vspltish 0, 1
+; LE-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
+; LE-NEXT:    xxland 41, 35, 37
+; LE-NEXT:    vspltish 1, 4
+; LE-NEXT:    vspltish 4, 8
+; LE-NEXT:    addi 3, 3, .LCPI1_0 at toc@l
+; LE-NEXT:    lxvd2x 1, 0, 3
+; LE-NEXT:    vsldoi 6, 0, 0, 1
+; LE-NEXT:    xxland 32, 35, 32
+; LE-NEXT:    vsldoi 7, 5, 5, 1
+; LE-NEXT:    vxor 5, 5, 5
+; LE-NEXT:    vmladduhm 9, 2, 9, 5
+; LE-NEXT:    vmladduhm 0, 2, 0, 5
+; LE-NEXT:    addis 3, 2, .LCPI1_1 at toc@ha
+; LE-NEXT:    addi 3, 3, .LCPI1_1 at toc@l
+; LE-NEXT:    vsldoi 8, 1, 1, 1
+; LE-NEXT:    xxlxor 0, 32, 41
+; LE-NEXT:    xxland 32, 35, 33
+; LE-NEXT:    vmladduhm 0, 2, 0, 5
+; LE-NEXT:    xxlxor 0, 0, 32
+; LE-NEXT:    xxland 32, 35, 36
+; LE-NEXT:    vmladduhm 0, 2, 0, 5
+; LE-NEXT:    xxlxor 0, 0, 32
+; LE-NEXT:    vadduhm 0, 4, 4
+; LE-NEXT:    vslh 4, 4, 4
+; LE-NEXT:    xxland 32, 35, 32
+; LE-NEXT:    xxland 36, 35, 36
+; LE-NEXT:    vmladduhm 0, 2, 0, 5
+; LE-NEXT:    vmladduhm 4, 2, 4, 5
+; LE-NEXT:    xxlxor 0, 0, 32
+; LE-NEXT:    xxland 32, 35, 1
+; LE-NEXT:    lxvd2x 1, 0, 3
+; LE-NEXT:    addis 3, 2, .LCPI1_2 at toc@ha
+; LE-NEXT:    vmladduhm 0, 2, 0, 5
+; LE-NEXT:    addi 3, 3, .LCPI1_2 at toc@l
+; LE-NEXT:    xxlxor 0, 0, 32
+; LE-NEXT:    vslh 0, 1, 1
+; LE-NEXT:    xxland 32, 35, 32
+; LE-NEXT:    vmladduhm 0, 2, 0, 5
+; LE-NEXT:    xxlxor 0, 0, 32
+; LE-NEXT:    xxland 32, 35, 1
+; LE-NEXT:    lxvd2x 1, 0, 3
+; LE-NEXT:    addis 3, 2, .LCPI1_3 at toc@ha
+; LE-NEXT:    vmladduhm 0, 2, 0, 5
+; LE-NEXT:    addi 3, 3, .LCPI1_3 at toc@l
+; LE-NEXT:    xxlxor 0, 0, 32
+; LE-NEXT:    xxland 32, 35, 38
+; LE-NEXT:    vmladduhm 0, 2, 0, 5
+; LE-NEXT:    xxlxor 0, 0, 32
+; LE-NEXT:    xxland 32, 35, 39
+; LE-NEXT:    vmladduhm 0, 2, 0, 5
+; LE-NEXT:    xxlxor 0, 0, 32
+; LE-NEXT:    xxland 32, 35, 40
+; LE-NEXT:    vmladduhm 0, 2, 0, 5
+; LE-NEXT:    xxlxor 0, 0, 32
+; LE-NEXT:    xxlxor 0, 0, 36
+; LE-NEXT:    xxland 36, 35, 1
+; LE-NEXT:    lxvd2x 1, 0, 3
+; LE-NEXT:    addis 3, 2, .LCPI1_4 at toc@ha
+; LE-NEXT:    vmladduhm 4, 2, 4, 5
+; LE-NEXT:    addi 3, 3, .LCPI1_4 at toc@l
+; LE-NEXT:    xxlxor 0, 0, 36
+; LE-NEXT:    xxland 36, 35, 1
+; LE-NEXT:    lxvd2x 1, 0, 3
+; LE-NEXT:    vmladduhm 4, 2, 4, 5
+; LE-NEXT:    xxlxor 0, 0, 36
+; LE-NEXT:    xxland 36, 35, 1
+; LE-NEXT:    vmladduhm 4, 2, 4, 5
+; LE-NEXT:    xxlxor 0, 0, 36
+; LE-NEXT:    xxleqv 36, 36, 36
+; LE-NEXT:    vslh 4, 4, 4
+; LE-NEXT:    xxland 35, 35, 36
+; LE-NEXT:    vmladduhm 2, 2, 3, 5
+; LE-NEXT:    xxlxor 34, 0, 34
+; LE-NEXT:    blr
+  %res = call <8 x i16> @llvm.clmul.v8i16(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @clmul_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; BE-LABEL: clmul_v4i32:
+; BE:       # %bb.0:
+; BE-NEXT:    stdu 1, -1184(1)
+; BE-NEXT:    li 3, 992
+; BE-NEXT:    vspltisw 9, 4
+; BE-NEXT:    stvx 20, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 1008
+; BE-NEXT:    vand 4, 3, 9
+; BE-NEXT:    vspltisw 6, 8
+; BE-NEXT:    stvx 21, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 1024
+; BE-NEXT:    vspltisw 11, 1
+; BE-NEXT:    stvx 22, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 1040
+; BE-NEXT:    vand 1, 3, 11
+; BE-NEXT:    vspltisw 8, 2
+; BE-NEXT:    stvx 23, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 1056
+; BE-NEXT:    vspltisb 17, -1
+; BE-NEXT:    stvx 24, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 1072
+; BE-NEXT:    stvx 25, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 1088
+; BE-NEXT:    vsldoi 15, 11, 11, 1
+; BE-NEXT:    stvx 26, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 1104
+; BE-NEXT:    stvx 27, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 1120
+; BE-NEXT:    stvx 28, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 1136
+; BE-NEXT:    vslw 18, 6, 6
+; BE-NEXT:    stvx 29, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 1152
+; BE-NEXT:    stvx 30, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 1168
+; BE-NEXT:    vsldoi 5, 11, 11, 2
+; BE-NEXT:    stvx 31, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 976
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 960
+; BE-NEXT:    vand 4, 3, 6
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 624
+; BE-NEXT:    vsldoi 13, 6, 6, 2
+; BE-NEXT:    vsldoi 4, 11, 11, 3
+; BE-NEXT:    vsldoi 11, 6, 6, 3
+; BE-NEXT:    vadduwm 6, 6, 6
+; BE-NEXT:    vand 12, 3, 6
+; BE-NEXT:    stvx 12, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 752
+; BE-NEXT:    vand 6, 3, 18
+; BE-NEXT:    stvx 6, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 864
+; BE-NEXT:    vsldoi 19, 8, 8, 2
+; BE-NEXT:    vand 5, 3, 5
+; BE-NEXT:    stvx 5, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 592
+; BE-NEXT:    vsldoi 0, 9, 9, 2
+; BE-NEXT:    vand 5, 3, 19
+; BE-NEXT:    stvx 5, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 896
+; BE-NEXT:    vslw 10, 9, 9
+; BE-NEXT:    vsldoi 31, 9, 9, 1
+; BE-NEXT:    vsldoi 9, 9, 9, 3
+; BE-NEXT:    vand 0, 3, 0
+; BE-NEXT:    stvx 0, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 944
+; BE-NEXT:    vand 23, 3, 13
+; BE-NEXT:    vand 13, 3, 4
+; BE-NEXT:    vand 4, 3, 9
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    addis 3, 2, .LCPI2_0 at toc@ha
+; BE-NEXT:    addi 3, 3, .LCPI2_0 at toc@l
+; BE-NEXT:    lvx 4, 0, 3
+; BE-NEXT:    addis 3, 2, .LCPI2_1 at toc@ha
+; BE-NEXT:    addi 3, 3, .LCPI2_1 at toc@l
+; BE-NEXT:    vand 25, 3, 4
+; BE-NEXT:    lvx 4, 0, 3
+; BE-NEXT:    li 3, 928
+; BE-NEXT:    vand 4, 3, 4
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    addis 3, 2, .LCPI2_2 at toc@ha
+; BE-NEXT:    addi 3, 3, .LCPI2_2 at toc@l
+; BE-NEXT:    lvx 4, 0, 3
+; BE-NEXT:    addis 3, 2, .LCPI2_3 at toc@ha
+; BE-NEXT:    addi 3, 3, .LCPI2_3 at toc@l
+; BE-NEXT:    vand 16, 3, 10
+; BE-NEXT:    vand 10, 3, 4
+; BE-NEXT:    lvx 4, 0, 3
+; BE-NEXT:    addis 3, 2, .LCPI2_4 at toc@ha
+; BE-NEXT:    addi 3, 3, .LCPI2_4 at toc@l
+; BE-NEXT:    vand 30, 3, 4
+; BE-NEXT:    lvx 4, 0, 3
+; BE-NEXT:    li 3, 768
+; BE-NEXT:    vand 4, 3, 4
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    addis 3, 2, .LCPI2_5 at toc@ha
+; BE-NEXT:    addi 3, 3, .LCPI2_5 at toc@l
+; BE-NEXT:    lvx 4, 0, 3
+; BE-NEXT:    li 3, 704
+; BE-NEXT:    vand 4, 3, 4
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    addis 3, 2, .LCPI2_6 at toc@ha
+; BE-NEXT:    addi 3, 3, .LCPI2_6 at toc@l
+; BE-NEXT:    lvx 4, 0, 3
+; BE-NEXT:    addis 3, 2, .LCPI2_7 at toc@ha
+; BE-NEXT:    addi 3, 3, .LCPI2_7 at toc@l
+; BE-NEXT:    vand 27, 3, 4
+; BE-NEXT:    lvx 4, 0, 3
+; BE-NEXT:    addis 3, 2, .LCPI2_8 at toc@ha
+; BE-NEXT:    addi 3, 3, .LCPI2_8 at toc@l
+; BE-NEXT:    vand 22, 3, 4
+; BE-NEXT:    lvx 4, 0, 3
+; BE-NEXT:    addis 3, 2, .LCPI2_9 at toc@ha
+; BE-NEXT:    addi 3, 3, .LCPI2_9 at toc@l
+; BE-NEXT:    vand 21, 3, 4
+; BE-NEXT:    lvx 4, 0, 3
+; BE-NEXT:    addis 3, 2, .LCPI2_10 at toc@ha
+; BE-NEXT:    addi 3, 3, .LCPI2_10 at toc@l
+; BE-NEXT:    vand 20, 3, 4
+; BE-NEXT:    lvx 4, 0, 3
+; BE-NEXT:    li 3, 496
+; BE-NEXT:    vand 4, 3, 4
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    addis 3, 2, .LCPI2_11 at toc@ha
+; BE-NEXT:    addi 3, 3, .LCPI2_11 at toc@l
+; BE-NEXT:    lvx 4, 0, 3
+; BE-NEXT:    li 3, 448
+; BE-NEXT:    vand 4, 3, 4
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    addis 3, 2, .LCPI2_12 at toc@ha
+; BE-NEXT:    addi 3, 3, .LCPI2_12 at toc@l
+; BE-NEXT:    lvx 4, 0, 3
+; BE-NEXT:    li 3, 368
+; BE-NEXT:    vand 7, 3, 8
+; BE-NEXT:    vsldoi 14, 8, 8, 1
+; BE-NEXT:    vsldoi 8, 8, 8, 3
+; BE-NEXT:    vslw 17, 17, 17
+; BE-NEXT:    vand 15, 3, 15
+; BE-NEXT:    vand 14, 3, 14
+; BE-NEXT:    vand 24, 3, 31
+; BE-NEXT:    vand 26, 3, 8
+; BE-NEXT:    vand 11, 3, 11
+; BE-NEXT:    vand 9, 3, 17
+; BE-NEXT:    vand 3, 3, 4
+; BE-NEXT:    stvx 3, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    vspltisw 3, -16
+; BE-NEXT:    li 3, 912
+; BE-NEXT:    vmulouh 6, 2, 7
+; BE-NEXT:    vrlw 7, 7, 3
+; BE-NEXT:    vmulouh 8, 2, 1
+; BE-NEXT:    vrlw 1, 1, 3
+; BE-NEXT:    vxor 0, 0, 0
+; BE-NEXT:    vmsumuhm 7, 2, 7, 0
+; BE-NEXT:    vmsumuhm 1, 2, 1, 0
+; BE-NEXT:    vslw 7, 7, 3
+; BE-NEXT:    vadduwm 6, 6, 7
+; BE-NEXT:    vslw 1, 1, 3
+; BE-NEXT:    vadduwm 1, 8, 1
+; BE-NEXT:    vxor 4, 1, 6
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 976
+; BE-NEXT:    lvx 28, 1, 3 # 16-byte Folded Reload
+; BE-NEXT:    li 3, 816
+; BE-NEXT:    vrlw 1, 28, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 960
+; BE-NEXT:    lvx 29, 1, 3 # 16-byte Folded Reload
+; BE-NEXT:    li 3, 848
+; BE-NEXT:    vrlw 1, 29, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 880
+; BE-NEXT:    vrlw 1, 12, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 832
+; BE-NEXT:    vrlw 1, 16, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 800
+; BE-NEXT:    vrlw 1, 15, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 784
+; BE-NEXT:    vrlw 1, 14, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 736
+; BE-NEXT:    vrlw 1, 24, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 752
+; BE-NEXT:    lvx 17, 1, 3 # 16-byte Folded Reload
+; BE-NEXT:    li 3, 720
+; BE-NEXT:    vrlw 1, 17, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 864
+; BE-NEXT:    vmr 31, 16
+; BE-NEXT:    lvx 16, 1, 3 # 16-byte Folded Reload
+; BE-NEXT:    li 3, 688
+; BE-NEXT:    vrlw 1, 16, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 672
+; BE-NEXT:    vrlw 1, 5, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 896
+; BE-NEXT:    vmr 19, 15
+; BE-NEXT:    lvx 15, 1, 3 # 16-byte Folded Reload
+; BE-NEXT:    li 3, 656
+; BE-NEXT:    vrlw 1, 15, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 640
+; BE-NEXT:    vrlw 1, 23, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 608
+; BE-NEXT:    vrlw 1, 13, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 576
+; BE-NEXT:    vrlw 1, 26, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 944
+; BE-NEXT:    lvx 12, 1, 3 # 16-byte Folded Reload
+; BE-NEXT:    li 3, 544
+; BE-NEXT:    vrlw 1, 12, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 528
+; BE-NEXT:    vrlw 1, 11, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 512
+; BE-NEXT:    vrlw 1, 9, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 480
+; BE-NEXT:    vrlw 1, 25, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 928
+; BE-NEXT:    lvx 7, 1, 3 # 16-byte Folded Reload
+; BE-NEXT:    li 3, 464
+; BE-NEXT:    vrlw 1, 7, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 432
+; BE-NEXT:    vrlw 1, 10, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 400
+; BE-NEXT:    vrlw 1, 30, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 768
+; BE-NEXT:    vmr 18, 14
+; BE-NEXT:    vmr 14, 23
+; BE-NEXT:    vmr 23, 26
+; BE-NEXT:    vmr 26, 30
+; BE-NEXT:    lvx 30, 1, 3 # 16-byte Folded Reload
+; BE-NEXT:    li 3, 352
+; BE-NEXT:    vrlw 1, 30, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 704
+; BE-NEXT:    vmr 6, 25
+; BE-NEXT:    vmr 25, 10
+; BE-NEXT:    lvx 10, 1, 3 # 16-byte Folded Reload
+; BE-NEXT:    li 3, 320
+; BE-NEXT:    vrlw 1, 10, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 288
+; BE-NEXT:    vrlw 1, 27, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 256
+; BE-NEXT:    vrlw 1, 22, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 224
+; BE-NEXT:    vrlw 1, 21, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT:    li 3, 208
+; BE-NEXT:    vrlw 1, 20, 3
+; BE-NEXT:    vmsumuhm 4, 2, 1, 0
+; BE-NEXT:    stvx 4, 1, 3 # 16-byte Folded S...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/182041


More information about the llvm-commits mailing list