[llvm] [DAG] expandCLMUL - unroll vector clmul if vector multiplies are not supported (PR #182041)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 18 07:18:15 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-powerpc
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Fixes powerpc cases reported on #<!-- -->182039
I'm hoping #<!-- -->177566 can be adapted to improve upon this.
---
Patch is 307.00 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/182041.diff
2 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+4)
- (added) llvm/test/CodeGen/PowerPC/clmul-vector.ll (+8874)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e4b4d80896fa7..3c7b46a9021da 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8439,6 +8439,10 @@ SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const {
unsigned BW = VT.getScalarSizeInBits();
unsigned Opcode = Node->getOpcode();
+ // Scalarize if the vector multiplication is unlikely to work.
+ if (VT.isVector() && !isOperationLegalOrCustom(ISD::MUL, VT))
+ return DAG.UnrollVectorOp(Node);
+
switch (Opcode) {
case ISD::CLMUL: {
// NOTE: If you change this expansion, please update the cost model
diff --git a/llvm/test/CodeGen/PowerPC/clmul-vector.ll b/llvm/test/CodeGen/PowerPC/clmul-vector.ll
new file mode 100644
index 0000000000000..9089dca5b0ed7
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/clmul-vector.ll
@@ -0,0 +1,8874 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=powerpc64-- | FileCheck %s --check-prefixes=CHECK,BE
+; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s --check-prefixes=CHECK,LE
+
+define <16 x i8> @clmul_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; BE-LABEL: clmul_v16i8:
+; BE: # %bb.0:
+; BE-NEXT: addis 3, 2, .LCPI0_0 at toc@ha
+; BE-NEXT: vspltisb 4, 2
+; BE-NEXT: addi 3, 3, .LCPI0_0 at toc@l
+; BE-NEXT: vand 4, 3, 4
+; BE-NEXT: lvx 10, 0, 3
+; BE-NEXT: addis 3, 2, .LCPI0_1 at toc@ha
+; BE-NEXT: vspltisb 5, 1
+; BE-NEXT: addi 3, 3, .LCPI0_1 at toc@l
+; BE-NEXT: vspltisb 0, 4
+; BE-NEXT: vand 5, 3, 5
+; BE-NEXT: vspltisb 6, 8
+; BE-NEXT: vspltisb 8, -1
+; BE-NEXT: vmuloub 9, 2, 4
+; BE-NEXT: vmuleub 4, 2, 4
+; BE-NEXT: vand 1, 3, 0
+; BE-NEXT: vperm 4, 4, 9, 10
+; BE-NEXT: vmuloub 9, 2, 5
+; BE-NEXT: vmuleub 5, 2, 5
+; BE-NEXT: vand 7, 3, 6
+; BE-NEXT: vaddubm 6, 6, 6
+; BE-NEXT: vperm 5, 5, 9, 10
+; BE-NEXT: vmuloub 9, 2, 1
+; BE-NEXT: vmuleub 1, 2, 1
+; BE-NEXT: vperm 1, 1, 9, 10
+; BE-NEXT: vmuloub 9, 2, 7
+; BE-NEXT: vmuleub 7, 2, 7
+; BE-NEXT: vand 6, 3, 6
+; BE-NEXT: vperm 7, 7, 9, 10
+; BE-NEXT: vmuloub 9, 2, 6
+; BE-NEXT: vmuleub 6, 2, 6
+; BE-NEXT: vperm 6, 6, 9, 10
+; BE-NEXT: lvx 9, 0, 3
+; BE-NEXT: vslb 0, 0, 0
+; BE-NEXT: vslb 8, 8, 8
+; BE-NEXT: vand 0, 3, 0
+; BE-NEXT: vand 8, 3, 8
+; BE-NEXT: vand 3, 3, 9
+; BE-NEXT: vmuloub 9, 2, 0
+; BE-NEXT: vmuleub 0, 2, 0
+; BE-NEXT: vxor 4, 5, 4
+; BE-NEXT: vperm 0, 0, 9, 10
+; BE-NEXT: vmuloub 9, 2, 8
+; BE-NEXT: vmuleub 8, 2, 8
+; BE-NEXT: vmuloub 5, 2, 3
+; BE-NEXT: vmuleub 2, 2, 3
+; BE-NEXT: vxor 3, 4, 1
+; BE-NEXT: vxor 3, 3, 7
+; BE-NEXT: vperm 2, 2, 5, 10
+; BE-NEXT: vxor 3, 3, 6
+; BE-NEXT: vxor 2, 3, 2
+; BE-NEXT: vperm 8, 8, 9, 10
+; BE-NEXT: vxor 2, 2, 0
+; BE-NEXT: vxor 2, 2, 8
+; BE-NEXT: blr
+;
+; LE-LABEL: clmul_v16i8:
+; LE: # %bb.0:
+; LE-NEXT: vspltisb 4, 2
+; LE-NEXT: addis 3, 2, .LCPI0_0 at toc@ha
+; LE-NEXT: vspltisb 5, 1
+; LE-NEXT: addi 3, 3, .LCPI0_0 at toc@l
+; LE-NEXT: xxland 36, 35, 36
+; LE-NEXT: xxland 37, 35, 37
+; LE-NEXT: vspltisb 0, 4
+; LE-NEXT: vspltisb 1, 8
+; LE-NEXT: lxvd2x 0, 0, 3
+; LE-NEXT: vmuloub 7, 2, 4
+; LE-NEXT: vmuleub 4, 2, 4
+; LE-NEXT: addis 3, 2, .LCPI0_1 at toc@ha
+; LE-NEXT: addi 3, 3, .LCPI0_1 at toc@l
+; LE-NEXT: xxswapd 38, 0
+; LE-NEXT: lxvd2x 0, 0, 3
+; LE-NEXT: vperm 4, 4, 7, 6
+; LE-NEXT: vmuloub 7, 2, 5
+; LE-NEXT: vmuleub 5, 2, 5
+; LE-NEXT: vperm 5, 5, 7, 6
+; LE-NEXT: xxland 39, 35, 32
+; LE-NEXT: vslb 0, 0, 0
+; LE-NEXT: vmuloub 8, 2, 7
+; LE-NEXT: vmuleub 7, 2, 7
+; LE-NEXT: xxland 32, 35, 32
+; LE-NEXT: vperm 7, 7, 8, 6
+; LE-NEXT: xxland 40, 35, 33
+; LE-NEXT: vaddubm 1, 1, 1
+; LE-NEXT: vmuloub 9, 2, 8
+; LE-NEXT: vmuleub 8, 2, 8
+; LE-NEXT: xxland 33, 35, 33
+; LE-NEXT: vperm 8, 8, 9, 6
+; LE-NEXT: vmuloub 9, 2, 1
+; LE-NEXT: vmuleub 1, 2, 1
+; LE-NEXT: vperm 1, 1, 9, 6
+; LE-NEXT: xxland 41, 35, 0
+; LE-NEXT: xxlxor 0, 37, 36
+; LE-NEXT: vmuloub 10, 2, 9
+; LE-NEXT: vmuleub 9, 2, 9
+; LE-NEXT: xxlxor 0, 0, 39
+; LE-NEXT: xxlxor 0, 0, 40
+; LE-NEXT: xxlxor 0, 0, 33
+; LE-NEXT: vperm 9, 9, 10, 6
+; LE-NEXT: vmuloub 10, 2, 0
+; LE-NEXT: vmuleub 0, 2, 0
+; LE-NEXT: xxlxor 0, 0, 41
+; LE-NEXT: vperm 0, 0, 10, 6
+; LE-NEXT: xxleqv 42, 42, 42
+; LE-NEXT: vslb 10, 10, 10
+; LE-NEXT: xxlxor 0, 0, 32
+; LE-NEXT: xxland 35, 35, 42
+; LE-NEXT: vmuloub 10, 2, 3
+; LE-NEXT: vmuleub 2, 2, 3
+; LE-NEXT: vperm 2, 2, 10, 6
+; LE-NEXT: xxlxor 34, 0, 34
+; LE-NEXT: blr
+ %res = call <16 x i8> @llvm.clmul.v16i8(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %res
+}
+
+define <8 x i16> @clmul_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; BE-LABEL: clmul_v8i16:
+; BE: # %bb.0:
+; BE-NEXT: addis 3, 2, .LCPI1_0 at toc@ha
+; BE-NEXT: vspltish 6, 2
+; BE-NEXT: addi 3, 3, .LCPI1_0 at toc@l
+; BE-NEXT: vand 4, 3, 6
+; BE-NEXT: lvx 13, 0, 3
+; BE-NEXT: addis 3, 2, .LCPI1_1 at toc@ha
+; BE-NEXT: addi 3, 3, .LCPI1_1 at toc@l
+; BE-NEXT: vspltish 7, 1
+; BE-NEXT: lvx 14, 0, 3
+; BE-NEXT: addis 3, 2, .LCPI1_2 at toc@ha
+; BE-NEXT: addi 3, 3, .LCPI1_2 at toc@l
+; BE-NEXT: vspltish 8, 4
+; BE-NEXT: lvx 15, 0, 3
+; BE-NEXT: addis 3, 2, .LCPI1_3 at toc@ha
+; BE-NEXT: addi 3, 3, .LCPI1_3 at toc@l
+; BE-NEXT: vspltish 9, 8
+; BE-NEXT: vand 5, 3, 7
+; BE-NEXT: lvx 16, 0, 3
+; BE-NEXT: addis 3, 2, .LCPI1_4 at toc@ha
+; BE-NEXT: addi 3, 3, .LCPI1_4 at toc@l
+; BE-NEXT: vspltisb 12, -1
+; BE-NEXT: lvx 17, 0, 3
+; BE-NEXT: vand 0, 3, 8
+; BE-NEXT: vand 1, 3, 9
+; BE-NEXT: vslh 10, 8, 8
+; BE-NEXT: vsldoi 7, 7, 7, 1
+; BE-NEXT: vsldoi 6, 6, 6, 1
+; BE-NEXT: vsldoi 8, 8, 8, 1
+; BE-NEXT: vslh 11, 9, 9
+; BE-NEXT: vadduhm 9, 9, 9
+; BE-NEXT: vslh 12, 12, 12
+; BE-NEXT: vand 9, 3, 9
+; BE-NEXT: vand 10, 3, 10
+; BE-NEXT: vand 7, 3, 7
+; BE-NEXT: vand 6, 3, 6
+; BE-NEXT: vand 8, 3, 8
+; BE-NEXT: vand 11, 3, 11
+; BE-NEXT: vand 12, 3, 12
+; BE-NEXT: vand 13, 3, 13
+; BE-NEXT: vand 14, 3, 14
+; BE-NEXT: vand 15, 3, 15
+; BE-NEXT: vand 16, 3, 16
+; BE-NEXT: vand 3, 3, 17
+; BE-NEXT: vxor 17, 17, 17
+; BE-NEXT: vmladduhm 4, 2, 4, 17
+; BE-NEXT: vmladduhm 5, 2, 5, 17
+; BE-NEXT: vmladduhm 0, 2, 0, 17
+; BE-NEXT: vmladduhm 1, 2, 1, 17
+; BE-NEXT: vmladduhm 9, 2, 9, 17
+; BE-NEXT: vmladduhm 10, 2, 10, 17
+; BE-NEXT: vmladduhm 7, 2, 7, 17
+; BE-NEXT: vmladduhm 6, 2, 6, 17
+; BE-NEXT: vmladduhm 8, 2, 8, 17
+; BE-NEXT: vmladduhm 11, 2, 11, 17
+; BE-NEXT: vmladduhm 12, 2, 12, 17
+; BE-NEXT: vmladduhm 13, 2, 13, 17
+; BE-NEXT: vmladduhm 14, 2, 14, 17
+; BE-NEXT: vmladduhm 15, 2, 15, 17
+; BE-NEXT: vmladduhm 16, 2, 16, 17
+; BE-NEXT: vmladduhm 2, 2, 3, 17
+; BE-NEXT: vxor 3, 5, 4
+; BE-NEXT: vxor 3, 3, 0
+; BE-NEXT: vxor 3, 3, 1
+; BE-NEXT: vxor 3, 3, 9
+; BE-NEXT: vxor 3, 3, 13
+; BE-NEXT: vxor 3, 3, 10
+; BE-NEXT: vxor 3, 3, 14
+; BE-NEXT: vxor 3, 3, 7
+; BE-NEXT: vxor 3, 3, 6
+; BE-NEXT: vxor 3, 3, 8
+; BE-NEXT: vxor 3, 3, 11
+; BE-NEXT: vxor 3, 3, 15
+; BE-NEXT: vxor 3, 3, 16
+; BE-NEXT: vxor 2, 3, 2
+; BE-NEXT: vxor 2, 2, 12
+; BE-NEXT: blr
+;
+; LE-LABEL: clmul_v8i16:
+; LE: # %bb.0:
+; LE-NEXT: vspltish 5, 2
+; LE-NEXT: vspltish 0, 1
+; LE-NEXT: addis 3, 2, .LCPI1_0 at toc@ha
+; LE-NEXT: xxland 41, 35, 37
+; LE-NEXT: vspltish 1, 4
+; LE-NEXT: vspltish 4, 8
+; LE-NEXT: addi 3, 3, .LCPI1_0 at toc@l
+; LE-NEXT: lxvd2x 1, 0, 3
+; LE-NEXT: vsldoi 6, 0, 0, 1
+; LE-NEXT: xxland 32, 35, 32
+; LE-NEXT: vsldoi 7, 5, 5, 1
+; LE-NEXT: vxor 5, 5, 5
+; LE-NEXT: vmladduhm 9, 2, 9, 5
+; LE-NEXT: vmladduhm 0, 2, 0, 5
+; LE-NEXT: addis 3, 2, .LCPI1_1 at toc@ha
+; LE-NEXT: addi 3, 3, .LCPI1_1 at toc@l
+; LE-NEXT: vsldoi 8, 1, 1, 1
+; LE-NEXT: xxlxor 0, 32, 41
+; LE-NEXT: xxland 32, 35, 33
+; LE-NEXT: vmladduhm 0, 2, 0, 5
+; LE-NEXT: xxlxor 0, 0, 32
+; LE-NEXT: xxland 32, 35, 36
+; LE-NEXT: vmladduhm 0, 2, 0, 5
+; LE-NEXT: xxlxor 0, 0, 32
+; LE-NEXT: vadduhm 0, 4, 4
+; LE-NEXT: vslh 4, 4, 4
+; LE-NEXT: xxland 32, 35, 32
+; LE-NEXT: xxland 36, 35, 36
+; LE-NEXT: vmladduhm 0, 2, 0, 5
+; LE-NEXT: vmladduhm 4, 2, 4, 5
+; LE-NEXT: xxlxor 0, 0, 32
+; LE-NEXT: xxland 32, 35, 1
+; LE-NEXT: lxvd2x 1, 0, 3
+; LE-NEXT: addis 3, 2, .LCPI1_2 at toc@ha
+; LE-NEXT: vmladduhm 0, 2, 0, 5
+; LE-NEXT: addi 3, 3, .LCPI1_2 at toc@l
+; LE-NEXT: xxlxor 0, 0, 32
+; LE-NEXT: vslh 0, 1, 1
+; LE-NEXT: xxland 32, 35, 32
+; LE-NEXT: vmladduhm 0, 2, 0, 5
+; LE-NEXT: xxlxor 0, 0, 32
+; LE-NEXT: xxland 32, 35, 1
+; LE-NEXT: lxvd2x 1, 0, 3
+; LE-NEXT: addis 3, 2, .LCPI1_3 at toc@ha
+; LE-NEXT: vmladduhm 0, 2, 0, 5
+; LE-NEXT: addi 3, 3, .LCPI1_3 at toc@l
+; LE-NEXT: xxlxor 0, 0, 32
+; LE-NEXT: xxland 32, 35, 38
+; LE-NEXT: vmladduhm 0, 2, 0, 5
+; LE-NEXT: xxlxor 0, 0, 32
+; LE-NEXT: xxland 32, 35, 39
+; LE-NEXT: vmladduhm 0, 2, 0, 5
+; LE-NEXT: xxlxor 0, 0, 32
+; LE-NEXT: xxland 32, 35, 40
+; LE-NEXT: vmladduhm 0, 2, 0, 5
+; LE-NEXT: xxlxor 0, 0, 32
+; LE-NEXT: xxlxor 0, 0, 36
+; LE-NEXT: xxland 36, 35, 1
+; LE-NEXT: lxvd2x 1, 0, 3
+; LE-NEXT: addis 3, 2, .LCPI1_4 at toc@ha
+; LE-NEXT: vmladduhm 4, 2, 4, 5
+; LE-NEXT: addi 3, 3, .LCPI1_4 at toc@l
+; LE-NEXT: xxlxor 0, 0, 36
+; LE-NEXT: xxland 36, 35, 1
+; LE-NEXT: lxvd2x 1, 0, 3
+; LE-NEXT: vmladduhm 4, 2, 4, 5
+; LE-NEXT: xxlxor 0, 0, 36
+; LE-NEXT: xxland 36, 35, 1
+; LE-NEXT: vmladduhm 4, 2, 4, 5
+; LE-NEXT: xxlxor 0, 0, 36
+; LE-NEXT: xxleqv 36, 36, 36
+; LE-NEXT: vslh 4, 4, 4
+; LE-NEXT: xxland 35, 35, 36
+; LE-NEXT: vmladduhm 2, 2, 3, 5
+; LE-NEXT: xxlxor 34, 0, 34
+; LE-NEXT: blr
+ %res = call <8 x i16> @llvm.clmul.v8i16(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %res
+}
+
+define <4 x i32> @clmul_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; BE-LABEL: clmul_v4i32:
+; BE: # %bb.0:
+; BE-NEXT: stdu 1, -1184(1)
+; BE-NEXT: li 3, 992
+; BE-NEXT: vspltisw 9, 4
+; BE-NEXT: stvx 20, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 1008
+; BE-NEXT: vand 4, 3, 9
+; BE-NEXT: vspltisw 6, 8
+; BE-NEXT: stvx 21, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 1024
+; BE-NEXT: vspltisw 11, 1
+; BE-NEXT: stvx 22, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 1040
+; BE-NEXT: vand 1, 3, 11
+; BE-NEXT: vspltisw 8, 2
+; BE-NEXT: stvx 23, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 1056
+; BE-NEXT: vspltisb 17, -1
+; BE-NEXT: stvx 24, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 1072
+; BE-NEXT: stvx 25, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 1088
+; BE-NEXT: vsldoi 15, 11, 11, 1
+; BE-NEXT: stvx 26, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 1104
+; BE-NEXT: stvx 27, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 1120
+; BE-NEXT: stvx 28, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 1136
+; BE-NEXT: vslw 18, 6, 6
+; BE-NEXT: stvx 29, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 1152
+; BE-NEXT: stvx 30, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 1168
+; BE-NEXT: vsldoi 5, 11, 11, 2
+; BE-NEXT: stvx 31, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 976
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 960
+; BE-NEXT: vand 4, 3, 6
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 624
+; BE-NEXT: vsldoi 13, 6, 6, 2
+; BE-NEXT: vsldoi 4, 11, 11, 3
+; BE-NEXT: vsldoi 11, 6, 6, 3
+; BE-NEXT: vadduwm 6, 6, 6
+; BE-NEXT: vand 12, 3, 6
+; BE-NEXT: stvx 12, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 752
+; BE-NEXT: vand 6, 3, 18
+; BE-NEXT: stvx 6, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 864
+; BE-NEXT: vsldoi 19, 8, 8, 2
+; BE-NEXT: vand 5, 3, 5
+; BE-NEXT: stvx 5, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 592
+; BE-NEXT: vsldoi 0, 9, 9, 2
+; BE-NEXT: vand 5, 3, 19
+; BE-NEXT: stvx 5, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 896
+; BE-NEXT: vslw 10, 9, 9
+; BE-NEXT: vsldoi 31, 9, 9, 1
+; BE-NEXT: vsldoi 9, 9, 9, 3
+; BE-NEXT: vand 0, 3, 0
+; BE-NEXT: stvx 0, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 944
+; BE-NEXT: vand 23, 3, 13
+; BE-NEXT: vand 13, 3, 4
+; BE-NEXT: vand 4, 3, 9
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: addis 3, 2, .LCPI2_0 at toc@ha
+; BE-NEXT: addi 3, 3, .LCPI2_0 at toc@l
+; BE-NEXT: lvx 4, 0, 3
+; BE-NEXT: addis 3, 2, .LCPI2_1 at toc@ha
+; BE-NEXT: addi 3, 3, .LCPI2_1 at toc@l
+; BE-NEXT: vand 25, 3, 4
+; BE-NEXT: lvx 4, 0, 3
+; BE-NEXT: li 3, 928
+; BE-NEXT: vand 4, 3, 4
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: addis 3, 2, .LCPI2_2 at toc@ha
+; BE-NEXT: addi 3, 3, .LCPI2_2 at toc@l
+; BE-NEXT: lvx 4, 0, 3
+; BE-NEXT: addis 3, 2, .LCPI2_3 at toc@ha
+; BE-NEXT: addi 3, 3, .LCPI2_3 at toc@l
+; BE-NEXT: vand 16, 3, 10
+; BE-NEXT: vand 10, 3, 4
+; BE-NEXT: lvx 4, 0, 3
+; BE-NEXT: addis 3, 2, .LCPI2_4 at toc@ha
+; BE-NEXT: addi 3, 3, .LCPI2_4 at toc@l
+; BE-NEXT: vand 30, 3, 4
+; BE-NEXT: lvx 4, 0, 3
+; BE-NEXT: li 3, 768
+; BE-NEXT: vand 4, 3, 4
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: addis 3, 2, .LCPI2_5 at toc@ha
+; BE-NEXT: addi 3, 3, .LCPI2_5 at toc@l
+; BE-NEXT: lvx 4, 0, 3
+; BE-NEXT: li 3, 704
+; BE-NEXT: vand 4, 3, 4
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: addis 3, 2, .LCPI2_6 at toc@ha
+; BE-NEXT: addi 3, 3, .LCPI2_6 at toc@l
+; BE-NEXT: lvx 4, 0, 3
+; BE-NEXT: addis 3, 2, .LCPI2_7 at toc@ha
+; BE-NEXT: addi 3, 3, .LCPI2_7 at toc@l
+; BE-NEXT: vand 27, 3, 4
+; BE-NEXT: lvx 4, 0, 3
+; BE-NEXT: addis 3, 2, .LCPI2_8 at toc@ha
+; BE-NEXT: addi 3, 3, .LCPI2_8 at toc@l
+; BE-NEXT: vand 22, 3, 4
+; BE-NEXT: lvx 4, 0, 3
+; BE-NEXT: addis 3, 2, .LCPI2_9 at toc@ha
+; BE-NEXT: addi 3, 3, .LCPI2_9 at toc@l
+; BE-NEXT: vand 21, 3, 4
+; BE-NEXT: lvx 4, 0, 3
+; BE-NEXT: addis 3, 2, .LCPI2_10 at toc@ha
+; BE-NEXT: addi 3, 3, .LCPI2_10 at toc@l
+; BE-NEXT: vand 20, 3, 4
+; BE-NEXT: lvx 4, 0, 3
+; BE-NEXT: li 3, 496
+; BE-NEXT: vand 4, 3, 4
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: addis 3, 2, .LCPI2_11 at toc@ha
+; BE-NEXT: addi 3, 3, .LCPI2_11 at toc@l
+; BE-NEXT: lvx 4, 0, 3
+; BE-NEXT: li 3, 448
+; BE-NEXT: vand 4, 3, 4
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: addis 3, 2, .LCPI2_12 at toc@ha
+; BE-NEXT: addi 3, 3, .LCPI2_12 at toc@l
+; BE-NEXT: lvx 4, 0, 3
+; BE-NEXT: li 3, 368
+; BE-NEXT: vand 7, 3, 8
+; BE-NEXT: vsldoi 14, 8, 8, 1
+; BE-NEXT: vsldoi 8, 8, 8, 3
+; BE-NEXT: vslw 17, 17, 17
+; BE-NEXT: vand 15, 3, 15
+; BE-NEXT: vand 14, 3, 14
+; BE-NEXT: vand 24, 3, 31
+; BE-NEXT: vand 26, 3, 8
+; BE-NEXT: vand 11, 3, 11
+; BE-NEXT: vand 9, 3, 17
+; BE-NEXT: vand 3, 3, 4
+; BE-NEXT: stvx 3, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: vspltisw 3, -16
+; BE-NEXT: li 3, 912
+; BE-NEXT: vmulouh 6, 2, 7
+; BE-NEXT: vrlw 7, 7, 3
+; BE-NEXT: vmulouh 8, 2, 1
+; BE-NEXT: vrlw 1, 1, 3
+; BE-NEXT: vxor 0, 0, 0
+; BE-NEXT: vmsumuhm 7, 2, 7, 0
+; BE-NEXT: vmsumuhm 1, 2, 1, 0
+; BE-NEXT: vslw 7, 7, 3
+; BE-NEXT: vadduwm 6, 6, 7
+; BE-NEXT: vslw 1, 1, 3
+; BE-NEXT: vadduwm 1, 8, 1
+; BE-NEXT: vxor 4, 1, 6
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 976
+; BE-NEXT: lvx 28, 1, 3 # 16-byte Folded Reload
+; BE-NEXT: li 3, 816
+; BE-NEXT: vrlw 1, 28, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 960
+; BE-NEXT: lvx 29, 1, 3 # 16-byte Folded Reload
+; BE-NEXT: li 3, 848
+; BE-NEXT: vrlw 1, 29, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 880
+; BE-NEXT: vrlw 1, 12, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 832
+; BE-NEXT: vrlw 1, 16, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 800
+; BE-NEXT: vrlw 1, 15, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 784
+; BE-NEXT: vrlw 1, 14, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 736
+; BE-NEXT: vrlw 1, 24, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 752
+; BE-NEXT: lvx 17, 1, 3 # 16-byte Folded Reload
+; BE-NEXT: li 3, 720
+; BE-NEXT: vrlw 1, 17, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 864
+; BE-NEXT: vmr 31, 16
+; BE-NEXT: lvx 16, 1, 3 # 16-byte Folded Reload
+; BE-NEXT: li 3, 688
+; BE-NEXT: vrlw 1, 16, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 672
+; BE-NEXT: vrlw 1, 5, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 896
+; BE-NEXT: vmr 19, 15
+; BE-NEXT: lvx 15, 1, 3 # 16-byte Folded Reload
+; BE-NEXT: li 3, 656
+; BE-NEXT: vrlw 1, 15, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 640
+; BE-NEXT: vrlw 1, 23, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 608
+; BE-NEXT: vrlw 1, 13, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 576
+; BE-NEXT: vrlw 1, 26, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 944
+; BE-NEXT: lvx 12, 1, 3 # 16-byte Folded Reload
+; BE-NEXT: li 3, 544
+; BE-NEXT: vrlw 1, 12, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 528
+; BE-NEXT: vrlw 1, 11, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 512
+; BE-NEXT: vrlw 1, 9, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 480
+; BE-NEXT: vrlw 1, 25, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 928
+; BE-NEXT: lvx 7, 1, 3 # 16-byte Folded Reload
+; BE-NEXT: li 3, 464
+; BE-NEXT: vrlw 1, 7, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 432
+; BE-NEXT: vrlw 1, 10, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 400
+; BE-NEXT: vrlw 1, 30, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 768
+; BE-NEXT: vmr 18, 14
+; BE-NEXT: vmr 14, 23
+; BE-NEXT: vmr 23, 26
+; BE-NEXT: vmr 26, 30
+; BE-NEXT: lvx 30, 1, 3 # 16-byte Folded Reload
+; BE-NEXT: li 3, 352
+; BE-NEXT: vrlw 1, 30, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 704
+; BE-NEXT: vmr 6, 25
+; BE-NEXT: vmr 25, 10
+; BE-NEXT: lvx 10, 1, 3 # 16-byte Folded Reload
+; BE-NEXT: li 3, 320
+; BE-NEXT: vrlw 1, 10, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 288
+; BE-NEXT: vrlw 1, 27, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 256
+; BE-NEXT: vrlw 1, 22, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 224
+; BE-NEXT: vrlw 1, 21, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded Spill
+; BE-NEXT: li 3, 208
+; BE-NEXT: vrlw 1, 20, 3
+; BE-NEXT: vmsumuhm 4, 2, 1, 0
+; BE-NEXT: stvx 4, 1, 3 # 16-byte Folded S...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/182041
More information about the llvm-commits
mailing list