[llvm] [PPC] Fix suspicious AltiVec VAVG patterns (PR #176891)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 21 07:56:10 PST 2026
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/176891
>From 88d437e05e3dd3478bacdcf66b14e927b1929165 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Tue, 20 Jan 2026 10:23:08 +0000
Subject: [PATCH 1/2] [PPC] Fix suspicious AltiVec VAVG patterns
The existing ((X+Y+1)>>1) patterns didn't correct handle overflow, like the VAVG instructions would
Remove the old patterns and correctly mark the altivec VAVGS/VAVGU patterns as matching the ISD::AVGCEIL opcodes - we can leave the generic DAG folds to handle everything else
I've updated the vavg.ll tests to correct match ISD::AVGCEILS/U patterns and added the old tests as negative "overflow" patterns that shouldn't fold to VAVG instructions
Fixes #174718
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 +
llvm/lib/Target/PowerPC/PPCInstrAltivec.td | 24 +-
llvm/test/CodeGen/PowerPC/vavg.ll | 275 ++++++++++++--------
3 files changed, 183 insertions(+), 118 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 641b7804097f8..1d41e59d6d4f7 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -804,6 +804,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (Subtarget.hasAltivec()) {
for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
+ setOperationAction(ISD::AVGCEILS, VT, Legal);
+ setOperationAction(ISD::AVGCEILU, VT, Legal);
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index fe1eea2b33615..5ee97326715c5 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1168,18 +1168,18 @@ def : Pat<(v1i128 (vselect v1i128:$vA, v1i128:$vB, v1i128:$vC)),
(VSEL $vC, $vB, $vA)>;
// Vector Integer Average Instructions
-def : Pat<(v4i32 (sra (sub v4i32:$vA, (vnot v4i32:$vB)),
- (v4i32 (immEQOneV)))), (v4i32 (VAVGSW $vA, $vB))>;
-def : Pat<(v8i16 (sra (sub v8i16:$vA, (v8i16 (bitconvert(vnot v4i32:$vB)))),
- (v8i16 (immEQOneV)))), (v8i16 (VAVGSH $vA, $vB))>;
-def : Pat<(v16i8 (sra (sub v16i8:$vA, (v16i8 (bitconvert(vnot v4i32:$vB)))),
- (v16i8 (immEQOneV)))), (v16i8 (VAVGSB $vA, $vB))>;
-def : Pat<(v4i32 (srl (sub v4i32:$vA, (vnot v4i32:$vB)),
- (v4i32 (immEQOneV)))), (v4i32 (VAVGUW $vA, $vB))>;
-def : Pat<(v8i16 (srl (sub v8i16:$vA, (v8i16 (bitconvert(vnot v4i32:$vB)))),
- (v8i16 (immEQOneV)))), (v8i16 (VAVGUH $vA, $vB))>;
-def : Pat<(v16i8 (srl (sub v16i8:$vA, (v16i8 (bitconvert(vnot v4i32:$vB)))),
- (v16i8 (immEQOneV)))), (v16i8 (VAVGUB $vA, $vB))>;
+def : Pat<(v4i32 (avgceils v4i32:$src1, v4i32:$src2)),
+ (v4i32 (VAVGSW $src1, $src2))>;
+def : Pat<(v8i16 (avgceils v8i16:$src1, v8i16:$src2)),
+ (v8i16 (VAVGSH $src1, $src2))>;
+def : Pat<(v16i8 (avgceils v16i8:$src1, v16i8:$src2)),
+ (v16i8 (VAVGSB $src1, $src2))>;
+def : Pat<(v4i32 (avgceilu v4i32:$src1, v4i32:$src2)),
+ (v4i32 (VAVGUW $src1, $src2))>;
+def : Pat<(v8i16 (avgceilu v8i16:$src1, v8i16:$src2)),
+ (v8i16 (VAVGUH $src1, $src2))>;
+def : Pat<(v16i8 (avgceilu v16i8:$src1, v16i8:$src2)),
+ (v16i8 (VAVGUB $src1, $src2))>;
def : Pat<(v16i8 (shl v16i8:$vA, (v16i8 (immEQOneV)))),
(v16i8 (VADDUBM $vA, $vA))>;
diff --git a/llvm/test/CodeGen/PowerPC/vavg.ll b/llvm/test/CodeGen/PowerPC/vavg.ll
index 31a876116523b..bc6fa43dfbf7a 100644
--- a/llvm/test/CodeGen/PowerPC/vavg.ll
+++ b/llvm/test/CodeGen/PowerPC/vavg.ll
@@ -2,136 +2,154 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck -check-prefix=CHECK-P9 %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck -check-prefix=CHECK-P8 %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck -check-prefix=CHECK-P7 %s
+
define <8 x i16> @test_v8i16(<8 x i16> %m, <8 x i16> %n) {
; CHECK-P9-LABEL: test_v8i16:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: vavguh 2, 3, 2
+; CHECK-P9-NEXT: vavguh 2, 2, 3
; CHECK-P9-NEXT: blr
;
; CHECK-P8-LABEL: test_v8i16:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: vavguh 2, 3, 2
+; CHECK-P8-NEXT: vavguh 2, 2, 3
; CHECK-P8-NEXT: blr
;
; CHECK-P7-LABEL: test_v8i16:
; CHECK-P7: # %bb.0: # %entry
-; CHECK-P7-NEXT: vavguh 2, 3, 2
+; CHECK-P7-NEXT: vavguh 2, 2, 3
; CHECK-P7-NEXT: blr
entry:
- %add = add <8 x i16> %m, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
- %add1 = add <8 x i16> %add, %n
- %shr = lshr <8 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
- ret <8 x i16> %shr
+ %xm = zext <8 x i16> %m to <8 x i17>
+ %xn = zext <8 x i16> %n to <8 x i17>
+ %add = add <8 x i17> %xm, <i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1>
+ %add1 = add <8 x i17> %add, %xn
+ %shr = lshr <8 x i17> %add1, <i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1>
+ %tr = trunc <8 x i17> %shr to <8 x i16>
+ ret <8 x i16> %tr
}
define <8 x i16> @test_v8i16_sign(<8 x i16> %m, <8 x i16> %n) {
; CHECK-P9-LABEL: test_v8i16_sign:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: vavgsh 2, 3, 2
+; CHECK-P9-NEXT: vavgsh 2, 2, 3
; CHECK-P9-NEXT: blr
;
; CHECK-P8-LABEL: test_v8i16_sign:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: vavgsh 2, 3, 2
+; CHECK-P8-NEXT: vavgsh 2, 2, 3
; CHECK-P8-NEXT: blr
;
; CHECK-P7-LABEL: test_v8i16_sign:
; CHECK-P7: # %bb.0: # %entry
-; CHECK-P7-NEXT: vavgsh 2, 3, 2
+; CHECK-P7-NEXT: vavgsh 2, 2, 3
; CHECK-P7-NEXT: blr
entry:
- %add = add <8 x i16> %m, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
- %add1 = add <8 x i16> %add, %n
- %shr = ashr <8 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
- ret <8 x i16> %shr
+ %xm = sext <8 x i16> %m to <8 x i17>
+ %xn = sext <8 x i16> %n to <8 x i17>
+ %add = add <8 x i17> %xm, <i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1>
+ %add1 = add <8 x i17> %add, %xn
+ %shr = ashr <8 x i17> %add1, <i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1>
+ %tr = trunc <8 x i17> %shr to <8 x i16>
+ ret <8 x i16> %tr
}
define <4 x i32> @test_v4i32(<4 x i32> %m, <4 x i32> %n) {
; CHECK-P9-LABEL: test_v4i32:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: vavguw 2, 3, 2
+; CHECK-P9-NEXT: vavguw 2, 2, 3
; CHECK-P9-NEXT: blr
;
; CHECK-P8-LABEL: test_v4i32:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: vavguw 2, 3, 2
+; CHECK-P8-NEXT: vavguw 2, 2, 3
; CHECK-P8-NEXT: blr
;
; CHECK-P7-LABEL: test_v4i32:
; CHECK-P7: # %bb.0: # %entry
-; CHECK-P7-NEXT: vavguw 2, 3, 2
+; CHECK-P7-NEXT: vavguw 2, 2, 3
; CHECK-P7-NEXT: blr
entry:
- %add = add <4 x i32> %m, <i32 1, i32 1, i32 1, i32 1>
- %add1 = add <4 x i32> %add, %n
- %shr = lshr <4 x i32> %add1, <i32 1, i32 1, i32 1, i32 1>
- ret <4 x i32> %shr
+ %xm = zext <4 x i32> %m to <4 x i33>
+ %xn = zext <4 x i32> %n to <4 x i33>
+ %add = add <4 x i33> %xm, <i33 1, i33 1, i33 1, i33 1>
+ %add1 = add <4 x i33> %add, %xn
+ %shr = lshr <4 x i33> %add1, <i33 1, i33 1, i33 1, i33 1>
+ %tr = trunc <4 x i33> %shr to <4 x i32>
+ ret <4 x i32> %tr
}
define <4 x i32> @test_v4i32_sign(<4 x i32> %m, <4 x i32> %n) {
; CHECK-P9-LABEL: test_v4i32_sign:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: vavgsw 2, 3, 2
+; CHECK-P9-NEXT: vavgsw 2, 2, 3
; CHECK-P9-NEXT: blr
;
; CHECK-P8-LABEL: test_v4i32_sign:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: vavgsw 2, 3, 2
+; CHECK-P8-NEXT: vavgsw 2, 2, 3
; CHECK-P8-NEXT: blr
;
; CHECK-P7-LABEL: test_v4i32_sign:
; CHECK-P7: # %bb.0: # %entry
-; CHECK-P7-NEXT: vavgsw 2, 3, 2
+; CHECK-P7-NEXT: vavgsw 2, 2, 3
; CHECK-P7-NEXT: blr
entry:
- %add = add <4 x i32> %m, <i32 1, i32 1, i32 1, i32 1>
- %add1 = add <4 x i32> %add, %n
- %shr = ashr <4 x i32> %add1, <i32 1, i32 1, i32 1, i32 1>
- ret <4 x i32> %shr
+ %xm = sext <4 x i32> %m to <4 x i33>
+ %xn = sext <4 x i32> %n to <4 x i33>
+ %add = add <4 x i33> %xm, <i33 1, i33 1, i33 1, i33 1>
+ %add1 = add <4 x i33> %add, %xn
+ %shr = ashr <4 x i33> %add1, <i33 1, i33 1, i33 1, i33 1>
+ %tr = trunc <4 x i33> %shr to <4 x i32>
+ ret <4 x i32> %tr
}
define <16 x i8> @test_v16i8(<16 x i8> %m, <16 x i8> %n) {
; CHECK-P9-LABEL: test_v16i8:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: vavgub 2, 3, 2
+; CHECK-P9-NEXT: vavgub 2, 2, 3
; CHECK-P9-NEXT: blr
;
; CHECK-P8-LABEL: test_v16i8:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: vavgub 2, 3, 2
+; CHECK-P8-NEXT: vavgub 2, 2, 3
; CHECK-P8-NEXT: blr
;
; CHECK-P7-LABEL: test_v16i8:
; CHECK-P7: # %bb.0: # %entry
-; CHECK-P7-NEXT: vavgub 2, 3, 2
+; CHECK-P7-NEXT: vavgub 2, 2, 3
; CHECK-P7-NEXT: blr
entry:
- %add = add <16 x i8> %m, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
- %add1 = add <16 x i8> %add, %n
- %shr = lshr <16 x i8> %add1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
- ret <16 x i8> %shr
+ %xm = zext <16 x i8> %m to <16 x i9>
+ %xn = zext <16 x i8> %n to <16 x i9>
+ %add = add <16 x i9> %xm, <i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1>
+ %add1 = add <16 x i9> %add, %xn
+ %shr = lshr <16 x i9> %add1, <i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1>
+ %tr = trunc <16 x i9> %shr to <16 x i8>
+ ret <16 x i8> %tr
}
define <16 x i8> @test_v16i8_sign(<16 x i8> %m, <16 x i8> %n) {
; CHECK-P9-LABEL: test_v16i8_sign:
-; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: vavgsb 2, 3, 2
+; CHECK-P9: # %bb.0:
+; CHECK-P9-NEXT: vavgsb 2, 2, 3
; CHECK-P9-NEXT: blr
;
; CHECK-P8-LABEL: test_v16i8_sign:
-; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: vavgsb 2, 3, 2
+; CHECK-P8: # %bb.0:
+; CHECK-P8-NEXT: vavgsb 2, 2, 3
; CHECK-P8-NEXT: blr
;
; CHECK-P7-LABEL: test_v16i8_sign:
-; CHECK-P7: # %bb.0: # %entry
-; CHECK-P7-NEXT: vavgsb 2, 3, 2
+; CHECK-P7: # %bb.0:
+; CHECK-P7-NEXT: vavgsb 2, 2, 3
; CHECK-P7-NEXT: blr
-entry:
- %add = add <16 x i8> %m, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
- %add1 = add <16 x i8> %add, %n
- %shr = ashr <16 x i8> %add1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
- ret <16 x i8> %shr
+ %xm = sext <16 x i8> %m to <16 x i9>
+ %xn = sext <16 x i8> %n to <16 x i9>
+ %add = add <16 x i9> %xm, <i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1>
+ %add1 = add <16 x i9> %add, %xn
+ %shr = ashr <16 x i9> %add1, <i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1>
+ %tr = trunc <16 x i9> %shr to <16 x i8>
+ ret <16 x i8> %tr
}
define <8 x i16> @test_v8i16_sign_negative(<8 x i16> %m, <8 x i16> %n) {
@@ -241,32 +259,113 @@ entry:
ret <4 x i32> %shr
}
+define <8 x i16> @test_v8i16_overflow(<8 x i16> %m, <8 x i16> %n) {
+; CHECK-P9-LABEL: test_v8i16_overflow:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: xxlnor 34, 34, 34
+; CHECK-P9-NEXT: vspltish 4, 1
+; CHECK-P9-NEXT: vsubuhm 2, 3, 2
+; CHECK-P9-NEXT: vsrh 2, 2, 4
+; CHECK-P9-NEXT: blr
+;
+; CHECK-P8-LABEL: test_v8i16_overflow:
+; CHECK-P8: # %bb.0: # %entry
+; CHECK-P8-NEXT: xxlnor 34, 34, 34
+; CHECK-P8-NEXT: vspltish 4, 1
+; CHECK-P8-NEXT: vsubuhm 2, 3, 2
+; CHECK-P8-NEXT: vsrh 2, 2, 4
+; CHECK-P8-NEXT: blr
+;
+; CHECK-P7-LABEL: test_v8i16_overflow:
+; CHECK-P7: # %bb.0: # %entry
+; CHECK-P7-NEXT: xxlnor 34, 34, 34
+; CHECK-P7-NEXT: vspltish 4, 1
+; CHECK-P7-NEXT: vsubuhm 2, 3, 2
+; CHECK-P7-NEXT: vsrh 2, 2, 4
+; CHECK-P7-NEXT: blr
+entry:
+ %add = add <8 x i16> %m, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %add1 = add <8 x i16> %add, %n
+ %shr = lshr <8 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %shr
+}
+
+define <4 x i32> @test_v4i32_overflow(<4 x i32> %m, <4 x i32> %n) {
+; CHECK-P9-LABEL: test_v4i32_overflow:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: xxlnor 34, 34, 34
+; CHECK-P9-NEXT: vspltisw 4, 1
+; CHECK-P9-NEXT: vsubuwm 2, 3, 2
+; CHECK-P9-NEXT: vsrw 2, 2, 4
+; CHECK-P9-NEXT: blr
+;
+; CHECK-P8-LABEL: test_v4i32_overflow:
+; CHECK-P8: # %bb.0: # %entry
+; CHECK-P8-NEXT: xxlnor 34, 34, 34
+; CHECK-P8-NEXT: vspltisw 4, 1
+; CHECK-P8-NEXT: vsubuwm 2, 3, 2
+; CHECK-P8-NEXT: vsrw 2, 2, 4
+; CHECK-P8-NEXT: blr
+;
+; CHECK-P7-LABEL: test_v4i32_overflow:
+; CHECK-P7: # %bb.0: # %entry
+; CHECK-P7-NEXT: xxlnor 34, 34, 34
+; CHECK-P7-NEXT: vspltisw 4, 1
+; CHECK-P7-NEXT: vsubuwm 2, 3, 2
+; CHECK-P7-NEXT: vsrw 2, 2, 4
+; CHECK-P7-NEXT: blr
+entry:
+ %add = add <4 x i32> %m, <i32 1, i32 1, i32 1, i32 1>
+ %add1 = add <4 x i32> %add, %n
+ %shr = lshr <4 x i32> %add1, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %shr
+}
+
+define <16 x i8> @test_v16i8_overflow(<16 x i8> %m, <16 x i8> %n) {
+; CHECK-P9-LABEL: test_v16i8_overflow:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: xxlnor 34, 34, 34
+; CHECK-P9-NEXT: xxspltib 36, 1
+; CHECK-P9-NEXT: vsububm 2, 3, 2
+; CHECK-P9-NEXT: vsrb 2, 2, 4
+; CHECK-P9-NEXT: blr
+;
+; CHECK-P8-LABEL: test_v16i8_overflow:
+; CHECK-P8: # %bb.0: # %entry
+; CHECK-P8-NEXT: xxlnor 34, 34, 34
+; CHECK-P8-NEXT: vspltisb 4, 1
+; CHECK-P8-NEXT: vsububm 2, 3, 2
+; CHECK-P8-NEXT: vsrb 2, 2, 4
+; CHECK-P8-NEXT: blr
+;
+; CHECK-P7-LABEL: test_v16i8_overflow:
+; CHECK-P7: # %bb.0: # %entry
+; CHECK-P7-NEXT: xxlnor 34, 34, 34
+; CHECK-P7-NEXT: vspltisb 4, 1
+; CHECK-P7-NEXT: vsububm 2, 3, 2
+; CHECK-P7-NEXT: vsrb 2, 2, 4
+; CHECK-P7-NEXT: blr
+entry:
+ %add = add <16 x i8> %m, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %add1 = add <16 x i8> %add, %n
+ %shr = lshr <16 x i8> %add1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %shr
+}
+
define <16 x i8> @test_avgceilu_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-P9-LABEL: test_avgceilu_v16i8:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: xxlor 36, 35, 34
-; CHECK-P9-NEXT: xxlxor 34, 35, 34
-; CHECK-P9-NEXT: xxspltib 35, 1
-; CHECK-P9-NEXT: vsrb 2, 2, 3
-; CHECK-P9-NEXT: vsububm 2, 4, 2
+; CHECK-P9-NEXT: vavgub 2, 3, 2
; CHECK-P9-NEXT: blr
;
; CHECK-P8-LABEL: test_avgceilu_v16i8:
; CHECK-P8: # %bb.0:
-; CHECK-P8-NEXT: vspltisb 4, 1
-; CHECK-P8-NEXT: xxlor 37, 35, 34
-; CHECK-P8-NEXT: xxlxor 34, 35, 34
-; CHECK-P8-NEXT: vsrb 2, 2, 4
-; CHECK-P8-NEXT: vsububm 2, 5, 2
+; CHECK-P8-NEXT: vavgub 2, 3, 2
; CHECK-P8-NEXT: blr
;
; CHECK-P7-LABEL: test_avgceilu_v16i8:
; CHECK-P7: # %bb.0:
-; CHECK-P7-NEXT: vspltisb 4, 1
-; CHECK-P7-NEXT: xxlor 37, 35, 34
-; CHECK-P7-NEXT: xxlxor 34, 35, 34
-; CHECK-P7-NEXT: vsrb 2, 2, 4
-; CHECK-P7-NEXT: vsububm 2, 5, 2
+; CHECK-P7-NEXT: vavgub 2, 3, 2
; CHECK-P7-NEXT: blr
%a_shr_1 = lshr <16 x i8> %a, splat (i8 1)
%b_shr_1 = lshr <16 x i8> %b, splat (i8 1)
@@ -280,29 +379,17 @@ define <16 x i8> @test_avgceilu_v16i8(<16 x i8> %a, <16 x i8> %b) {
define <8 x i16> @test_avgceilu_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-P9-LABEL: test_avgceilu_v8i16:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: xxlor 36, 35, 34
-; CHECK-P9-NEXT: xxlxor 34, 35, 34
-; CHECK-P9-NEXT: vspltish 3, 1
-; CHECK-P9-NEXT: vsrh 2, 2, 3
-; CHECK-P9-NEXT: vsubuhm 2, 4, 2
+; CHECK-P9-NEXT: vavguh 2, 3, 2
; CHECK-P9-NEXT: blr
;
; CHECK-P8-LABEL: test_avgceilu_v8i16:
; CHECK-P8: # %bb.0:
-; CHECK-P8-NEXT: vspltish 4, 1
-; CHECK-P8-NEXT: xxlor 37, 35, 34
-; CHECK-P8-NEXT: xxlxor 34, 35, 34
-; CHECK-P8-NEXT: vsrh 2, 2, 4
-; CHECK-P8-NEXT: vsubuhm 2, 5, 2
+; CHECK-P8-NEXT: vavguh 2, 3, 2
; CHECK-P8-NEXT: blr
;
; CHECK-P7-LABEL: test_avgceilu_v8i16:
; CHECK-P7: # %bb.0:
-; CHECK-P7-NEXT: vspltish 4, 1
-; CHECK-P7-NEXT: xxlor 37, 35, 34
-; CHECK-P7-NEXT: xxlxor 34, 35, 34
-; CHECK-P7-NEXT: vsrh 2, 2, 4
-; CHECK-P7-NEXT: vsubuhm 2, 5, 2
+; CHECK-P7-NEXT: vavguh 2, 3, 2
; CHECK-P7-NEXT: blr
%a_shr_1 = lshr <8 x i16> %a, splat (i16 1)
%b_shr_1 = lshr <8 x i16> %b, splat (i16 1)
@@ -316,29 +403,17 @@ define <8 x i16> @test_avgceilu_v8i16(<8 x i16> %a, <8 x i16> %b) {
define <16 x i8> @test_avgceils_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-P9-LABEL: test_avgceils_v16i8:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: xxlor 36, 35, 34
-; CHECK-P9-NEXT: xxlxor 34, 35, 34
-; CHECK-P9-NEXT: xxspltib 35, 1
-; CHECK-P9-NEXT: vsrab 2, 2, 3
-; CHECK-P9-NEXT: vsububm 2, 4, 2
+; CHECK-P9-NEXT: vavgsb 2, 3, 2
; CHECK-P9-NEXT: blr
;
; CHECK-P8-LABEL: test_avgceils_v16i8:
; CHECK-P8: # %bb.0:
-; CHECK-P8-NEXT: vspltisb 4, 1
-; CHECK-P8-NEXT: xxlor 37, 35, 34
-; CHECK-P8-NEXT: xxlxor 34, 35, 34
-; CHECK-P8-NEXT: vsrab 2, 2, 4
-; CHECK-P8-NEXT: vsububm 2, 5, 2
+; CHECK-P8-NEXT: vavgsb 2, 3, 2
; CHECK-P8-NEXT: blr
;
; CHECK-P7-LABEL: test_avgceils_v16i8:
; CHECK-P7: # %bb.0:
-; CHECK-P7-NEXT: vspltisb 4, 1
-; CHECK-P7-NEXT: xxlor 37, 35, 34
-; CHECK-P7-NEXT: xxlxor 34, 35, 34
-; CHECK-P7-NEXT: vsrab 2, 2, 4
-; CHECK-P7-NEXT: vsububm 2, 5, 2
+; CHECK-P7-NEXT: vavgsb 2, 3, 2
; CHECK-P7-NEXT: blr
%a_shr_1 = ashr <16 x i8> %a, splat (i8 1)
%b_shr_1 = ashr <16 x i8> %b, splat (i8 1)
@@ -352,29 +427,17 @@ define <16 x i8> @test_avgceils_v16i8(<16 x i8> %a, <16 x i8> %b) {
define <8 x i16> @test_avgceils_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-P9-LABEL: test_avgceils_v8i16:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: xxlor 36, 35, 34
-; CHECK-P9-NEXT: xxlxor 34, 35, 34
-; CHECK-P9-NEXT: vspltish 3, 1
-; CHECK-P9-NEXT: vsrah 2, 2, 3
-; CHECK-P9-NEXT: vsubuhm 2, 4, 2
+; CHECK-P9-NEXT: vavgsh 2, 3, 2
; CHECK-P9-NEXT: blr
;
; CHECK-P8-LABEL: test_avgceils_v8i16:
; CHECK-P8: # %bb.0:
-; CHECK-P8-NEXT: vspltish 4, 1
-; CHECK-P8-NEXT: xxlor 37, 35, 34
-; CHECK-P8-NEXT: xxlxor 34, 35, 34
-; CHECK-P8-NEXT: vsrah 2, 2, 4
-; CHECK-P8-NEXT: vsubuhm 2, 5, 2
+; CHECK-P8-NEXT: vavgsh 2, 3, 2
; CHECK-P8-NEXT: blr
;
; CHECK-P7-LABEL: test_avgceils_v8i16:
; CHECK-P7: # %bb.0:
-; CHECK-P7-NEXT: vspltish 4, 1
-; CHECK-P7-NEXT: xxlor 37, 35, 34
-; CHECK-P7-NEXT: xxlxor 34, 35, 34
-; CHECK-P7-NEXT: vsrah 2, 2, 4
-; CHECK-P7-NEXT: vsubuhm 2, 5, 2
+; CHECK-P7-NEXT: vavgsh 2, 3, 2
; CHECK-P7-NEXT: blr
%a_shr_1 = ashr <8 x i16> %a, splat (i16 1)
%b_shr_1 = ashr <8 x i16> %b, splat (i16 1)
>From e4a0366a7a9b956385aa67c2da0f08eccd9827f2 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 21 Jan 2026 15:55:53 +0000
Subject: [PATCH 2/2] Bump width
---
llvm/test/CodeGen/PowerPC/vavg.ll | 72 +++++++++++++++----------------
1 file changed, 36 insertions(+), 36 deletions(-)
diff --git a/llvm/test/CodeGen/PowerPC/vavg.ll b/llvm/test/CodeGen/PowerPC/vavg.ll
index bc6fa43dfbf7a..e473cbcd3f281 100644
--- a/llvm/test/CodeGen/PowerPC/vavg.ll
+++ b/llvm/test/CodeGen/PowerPC/vavg.ll
@@ -19,12 +19,12 @@ define <8 x i16> @test_v8i16(<8 x i16> %m, <8 x i16> %n) {
; CHECK-P7-NEXT: vavguh 2, 2, 3
; CHECK-P7-NEXT: blr
entry:
- %xm = zext <8 x i16> %m to <8 x i17>
- %xn = zext <8 x i16> %n to <8 x i17>
- %add = add <8 x i17> %xm, <i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1>
- %add1 = add <8 x i17> %add, %xn
- %shr = lshr <8 x i17> %add1, <i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1>
- %tr = trunc <8 x i17> %shr to <8 x i16>
+ %xm = zext <8 x i16> %m to <8 x i18>
+ %xn = zext <8 x i16> %n to <8 x i18>
+ %add = add <8 x i18> %xm, <i18 1, i18 1, i18 1, i18 1, i18 1, i18 1, i18 1, i18 1>
+ %add1 = add <8 x i18> %add, %xn
+ %shr = lshr <8 x i18> %add1, <i18 1, i18 1, i18 1, i18 1, i18 1, i18 1, i18 1, i18 1>
+ %tr = trunc <8 x i18> %shr to <8 x i16>
ret <8 x i16> %tr
}
@@ -44,12 +44,12 @@ define <8 x i16> @test_v8i16_sign(<8 x i16> %m, <8 x i16> %n) {
; CHECK-P7-NEXT: vavgsh 2, 2, 3
; CHECK-P7-NEXT: blr
entry:
- %xm = sext <8 x i16> %m to <8 x i17>
- %xn = sext <8 x i16> %n to <8 x i17>
- %add = add <8 x i17> %xm, <i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1>
- %add1 = add <8 x i17> %add, %xn
- %shr = ashr <8 x i17> %add1, <i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1>
- %tr = trunc <8 x i17> %shr to <8 x i16>
+ %xm = sext <8 x i16> %m to <8 x i18>
+ %xn = sext <8 x i16> %n to <8 x i18>
+ %add = add <8 x i18> %xm, <i18 1, i18 1, i18 1, i18 1, i18 1, i18 1, i18 1, i18 1>
+ %add1 = add <8 x i18> %add, %xn
+ %shr = ashr <8 x i18> %add1, <i18 1, i18 1, i18 1, i18 1, i18 1, i18 1, i18 1, i18 1>
+ %tr = trunc <8 x i18> %shr to <8 x i16>
ret <8 x i16> %tr
}
@@ -69,12 +69,12 @@ define <4 x i32> @test_v4i32(<4 x i32> %m, <4 x i32> %n) {
; CHECK-P7-NEXT: vavguw 2, 2, 3
; CHECK-P7-NEXT: blr
entry:
- %xm = zext <4 x i32> %m to <4 x i33>
- %xn = zext <4 x i32> %n to <4 x i33>
- %add = add <4 x i33> %xm, <i33 1, i33 1, i33 1, i33 1>
- %add1 = add <4 x i33> %add, %xn
- %shr = lshr <4 x i33> %add1, <i33 1, i33 1, i33 1, i33 1>
- %tr = trunc <4 x i33> %shr to <4 x i32>
+ %xm = zext <4 x i32> %m to <4 x i34>
+ %xn = zext <4 x i32> %n to <4 x i34>
+ %add = add <4 x i34> %xm, <i34 1, i34 1, i34 1, i34 1>
+ %add1 = add <4 x i34> %add, %xn
+ %shr = lshr <4 x i34> %add1, <i34 1, i34 1, i34 1, i34 1>
+ %tr = trunc <4 x i34> %shr to <4 x i32>
ret <4 x i32> %tr
}
@@ -94,12 +94,12 @@ define <4 x i32> @test_v4i32_sign(<4 x i32> %m, <4 x i32> %n) {
; CHECK-P7-NEXT: vavgsw 2, 2, 3
; CHECK-P7-NEXT: blr
entry:
- %xm = sext <4 x i32> %m to <4 x i33>
- %xn = sext <4 x i32> %n to <4 x i33>
- %add = add <4 x i33> %xm, <i33 1, i33 1, i33 1, i33 1>
- %add1 = add <4 x i33> %add, %xn
- %shr = ashr <4 x i33> %add1, <i33 1, i33 1, i33 1, i33 1>
- %tr = trunc <4 x i33> %shr to <4 x i32>
+ %xm = sext <4 x i32> %m to <4 x i34>
+ %xn = sext <4 x i32> %n to <4 x i34>
+ %add = add <4 x i34> %xm, <i34 1, i34 1, i34 1, i34 1>
+ %add1 = add <4 x i34> %add, %xn
+ %shr = ashr <4 x i34> %add1, <i34 1, i34 1, i34 1, i34 1>
+ %tr = trunc <4 x i34> %shr to <4 x i32>
ret <4 x i32> %tr
}
@@ -119,12 +119,12 @@ define <16 x i8> @test_v16i8(<16 x i8> %m, <16 x i8> %n) {
; CHECK-P7-NEXT: vavgub 2, 2, 3
; CHECK-P7-NEXT: blr
entry:
- %xm = zext <16 x i8> %m to <16 x i9>
- %xn = zext <16 x i8> %n to <16 x i9>
- %add = add <16 x i9> %xm, <i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1>
- %add1 = add <16 x i9> %add, %xn
- %shr = lshr <16 x i9> %add1, <i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1>
- %tr = trunc <16 x i9> %shr to <16 x i8>
+ %xm = zext <16 x i8> %m to <16 x i10>
+ %xn = zext <16 x i8> %n to <16 x i10>
+ %add = add <16 x i10> %xm, <i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1>
+ %add1 = add <16 x i10> %add, %xn
+ %shr = lshr <16 x i10> %add1, <i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1>
+ %tr = trunc <16 x i10> %shr to <16 x i8>
ret <16 x i8> %tr
}
@@ -143,12 +143,12 @@ define <16 x i8> @test_v16i8_sign(<16 x i8> %m, <16 x i8> %n) {
; CHECK-P7: # %bb.0:
; CHECK-P7-NEXT: vavgsb 2, 2, 3
; CHECK-P7-NEXT: blr
- %xm = sext <16 x i8> %m to <16 x i9>
- %xn = sext <16 x i8> %n to <16 x i9>
- %add = add <16 x i9> %xm, <i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1>
- %add1 = add <16 x i9> %add, %xn
- %shr = ashr <16 x i9> %add1, <i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1>
- %tr = trunc <16 x i9> %shr to <16 x i8>
+ %xm = sext <16 x i8> %m to <16 x i10>
+ %xn = sext <16 x i8> %n to <16 x i10>
+ %add = add <16 x i10> %xm, <i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1>
+ %add1 = add <16 x i10> %add, %xn
+ %shr = ashr <16 x i10> %add1, <i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1>
+ %tr = trunc <16 x i10> %shr to <16 x i8>
ret <16 x i8> %tr
}
More information about the llvm-commits
mailing list