[llvm] ad8e75c - [ARM] Fix for matching reductions that are both sext and zext.
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 16 15:12:12 PDT 2021
Author: David Green
Date: 2021-07-16T23:11:42+01:00
New Revision: ad8e75caa2ebface54c92d7e4d7dc21c3166b6c6
URL: https://github.com/llvm/llvm-project/commit/ad8e75caa2ebface54c92d7e4d7dc21c3166b6c6
DIFF: https://github.com/llvm/llvm-project/commit/ad8e75caa2ebface54c92d7e4d7dc21c3166b6c6.diff
LOG: [ARM] Fix for matching reductions that are both sext and zext.
Fix a silly mistake that was not making sure that _both_ operands were
the correct extend code.
Added:
Modified:
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll
llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index eeb8885c0febd..261be2ce74025 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -16063,7 +16063,7 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
return false;
SDValue ExtA = Mul->getOperand(0);
SDValue ExtB = Mul->getOperand(1);
- if (ExtA->getOpcode() != ExtendCode && ExtB->getOpcode() != ExtendCode)
+ if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
return false;
A = ExtA->getOperand(0);
B = ExtB->getOperand(0);
@@ -16097,7 +16097,7 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
return false;
SDValue ExtA = Mul->getOperand(0);
SDValue ExtB = Mul->getOperand(1);
- if (ExtA->getOpcode() != ExtendCode && ExtB->getOpcode() != ExtendCode)
+ if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
return false;
A = ExtA->getOperand(0);
B = ExtB->getOperand(0);
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll
index b02c8b4c23498..ba5cf0881ce34 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll
@@ -695,8 +695,22 @@ entry:
define arm_aapcs_vfpcc signext i16 @add_v16i8_v16i16_szext(<16 x i8> %x, <16 x i8> %y) {
; CHECK-LABEL: add_v16i8_v16i16_szext:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmlav.s8 r0, q0, q1
+; CHECK-NEXT: .pad #32
+; CHECK-NEXT: sub sp, #32
+; CHECK-NEXT: add r0, sp, #16
+; CHECK-NEXT: mov r1, sp
+; CHECK-NEXT: vstrw.32 q1, [r0]
+; CHECK-NEXT: vstrw.32 q0, [r1]
+; CHECK-NEXT: vldrb.u16 q0, [r0, #8]
+; CHECK-NEXT: vldrb.s16 q1, [r1, #8]
+; CHECK-NEXT: vldrb.s16 q2, [r1]
+; CHECK-NEXT: vmul.i16 q0, q1, q0
+; CHECK-NEXT: vldrb.u16 q1, [r0]
+; CHECK-NEXT: vmul.i16 q1, q2, q1
+; CHECK-NEXT: vadd.i16 q0, q1, q0
+; CHECK-NEXT: vaddv.u16 r0, q0
; CHECK-NEXT: sxth r0, r0
+; CHECK-NEXT: add sp, #32
; CHECK-NEXT: bx lr
entry:
%xx = sext <16 x i8> %x to <16 x i16>
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll
index ec29c4ddf9024..dc42ecdf9bb81 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll
@@ -1112,9 +1112,63 @@ entry:
define arm_aapcs_vfpcc signext i16 @add_v16i8_v16i16_szext(<16 x i8> %x, <16 x i8> %y, <16 x i8> %b) {
; CHECK-LABEL: add_v16i8_v16i16_szext:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vpt.i8 eq, q2, zr
-; CHECK-NEXT: vmlavt.s8 r0, q0, q1
+; CHECK-NEXT: .pad #32
+; CHECK-NEXT: sub sp, #32
+; CHECK-NEXT: add r0, sp, #16
+; CHECK-NEXT: mov r1, sp
+; CHECK-NEXT: vstrw.32 q1, [r0]
+; CHECK-NEXT: vstrw.32 q0, [r1]
+; CHECK-NEXT: vcmp.i8 eq, q2, zr
+; CHECK-NEXT: vmov.i8 q0, #0x0
+; CHECK-NEXT: vmov.i8 q1, #0xff
+; CHECK-NEXT: vldrb.u16 q2, [r0]
+; CHECK-NEXT: vpsel q0, q1, q0
+; CHECK-NEXT: vldrb.s16 q3, [r1]
+; CHECK-NEXT: vmov.u8 r2, q0[0]
+; CHECK-NEXT: vmov.16 q1[0], r2
+; CHECK-NEXT: vmov.u8 r2, q0[1]
+; CHECK-NEXT: vmov.16 q1[1], r2
+; CHECK-NEXT: vmov.u8 r2, q0[2]
+; CHECK-NEXT: vmov.16 q1[2], r2
+; CHECK-NEXT: vmov.u8 r2, q0[3]
+; CHECK-NEXT: vmov.16 q1[3], r2
+; CHECK-NEXT: vmov.u8 r2, q0[4]
+; CHECK-NEXT: vmov.16 q1[4], r2
+; CHECK-NEXT: vmov.u8 r2, q0[5]
+; CHECK-NEXT: vmov.16 q1[5], r2
+; CHECK-NEXT: vmov.u8 r2, q0[6]
+; CHECK-NEXT: vmov.16 q1[6], r2
+; CHECK-NEXT: vmov.u8 r2, q0[7]
+; CHECK-NEXT: vmov.16 q1[7], r2
+; CHECK-NEXT: vmov.u8 r2, q0[8]
+; CHECK-NEXT: vcmp.i16 ne, q1, zr
+; CHECK-NEXT: vmov.i32 q1, #0x0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmult.i16 q1, q3, q2
+; CHECK-NEXT: vmov.16 q2[0], r2
+; CHECK-NEXT: vmov.u8 r2, q0[9]
+; CHECK-NEXT: vmov.16 q2[1], r2
+; CHECK-NEXT: vmov.u8 r2, q0[10]
+; CHECK-NEXT: vmov.16 q2[2], r2
+; CHECK-NEXT: vmov.u8 r2, q0[11]
+; CHECK-NEXT: vmov.16 q2[3], r2
+; CHECK-NEXT: vmov.u8 r2, q0[12]
+; CHECK-NEXT: vmov.16 q2[4], r2
+; CHECK-NEXT: vmov.u8 r2, q0[13]
+; CHECK-NEXT: vmov.16 q2[5], r2
+; CHECK-NEXT: vmov.u8 r2, q0[14]
+; CHECK-NEXT: vmov.16 q2[6], r2
+; CHECK-NEXT: vmov.u8 r2, q0[15]
+; CHECK-NEXT: vmov.16 q2[7], r2
+; CHECK-NEXT: vldrb.u16 q0, [r0, #8]
+; CHECK-NEXT: vcmp.i16 ne, q2, zr
+; CHECK-NEXT: vldrb.s16 q2, [r1, #8]
+; CHECK-NEXT: vmul.i16 q0, q2, q0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vaddt.i16 q1, q1, q0
+; CHECK-NEXT: vaddv.u16 r0, q1
; CHECK-NEXT: sxth r0, r0
+; CHECK-NEXT: add sp, #32
; CHECK-NEXT: bx lr
entry:
%c = icmp eq <16 x i8> %b, zeroinitializer
More information about the llvm-commits
mailing list