[llvm] 602f81e - [AArch64] Fix zero element TBL indices
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu May 19 05:54:47 PDT 2022
Author: David Green
Date: 2022-05-19T13:54:35+01:00
New Revision: 602f81ec336330f97e22442b98035c6f007cac6d
URL: https://github.com/llvm/llvm-project/commit/602f81ec336330f97e22442b98035c6f007cac6d
DIFF: https://github.com/llvm/llvm-project/commit/602f81ec336330f97e22442b98035c6f007cac6d.diff
LOG: [AArch64] Fix zero element TBL indices
A TBL instruction will fill out-of-range values with 0's, something used
in D121139 to turn tbl2 with a zero input into tbl1s. This works OK for
v16i8, but for v8i8 the input is still treated as a v16i8, so
out-of-range values (like a lane index of 8) would end up loading values
from the top half of the input register. Clean this up by detecting the
out of range values and making sure they really use out of range values.
There is a fix for swapped indices of 64bit input vectors too, which
could be incorrectly adjusted if the zerovector was the first operand.
Fixes #55545
Differential Revision: https://reviews.llvm.org/D125865
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8e504173e261..503ac7cb359e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9939,30 +9939,34 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
Swap = true;
}
+ // If the V2 source is undef or zero then we can use a tbl1, as tbl1 will fill
+ // out of range values with 0s. We do need to make sure that any out-of-range
+ // values are really out-of-range for a v16i8 vector.
+ bool IsUndefOrZero = V2.isUndef() || isZerosVector(V2.getNode());
+ MVT IndexVT = MVT::v8i8;
+ unsigned IndexLen = 8;
+ if (Op.getValueSizeInBits() == 128) {
+ IndexVT = MVT::v16i8;
+ IndexLen = 16;
+ }
+
SmallVector<SDValue, 8> TBLMask;
for (int Val : ShuffleMask) {
for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
unsigned Offset = Byte + Val * BytesPerElt;
if (Swap)
- Offset = Offset < 16 ? Offset + 16 : Offset - 16;
+ Offset = Offset < IndexLen ? Offset + IndexLen : Offset - IndexLen;
+ if (IsUndefOrZero && Offset >= IndexLen)
+ Offset = 255;
TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
}
}
- MVT IndexVT = MVT::v8i8;
- unsigned IndexLen = 8;
- if (Op.getValueSizeInBits() == 128) {
- IndexVT = MVT::v16i8;
- IndexLen = 16;
- }
-
SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1);
SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2);
SDValue Shuffle;
- // If the V2 source is undef or zero then we can use a tbl1, as tbl1 will fill
- // out of range values with 0s.
- if (V2.isUndef() || isZerosVector(V2.getNode())) {
+ if (IsUndefOrZero) {
if (IndexLen == 8)
V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
Shuffle = DAG.getNode(
diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
index 6aa07e7846a6..069767da7b49 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -909,9 +909,9 @@ define <8 x i8> @vselect_equivalent_shuffle_v8i8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: .LCPI90_0:
; CHECK-NEXT: .byte 0
-; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 255
; CHECK-NEXT: .byte 2
-; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .byte 255
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 5
; CHECK-NEXT: .byte 6
@@ -930,14 +930,14 @@ define <8 x i8> @vselect_equivalent_shuffle_v8i8_zero(<8 x i8> %a) {
}
; CHECK-LABEL: .LCPI91_0:
-; CHECK-NEXT: .byte 24
-; CHECK-NEXT: .byte 16
-; CHECK-NEXT: .byte 26
-; CHECK-NEXT: .byte 17
-; CHECK-NEXT: .byte 28
-; CHECK-NEXT: .byte 29
-; CHECK-NEXT: .byte 30
-; CHECK-NEXT: .byte 31
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 255
+; CHECK-NEXT: .byte 2
+; CHECK-NEXT: .byte 255
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 5
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .byte 7
define <8 x i8> @vselect_equivalent_shuffle_v8i8_zeroswap(<8 x i8> %a) {
; CHECK-LABEL: vselect_equivalent_shuffle_v8i8_zeroswap:
; CHECK: // %bb.0:
@@ -984,12 +984,12 @@ define <8 x i16> @vselect_equivalent_shuffle_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: .LCPI93_0:
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 1
-; CHECK-NEXT: .byte 16
-; CHECK-NEXT: .byte 17
+; CHECK-NEXT: .byte 255
+; CHECK-NEXT: .byte 255
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 5
-; CHECK-NEXT: .byte 18
-; CHECK-NEXT: .byte 19
+; CHECK-NEXT: .byte 255
+; CHECK-NEXT: .byte 255
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .byte 10
@@ -1011,12 +1011,12 @@ define <8 x i16> @vselect_equivalent_shuffle_v8i16_zero(<8 x i16> %a) {
; CHECK: .byte 0
; CHECK: .byte 1
-; CHECK: .byte 16
-; CHECK: .byte 17
+; CHECK: .byte 255
+; CHECK: .byte 255
; CHECK: .byte 4
; CHECK: .byte 5
-; CHECK: .byte 18
-; CHECK: .byte 19
+; CHECK: .byte 255
+; CHECK: .byte 255
; CHECK: .byte 8
; CHECK: .byte 9
; CHECK: .byte 10
More information about the llvm-commits
mailing list