[llvm] 602f81e - [AArch64] Fix zero element TBL indices
    David Green via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Thu May 19 05:54:47 PDT 2022
    
    
  
Author: David Green
Date: 2022-05-19T13:54:35+01:00
New Revision: 602f81ec336330f97e22442b98035c6f007cac6d
URL: https://github.com/llvm/llvm-project/commit/602f81ec336330f97e22442b98035c6f007cac6d
DIFF: https://github.com/llvm/llvm-project/commit/602f81ec336330f97e22442b98035c6f007cac6d.diff
LOG: [AArch64] Fix zero element TBL indices
A TBL instruction will fill out-of-range values with 0's, something used
in D121139 to turn tbl2 with a zero input into tbl1s. This works OK for
v16i8, but for v8i8 the input is still treated as a v16i8, so
out-of-range values (like a lane index of 8) would end up loading values
from the top half of the input register. Clean this up by detecting the
out of range values and making sure they really use out of range values.
There is a fix for swapped indices of 64bit input vectors too, which
could be incorrectly adjusted if the zerovector was the first operand.
Fixes #55545
Differential Revision: https://reviews.llvm.org/D125865
Added: 
    
Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
Removed: 
    
################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8e504173e261..503ac7cb359e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9939,30 +9939,34 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
     Swap = true;
   }
 
+  // If the V2 source is undef or zero then we can use a tbl1, as tbl1 will fill
+  // out of range values with 0s. We do need to make sure that any out-of-range
+  // values are really out-of-range for a v16i8 vector.
+  bool IsUndefOrZero = V2.isUndef() || isZerosVector(V2.getNode());
+  MVT IndexVT = MVT::v8i8;
+  unsigned IndexLen = 8;
+  if (Op.getValueSizeInBits() == 128) {
+    IndexVT = MVT::v16i8;
+    IndexLen = 16;
+  }
+
   SmallVector<SDValue, 8> TBLMask;
   for (int Val : ShuffleMask) {
     for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
       unsigned Offset = Byte + Val * BytesPerElt;
       if (Swap)
-        Offset = Offset < 16 ? Offset + 16 : Offset - 16;
+        Offset = Offset < IndexLen ? Offset + IndexLen : Offset - IndexLen;
+      if (IsUndefOrZero && Offset >= IndexLen)
+        Offset = 255;
       TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
     }
   }
 
-  MVT IndexVT = MVT::v8i8;
-  unsigned IndexLen = 8;
-  if (Op.getValueSizeInBits() == 128) {
-    IndexVT = MVT::v16i8;
-    IndexLen = 16;
-  }
-
   SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1);
   SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2);
 
   SDValue Shuffle;
-  // If the V2 source is undef or zero then we can use a tbl1, as tbl1 will fill
-  // out of range values with 0s.
-  if (V2.isUndef() || isZerosVector(V2.getNode())) {
+  if (IsUndefOrZero) {
     if (IndexLen == 8)
       V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
     Shuffle = DAG.getNode(
diff  --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
index 6aa07e7846a6..069767da7b49 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -909,9 +909,9 @@ define <8 x i8> @vselect_equivalent_shuffle_v8i8(<8 x i8> %a, <8 x i8> %b) {
 
 ; CHECK-LABEL: .LCPI90_0:
 ; CHECK-NEXT: .byte   0
-; CHECK-NEXT: .byte   8
+; CHECK-NEXT: .byte   255
 ; CHECK-NEXT: .byte   2
-; CHECK-NEXT: .byte   9
+; CHECK-NEXT: .byte   255
 ; CHECK-NEXT: .byte   4
 ; CHECK-NEXT: .byte   5
 ; CHECK-NEXT: .byte   6
@@ -930,14 +930,14 @@ define <8 x i8> @vselect_equivalent_shuffle_v8i8_zero(<8 x i8> %a) {
 }
 
 ; CHECK-LABEL: .LCPI91_0:
-; CHECK-NEXT: .byte   24
-; CHECK-NEXT: .byte   16
-; CHECK-NEXT: .byte   26
-; CHECK-NEXT: .byte   17
-; CHECK-NEXT: .byte   28
-; CHECK-NEXT: .byte   29
-; CHECK-NEXT: .byte   30
-; CHECK-NEXT: .byte   31
+; CHECK-NEXT: .byte   0
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .byte   2
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .byte   4
+; CHECK-NEXT: .byte   5
+; CHECK-NEXT: .byte   6
+; CHECK-NEXT: .byte   7
 define <8 x i8> @vselect_equivalent_shuffle_v8i8_zeroswap(<8 x i8> %a) {
 ; CHECK-LABEL: vselect_equivalent_shuffle_v8i8_zeroswap:
 ; CHECK:       // %bb.0:
@@ -984,12 +984,12 @@ define <8 x i16> @vselect_equivalent_shuffle_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: .LCPI93_0:
 ; CHECK-NEXT: .byte   0
 ; CHECK-NEXT: .byte   1
-; CHECK-NEXT: .byte   16
-; CHECK-NEXT: .byte   17
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .byte   255
 ; CHECK-NEXT: .byte   4
 ; CHECK-NEXT: .byte   5
-; CHECK-NEXT: .byte   18
-; CHECK-NEXT: .byte   19
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .byte   255
 ; CHECK-NEXT: .byte   8
 ; CHECK-NEXT: .byte   9
 ; CHECK-NEXT: .byte   10
@@ -1011,12 +1011,12 @@ define <8 x i16> @vselect_equivalent_shuffle_v8i16_zero(<8 x i16> %a) {
 
 ; CHECK: .byte   0
 ; CHECK: .byte   1
-; CHECK: .byte   16
-; CHECK: .byte   17
+; CHECK: .byte   255
+; CHECK: .byte   255
 ; CHECK: .byte   4
 ; CHECK: .byte   5
-; CHECK: .byte   18
-; CHECK: .byte   19
+; CHECK: .byte   255
+; CHECK: .byte   255
 ; CHECK: .byte   8
 ; CHECK: .byte   9
 ; CHECK: .byte   10
        
    
    
More information about the llvm-commits
mailing list