[llvm] 1993f95 - [ValueTracking][SVE] Fix getOffsetFromIndex for scalable vector.

Fri Mar 20 14:48:43 PDT 2020

Author: Huihui Zhang
Date: 2020-03-20T14:48:29-07:00
New Revision: 1993f95f2b1eb2d8da7f1a01e977d8fe06314bcf

URL: https://github.com/llvm/llvm-project/commit/1993f95f2b1eb2d8da7f1a01e977d8fe06314bcf
DIFF: https://github.com/llvm/llvm-project/commit/1993f95f2b1eb2d8da7f1a01e977d8fe06314bcf.diff

LOG: [ValueTracking][SVE] Fix getOffsetFromIndex for scalable vector.

Summary:
Return None if GEP index type is scalable vector. Size of scalable vectors
are multiplied by a runtime constant.

Avoid transforming:
  %a = bitcast i8* %p to <vscale x 16 x i8>*
  %tmp0 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i64 0
  store <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8>* %tmp0
  %tmp1 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i64 1
  store <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8>* %tmp1

into:
  %a = bitcast i8* %p to <vscale x 16 x i8>*
  %tmp0 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i64 0
  %1 = bitcast <vscale x 16 x i8>* %tmp0 to i8*
  call void @llvm.memset.p0i8.i64(i8* align 16 %1, i8 0, i64 32, i1 false)

Reviewers: sdesmalen, efriedma, apazos, reames

Reviewed By: sdesmalen

Subscribers: tschuett, hiraditya, rkruppe, arphaman, psnobl, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D76464

Added: 
    llvm/test/Transforms/MemCpyOpt/vscale-memset.ll

Modified: 
    llvm/lib/Analysis/ValueTracking.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 773f79c3c4f4..3e62c45d445b 100644

--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -6211,10 +6211,12 @@ getOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, const DataLayout &DL) {
       continue;
     }
 
-    // Otherwise, we have a sequential type like an array or vector.  Multiply
-    // the index by the ElementSize.
-    uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
-    Offset += Size * OpC->getSExtValue();
+    // Otherwise, we have a sequential type like an array or fixed-length
+    // vector. Multiply the index by the ElementSize.
+    TypeSize Size = DL.getTypeAllocSize(GTI.getIndexedType());
+    if (Size.isScalable())
+      return None;
+    Offset += Size.getFixedSize() * OpC->getSExtValue();
   }
 
   return Offset;

diff  --git a/llvm/test/Transforms/MemCpyOpt/vscale-memset.ll b/llvm/test/Transforms/MemCpyOpt/vscale-memset.ll
new file mode 100644
index 000000000000..256bd8518dc1
--- /dev/null
+++ b/llvm/test/Transforms/MemCpyOpt/vscale-memset.ll
@@ -0,0 +1,115 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -memcpyopt -dce -S | FileCheck %s
+
+; Negative test
+; Check this test is not transformed into memset, or cause a compiler warning
+; warning: Compiler has made implicit assumption that TypeSize is not scalable. This may or may not lead to broken code.
+
+define void @foo(i8* %p) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    [[A:%.*]] = bitcast i8* [[P:%.*]] to <vscale x 16 x i8>*
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[A]], i64 0
+; CHECK-NEXT:    store <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8>* [[TMP0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[A]], i64 1
+; CHECK-NEXT:    store <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8>* [[TMP1]]
+; CHECK-NEXT:    ret void
+;
+  %a = bitcast i8* %p to <vscale x 16 x i8>*
+  %tmp0 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i64 0
+  store <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8>* %tmp0
+  %tmp1 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i64 1
+  store <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8>* %tmp1
+  ret void
+}
+
+; Positive test
+
+define void @memset_vscale_index_zero(i8* %p, i8 %z) {
+; CHECK-LABEL: @memset_vscale_index_zero(
+; CHECK-NEXT:    [[A:%.*]] = bitcast i8* [[P:%.*]] to <vscale x 16 x i8>*
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[A]], i32 0, i32 0
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP0]], i8 [[Z:%.*]], i64 17, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %a = bitcast i8* %p to <vscale x 16 x i8>*
+  %tmp0 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 0, i32 0
+  store i8 %z, i8* %tmp0
+  %tmp1 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 0, i32 1
+  store i8 %z, i8* %tmp1
+  %tmp2 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 0, i32 2
+  store i8 %z, i8* %tmp2
+  %tmp3 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 0, i32 3
+  store i8 %z, i8* %tmp3
+  %tmp4 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 0, i32 4
+  store i8 %z, i8* %tmp4
+  %tmp5 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 0, i32 5
+  store i8 %z, i8* %tmp5
+  %tmp6 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 0, i32 6
+  store i8 %z, i8* %tmp6
+  %tmp7 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 0, i32 7
+  store i8 %z, i8* %tmp7
+  %tmp8 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 0, i32 8
+  store i8 %z, i8* %tmp8
+  %tmp9 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 0, i32 9
+  store i8 %z, i8* %tmp9
+  %tmp10 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 0, i32 10
+  store i8 %z, i8* %tmp10
+  %tmp11 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 0, i32 11
+  store i8 %z, i8* %tmp11
+  %tmp12 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 0, i32 12
+  store i8 %z, i8* %tmp12
+  %tmp13 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 0, i32 13
+  store i8 %z, i8* %tmp13
+  %tmp14 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 0, i32 14
+  store i8 %z, i8* %tmp14
+  %tmp15 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 0, i32 15
+  store i8 %z, i8* %tmp15
+  %tmp16 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 0, i32 16
+  store i8 %z, i8* %tmp16
+  ret void
+}
+
+define void @memset_vscale_index_nonzero(i8* %p, i8 %z) {
+; CHECK-LABEL: @memset_vscale_index_nonzero(
+; CHECK-NEXT:    [[A:%.*]] = bitcast i8* [[P:%.*]] to <vscale x 16 x i8>*
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[A]], i32 1, i32 0
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP0]], i8 [[Z:%.*]], i64 17, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %a = bitcast i8* %p to <vscale x 16 x i8>*
+  %tmp0 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 1, i32 0
+  store i8 %z, i8* %tmp0
+  %tmp1 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 1, i32 1
+  store i8 %z, i8* %tmp1
+  %tmp2 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 1, i32 2
+  store i8 %z, i8* %tmp2
+  %tmp3 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 1, i32 3
+  store i8 %z, i8* %tmp3
+  %tmp4 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 1, i32 4
+  store i8 %z, i8* %tmp4
+  %tmp5 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 1, i32 5
+  store i8 %z, i8* %tmp5
+  %tmp6 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 1, i32 6
+  store i8 %z, i8* %tmp6
+  %tmp7 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 1, i32 7
+  store i8 %z, i8* %tmp7
+  %tmp8 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 1, i32 8
+  store i8 %z, i8* %tmp8
+  %tmp9 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 1, i32 9
+  store i8 %z, i8* %tmp9
+  %tmp10 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 1, i32 10
+  store i8 %z, i8* %tmp10
+  %tmp11 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 1, i32 11
+  store i8 %z, i8* %tmp11
+  %tmp12 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 1, i32 12
+  store i8 %z, i8* %tmp12
+  %tmp13 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 1, i32 13
+  store i8 %z, i8* %tmp13
+  %tmp14 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 1, i32 14
+  store i8 %z, i8* %tmp14
+  %tmp15 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 1, i32 15
+  store i8 %z, i8* %tmp15
+  %tmp16 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i32 1, i32 16
+  store i8 %z, i8* %tmp16
+  ret void
+}