[llvm] a4753f5 - [IR] Avoid creation of GEPs into vectors (in one place)

Mon Jan 23 04:25:56 PST 2023

Author: Jannik Silvanus
Date: 2023-01-23T13:25:39+01:00
New Revision: a4753f5dc0a9bccf3706a82cacbd046c272eb814

URL: https://github.com/llvm/llvm-project/commit/a4753f5dc0a9bccf3706a82cacbd046c272eb814
DIFF: https://github.com/llvm/llvm-project/commit/a4753f5dc0a9bccf3706a82cacbd046c272eb814.diff

LOG: [IR] Avoid creation of GEPs into vectors (in one place)

The method DataLayout::getGEPIndexForOffset(Type *&ElemTy, APInt &Offset)
allows to generate GEP indices for a given byte-based offset.
This allows to generate "natural" GEPs using the given type structure
if the byte offset happens to match a nested element object.

With opaque pointers and a general move towards byte-based GEPs [1],
this function may be questionable in the future.

This patch avoids creation of GEPs into vectors in routines that use
DataLayout::getGEPIndexForOffset by not returning indices in that case.

The reason is that A) GEPs into vectors have been discouraged for a long
time [2], and B) that GEPs into vectors are currently broken if the element
type is overaligned [1]. This is also demonstrated by a lit test where
previously InstCombine replaced valid loads by poison. Note that
the result of InstCombine on that test is *still* invalid, because
padding bytes are assumed.
Moreover, GEPs into vectors may be outright forbidden in the future [1].

[1]: https://discourse.llvm.org/t/67497
[2]: https://llvm.org/docs/GetElementPtr.html

The test case is new. It will be precommitted if this patch is accepted.

Differential Revision: https://reviews.llvm.org/D142146

Added: 
    

Modified: 
    llvm/lib/IR/DataLayout.cpp
    llvm/test/Transforms/InstCombine/load-gep-overalign.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index b4d40e6fec82c..289d525d1f517 100644

--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -940,13 +940,10 @@ std::optional<APInt> DataLayout::getGEPIndexForOffset(Type *&ElemTy,
   }
 
   if (auto *VecTy = dyn_cast<VectorType>(ElemTy)) {
-    ElemTy = VecTy->getElementType();
-    unsigned ElemSizeInBits = getTypeSizeInBits(ElemTy).getFixedValue();
-    // GEPs over non-multiple of 8 size vector elements are invalid.
-    if (ElemSizeInBits % 8 != 0)
-      return std::nullopt;
-
-    return getElementIndex(TypeSize::Fixed(ElemSizeInBits / 8), Offset);
+    // Vector GEPs are partially broken (e.g. for overaligned element types),
+    // and may be forbidden in the future, so avoid generating GEPs into
+    // vectors. See https://discourse.llvm.org/t/67497
+    return std::nullopt;
   }
 
   if (auto *STy = dyn_cast<StructType>(ElemTy)) {

diff  --git a/llvm/test/Transforms/InstCombine/load-gep-overalign.ll b/llvm/test/Transforms/InstCombine/load-gep-overalign.ll
index afc3feda51561..70d51191793e6 100644
--- a/llvm/test/Transforms/InstCombine/load-gep-overalign.ll
+++ b/llvm/test/Transforms/InstCombine/load-gep-overalign.ll
@@ -12,9 +12,7 @@ define void @test_vector_load_i8() {
 ; OVERALIGNED and NATURAL should have the same result, because the layout of vectors ignores
 ; element type alignment, and thus the representation of @foo is the same in both cases.
 ;
-; TODO: The OVERALIGNED result is incorrect.
-; First, for nonzero even indices, the valid load is replaced by poison.
-; Second, the remaining bytes at indices >= 2 are also incorrect, as apparently padding bytes
+; TODO: The OVERALIGNED result is incorrect, as apparently padding bytes
 ; are assumed as they would appear in an array. In vectors, there is no padding.
 ;
 ; NATURAL-LABEL: @test_vector_load_i8(
@@ -31,11 +29,11 @@ define void @test_vector_load_i8() {
 ; OVERALIGNED-LABEL: @test_vector_load_i8(
 ; OVERALIGNED-NEXT:    call void @report(i64 0, i8 1)
 ; OVERALIGNED-NEXT:    call void @report(i64 1, i8 35)
-; OVERALIGNED-NEXT:    call void @report(i64 2, i8 poison)
+; OVERALIGNED-NEXT:    call void @report(i64 2, i8 0)
 ; OVERALIGNED-NEXT:    call void @report(i64 3, i8 0)
-; OVERALIGNED-NEXT:    call void @report(i64 4, i8 poison)
+; OVERALIGNED-NEXT:    call void @report(i64 4, i8 69)
 ; OVERALIGNED-NEXT:    call void @report(i64 5, i8 103)
-; OVERALIGNED-NEXT:    call void @report(i64 6, i8 poison)
+; OVERALIGNED-NEXT:    call void @report(i64 6, i8 0)
 ; OVERALIGNED-NEXT:    call void @report(i64 7, i8 0)
 ; OVERALIGNED-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll
index 7da58137ab87d..d48f656089be1 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll
@@ -188,7 +188,7 @@ define void @multiply_can_hoist_multiple_insts2(ptr noalias %A, ptr %B, ptr %C)
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr double, ptr [[TMP3]], i64 1
 ; CHECK-NEXT:    [[COL_LOAD14:%.*]] = load <1 x double>, ptr [[TMP11]], align 8
 ; CHECK-NEXT:    [[TMP12:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD13]], <1 x double> [[COL_LOAD14]], <1 x double> [[TMP9]])
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr <4 x double>, ptr [[C]], i64 42, i64 1
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[C]], i64 1352
 ; CHECK-NEXT:    store <1 x double> [[TMP12]], ptr [[TMP13]], align 8
 ; CHECK-NEXT:    [[COL_LOAD19:%.*]] = load <1 x double>, ptr [[A]], align 8
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr double, ptr [[TMP3]], i64 2
@@ -199,7 +199,7 @@ define void @multiply_can_hoist_multiple_insts2(ptr noalias %A, ptr %B, ptr %C)
 ; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr double, ptr [[TMP3]], i64 3
 ; CHECK-NEXT:    [[COL_LOAD25:%.*]] = load <1 x double>, ptr [[TMP17]], align 8
 ; CHECK-NEXT:    [[TMP18:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD24]], <1 x double> [[COL_LOAD25]], <1 x double> [[TMP15]])
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr <4 x double>, ptr [[C]], i64 42, i64 2
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[C]], i64 1360
 ; CHECK-NEXT:    store <1 x double> [[TMP18]], ptr [[TMP19]], align 8
 ; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr double, ptr [[A]], i64 1
 ; CHECK-NEXT:    [[COL_LOAD30:%.*]] = load <1 x double>, ptr [[TMP20]], align 8
@@ -211,7 +211,7 @@ define void @multiply_can_hoist_multiple_insts2(ptr noalias %A, ptr %B, ptr %C)
 ; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr double, ptr [[TMP3]], i64 3
 ; CHECK-NEXT:    [[COL_LOAD36:%.*]] = load <1 x double>, ptr [[TMP24]], align 8
 ; CHECK-NEXT:    [[TMP25:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD35]], <1 x double> [[COL_LOAD36]], <1 x double> [[TMP22]])
-; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr <4 x double>, ptr [[C]], i64 42, i64 3
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr i8, ptr [[C]], i64 1368
 ; CHECK-NEXT:    store <1 x double> [[TMP25]], ptr [[TMP26]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -248,7 +248,7 @@ define void @multiply_dont_hoist_phi(ptr noalias %A, ptr %B, ptr %C) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD1]], <2 x double> [[SPLAT_SPLAT7]], <2 x double> [[TMP2]])
 ; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr <4 x double>, ptr [[C:%.*]], i64 26
 ; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[GEP_1]], align 8
-; CHECK-NEXT:    [[VEC_GEP14:%.*]] = getelementptr <4 x double>, ptr [[C]], i64 26, i64 2
+; CHECK-NEXT:    [[VEC_GEP14:%.*]] = getelementptr i8, ptr [[C]], i64 848
 ; CHECK-NEXT:    store <2 x double> [[TMP1]], ptr [[VEC_GEP14]], align 8
 ; CHECK-NEXT:    ret void
 ;