[llvm] 69b0ed9 - [InstCombine] Fix miscompile on GEP+load to icmp fold (PR45210)

Thu Jun 17 03:46:31 PDT 2021

Author: hyeongyukim
Date: 2021-06-17T19:46:17+09:00
New Revision: 69b0ed9a0a6aeab0b1c93d0a76c66a63c1d6f410

URL: https://github.com/llvm/llvm-project/commit/69b0ed9a0a6aeab0b1c93d0a76c66a63c1d6f410
DIFF: https://github.com/llvm/llvm-project/commit/69b0ed9a0a6aeab0b1c93d0a76c66a63c1d6f410.diff

LOG: [InstCombine] Fix miscompile on GEP+load to icmp fold (PR45210)

As noted in PR45210: https://bugs.llvm.org/show_bug.cgi?id=45210
...the bug is triggered as Eli say when sext(idx) * ElementSize overflows.

```
   // assume that GV is an array of 4-byte elements
   GEP = gep GV, 0, Idx // this is accessing Idx * 4
   L = load GEP
   ICI = icmp eq L, value
 =>
   ICI = icmp eq Idx, NewIdx
```

The foldCmpLoadFromIndexedGlobal function simplifies GEP+load operation to icmp.
And there is a problem because Idx * ElementSize can overflow.

Let's assume that the wanted value is at offset 0.
Then, there are actually four possible values for Idx to match offset 0: 0x00..00, 0x40..00, 0x80..00, 0xC0..00.
We should return true for all these values, but currently, the new icmp only returns true for 0x00..00.

This problem can be solved by masking off (trailing zeros of ElementSize) bits from Idx.

```
   ...
 =>
   Idx' = and Idx, 0x3F..FF
   ICI = icmp eq Idx', NewIdx
```

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D99481

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
    llvm/test/Transforms/InstCombine/load-cmp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 9e4d88899b19e..189b49e584c2a 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -279,9 +279,28 @@ InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
       Idx = Builder.CreateTrunc(Idx, IntPtrTy);
   }
 
+  // If inbounds keyword is not present, Idx * ElementSize can overflow.
+  // Let's assume that ElementSize is 2 and the wanted value is at offset 0.
+  // Then, there are two possible values for Idx to match offset 0:
+  // 0x00..00, 0x80..00.
+  // Emitting 'icmp eq Idx, 0' isn't correct in this case because the
+  // comparison is false if Idx was 0x80..00.
+  // We need to erase the highest countTrailingZeros(ElementSize) bits of Idx.
+  unsigned ElementSize =
+      DL.getTypeAllocSize(Init->getType()->getArrayElementType());
+  auto MaskIdx = [&](Value* Idx){
+    if (!GEP->isInBounds() && countTrailingZeros(ElementSize) != 0) {
+      Value *Mask = ConstantInt::get(Idx->getType(), -1);
+      Mask = Builder.CreateLShr(Mask, countTrailingZeros(ElementSize));
+      Idx = Builder.CreateAnd(Idx, Mask);
+    }
+    return Idx;
+  };
+
   // If the comparison is only true for one or two elements, emit direct
   // comparisons.
   if (SecondTrueElement != Overdefined) {
+    Idx = MaskIdx(Idx);
     // None true -> false.
     if (FirstTrueElement == Undefined)
       return replaceInstUsesWith(ICI, Builder.getFalse());
@@ -302,6 +321,7 @@ InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
   // If the comparison is only false for one or two elements, emit direct
   // comparisons.
   if (SecondFalseElement != Overdefined) {
+    Idx = MaskIdx(Idx);
     // None false -> true.
     if (FirstFalseElement == Undefined)
       return replaceInstUsesWith(ICI, Builder.getTrue());
@@ -323,6 +343,7 @@ InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
   // where it is true, emit the range check.
   if (TrueRangeEnd != Overdefined) {
     assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
+    Idx = MaskIdx(Idx);
 
     // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
     if (FirstTrueElement) {
@@ -338,6 +359,7 @@ InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
   // False range check.
   if (FalseRangeEnd != Overdefined) {
     assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
+    Idx = MaskIdx(Idx);
     // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
     if (FirstFalseElement) {
       Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
@@ -364,6 +386,7 @@ InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
       Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
 
     if (Ty) {
+      Idx = MaskIdx(Idx);
       Value *V = Builder.CreateIntCast(Idx, Ty, false);
       V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
       V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V);

diff  --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll
index 84f692dd4595d..92bf377d016c8 100644
--- a/llvm/test/Transforms/InstCombine/load-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/load-cmp.ll
@@ -2,22 +2,22 @@
 ; RUN: opt -instcombine -S -data-layout="p:32:32:32-p1:16:16:16-n8:16:32:64" < %s | FileCheck %s
 
 @G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
-                                     i16 73, i16 82, i16 69, i16 68, i16 0]
+  i16 73, i16 82, i16 69, i16 68, i16 0]
 
 @G16_as1 = internal addrspace(1) constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
-                                                      i16 73, i16 82, i16 69, i16 68, i16 0]
+  i16 73, i16 82, i16 69, i16 68, i16 0]
 
 @GD = internal constant [6 x double]
-   [double -10.0, double 1.0, double 4.0, double 2.0, double -20.0, double -40.0]
+  [double -10.0, double 1.0, double 4.0, double 2.0, double -20.0, double -40.0]
 
 %Foo = type { i32, i32, i32, i32 }
 
 @GS = internal constant %Foo { i32 1, i32 4, i32 9, i32 14 }
 
 @GStructArr = internal constant [4 x %Foo] [ %Foo { i32 1, i32 4, i32 9, i32 14 },
-                                             %Foo { i32 5, i32 4, i32 6, i32 11 },
-                                             %Foo { i32 6, i32 5, i32 9, i32 20 },
-                                             %Foo { i32 12, i32 3, i32 9, i32 8 } ]
+  %Foo { i32 5, i32 4, i32 6, i32 11 },
+  %Foo { i32 6, i32 5, i32 9, i32 20 },
+  %Foo { i32 12, i32 3, i32 9, i32 8 } ]
 
 
 define i1 @test1(i32 %X) {
@@ -33,7 +33,8 @@ define i1 @test1(i32 %X) {
 
 define i1 @test1_noinbounds(i32 %X) {
 ; CHECK-LABEL: @test1_noinbounds(
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[X:%.*]], 9
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[TMP1]], 9
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %P = getelementptr [10 x i16], [10 x i16]* @G16, i32 0, i32 %X
@@ -44,8 +45,8 @@ define i1 @test1_noinbounds(i32 %X) {
 
 define i1 @test1_noinbounds_i64(i64 %X) {
 ; CHECK-LABEL: @test1_noinbounds_i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[TMP1]], 9
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i64 [[TMP1]], 9
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %P = getelementptr [10 x i16], [10 x i16]* @G16, i64 0, i64 %X
@@ -56,8 +57,8 @@ define i1 @test1_noinbounds_i64(i64 %X) {
 
 define i1 @test1_noinbounds_as1(i32 %x) {
 ; CHECK-LABEL: @test1_noinbounds_as1(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i16 [[TMP1]], 9
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 32767
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[TMP1]], 9
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %p = getelementptr [10 x i16], [10 x i16] addrspace(1)* @G16_as1, i16 0, i32 %x
@@ -260,7 +261,8 @@ define i1 @test10_struct_arr(i32 %x) {
 
 define i1 @test10_struct_arr_noinbounds(i32 %x) {
 ; CHECK-LABEL: @test10_struct_arr_noinbounds(
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 268435455
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP1]], 1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
@@ -294,7 +296,9 @@ define i1 @test10_struct_arr_i64(i64 %x) {
 
 define i1 @test10_struct_arr_noinbounds_i16(i16 %x) {
 ; CHECK-LABEL: @test10_struct_arr_noinbounds_i16(
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i16 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 268435455
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP2]], 1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i16 %x, i32 2
@@ -305,8 +309,8 @@ define i1 @test10_struct_arr_noinbounds_i16(i16 %x) {
 
 define i1 @test10_struct_arr_noinbounds_i64(i64 %x) {
 ; CHECK-LABEL: @test10_struct_arr_noinbounds_i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], 268435455
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i64 [[TMP1]], 1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i64 %x, i32 2