[llvm] 4f2fd38 - [InstCombine] Fix miscompile on GEP+load to icmp fold (PR45210)

Sun May 30 22:28:12 PDT 2021

Author: Hyeongyu Kim
Date: 2021-05-31T14:08:20+09:00
New Revision: 4f2fd3818b0eb26806f366bc37369349aeedcaf9

URL: https://github.com/llvm/llvm-project/commit/4f2fd3818b0eb26806f366bc37369349aeedcaf9
DIFF: https://github.com/llvm/llvm-project/commit/4f2fd3818b0eb26806f366bc37369349aeedcaf9.diff

LOG: [InstCombine] Fix miscompile on GEP+load to icmp fold (PR45210)

As noted in PR45210: https://bugs.llvm.org/show_bug.cgi?id=45210
...the bug is triggered as Eli say when sext(idx) * ElementSize overflows.

```
   // assume that GV is an array of 4-byte elements
   GEP = gep GV, 0, Idx // this is accessing Idx * 4
   L = load GEP
   ICI = icmp eq L, value
 =>
   ICI = icmp eq Idx, NewIdx
```

The foldCmpLoadFromIndexedGlobal function simplifies GEP+load operation to icmp.
And there is a problem because Idx * ElementSize can overflow.

Let's assume that the wanted value is at offset 0.
Then, there are actually four possible values for Idx to match offset 0: 0x00..00, 0x40..00, 0x80..00, 0xC0..00.
We should return true for all these values, but currently, the new icmp only returns true for 0x00..00.

This problem can be solved by masking off (trailing zeros of ElementSize) bits from Idx.

```
   ...
 =>
   Idx' = and Idx, 0x3F..FF
   ICI = icmp eq Idx', NewIdx
```

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D99481

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
    llvm/test/Transforms/InstCombine/load-cmp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 9e4d88899b19..5e60edd89d2a 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -269,14 +269,30 @@ InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
   // order the state machines in complexity of the generated code.
   Value *Idx = GEP->getOperand(2);
 
-  // If the index is larger than the pointer size of the target, truncate the
-  // index down like the GEP would do implicitly.  We don't have to do this for
-  // an inbounds GEP because the index can't be out of range.
   if (!GEP->isInBounds()) {
+    // If the index is larger than the pointer size of the target, truncate the
+    // index down like the GEP would do implicitly.  We don't have to do this
+    // for an inbounds GEP because the index can't be out of range.
     Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
     unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
     if (Idx->getType()->getPrimitiveSizeInBits().getFixedSize() > PtrSize)
       Idx = Builder.CreateTrunc(Idx, IntPtrTy);
+
+    unsigned ElementSize =
+        DL.getTypeAllocSize(Init->getType()->getArrayElementType());
+
+    // If inbounds keyword is not present, Idx * ElementSize can overflow.
+    // Let's assume that ElementSize is 2 and the wanted value is at offset 0.
+    // Then, there are two possible values for Idx to match offset 0:
+    // 0x00..00, 0x80..00.
+    // Emitting 'icmp eq Idx, 0' isn't correct in this case because the
+    // comparison is false if Idx was 0x80..00.
+    // We need to erase the highest countTrailingZeros(ElementSize) bits of Idx.
+    if (countTrailingZeros(ElementSize) != 0) {
+      Value *Mask = ConstantInt::getSigned(Idx->getType(), -1);
+      Mask = Builder.CreateLShr(Mask, countTrailingZeros(ElementSize));
+      Idx = Builder.CreateAnd(Idx, Mask);
+    }
   }
 
   // If the comparison is only true for one or two elements, emit direct

diff  --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll
index 84f692dd4595..b56cfdd6c556 100644
--- a/llvm/test/Transforms/InstCombine/load-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/load-cmp.ll
@@ -33,7 +33,8 @@ define i1 @test1(i32 %X) {
 
 define i1 @test1_noinbounds(i32 %X) {
 ; CHECK-LABEL: @test1_noinbounds(
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[X:%.*]], 9
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[TMP1]], 9
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %P = getelementptr [10 x i16], [10 x i16]* @G16, i32 0, i32 %X
@@ -44,8 +45,8 @@ define i1 @test1_noinbounds(i32 %X) {
 
 define i1 @test1_noinbounds_i64(i64 %X) {
 ; CHECK-LABEL: @test1_noinbounds_i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[TMP1]], 9
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i64 [[TMP1]], 9
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %P = getelementptr [10 x i16], [10 x i16]* @G16, i64 0, i64 %X
@@ -54,10 +55,14 @@ define i1 @test1_noinbounds_i64(i64 %X) {
   ret i1 %R
 }
 
+; When x is 0x8009, x * 2(ElementSize) overflows and it becomes 0x0012.
+; It is the same location as when x is 0x0009, So we need to return
+; true if x AND 0x7FFF is 0x0009.
+
 define i1 @test1_noinbounds_as1(i32 %x) {
 ; CHECK-LABEL: @test1_noinbounds_as1(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i16 [[TMP1]], 9
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 32767
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[TMP1]], 9
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %p = getelementptr [10 x i16], [10 x i16] addrspace(1)* @G16_as1, i16 0, i32 %x
@@ -260,7 +265,8 @@ define i1 @test10_struct_arr(i32 %x) {
 
 define i1 @test10_struct_arr_noinbounds(i32 %x) {
 ; CHECK-LABEL: @test10_struct_arr_noinbounds(
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 268435455
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP1]], 1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
@@ -294,7 +300,9 @@ define i1 @test10_struct_arr_i64(i64 %x) {
 
 define i1 @test10_struct_arr_noinbounds_i16(i16 %x) {
 ; CHECK-LABEL: @test10_struct_arr_noinbounds_i16(
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i16 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 268435455
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP2]], 1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i16 %x, i32 2
@@ -305,8 +313,8 @@ define i1 @test10_struct_arr_noinbounds_i16(i16 %x) {
 
 define i1 @test10_struct_arr_noinbounds_i64(i64 %x) {
 ; CHECK-LABEL: @test10_struct_arr_noinbounds_i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], 268435455
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i64 [[TMP1]], 1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i64 %x, i32 2