[PATCH] D70844: [InstCombine] Fix big-endian miscompile of (bitcast (zext/trunc (bitcast)))

Fri Nov 29 01:44:49 PST 2019

bjope created this revision.
bjope added reviewers: spatel, lattner.
Herald added a subscriber: hiraditya.
Herald added a project: LLVM.

optimizeVectorResize is rewriting patterns like:

  %1 = bitcast vector %src to integer
  %2 = trunc/zext %1
  %dst = bitcast %2 to vector

Since bitcasting between integer an vector types gives
different integer values depending on endianness, we need
to take endianness into account. As it happens the old
implementation only gave the correct result for little
endian targets.

Fixes: https://bugs.llvm.org/show_bug.cgi?id=44178


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D70844

Files:
  llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
  llvm/test/Transforms/InstCombine/cast.ll


Index: llvm/test/Transforms/InstCombine/cast.ll
===================================================================

--- llvm/test/Transforms/InstCombine/cast.ll
+++ llvm/test/Transforms/InstCombine/cast.ll
@@ -824,7 +824,7 @@
 
 define <3 x i32> @test60(<4 x i32> %call4) {
 ; CHECK-LABEL: @test60(
-; CHECK-NEXT:    [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> <i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    ret <3 x i32> [[P10]]
 ;
   %p11 = bitcast <4 x i32> %call4 to i128
@@ -836,7 +836,7 @@
 
 define <4 x i32> @test61(<3 x i32> %call4) {
 ; CHECK-LABEL: @test61(
-; CHECK-NEXT:    [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
 ; CHECK-NEXT:    ret <4 x i32> [[P10]]
 ;
   %p11 = bitcast <3 x i32> %call4 to i96
@@ -848,7 +848,7 @@
 define <4 x i32> @test62(<3 x float> %call4) {
 ; CHECK-LABEL: @test62(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <3 x float> [[CALL4:%.*]] to <3 x i32>
-; CHECK-NEXT:    [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
 ; CHECK-NEXT:    ret <4 x i32> [[P10]]
 ;
   %p11 = bitcast <3 x float> %call4 to i96
Index: llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1820,7 +1820,11 @@
 }
 
 /// This input value (which is known to have vector type) is being zero extended
-/// or truncated to the specified vector type.
+/// or truncated to the specified vector type. Since the zext/trunc is done
+/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
+/// endianness will impact which end of the vector that is extended or
+/// truncated.
+///
 /// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
 ///
 /// The source and destination vector types may have different element types.
@@ -1850,25 +1854,36 @@
   SmallVector<uint32_t, 16> ShuffleMask;
   Value *V2;
 
-  if (SrcTy->getNumElements() > DestTy->getNumElements()) {
-    // If we're shrinking the number of elements, just shuffle in the low
+  bool IsBigEndian = IC.getDataLayout().isBigEndian();
+  unsigned SrcElts = SrcTy->getNumElements();
+  unsigned DestElts = DestTy->getNumElements();
+
+  if (SrcElts > DestElts) {
+    // If we're shrinking the number of elements, just shuffle in the low/high
     // elements from the input and use undef as the second shuffle input.
     V2 = UndefValue::get(SrcTy);
-    for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
-      ShuffleMask.push_back(i);
+    unsigned FirstElt = IsBigEndian ? SrcElts - DestElts : 0;
+    for (unsigned i = 0, e = DestElts; i != e; ++i)
+      ShuffleMask.push_back(FirstElt + i);
 
   } else {
     // If we're increasing the number of elements, shuffle in all of the
     // elements from InVal and fill the rest of the result elements with zeros
     // from a constant zero.
+
+    // The excess elements reference the first element of the zero input (V2).
     V2 = Constant::getNullValue(SrcTy);
-    unsigned SrcElts = SrcTy->getNumElements();
+
+    if (IsBigEndian)
+      for (unsigned i = 0, e = DestElts - SrcElts; i != e; ++i)
+        ShuffleMask.push_back(SrcElts);
+
     for (unsigned i = 0, e = SrcElts; i != e; ++i)
       ShuffleMask.push_back(i);
 
-    // The excess elements reference the first element of the zero input.
-    for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i)
-      ShuffleMask.push_back(SrcElts);
+    if (!IsBigEndian)
+      for (unsigned i = 0, e = DestElts - SrcElts; i != e; ++i)
+        ShuffleMask.push_back(SrcElts);
   }
 
   return new ShuffleVectorInst(InVal, V2,


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D70844.231494.patch
Type: text/x-patch
Size: 4273 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20191129/ca3d2649/attachment.bin>