[llvm] 6d949a9 - [InstCombine] restrict funnel shift match to avoid miscompile

Tue May 18 10:39:02 PDT 2021

Author: Sanjay Patel
Date: 2021-05-18T13:32:07-04:00
New Revision: 6d949a9c8fa440b0b91c9a96a30a9b7c1b7cf1e1

URL: https://github.com/llvm/llvm-project/commit/6d949a9c8fa440b0b91c9a96a30a9b7c1b7cf1e1
DIFF: https://github.com/llvm/llvm-project/commit/6d949a9c8fa440b0b91c9a96a30a9b7c1b7cf1e1.diff

LOG: [InstCombine] restrict funnel shift match to avoid miscompile

As noted in the post-commit discussion for:
https://reviews.llvm.org/rGabd7529625a73f405e40a63dcc446c41d51a219e

...that change exposed a logic hole that allows a miscompile
if the shift amount could exceed the narrow width:
https://alive2.llvm.org/ce/z/-i_CiM
https://alive2.llvm.org/ce/z/NaYz28

The restriction isn't necessary for a rotate (same operand for
both shifts), so we should adjust the matching for the shift
value as a follow-up enhancement:
https://alive2.llvm.org/ce/z/ahuuQb

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
    llvm/test/Transforms/InstCombine/funnel.ll
    llvm/test/Transforms/InstCombine/rotate.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index e6b6936e373b7..785ee240d15b7 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -526,6 +526,7 @@ Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
   // even with non-power-of-2 sizes, but it is not a likely scenario.
   Type *DestTy = Trunc.getType();
   unsigned NarrowWidth = DestTy->getScalarSizeInBits();
+  unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
   if (!isPowerOf2_32(NarrowWidth))
     return nullptr;
 
@@ -556,8 +557,13 @@ Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
   auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
     // The shift amounts may add up to the narrow bit width:
     // (shl ShVal0, L) | (lshr ShVal1, Width - L)
-    if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L)))))
-      return L;
+    // If this is a funnel shift (
diff erent operands are shifted), then the
+    // shift amount can not over-shift (create poison) in the narrow type.
+    unsigned MaxShiftAmountWidth = Log2_32(NarrowWidth);
+    APInt HiBitMask = ~APInt::getLowBitsSet(WideWidth, MaxShiftAmountWidth);
+    if (ShVal0 == ShVal1 || MaskedValueIsZero(L, HiBitMask))
+      if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L)))))
+        return L;
 
     // The following patterns currently only work for rotation patterns.
     // TODO: Add more general funnel-shift compatible patterns.
@@ -592,7 +598,6 @@ Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
   // The right-shifted value must have high zeros in the wide type (for example
   // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
   // truncated, so those do not matter.
-  unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
   APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
   if (!MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc))
     return nullptr;

diff  --git a/llvm/test/Transforms/InstCombine/funnel.ll b/llvm/test/Transforms/InstCombine/funnel.ll
index a064b6f1df834..5402020436649 100644
--- a/llvm/test/Transforms/InstCombine/funnel.ll
+++ b/llvm/test/Transforms/InstCombine/funnel.ll
@@ -354,12 +354,16 @@ define <2 x i64> @fshl_select_vector(<2 x i64> %x, <2 x i64> %y, <2 x i64> %sham
   ret <2 x i64> %r
 }
 
+; Negative test - an oversized shift in the narrow type would produce the wrong value.
+
 define i8 @unmasked_shlop_unmasked_shift_amount(i32 %x, i32 %y, i32 %shamt) {
 ; CHECK-LABEL: @unmasked_shlop_unmasked_shift_amount(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i8
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[X:%.*]] to i8
-; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[Y:%.*]] to i8
-; CHECK-NEXT:    [[T8:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP2]], i8 [[TMP3]], i8 [[TMP1]])
+; CHECK-NEXT:    [[MASKY:%.*]] = and i32 [[Y:%.*]], 255
+; CHECK-NEXT:    [[T4:%.*]] = sub i32 8, [[SHAMT:%.*]]
+; CHECK-NEXT:    [[T5:%.*]] = shl i32 [[X:%.*]], [[T4]]
+; CHECK-NEXT:    [[T6:%.*]] = lshr i32 [[MASKY]], [[SHAMT]]
+; CHECK-NEXT:    [[T7:%.*]] = or i32 [[T5]], [[T6]]
+; CHECK-NEXT:    [[T8:%.*]] = trunc i32 [[T7]] to i8
 ; CHECK-NEXT:    ret i8 [[T8]]
 ;
   %masky = and i32 %y, 255
@@ -371,12 +375,17 @@ define i8 @unmasked_shlop_unmasked_shift_amount(i32 %x, i32 %y, i32 %shamt) {
   ret i8 %t8
 }
 
+; Negative test - an oversized shift in the narrow type would produce the wrong value.
+
 define i8 @unmasked_shlop_insufficient_mask_shift_amount(i16 %x, i16 %y, i16 %shamt) {
 ; CHECK-LABEL: @unmasked_shlop_insufficient_mask_shift_amount(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i16 [[SHAMT:%.*]] to i8
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i16 [[Y:%.*]] to i8
-; CHECK-NEXT:    [[TMP3:%.*]] = trunc i16 [[X:%.*]] to i8
-; CHECK-NEXT:    [[T8:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP2]], i8 [[TMP3]], i8 [[TMP1]])
+; CHECK-NEXT:    [[SHM:%.*]] = and i16 [[SHAMT:%.*]], 15
+; CHECK-NEXT:    [[MASKX:%.*]] = and i16 [[X:%.*]], 255
+; CHECK-NEXT:    [[T4:%.*]] = sub nsw i16 8, [[SHM]]
+; CHECK-NEXT:    [[T5:%.*]] = shl i16 [[Y:%.*]], [[T4]]
+; CHECK-NEXT:    [[T6:%.*]] = lshr i16 [[MASKX]], [[SHM]]
+; CHECK-NEXT:    [[T7:%.*]] = or i16 [[T5]], [[T6]]
+; CHECK-NEXT:    [[T8:%.*]] = trunc i16 [[T7]] to i8
 ; CHECK-NEXT:    ret i8 [[T8]]
 ;
   %shm = and i16 %shamt, 15

diff  --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll
index 443d0c5fba61c..6a6337b8f309e 100644
--- a/llvm/test/Transforms/InstCombine/rotate.ll
+++ b/llvm/test/Transforms/InstCombine/rotate.ll
@@ -937,12 +937,16 @@ define i32 @rotateright32_doubleand1(i32 %v, i16 %r) {
   ret i32 %or
 }
 
+; TODO: This should be a rotate (funnel-shift).
+
 define i8 @unmasked_shlop_unmasked_shift_amount(i32 %x, i32 %shamt) {
 ; CHECK-LABEL: @unmasked_shlop_unmasked_shift_amount(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i8
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[X:%.*]] to i8
-; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[X]] to i8
-; CHECK-NEXT:    [[T8:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP2]], i8 [[TMP3]], i8 [[TMP1]])
+; CHECK-NEXT:    [[MASKX:%.*]] = and i32 [[X:%.*]], 255
+; CHECK-NEXT:    [[T4:%.*]] = sub i32 8, [[SHAMT:%.*]]
+; CHECK-NEXT:    [[T5:%.*]] = shl i32 [[X]], [[T4]]
+; CHECK-NEXT:    [[T6:%.*]] = lshr i32 [[MASKX]], [[SHAMT]]
+; CHECK-NEXT:    [[T7:%.*]] = or i32 [[T5]], [[T6]]
+; CHECK-NEXT:    [[T8:%.*]] = trunc i32 [[T7]] to i8
 ; CHECK-NEXT:    ret i8 [[T8]]
 ;
   %maskx = and i32 %x, 255