[llvm] r318323 - [InstCombine] trunc (binop X, C) --> binop (trunc X, C')

Fri Dec 8 06:20:45 PST 2017

Just started looking at the example - this hangs somewhere in the backend,
not in instcombine (IR below). Does that affect the decision about
reverting?

$ ./llc -o - inf.ll -mtriple=x86_64
    .text
    .file    "inf.c"
^C

$ cat inf.ll
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64--linux-gnu"

%struct.c = type { i32, [0 x i8] }

@d = common local_unnamed_addr global i32 0, align 4
@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32,
void ()*, i8* } { i32 0, void ()* @msan.module_ctor, i8* null }]
@__msan_retval_tls = external thread_local(initialexec) global [100 x i64]
@__msan_retval_origin_tls = external thread_local(initialexec) global i32
@__msan_param_tls = external thread_local(initialexec) global [100 x i64]
@__msan_param_origin_tls = external thread_local(initialexec) global [200 x
i32]
@__msan_va_arg_tls = external thread_local(initialexec) global [100 x i64]
@__msan_va_arg_overflow_size_tls = external thread_local(initialexec)
global i64
@__msan_origin_tls = external thread_local(initialexec) global i32

; Function Attrs: norecurse nounwind sanitize_memory
define i32 @e(%struct.c* %f) local_unnamed_addr #0 {
entry:
  %0 = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]*
@__msan_param_tls, i64 0, i64 0), align 8
  %1 = ptrtoint %struct.c* %f to i64
  %_msprop = trunc i64 %0 to i32
  %2 = trunc i64 %1 to i32
  store i32 0, i32* inttoptr (i64 xor (i64 ptrtoint (i32* @d to i64), i64
87960930222080) to i32*), align 4
  store i32 0, i32* @d, align 4, !tbaa !2
  %3 = icmp eq i32 %_msprop, 0
  br i1 %3, label %5, label %4, !prof !6

; <label>:4:                                      ; preds = %entry
  call void @__msan_warning_noreturn() #1
  call void asm sideeffect "", ""() #1
  unreachable

; <label>:5:                                      ; preds = %entry
  %cmp9 = icmp sgt i32 %2, 0
  br i1 %cmp9, label %for.body.lr.ph, label %for.end

for.body.lr.ph:                                   ; preds = %5
  %_msprop23 = trunc i64 %0 to i8
  %6 = trunc i64 %1 to i8
  %arrayidx.phi.trans.insert = getelementptr inbounds %struct.c, %struct.c*
%f, i64 0, i32 1, i64 0
  %_mscmp54 = icmp eq i64 %0, 0
  br i1 %_mscmp54, label %8, label %7, !prof !6

; <label>:7:                                      ; preds = %for.body.lr.ph
  call void @__msan_warning_noreturn() #1
  call void asm sideeffect "", ""() #1
  unreachable

; <label>:8:                                      ; preds = %for.body.lr.ph
  %.pre = load i8, i8* %arrayidx.phi.trans.insert, align 1, !tbaa !7
  %9 = ptrtoint i8* %arrayidx.phi.trans.insert to i64
  %10 = xor i64 %9, 87960930222080
  %11 = inttoptr i64 %10 to i8*
  %_msld = load i8, i8* %11, align 1
  %wide.trip.count = and i64 %1, 4294967295
  br i1 false, label %12, label %13, !prof !8

; <label>:12:                                     ; preds = %8
  unreachable

; <label>:13:                                     ; preds = %8
  %14 = icmp ult i64 %wide.trip.count, 32
  br i1 %14, label %for.body.preheader, label %vector.ph

vector.ph:                                        ; preds = %13
  %n.mod.vf = and i64 %1, 31
  %n.vec = sub nsw i64 %wide.trip.count, %n.mod.vf
  %cast.crd = trunc i64 %n.vec to i8
  %15 = mul i8 %cast.crd, %6
  %_msprop33 = or i8 %_msld, %_msprop23
  %ind.end = add i8 %.pre, %15
  %_msprop34 = insertelement <16 x i8> undef, i8 %_msld, i32 0
  %.splatinsert = insertelement <16 x i8> undef, i8 %.pre, i32 0
  %_msprop35 = shufflevector <16 x i8> %_msprop34, <16 x i8> undef, <16 x
i32> zeroinitializer
  %.splat = shufflevector <16 x i8> %.splatinsert, <16 x i8> undef, <16 x
i32> zeroinitializer
  %_msprop36 = insertelement <16 x i8> undef, i8 %_msprop23, i32 0
  %.splatinsert12 = insertelement <16 x i8> undef, i8 %6, i32 0
  %_msprop37 = shufflevector <16 x i8> %_msprop36, <16 x i8> undef, <16 x
i32> zeroinitializer
  %.splat13 = shufflevector <16 x i8> %.splatinsert12, <16 x i8> undef, <16
x i32> zeroinitializer
  %msprop_mul_cst = mul <16 x i8> %_msprop37, <i8 0, i8 1, i8 2, i8 1, i8
4, i8 1, i8 2, i8 1, i8 8, i8 1, i8 2, i8 1, i8 4, i8 1, i8 2, i8 1>
  %16 = mul <16 x i8> %.splat13, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6,
i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>
  %_msprop38 = or <16 x i8> %_msprop35, %msprop_mul_cst
  %induction = add <16 x i8> %.splat, %16
  %17 = shl i8 %_msprop23, 4
  %18 = shl i8 %6, 4
  %_msprop39 = insertelement <16 x i8> undef, i8 %17, i32 0
  %.splatinsert14 = insertelement <16 x i8> undef, i8 %18, i32 0
  %_msprop40 = shufflevector <16 x i8> %_msprop39, <16 x i8> undef, <16 x
i32> zeroinitializer
  %.splat15 = shufflevector <16 x i8> %.splatinsert14, <16 x i8> undef, <16
x i32> zeroinitializer
  br label %vector.body

vector.body:                                      ; preds = %37, %vector.ph
  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %37 ]
  %_msphi_s46 = phi <16 x i8> [ %_msprop38, %vector.ph ], [ %_msprop47, %37
]
  %vec.ind = phi <16 x i8> [ %induction, %vector.ph ], [ %vec.ind.next, %37
]
  %_msprop47 = or <16 x i8> %_msphi_s46, %_msprop40
  %step.add = add <16 x i8> %vec.ind, %.splat15
  %_msprop49 = or <16 x i8> %_msprop47, %_msprop37
  %19 = add <16 x i8> %step.add, %.splat13
  %20 = or i64 %index, 1
  %21 = getelementptr inbounds %struct.c, %struct.c* %f, i64 0, i32 1, i64
%20
  br i1 false, label %22, label %23, !prof !8

; <label>:22:                                     ; preds = %vector.body
  unreachable

; <label>:23:                                     ; preds = %vector.body
  %24 = ptrtoint i8* %21 to i64
  %25 = xor i64 %24, 87960930222080
  %26 = inttoptr i64 %25 to <16 x i8>*
  %27 = bitcast i8* %21 to <16 x i8>*
  %28 = add <16 x i8> %vec.ind, %.splat13
  %_msprop48 = or <16 x i8> %_msphi_s46, %_msprop37
  store <16 x i8> %_msprop48, <16 x i8>* %26, align 1
  store <16 x i8> %28, <16 x i8>* %27, align 1, !tbaa !7
  %29 = getelementptr i8, i8* %21, i64 16
  br i1 false, label %30, label %31, !prof !8

; <label>:30:                                     ; preds = %23
  unreachable

; <label>:31:                                     ; preds = %23
  %32 = ptrtoint i8* %29 to i64
  %33 = xor i64 %32, 87960930222080
  %34 = inttoptr i64 %33 to <16 x i8>*
  %35 = bitcast i8* %29 to <16 x i8>*
  store <16 x i8> %_msprop49, <16 x i8>* %34, align 1
  store <16 x i8> %19, <16 x i8>* %35, align 1, !tbaa !7
  %index.next = add i64 %index, 32
  br i1 false, label %36, label %37, !prof !8

; <label>:36:                                     ; preds = %31
  unreachable

; <label>:37:                                     ; preds = %31
  %38 = icmp eq i64 %index.next, %n.vec
  %vec.ind.next = add <16 x i8> %step.add, %.splat15
  br i1 %38, label %middle.block, label %vector.body, !llvm.loop !9

middle.block:                                     ; preds = %37
  br i1 false, label %39, label %40, !prof !8

; <label>:39:                                     ; preds = %middle.block
  unreachable

; <label>:40:                                     ; preds = %middle.block
  %41 = icmp eq i64 %n.mod.vf, 0
  br i1 %41, label %for.cond.for.end_crit_edge, label %for.body.preheader

for.body.preheader:                               ; preds = %40, %13
  %_msphi_s = phi i8 [ %_msld, %13 ], [ %_msprop33, %40 ]
  %.ph = phi i8 [ %.pre, %13 ], [ %ind.end, %40 ]
  %indvars.iv.ph = phi i64 [ 0, %13 ], [ %n.vec, %40 ]
  br label %for.body

for.body:                                         ; preds =
%for.body.preheader, %49
  %_msphi_s25 = phi i8 [ %_msprop27, %49 ], [ %_msphi_s,
%for.body.preheader ]
  %42 = phi i8 [ %conv1, %49 ], [ %.ph, %for.body.preheader ]
  %indvars.iv = phi i64 [ %indvars.iv.next, %49 ], [ %indvars.iv.ph,
%for.body.preheader ]
  %_msprop27 = or i8 %_msphi_s25, %_msprop23
  %conv1 = add i8 %42, %6
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  %arrayidx5 = getelementptr inbounds %struct.c, %struct.c* %f, i64 0, i32
1, i64 %indvars.iv.next
  br i1 false, label %43, label %44, !prof !8

; <label>:43:                                     ; preds = %for.body
  unreachable

; <label>:44:                                     ; preds = %for.body
  %45 = ptrtoint i8* %arrayidx5 to i64
  %46 = xor i64 %45, 87960930222080
  %47 = inttoptr i64 %46 to i8*
  store i8 %_msprop27, i8* %47, align 1
  store i8 %conv1, i8* %arrayidx5, align 1, !tbaa !7
  br i1 false, label %48, label %49, !prof !8

; <label>:48:                                     ; preds = %44
  unreachable

; <label>:49:                                     ; preds = %44
  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
  br i1 %exitcond, label %for.cond.for.end_crit_edge.loopexit, label
%for.body, !llvm.loop !11

for.cond.for.end_crit_edge.loopexit:              ; preds = %49
  br label %for.cond.for.end_crit_edge

for.cond.for.end_crit_edge:                       ; preds =
%for.cond.for.end_crit_edge.loopexit, %40
  store i32 0, i32* inttoptr (i64 xor (i64 ptrtoint (i32* @d to i64), i64
87960930222080) to i32*), align 4
  store i32 %2, i32* @d, align 4, !tbaa !2
  br label %for.end

for.end:                                          ; preds =
%for.cond.for.end_crit_edge, %5
  store i32 -1, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*),
align 8
  ret i32 undef
}

declare void @__msan_init() local_unnamed_addr

define internal void @msan.module_ctor() {
  tail call void @__msan_init()
  ret void
}

declare void @__msan_warning_noreturn()

declare void @__msan_maybe_warning_1(i8, i32)

declare void @__msan_maybe_store_origin_1(i8, i8*, i32)

declare void @__msan_maybe_warning_2(i16, i32)

declare void @__msan_maybe_store_origin_2(i16, i8*, i32)

declare void @__msan_maybe_warning_4(i32, i32)

declare void @__msan_maybe_store_origin_4(i32, i8*, i32)

declare void @__msan_maybe_warning_8(i64, i32)

declare void @__msan_maybe_store_origin_8(i64, i8*, i32)

declare void @__msan_set_alloca_origin4(i8*, i64, i8*, i64)

declare void @__msan_poison_stack(i8*, i64)

declare i32 @__msan_chain_origin(i32)

declare i8* @__msan_memmove(i8*, i8*, i64)

declare i8* @__msan_memcpy(i8*, i8*, i64)

declare i8* @__msan_memset(i8*, i32, i64)

attributes #0 = { norecurse nounwind sanitize_memory
"correctly-rounded-divide-sqrt-fp-math"="false"
"disable-tail-calls"="false" "less-precise-fpmad"="false"
"no-frame-pointer-elim"="false" "no-infs-fp-math"="false"
"no-jump-tables"="false" "no-nans-fp-math"="false"
"no-signed-zeros-fp-math"="false" "no-trapping-math"="false"
"stack-protector-buffer-size"="8"
"target-features"="+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
"unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 6.0.0 (trunk 320064) (llvm/trunk 320063)"}
!2 = !{!3, !3, i64 0}
!3 = !{!"int", !4, i64 0}
!4 = !{!"omnipotent char", !5, i64 0}
!5 = !{!"Simple C/C++ TBAA"}
!6 = !{!"branch_weights", i32 1000, i32 1}
!7 = !{!4, !4, i64 0}
!8 = !{!"branch_weights", i32 1, i32 1000}
!9 = distinct !{!9, !10}
!10 = !{!"llvm.loop.isvectorized", i32 1}
!11 = distinct !{!11, !12, !10}
!12 = !{!"llvm.loop.unroll.runtime.disable"}

On Fri, Dec 8, 2017 at 3:24 AM, Chandler Carruth <chandlerc at gmail.com>
wrote:

> I think this is still enough of a test case to revert while we sort it out.
>
> On Fri, Dec 8, 2017 at 4:38 AM David Blaikie <dblaikie at gmail.com> wrote:
>
>> Haven't looked too far yet, but here's my current repro:
>>
>> typedef struct {
>>   int a;
>>   char b[]
>> } c;
>> d;
>> e(c *f) {
>>   int g, h = g = f;
>>   d = 0;
>>   for (; d < h; d++)
>>     f->b[d + 1] = f->b[d] + g;
>> }
>>
>> clang -cc1 -emit-obj -triple x86_64-linux-gnu -target-feature +sse4.2 -O2
>> -w -fsanitize=memory -vectorize-loops -o /dev/null foo.ii -x c
>>
>> Appears to run forever/a long time.
>>
>> Interestingly, a nearby test case seems to fail with or without the patch:
>>
>> typedef struct {
>>   int a;
>>   char b[]
>> } c;
>> d;
>> e(c *f) {
>>   int g = f;
>>   for (; d; d++)
>>     f->b[d + 1] = f->b[d] + g;
>> }
>>
>> So I guess this might be one of those unfortunate cases of an
>> optimization causing further exposure to an existing bug, but still..
>>
>> On Thu, Dec 7, 2017 at 1:56 PM Sanjay Patel <spatel at rotateright.com>
>> wrote:
>>
>>> Thanks for letting me know.
>>>
>>> This might be a good time to check out (though I haven't gotten to it
>>> yet):
>>> https://github.com/rutgers-apl/alive-loops
>>>
>>> This was mentioned on llvm-dev:
>>> http://lists.llvm.org/pipermail/llvm-dev/2017-September/117466.html
>>>
>>> On Thu, Dec 7, 2017 at 12:48 PM, Chandler Carruth <chandlerc at gmail.com>
>>> wrote:
>>>
>>>> FYI, we've root caused an compile timeout to this revision. It seems
>>>> quite likely this is fighting another instcombine.
>>>>
>>>> We're still working on a test case, but wanted to go ahead and give a
>>>> heads-up in case you can spot the place where we reverse this transform.
>>>>
>>>>
>>>> On Wed, Nov 15, 2017 at 8:12 PM Sanjay Patel via llvm-commits <
>>>> llvm-commits at lists.llvm.org> wrote:
>>>>
>>>>> Author: spatel
>>>>> Date: Wed Nov 15 11:12:01 2017
>>>>> New Revision: 318323
>>>>>
>>>>> URL: http://llvm.org/viewvc/llvm-project?rev=318323&view=rev
>>>>> Log:
>>>>> [InstCombine] trunc (binop X, C) --> binop (trunc X, C')
>>>>>
>>>>> Note that one-use and shouldChangeType() are checked ahead of the
>>>>> switch.
>>>>>
>>>>> Without the narrowing folds, we can produce inferior vector code as
>>>>> shown in PR35299:
>>>>> https://bugs.llvm.org/show_bug.cgi?id=35299
>>>>>
>>>>>
>>>>> Modified:
>>>>>     llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp
>>>>>     llvm/trunk/test/Transforms/InstCombine/pr33765.ll
>>>>>     llvm/trunk/test/Transforms/InstCombine/trunc-binop-ext.ll
>>>>>
>>>>> Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp
>>>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/
>>>>> Transforms/InstCombine/InstCombineCasts.cpp?rev=
>>>>> 318323&r1=318322&r2=318323&view=diff
>>>>> ============================================================
>>>>> ==================
>>>>> --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp
>>>>> (original)
>>>>> +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp Wed
>>>>> Nov 15 11:12:01 2017
>>>>> @@ -545,6 +545,8 @@ Instruction *InstCombiner::narrowBinOp(T
>>>>>    if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))
>>>>>      return nullptr;
>>>>>
>>>>> +  Value *BinOp0 = BinOp->getOperand(0);
>>>>> +  Value *BinOp1 = BinOp->getOperand(1);
>>>>>    switch (BinOp->getOpcode()) {
>>>>>    case Instruction::And:
>>>>>    case Instruction::Or:
>>>>> @@ -552,20 +554,31 @@ Instruction *InstCombiner::narrowBinOp(T
>>>>>    case Instruction::Add:
>>>>>    case Instruction::Mul: {
>>>>>      Constant *C;
>>>>> -    if (match(BinOp->getOperand(1), m_Constant(C))) {
>>>>> +    if (match(BinOp1, m_Constant(C))) {
>>>>>        // trunc (binop X, C) --> binop (trunc X, C')
>>>>>        Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
>>>>> -      Value *TruncX = Builder.CreateTrunc(BinOp->getOperand(0),
>>>>> DestTy);
>>>>> +      Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);
>>>>>        return BinaryOperator::Create(BinOp->getOpcode(), TruncX,
>>>>> NarrowC);
>>>>>      }
>>>>> +    Value *X;
>>>>> +    if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() ==
>>>>> DestTy) {
>>>>> +      // trunc (binop (ext X), Y) --> binop X, (trunc Y)
>>>>> +      Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);
>>>>> +      return BinaryOperator::Create(BinOp->getOpcode(), X,
>>>>> NarrowOp1);
>>>>> +    }
>>>>> +    if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() ==
>>>>> DestTy) {
>>>>> +      // trunc (binop Y, (ext X)) --> binop (trunc Y), X
>>>>> +      Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);
>>>>> +      return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0,
>>>>> X);
>>>>> +    }
>>>>>      break;
>>>>>    }
>>>>>    case Instruction::Sub: {
>>>>>      Constant *C;
>>>>> -    if (match(BinOp->getOperand(0), m_Constant(C))) {
>>>>> +    if (match(BinOp0, m_Constant(C))) {
>>>>>        // trunc (binop C, X) --> binop (trunc C', X)
>>>>>        Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
>>>>> -      Value *TruncX = Builder.CreateTrunc(BinOp->getOperand(1),
>>>>> DestTy);
>>>>> +      Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);
>>>>>        return BinaryOperator::Create(BinOp->getOpcode(), NarrowC,
>>>>> TruncX);
>>>>>      }
>>>>>      break;
>>>>>
>>>>> Modified: llvm/trunk/test/Transforms/InstCombine/pr33765.ll
>>>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
>>>>> Transforms/InstCombine/pr33765.ll?rev=318323&r1=
>>>>> 318322&r2=318323&view=diff
>>>>> ============================================================
>>>>> ==================
>>>>> --- llvm/trunk/test/Transforms/InstCombine/pr33765.ll (original)
>>>>> +++ llvm/trunk/test/Transforms/InstCombine/pr33765.ll Wed Nov 15
>>>>> 11:12:01 2017
>>>>> @@ -10,9 +10,8 @@ define void @patatino(i8 %beth) {
>>>>>  ; CHECK:       if.then9:
>>>>>  ; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[CONV]], [[CONV]]
>>>>>  ; CHECK-NEXT:    [[TINKY:%.*]] = load i16, i16* @glob, align 2
>>>>> -; CHECK-NEXT:    [[CONV131:%.*]] = zext i16 [[TINKY]] to i32
>>>>> -; CHECK-NEXT:    [[AND:%.*]] = and i32 [[MUL]], [[CONV131]]
>>>>> -; CHECK-NEXT:    [[CONV14:%.*]] = trunc i32 [[AND]] to i16
>>>>> +; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[MUL]] to i16
>>>>> +; CHECK-NEXT:    [[CONV14:%.*]] = and i16 [[TINKY]], [[TMP1]]
>>>>>  ; CHECK-NEXT:    store i16 [[CONV14]], i16* @glob, align 2
>>>>>  ; CHECK-NEXT:    ret void
>>>>>  ;
>>>>>
>>>>> Modified: llvm/trunk/test/Transforms/InstCombine/trunc-binop-ext.ll
>>>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
>>>>> Transforms/InstCombine/trunc-binop-ext.ll?rev=318323&r1=
>>>>> 318322&r2=318323&view=diff
>>>>> ============================================================
>>>>> ==================
>>>>> --- llvm/trunk/test/Transforms/InstCombine/trunc-binop-ext.ll
>>>>> (original)
>>>>> +++ llvm/trunk/test/Transforms/InstCombine/trunc-binop-ext.ll Wed Nov
>>>>> 15 11:12:01 2017
>>>>> @@ -2,9 +2,8 @@
>>>>>
>>>>>  define i16 @narrow_sext_and(i16 %x16, i32 %y32) {
>>>>>  ; CHECK-LABEL: @narrow_sext_and(
>>>>> -; CHECK-NEXT:    [[X321:%.*]] = zext i16 %x16 to i32
>>>>> -; CHECK-NEXT:    [[B:%.*]] = and i32 [[X321]], %y32
>>>>> -; CHECK-NEXT:    [[R:%.*]] = trunc i32 [[B]] to i16
>>>>> +; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %y32 to i16
>>>>> +; CHECK-NEXT:    [[R:%.*]] = and i16 [[TMP1]], %x16
>>>>>  ; CHECK-NEXT:    ret i16 [[R]]
>>>>>  ;
>>>>>    %x32 = sext i16 %x16 to i32
>>>>> @@ -15,9 +14,8 @@ define i16 @narrow_sext_and(i16 %x16, i3
>>>>>
>>>>>  define i16 @narrow_zext_and(i16 %x16, i32 %y32) {
>>>>>  ; CHECK-LABEL: @narrow_zext_and(
>>>>> -; CHECK-NEXT:    [[X32:%.*]] = zext i16 %x16 to i32
>>>>> -; CHECK-NEXT:    [[B:%.*]] = and i32 [[X32]], %y32
>>>>> -; CHECK-NEXT:    [[R:%.*]] = trunc i32 [[B]] to i16
>>>>> +; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %y32 to i16
>>>>> +; CHECK-NEXT:    [[R:%.*]] = and i16 [[TMP1]], %x16
>>>>>  ; CHECK-NEXT:    ret i16 [[R]]
>>>>>  ;
>>>>>    %x32 = zext i16 %x16 to i32
>>>>> @@ -28,9 +26,8 @@ define i16 @narrow_zext_and(i16 %x16, i3
>>>>>
>>>>>  define i16 @narrow_sext_or(i16 %x16, i32 %y32) {
>>>>>  ; CHECK-LABEL: @narrow_sext_or(
>>>>> -; CHECK-NEXT:    [[X321:%.*]] = zext i16 %x16 to i32
>>>>> -; CHECK-NEXT:    [[B:%.*]] = or i32 [[X321]], %y32
>>>>> -; CHECK-NEXT:    [[R:%.*]] = trunc i32 [[B]] to i16
>>>>> +; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %y32 to i16
>>>>> +; CHECK-NEXT:    [[R:%.*]] = or i16 [[TMP1]], %x16
>>>>>  ; CHECK-NEXT:    ret i16 [[R]]
>>>>>  ;
>>>>>    %x32 = sext i16 %x16 to i32
>>>>> @@ -41,9 +38,8 @@ define i16 @narrow_sext_or(i16 %x16, i32
>>>>>
>>>>>  define i16 @narrow_zext_or(i16 %x16, i32 %y32) {
>>>>>  ; CHECK-LABEL: @narrow_zext_or(
>>>>> -; CHECK-NEXT:    [[X32:%.*]] = zext i16 %x16 to i32
>>>>> -; CHECK-NEXT:    [[B:%.*]] = or i32 [[X32]], %y32
>>>>> -; CHECK-NEXT:    [[R:%.*]] = trunc i32 [[B]] to i16
>>>>> +; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %y32 to i16
>>>>> +; CHECK-NEXT:    [[R:%.*]] = or i16 [[TMP1]], %x16
>>>>>  ; CHECK-NEXT:    ret i16 [[R]]
>>>>>  ;
>>>>>    %x32 = zext i16 %x16 to i32
>>>>> @@ -54,9 +50,8 @@ define i16 @narrow_zext_or(i16 %x16, i32
>>>>>
>>>>>  define i16 @narrow_sext_xor(i16 %x16, i32 %y32) {
>>>>>  ; CHECK-LABEL: @narrow_sext_xor(
>>>>> -; CHECK-NEXT:    [[X321:%.*]] = zext i16 %x16 to i32
>>>>> -; CHECK-NEXT:    [[B:%.*]] = xor i32 [[X321]], %y32
>>>>> -; CHECK-NEXT:    [[R:%.*]] = trunc i32 [[B]] to i16
>>>>> +; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %y32 to i16
>>>>> +; CHECK-NEXT:    [[R:%.*]] = xor i16 [[TMP1]], %x16
>>>>>  ; CHECK-NEXT:    ret i16 [[R]]
>>>>>  ;
>>>>>    %x32 = sext i16 %x16 to i32
>>>>> @@ -67,9 +62,8 @@ define i16 @narrow_sext_xor(i16 %x16, i3
>>>>>
>>>>>  define i16 @narrow_zext_xor(i16 %x16, i32 %y32) {
>>>>>  ; CHECK-LABEL: @narrow_zext_xor(
>>>>> -; CHECK-NEXT:    [[X32:%.*]] = zext i16 %x16 to i32
>>>>> -; CHECK-NEXT:    [[B:%.*]] = xor i32 [[X32]], %y32
>>>>> -; CHECK-NEXT:    [[R:%.*]] = trunc i32 [[B]] to i16
>>>>> +; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %y32 to i16
>>>>> +; CHECK-NEXT:    [[R:%.*]] = xor i16 [[TMP1]], %x16
>>>>>  ; CHECK-NEXT:    ret i16 [[R]]
>>>>>  ;
>>>>>    %x32 = zext i16 %x16 to i32
>>>>> @@ -80,9 +74,8 @@ define i16 @narrow_zext_xor(i16 %x16, i3
>>>>>
>>>>>  define i16 @narrow_sext_add(i16 %x16, i32 %y32) {
>>>>>  ; CHECK-LABEL: @narrow_sext_add(
>>>>> -; CHECK-NEXT:    [[X321:%.*]] = zext i16 %x16 to i32
>>>>> -; CHECK-NEXT:    [[B:%.*]] = add i32 [[X321]], %y32
>>>>> -; CHECK-NEXT:    [[R:%.*]] = trunc i32 [[B]] to i16
>>>>> +; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %y32 to i16
>>>>> +; CHECK-NEXT:    [[R:%.*]] = add i16 [[TMP1]], %x16
>>>>>  ; CHECK-NEXT:    ret i16 [[R]]
>>>>>  ;
>>>>>    %x32 = sext i16 %x16 to i32
>>>>> @@ -93,9 +86,8 @@ define i16 @narrow_sext_add(i16 %x16, i3
>>>>>
>>>>>  define i16 @narrow_zext_add(i16 %x16, i32 %y32) {
>>>>>  ; CHECK-LABEL: @narrow_zext_add(
>>>>> -; CHECK-NEXT:    [[X32:%.*]] = zext i16 %x16 to i32
>>>>> -; CHECK-NEXT:    [[B:%.*]] = add i32 [[X32]], %y32
>>>>> -; CHECK-NEXT:    [[R:%.*]] = trunc i32 [[B]] to i16
>>>>> +; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %y32 to i16
>>>>> +; CHECK-NEXT:    [[R:%.*]] = add i16 [[TMP1]], %x16
>>>>>  ; CHECK-NEXT:    ret i16 [[R]]
>>>>>  ;
>>>>>    %x32 = zext i16 %x16 to i32
>>>>> @@ -106,9 +98,8 @@ define i16 @narrow_zext_add(i16 %x16, i3
>>>>>
>>>>>  define i16 @narrow_sext_mul(i16 %x16, i32 %y32) {
>>>>>  ; CHECK-LABEL: @narrow_sext_mul(
>>>>> -; CHECK-NEXT:    [[X32:%.*]] = sext i16 %x16 to i32
>>>>> -; CHECK-NEXT:    [[B:%.*]] = mul i32 [[X32]], %y32
>>>>> -; CHECK-NEXT:    [[R:%.*]] = trunc i32 [[B]] to i16
>>>>> +; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %y32 to i16
>>>>> +; CHECK-NEXT:    [[R:%.*]] = mul i16 [[TMP1]], %x16
>>>>>  ; CHECK-NEXT:    ret i16 [[R]]
>>>>>  ;
>>>>>    %x32 = sext i16 %x16 to i32
>>>>> @@ -119,9 +110,8 @@ define i16 @narrow_sext_mul(i16 %x16, i3
>>>>>
>>>>>  define i16 @narrow_zext_mul(i16 %x16, i32 %y32) {
>>>>>  ; CHECK-LABEL: @narrow_zext_mul(
>>>>> -; CHECK-NEXT:    [[X32:%.*]] = zext i16 %x16 to i32
>>>>> -; CHECK-NEXT:    [[B:%.*]] = mul i32 [[X32]], %y32
>>>>> -; CHECK-NEXT:    [[R:%.*]] = trunc i32 [[B]] to i16
>>>>> +; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %y32 to i16
>>>>> +; CHECK-NEXT:    [[R:%.*]] = mul i16 [[TMP1]], %x16
>>>>>  ; CHECK-NEXT:    ret i16 [[R]]
>>>>>  ;
>>>>>    %x32 = zext i16 %x16 to i32
>>>>> @@ -130,15 +120,14 @@ define i16 @narrow_zext_mul(i16 %x16, i3
>>>>>    ret i16 %r
>>>>>  }
>>>>>
>>>>> -; Verify that the commuted patterns work. The div is to ensure that
>>>>> complexity-based
>>>>> +; Verify that the commuted patterns work. The div is to ensure that
>>>>> complexity-based
>>>>>  ; canonicalization doesn't swap the binop operands. Use vector types
>>>>> to show those work too.
>>>>>
>>>>>  define <2 x i16> @narrow_sext_and_commute(<2 x i16> %x16, <2 x i32>
>>>>> %y32) {
>>>>>  ; CHECK-LABEL: @narrow_sext_and_commute(
>>>>>  ; CHECK-NEXT:    [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32
>>>>> -17>
>>>>> -; CHECK-NEXT:    [[X321:%.*]] = zext <2 x i16> %x16 to <2 x i32>
>>>>> -; CHECK-NEXT:    [[B:%.*]] = and <2 x i32> [[Y32OP0]], [[X321]]
>>>>> -; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i32> [[B]] to <2 x i16>
>>>>> +; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x
>>>>> i16>
>>>>> +; CHECK-NEXT:    [[R:%.*]] = and <2 x i16> [[TMP1]], %x16
>>>>>  ; CHECK-NEXT:    ret <2 x i16> [[R]]
>>>>>  ;
>>>>>    %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
>>>>> @@ -151,9 +140,8 @@ define <2 x i16> @narrow_sext_and_commut
>>>>>  define <2 x i16> @narrow_zext_and_commute(<2 x i16> %x16, <2 x i32>
>>>>> %y32) {
>>>>>  ; CHECK-LABEL: @narrow_zext_and_commute(
>>>>>  ; CHECK-NEXT:    [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32
>>>>> -17>
>>>>> -; CHECK-NEXT:    [[X32:%.*]] = zext <2 x i16> %x16 to <2 x i32>
>>>>> -; CHECK-NEXT:    [[B:%.*]] = and <2 x i32> [[Y32OP0]], [[X32]]
>>>>> -; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i32> [[B]] to <2 x i16>
>>>>> +; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x
>>>>> i16>
>>>>> +; CHECK-NEXT:    [[R:%.*]] = and <2 x i16> [[TMP1]], %x16
>>>>>  ; CHECK-NEXT:    ret <2 x i16> [[R]]
>>>>>  ;
>>>>>    %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
>>>>> @@ -166,9 +154,8 @@ define <2 x i16> @narrow_zext_and_commut
>>>>>  define <2 x i16> @narrow_sext_or_commute(<2 x i16> %x16, <2 x i32>
>>>>> %y32) {
>>>>>  ; CHECK-LABEL: @narrow_sext_or_commute(
>>>>>  ; CHECK-NEXT:    [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32
>>>>> -17>
>>>>> -; CHECK-NEXT:    [[X321:%.*]] = zext <2 x i16> %x16 to <2 x i32>
>>>>> -; CHECK-NEXT:    [[B:%.*]] = or <2 x i32> [[Y32OP0]], [[X321]]
>>>>> -; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i32> [[B]] to <2 x i16>
>>>>> +; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x
>>>>> i16>
>>>>> +; CHECK-NEXT:    [[R:%.*]] = or <2 x i16> [[TMP1]], %x16
>>>>>  ; CHECK-NEXT:    ret <2 x i16> [[R]]
>>>>>  ;
>>>>>    %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
>>>>> @@ -181,9 +168,8 @@ define <2 x i16> @narrow_sext_or_commute
>>>>>  define <2 x i16> @narrow_zext_or_commute(<2 x i16> %x16, <2 x i32>
>>>>> %y32) {
>>>>>  ; CHECK-LABEL: @narrow_zext_or_commute(
>>>>>  ; CHECK-NEXT:    [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32
>>>>> -17>
>>>>> -; CHECK-NEXT:    [[X32:%.*]] = zext <2 x i16> %x16 to <2 x i32>
>>>>> -; CHECK-NEXT:    [[B:%.*]] = or <2 x i32> [[Y32OP0]], [[X32]]
>>>>> -; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i32> [[B]] to <2 x i16>
>>>>> +; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x
>>>>> i16>
>>>>> +; CHECK-NEXT:    [[R:%.*]] = or <2 x i16> [[TMP1]], %x16
>>>>>  ; CHECK-NEXT:    ret <2 x i16> [[R]]
>>>>>  ;
>>>>>    %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
>>>>> @@ -196,9 +182,8 @@ define <2 x i16> @narrow_zext_or_commute
>>>>>  define <2 x i16> @narrow_sext_xor_commute(<2 x i16> %x16, <2 x i32>
>>>>> %y32) {
>>>>>  ; CHECK-LABEL: @narrow_sext_xor_commute(
>>>>>  ; CHECK-NEXT:    [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32
>>>>> -17>
>>>>> -; CHECK-NEXT:    [[X321:%.*]] = zext <2 x i16> %x16 to <2 x i32>
>>>>> -; CHECK-NEXT:    [[B:%.*]] = xor <2 x i32> [[Y32OP0]], [[X321]]
>>>>> -; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i32> [[B]] to <2 x i16>
>>>>> +; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x
>>>>> i16>
>>>>> +; CHECK-NEXT:    [[R:%.*]] = xor <2 x i16> [[TMP1]], %x16
>>>>>  ; CHECK-NEXT:    ret <2 x i16> [[R]]
>>>>>  ;
>>>>>    %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
>>>>> @@ -211,9 +196,8 @@ define <2 x i16> @narrow_sext_xor_commut
>>>>>  define <2 x i16> @narrow_zext_xor_commute(<2 x i16> %x16, <2 x i32>
>>>>> %y32) {
>>>>>  ; CHECK-LABEL: @narrow_zext_xor_commute(
>>>>>  ; CHECK-NEXT:    [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32
>>>>> -17>
>>>>> -; CHECK-NEXT:    [[X32:%.*]] = zext <2 x i16> %x16 to <2 x i32>
>>>>> -; CHECK-NEXT:    [[B:%.*]] = xor <2 x i32> [[Y32OP0]], [[X32]]
>>>>> -; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i32> [[B]] to <2 x i16>
>>>>> +; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x
>>>>> i16>
>>>>> +; CHECK-NEXT:    [[R:%.*]] = xor <2 x i16> [[TMP1]], %x16
>>>>>  ; CHECK-NEXT:    ret <2 x i16> [[R]]
>>>>>  ;
>>>>>    %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
>>>>> @@ -226,9 +210,8 @@ define <2 x i16> @narrow_zext_xor_commut
>>>>>  define <2 x i16> @narrow_sext_add_commute(<2 x i16> %x16, <2 x i32>
>>>>> %y32) {
>>>>>  ; CHECK-LABEL: @narrow_sext_add_commute(
>>>>>  ; CHECK-NEXT:    [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32
>>>>> -17>
>>>>> -; CHECK-NEXT:    [[X321:%.*]] = zext <2 x i16> %x16 to <2 x i32>
>>>>> -; CHECK-NEXT:    [[B:%.*]] = add <2 x i32> [[Y32OP0]], [[X321]]
>>>>> -; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i32> [[B]] to <2 x i16>
>>>>> +; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x
>>>>> i16>
>>>>> +; CHECK-NEXT:    [[R:%.*]] = add <2 x i16> [[TMP1]], %x16
>>>>>  ; CHECK-NEXT:    ret <2 x i16> [[R]]
>>>>>  ;
>>>>>    %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
>>>>> @@ -241,9 +224,8 @@ define <2 x i16> @narrow_sext_add_commut
>>>>>  define <2 x i16> @narrow_zext_add_commute(<2 x i16> %x16, <2 x i32>
>>>>> %y32) {
>>>>>  ; CHECK-LABEL: @narrow_zext_add_commute(
>>>>>  ; CHECK-NEXT:    [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32
>>>>> -17>
>>>>> -; CHECK-NEXT:    [[X32:%.*]] = zext <2 x i16> %x16 to <2 x i32>
>>>>> -; CHECK-NEXT:    [[B:%.*]] = add <2 x i32> [[Y32OP0]], [[X32]]
>>>>> -; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i32> [[B]] to <2 x i16>
>>>>> +; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x
>>>>> i16>
>>>>> +; CHECK-NEXT:    [[R:%.*]] = add <2 x i16> [[TMP1]], %x16
>>>>>  ; CHECK-NEXT:    ret <2 x i16> [[R]]
>>>>>  ;
>>>>>    %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
>>>>> @@ -256,9 +238,8 @@ define <2 x i16> @narrow_zext_add_commut
>>>>>  define <2 x i16> @narrow_sext_mul_commute(<2 x i16> %x16, <2 x i32>
>>>>> %y32) {
>>>>>  ; CHECK-LABEL: @narrow_sext_mul_commute(
>>>>>  ; CHECK-NEXT:    [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32
>>>>> -17>
>>>>> -; CHECK-NEXT:    [[X32:%.*]] = sext <2 x i16> %x16 to <2 x i32>
>>>>> -; CHECK-NEXT:    [[B:%.*]] = mul <2 x i32> [[Y32OP0]], [[X32]]
>>>>> -; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i32> [[B]] to <2 x i16>
>>>>> +; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x
>>>>> i16>
>>>>> +; CHECK-NEXT:    [[R:%.*]] = mul <2 x i16> [[TMP1]], %x16
>>>>>  ; CHECK-NEXT:    ret <2 x i16> [[R]]
>>>>>  ;
>>>>>    %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
>>>>> @@ -271,9 +252,8 @@ define <2 x i16> @narrow_sext_mul_commut
>>>>>  define <2 x i16> @narrow_zext_mul_commute(<2 x i16> %x16, <2 x i32>
>>>>> %y32) {
>>>>>  ; CHECK-LABEL: @narrow_zext_mul_commute(
>>>>>  ; CHECK-NEXT:    [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32
>>>>> -17>
>>>>> -; CHECK-NEXT:    [[X32:%.*]] = zext <2 x i16> %x16 to <2 x i32>
>>>>> -; CHECK-NEXT:    [[B:%.*]] = mul <2 x i32> [[Y32OP0]], [[X32]]
>>>>> -; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i32> [[B]] to <2 x i16>
>>>>> +; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x
>>>>> i16>
>>>>> +; CHECK-NEXT:    [[R:%.*]] = mul <2 x i16> [[TMP1]], %x16
>>>>>  ; CHECK-NEXT:    ret <2 x i16> [[R]]
>>>>>  ;
>>>>>    %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
>>>>>
>>>>>
>>>>> _______________________________________________
>>>>> llvm-commits mailing list
>>>>> llvm-commits at lists.llvm.org
>>>>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>>>>
>>>>
>>>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20171208/c7e7d3a1/attachment-0001.html>