[llvm-bugs] [Bug 28515] New: loop vectorizer miscompiles n-ary array concatenation w/ avx2

via llvm-bugs llvm-bugs at lists.llvm.org
Mon Jul 11 17:40:43 PDT 2016


https://llvm.org/bugs/show_bug.cgi?id=28515

            Bug ID: 28515
           Summary: loop vectorizer miscompiles n-ary array concatenation
                    w/ avx2
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Linux
            Status: NEW
          Severity: normal
          Priority: P
         Component: Loop Optimizer
          Assignee: unassignedbugs at nondot.org
          Reporter: david.majnemer at gmail.com
                CC: elena.demikhovsky at intel.com, llvm-bugs at lists.llvm.org,
                    mkuper at google.com, mssimpso at codeaurora.org,
                    wmi at google.com
    Classification: Unclassified

consider:
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@0 = private constant [2 x i32] [i32 0, i32 1]
@1 = private constant [19 x i32] [i32 42, i32 43, i32 44, i32 45, i32 46, i32
47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32
57, i32 58, i32 59, i32 60]
@2 = private constant [16 x i32] [i32 -200, i32 -199, i32 -198, i32 -197, i32
-196, i32 -195, i32 -194, i32 -193, i32 -192, i32 -191, i32 -190, i32 -189, i32
-188, i32 -187, i32 -186, i32 -185]
@3 = private constant [1 x i32] [i32 5555]

define void @concat(i8* nocapture %retval) local_unnamed_addr #0 {
entry:
  %0 = bitcast i8* %retval to [38 x i32]*
  br label %loop_body.dim.0

loop_body.dim.0:                                  ; preds = %entry,
%loop_header.dim.0
  %invar_address.dim.0.01 = phi i64 [ 0, %entry ], [ %19, %loop_header.dim.0 ]
  %1 = icmp ult i64 %invar_address.dim.0.01, 2
  br i1 %1, label %2, label %5

; <label>:2:                                      ; preds = %loop_body.dim.0
  %3 = getelementptr inbounds [2 x i32], [2 x i32]* @0, i64 0, i64
%invar_address.dim.0.01
  %4 = load i32, i32* %3, align 4
  br label %loop_header.dim.0

; <label>:5:                                      ; preds = %loop_body.dim.0
  %6 = add nsw i64 %invar_address.dim.0.01, -2
  %7 = icmp ult i64 %6, 19
  br i1 %7, label %8, label %11

; <label>:8:                                      ; preds = %5
  %9 = getelementptr inbounds [19 x i32], [19 x i32]* @1, i64 0, i64 %6
  %10 = load i32, i32* %9, align 4
  br label %loop_header.dim.0

; <label>:11:                                     ; preds = %5
  %12 = add nsw i64 %invar_address.dim.0.01, -21
  %13 = icmp ult i64 %12, 16
  br i1 %13, label %14, label %loop_header.dim.0

; <label>:14:                                     ; preds = %11
  %15 = getelementptr inbounds [16 x i32], [16 x i32]* @2, i64 0, i64 %12
  %16 = load i32, i32* %15, align 4
  br label %loop_header.dim.0

loop_header.dim.0:                                ; preds = %11, %14, %8, %2
  %17 = phi i32 [ %4, %2 ], [ %10, %8 ], [ %16, %14 ], [ 5555, %11 ]
  %18 = getelementptr inbounds [38 x i32], [38 x i32]* %0, i64 0, i64
%invar_address.dim.0.01
  store i32 %17, i32* %18, align 4
  %19 = add nuw nsw i64 %invar_address.dim.0.01, 1
  %exitcond = icmp eq i64 %19, 38
  br i1 %exitcond, label %loop_exit.dim.0, label %loop_body.dim.0

loop_exit.dim.0:                                  ; preds = %loop_header.dim.0
  ret void
}

attributes #0 = { "target-cpu"="haswell"
"target-features"="+aes,+avx,+avx2,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
}

This IR is quite simple: it logically concatenates @0, @1, @2 and @3 by loading
from them and storing to the argument pointer "retval".

However, the generated IR after optimizations (-O3) is dramatically altered.

Some snippets:
  br i1 undef, label %31, label %loop_header.dim.0.epil

  %wide.masked.load8 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x
i32>* bitcast (i32* getelementptr ([19 x i32], [19 x i32]* @1, i64
242720316759336205, i64 7) to <8 x i32>*)

This is likely due to the loop vectorizer.

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20160712/add7c296/attachment.html>


More information about the llvm-bugs mailing list