[llvm-bugs] [Bug 48579] New: Can we vectorize calls inside nested for loops

Tue Dec 22 16:00:11 PST 2020

https://bugs.llvm.org/show_bug.cgi?id=48579

            Bug ID: 48579
           Summary: Can we vectorize calls inside nested for loops
           Product: new-bugs
           Version: unspecified
          Hardware: PC
                OS: All
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: new bugs
          Assignee: unassignedbugs at nondot.org
          Reporter: hiraditya at msn.com
                CC: htmldeveloper at gmail.com, llvm-bugs at lists.llvm.org

$ ./bin/opt -S -loop-vectorize -O3 test.ll

LV: Vector loop of width 2 costs: 65.
LV: Selecting VF: 1.
LV: Vectorization is possible but not beneficial.
LV: Interleaving is not beneficial.

The program has nested loops and there is a call in the inner loop.

$ cat test.ll

```
; RUN: opt -loop-vectorize -O3 < %s
; REQUIRES: asserts
; ModuleID = 'test.ll'
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "arm64-apple-ios8.0.0"

%0 = type { float, float, float, float, float, float, float, float, i32, i32,
i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8, i8, i8, i32, i32, %1,
%12, %12, %12, %16, %17, %18, %26 }
%1 = type { %2 }
%2 = type { %3 }
%3 = type { %4* }
%4 = type <{ %5, i32, [4 x i8] }>
%5 = type { %6 }
%6 = type { %7, %8 }
%7 = type { float* }
%8 = type { %9 }
%9 = type { %10, %11* }
%10 = type { [24 x i8] }
%11 = type { i32 (...)** }
%12 = type { %13 }
%13 = type { %14 }
%14 = type { %15* }
%15 = type { i32, i32, i8, float** }
%16 = type { float, float, float }
%17 = type { float, float, float, float }
%18 = type { %19 }
%19 = type { %20*, %20*, %24 }
%20 = type { %21 }
%21 = type { %22 }
%22 = type { %23* }
%23 = type { i32, i32, %4, %4 }
%24 = type { %25 }
%25 = type { %20* }
%26 = type { [1 x %27] }
%27 = type { %28, %34, %40, %52, %45, %52, %53, %12, i8, i32 }
%28 = type { [70 x %29] }
%29 = type { %30 }
%30 = type { %31 }
%31 = type { %32* }
%32 = type { float, float, float, float, float, float, float, i8, i8, i8, i32,
[4 x i8], %33 }
%33 = type { float, float, float, [4 x i8], %4 }
%34 = type { [70 x %35] }
%35 = type { %36 }
%36 = type { %37 }
%37 = type { %38* }
%38 = type <{ i32 (...)**, i32, %39, [4 x i8] }>
%39 = type { i32, i8, i32, float, i8, float }
%40 = type { [70 x %41] }
%41 = type { %42 }
%42 = type { %43 }
%43 = type { %44* }
%44 = type { float, float, float, float, i32, [16 x float] }
%45 = type { [70 x %46] }
%46 = type { %47 }
%47 = type { %48 }
%48 = type { %49* }
%49 = type { float, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, [5 x i8],
%4, %4, [2 x float*], %50*, %51* }
%50 = type { i32, i32, float, [4 x i8], %4 }
%51 = type { i32, i32, i32, float, %4 }
%52 = type { [70 x float] }
%53 = type { [70 x %54] }
%54 = type { %55 }
%55 = type { %56 }
%56 = type { %57* }
%57 = type { float, %16, %16, %58, %59, %59, i8, i8, float, float, float,
float, float, float, float }
%58 = type { float, float, float, float, float, %16, %16 }
%59 = type { float, float, float, float, float, float, float, float, float,
float }

@0 = internal global [2 x float] [float 5.000000e-01, float 5.000000e-01],
align 4
@1 = private unnamed_addr constant [3 x i8] c"Er\00", align 1
@2 = private unnamed_addr constant [68 x i8] c"allocator<T>::allocate(size_t n)
'n' exceeds maximum supported size\00", align 1
@llvm.global_ctors = appending global [0 x { i32, void ()*, i8* }]
zeroinitializer

define hidden void @foo(%0* readonly, [2 x i64]) {
  %3 = getelementptr inbounds %0, %0* %0, i64 0, i32 33, i32 0, i64 0, i32 0,
i32 0, i64 0
  %4 = getelementptr inbounds %0, %0* %0, i64 0, i32 33, i32 0, i64 0, i32 0,
i32 0, i64 70
  br label %6

; <label>:5:                                      ; preds = %6
  ret void

; <label>:6:                                      ; preds = %6, %2
  %7 = phi %29* [ %3, %2 ], [ %26, %6 ]
  %8 = getelementptr inbounds %29, %29* %7, i64 0, i32 0, i32 0, i32 0
  %9 = load %32*, %32** %8, align 8, !tbaa !5
  %10 = extractvalue [2 x i64] %1, 0
  %11 = trunc i64 %10 to i32
  %12 = lshr i64 %10, 32
  %13 = trunc i64 %12 to i32
  %14 = extractvalue [2 x i64] %1, 1
  %15 = trunc i64 %14 to i32
  %16 = getelementptr inbounds %32, %32* %9, i64 0, i32 2
  %17 = bitcast float* %16 to i32*
  store i32 %11, i32* %17, align 8, !tbaa !9
  %18 = getelementptr inbounds %32, %32* %9, i64 0, i32 3
  %19 = bitcast float* %18 to i32*
  store i32 %13, i32* %19, align 4, !tbaa !19
  %20 = getelementptr inbounds %32, %32* %9, i64 0, i32 5
  %21 = bitcast float* %20 to i32*
  store i32 %15, i32* %21, align 4, !tbaa !20
  %22 = getelementptr inbounds %32, %32* %9, i64 0, i32 7
  %23 = lshr i64 %14, 32
  %24 = and i64 %23, 1
  %25 = trunc i64 %24 to i8
  store i8 %25, i8* %22, align 4, !tbaa !21
  %26 = getelementptr inbounds %29, %29* %7, i64 1
  %27 = icmp eq %29* %26, %4
  br i1 %27, label %5, label %6
}

!llvm.module.flags = !{!0, !1, !2, !3}

!0 = !{i32 2, !"Dwarf Version", i32 2}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = !{i32 1, !"wchar_size", i32 4}
!3 = !{i32 7, !"PIC Level", i32 2}
!5 = !{!6, !6, i64 0}
!6 = !{!"any pointer", !7, i64 0}
!7 = !{!"omnipotent char", !8, i64 0}
!8 = !{!"Simple C++ TBAA"}
!9 = !{!10, !11, i64 8}
!10 = !{!"foo6", !11, i64 0, !11, i64 4, !11, i64 8, !11, i64 12, !11, i64 16,
!11, i64 20, !11, i64 24, !12, i64 28, !12, i64 29, !12, i64 30, !13, i64 32,
!14, i64 40}
!11 = !{!"float", !7, i64 0}
!12 = !{!"bool", !7, i64 0}
!13 = !{!"foo1", !7, i64 0}
!14 = !{!"foo2", !11, i64 0, !11, i64 4, !11, i64 8, !15, i64 16}
!15 = !{!"foo3", !16, i64 0, !18, i64 40}
!16 = !{!"foo4", !17, i64 0}
!17 = !{!"foo5"}
!18 = !{!"int", !7, i64 0}
!19 = !{!10, !11, i64 12}
!20 = !{!10, !11, i64 20}
!21 = !{!10, !12, i64 28}
```

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20201223/21c83ad7/attachment.html>