[PATCH] D97667: [loop-idiom] Hoist loop memcpys to loop preheader
Han Zhu via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 28 11:45:22 PDT 2021
zhuhan0 added a comment.
@tpopp I cannot reproduce your test failure with opt -O2 and -O3. My patch only affects memcpy intrinsics in the loop body. Therefore running your test case shouldn't hit my code. Output of opt -O3:
; ModuleID = 'reverse_4d_float_array.ll'
source_filename = "__compute_module"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-grtev4-linux-gnu"
@0 = external dso_local unnamed_addr constant [96 x i8], align 16
; Function Attrs: nofree norecurse nosync nounwind uwtable
define void @Reverse4DFloatArrayOnDim01.3(i8* nocapture readnone %retval, i8* noalias nocapture readnone %run_options, i8** noalias nocapture readnone %params, i8** noalias nocapture readonly %buffer_table, i64* noalias nocapture readnone %prof_counters) local_unnamed_addr #0 {
entry:
%0 = bitcast i8** %buffer_table to [4 x [3 x [2 x [1 x float]]]]**
%1 = load [4 x [3 x [2 x [1 x float]]]]*, [4 x [3 x [2 x [1 x float]]]]** %0, align 8, !invariant.load !0, !dereferenceable !1, !align !2
%2 = load float, float* bitcast (i8* getelementptr inbounds ([96 x i8], [96 x i8]* @0, i64 0, i64 88) to float*), align 8, !alias.scope !3, !noalias !6
%3 = getelementptr inbounds [4 x [3 x [2 x [1 x float]]]], [4 x [3 x [2 x [1 x float]]]]* %1, i64 0, i64 0, i64 0, i64 0, i64 0
store float %2, float* %3, align 16, !alias.scope !6, !noalias !3
%4 = load float, float* bitcast (i8* getelementptr inbounds ([96 x i8], [96 x i8]* @0, i64 0, i64 92) to float*), align 4, !alias.scope !3, !noalias !6
%5 = getelementptr inbounds [4 x [3 x [2 x [1 x float]]]], [4 x [3 x [2 x [1 x float]]]]* %1, i64 0, i64 0, i64 0, i64 1, i64 0
store float %4, float* %5, align 4, !alias.scope !6, !noalias !3
%6 = load float, float* bitcast (i8* getelementptr inbounds ([96 x i8], [96 x i8]* @0, i64 0, i64 80) to float*), align 16, !alias.scope !3, !noalias !6
%7 = getelementptr inbounds [4 x [3 x [2 x [1 x float]]]], [4 x [3 x [2 x [1 x float]]]]* %1, i64 0, i64 0, i64 1, i64 0, i64 0
store float %6, float* %7, align 8, !alias.scope !6, !noalias !3
%8 = load float, float* bitcast (i8* getelementptr inbounds ([96 x i8], [96 x i8]* @0, i64 0, i64 84) to float*), align 4, !alias.scope !3, !noalias !6
%9 = getelementptr inbounds [4 x [3 x [2 x [1 x float]]]], [4 x [3 x [2 x [1 x float]]]]* %1, i64 0, i64 0, i64 1, i64 1, i64 0
store float %8, float* %9, align 4, !alias.scope !6, !noalias !3
%10 = load float, float* bitcast (i8* getelementptr inbounds ([96 x i8], [96 x i8]* @0, i64 0, i64 72) to float*), align 8, !alias.scope !3, !noalias !6
%11 = getelementptr inbounds [4 x [3 x [2 x [1 x float]]]], [4 x [3 x [2 x [1 x float]]]]* %1, i64 0, i64 0, i64 2, i64 0, i64 0
store float %10, float* %11, align 16, !alias.scope !6, !noalias !3
%12 = load float, float* bitcast (i8* getelementptr inbounds ([96 x i8], [96 x i8]* @0, i64 0, i64 76) to float*), align 4, !alias.scope !3, !noalias !6
%13 = getelementptr inbounds [4 x [3 x [2 x [1 x float]]]], [4 x [3 x [2 x [1 x float]]]]* %1, i64 0, i64 0, i64 2, i64 1, i64 0
store float %12, float* %13, align 4, !alias.scope !6, !noalias !3
%14 = load float, float* bitcast (i8* getelementptr inbounds ([96 x i8], [96 x i8]* @0, i64 0, i64 64) to float*), align 16, !alias.scope !3, !noalias !6
%15 = getelementptr inbounds [4 x [3 x [2 x [1 x float]]]], [4 x [3 x [2 x [1 x float]]]]* %1, i64 0, i64 1, i64 0, i64 0, i64 0
store float %14, float* %15, align 8, !alias.scope !6, !noalias !3
%16 = load float, float* bitcast (i8* getelementptr inbounds ([96 x i8], [96 x i8]* @0, i64 0, i64 68) to float*), align 4, !alias.scope !3, !noalias !6
%17 = getelementptr inbounds [4 x [3 x [2 x [1 x float]]]], [4 x [3 x [2 x [1 x float]]]]* %1, i64 0, i64 1, i64 0, i64 1, i64 0
store float %16, float* %17, align 4, !alias.scope !6, !noalias !3
%18 = load float, float* bitcast (i8* getelementptr inbounds ([96 x i8], [96 x i8]* @0, i64 0, i64 56) to float*), align 8, !alias.scope !3, !noalias !6
%19 = getelementptr inbounds [4 x [3 x [2 x [1 x float]]]], [4 x [3 x [2 x [1 x float]]]]* %1, i64 0, i64 1, i64 1, i64 0, i64 0
store float %18, float* %19, align 16, !alias.scope !6, !noalias !3
%20 = load float, float* bitcast (i8* getelementptr inbounds ([96 x i8], [96 x i8]* @0, i64 0, i64 60) to float*), align 4, !alias.scope !3, !noalias !6
%21 = getelementptr inbounds [4 x [3 x [2 x [1 x float]]]], [4 x [3 x [2 x [1 x float]]]]* %1, i64 0, i64 1, i64 1, i64 1, i64 0
store float %20, float* %21, align 4, !alias.scope !6, !noalias !3
%22 = load float, float* bitcast (i8* getelementptr inbounds ([96 x i8], [96 x i8]* @0, i64 0, i64 48) to float*), align 16, !alias.scope !3, !noalias !6
%23 = getelementptr inbounds [4 x [3 x [2 x [1 x float]]]], [4 x [3 x [2 x [1 x float]]]]* %1, i64 0, i64 1, i64 2, i64 0, i64 0
store float %22, float* %23, align 8, !alias.scope !6, !noalias !3
%24 = load float, float* bitcast (i8* getelementptr inbounds ([96 x i8], [96 x i8]* @0, i64 0, i64 52) to float*), align 4, !alias.scope !3, !noalias !6
%25 = getelementptr inbounds [4 x [3 x [2 x [1 x float]]]], [4 x [3 x [2 x [1 x float]]]]* %1, i64 0, i64 1, i64 2, i64 1, i64 0
store float %24, float* %25, align 4, !alias.scope !6, !noalias !3
%26 = getelementptr inbounds [4 x [3 x [2 x [1 x float]]]], [4 x [3 x [2 x [1 x float]]]]* %1, i64 0, i64 2, i64 0, i64 0, i64 0
%27 = load <4 x float>, <4 x float>* bitcast (i8* getelementptr inbounds ([96 x i8], [96 x i8]* @0, i64 0, i64 32) to <4 x float>*), align 16, !alias.scope !3, !noalias !6
%shuffle = shufflevector <4 x float> %27, <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
%28 = bitcast float* %26 to <4 x float>*
store <4 x float> %shuffle, <4 x float>* %28, align 16, !alias.scope !6, !noalias !3
%29 = getelementptr inbounds [4 x [3 x [2 x [1 x float]]]], [4 x [3 x [2 x [1 x float]]]]* %1, i64 0, i64 2, i64 2, i64 0, i64 0
%30 = load <4 x float>, <4 x float>* bitcast (i8* getelementptr inbounds ([96 x i8], [96 x i8]* @0, i64 0, i64 16) to <4 x float>*), align 16, !alias.scope !3, !noalias !6
%shuffle7 = shufflevector <4 x float> %30, <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
%31 = bitcast float* %29 to <4 x float>*
store <4 x float> %shuffle7, <4 x float>* %31, align 16, !alias.scope !6, !noalias !3
%32 = getelementptr inbounds [4 x [3 x [2 x [1 x float]]]], [4 x [3 x [2 x [1 x float]]]]* %1, i64 0, i64 3, i64 1, i64 0, i64 0
%33 = load <4 x float>, <4 x float>* bitcast ([96 x i8]* @0 to <4 x float>*), align 16, !alias.scope !3, !noalias !6
%shuffle8 = shufflevector <4 x float> %33, <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
%34 = bitcast float* %32 to <4 x float>*
store <4 x float> %shuffle8, <4 x float>* %34, align 16, !alias.scope !6, !noalias !3
ret void
}
attributes #0 = { nofree norecurse nosync nounwind uwtable "denormal-fp-math"="preserve-sign" "no-frame-pointer-elim"="false" }
!0 = !{}
!1 = !{i64 96}
!2 = !{i64 16}
!3 = !{!4}
!4 = !{!"buffer: {index:1, offset:0, size:96}", !5}
!5 = !{!"XLA global AA domain"}
!6 = !{!7}
!7 = !{!"buffer: {index:0, offset:0, size:96}", !5}
Do you have different compiler args to hit this test failure? Or is this not even an llvm test case?
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D97667/new/
https://reviews.llvm.org/D97667
More information about the llvm-commits
mailing list