[PATCH] D118538: [SLP] Schedule only sub-graph of vectorizable instructions

Arthur Eubanks via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 1 15:25:47 PST 2022


aeubanks added a comment.

Based on some debug logs, it seems like the `llvm.stacksave()`/`llvm.stackrestore()`s are moving around, not the allocas.

Better reduced repro:

  $ cat /tmp/a.ll
  target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32-a:0:32-S32"
  target triple = "i386-pc-windows-msvc19.16.0"
  
  declare i8* @llvm.stacksave()
  
  declare void @llvm.stackrestore(i8*)
  
  declare i8* @wibble(i8*)
  
  declare void @quux(i32* inalloca(i32))
  
  define void @ham() #1 {
    %tmp2 = alloca i8
    %tmp3 = alloca i8
    %tmp4 = alloca i8
    %tmp5 = alloca i8
    %tmp12 = alloca [12 x i8*]
    %tmp15 = call i8* @wibble(i8* %tmp2)
    %tmp16 = call i8* @wibble(i8* %tmp3)
    %tmp17 = call i8* @wibble(i8* %tmp4)
    %tmp23 = call i8* @llvm.stacksave()
    %tmp24 = alloca inalloca i32
    call void @quux(i32* inalloca(i32) %tmp24)
    call void @llvm.stackrestore(i8* %tmp23)
    %tmp32 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 0
    store i8* %tmp4, i8** %tmp32
    %tmp33 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 1
    store i8* %tmp4, i8** %tmp33
    %tmp34 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 2
    store i8* %tmp4, i8** %tmp34
    %tmp35 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 3
    store i8* %tmp4, i8** %tmp35
    %tmp36 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 4
    store i8* %tmp4, i8** %tmp36
    %tmp37 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 5
    store i8* %tmp5, i8** %tmp37
    %tmp38 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 6
    store i8* %tmp5, i8** %tmp38
    %tmp39 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 7
    store i8* %tmp5, i8** %tmp39
    ret void
  }
  
  attributes #0 = { nofree nosync nounwind willreturn }
  attributes #1 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" }
  
  $ bin/opt -passes=slp-vectorizer /tmp/a.ll -S
  target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32-a:0:32-S32"
  target triple = "i386-pc-windows-msvc19.16.0"
  
  ; Function Attrs: nofree nosync nounwind willreturn
  declare i8* @llvm.stacksave() #0
  
  ; Function Attrs: nofree nosync nounwind willreturn
  declare void @llvm.stackrestore(i8*) #0
  
  declare i8* @wibble(i8*)
  
  declare void @quux(i32* inalloca(i32))
  
  define void @ham() #1 {
    %tmp2 = alloca i8, align 1
    %tmp3 = alloca i8, align 1
    %tmp12 = alloca [12 x i8*], align 4
    %tmp15 = call i8* @wibble(i8* %tmp2)
    %tmp16 = call i8* @wibble(i8* %tmp3)
    %tmp24 = alloca inalloca i32, align 4
    %tmp32 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 0
    %tmp33 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 1
    %tmp34 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 2
    %tmp35 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 3
    %1 = bitcast i8** %tmp32 to <4 x i8*>*
    %tmp36 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 4
    %tmp37 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 5
    %tmp38 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 6
    %tmp39 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 7
    %tmp4 = alloca i8, align 1
    %tmp5 = alloca i8, align 1
    %tmp17 = call i8* @wibble(i8* %tmp4)
    %tmp23 = call i8* @llvm.stacksave()
    call void @quux(i32* inalloca(i32) %tmp24)
    call void @llvm.stackrestore(i8* %tmp23)
    %2 = insertelement <4 x i8*> poison, i8* %tmp4, i32 0
    %shuffle = shufflevector <4 x i8*> %2, <4 x i8*> poison, <4 x i32> zeroinitializer
    store <4 x i8*> %shuffle, <4 x i8*>* %1, align 4
    %3 = insertelement <4 x i8*> %2, i8* %tmp5, i32 1
    %shuffle1 = shufflevector <4 x i8*> %3, <4 x i8*> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
    %4 = bitcast i8** %tmp36 to <4 x i8*>*
    store <4 x i8*> %shuffle1, <4 x i8*>* %4, align 4
    ret void
  }
  
  attributes #0 = { nofree nosync nounwind willreturn }
  attributes #1 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" }

`%tmp24` is no longer between the `stacksave`/`stackrestore`


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D118538/new/

https://reviews.llvm.org/D118538



More information about the llvm-commits mailing list