[llvm] [DAGCombiner] Relax condition for extract_vector_elt combine (PR #157658)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 10 18:53:52 PDT 2025
zhaoqi5 wrote:
> this is causing hangs on the following IR:
>
> ```
> $ cat /tmp/a.ll
> target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
> target triple = "x86_64-apple-ios17.0.0-simulator"
>
> declare void @llvm.memset.p0.i64(ptr writeonly captures(none), i8, i64, i1 immarg)
>
> declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly captures(none), ptr noalias readonly captures(none), i64, i1 immarg)
>
> define ptr @_ZN5SkM449setRotateE4SkV3f(ptr noundef returned writeonly align 4 captures(ret: address, provenance) dereferenceable_or_null(64) initializes((0, 64)) %this, <2 x float> %axis.coerce0, float %axis.coerce1, float noundef %radians) {
> entry:
> %0 = fmul <2 x float> %axis.coerce0, %axis.coerce0
> %shift = shufflevector <2 x float> %0, <2 x float> poison, <2 x i32> <i32 1, i32 poison>
> %foldExtExtBinop = fadd <2 x float> %0, %shift
> %add.i.i = extractelement <2 x float> %foldExtExtBinop, i64 0
> %mul5.i.i = fmul float %axis.coerce1, %axis.coerce1
> %add6.i.i = fadd float %mul5.i.i, %add.i.i
> %1 = tail call noundef float @llvm.sqrt.f32(float %add6.i.i)
> %cmp = fcmp ogt float %add6.i.i, 0.000000e+00
> %sub.i = fsub float %1, %1
> %cmp.i = fcmp ord float %sub.i, 0.000000e+00
> %or.cond = and i1 %cmp, %cmp.i
> %div = fdiv float 1.000000e+00, %1
> %mul5.i = fmul float %axis.coerce1, %div
> %2 = tail call noundef float @llvm.sin.f32(float %radians)
> %3 = tail call noundef float @llvm.cos.f32(float %radians)
> %sub.i.i = fsub float 1.000000e+00, %3
> %mul8.i.i = fmul float %2, %mul5.i
> %mul33.i.i = fmul float %sub.i.i, %mul5.i
> %mul34.i.i = fmul float %mul5.i, %mul33.i.i
> %add35.i.i = fadd float %3, %mul34.i.i
> %4 = insertelement <2 x float> poison, float %div, i64 0
> %5 = shufflevector <2 x float> %4, <2 x float> poison, <2 x i32> zeroinitializer
> %6 = fmul <2 x float> %axis.coerce0, %5
> %7 = extractelement <2 x float> %6, i64 0
> %mul.i.i8 = fmul float %sub.i.i, %7
> %8 = insertelement <2 x float> poison, float %mul.i.i8, i64 0
> %9 = shufflevector <2 x float> %8, <2 x float> poison, <2 x i32> zeroinitializer
> %10 = fmul <2 x float> %6, %9
> %11 = extractelement <2 x float> %10, i64 1
> %sub9.i.i = fsub float %11, %mul8.i.i
> %mul11.i.i = fmul float %mul5.i, %mul.i.i8
> %12 = extractelement <2 x float> %6, i64 1
> %mul12.i.i = fmul float %2, %12
> %add13.i.i = fadd float %mul12.i.i, %mul11.i.i
> %13 = insertelement <2 x float> poison, float %3, i64 0
> %14 = insertelement <2 x float> %13, float %mul8.i.i, i64 1
> %15 = fadd <2 x float> %14, %10
> %mul18.i.i = fmul float %sub.i.i, %12
> %mul22.i.i = fmul float %mul5.i, %mul18.i.i
> %sub28.i.i = fsub float %mul11.i.i, %mul12.i.i
> store <2 x float> %15, ptr %this, align 4
> %ref.tmp.sroa.5.0.this.sroa_idx.i.i = getelementptr inbounds nuw i8, ptr %this, i64 8
> store float %sub28.i.i, ptr %ref.tmp.sroa.5.0.this.sroa_idx.i.i, align 4
> %ref.tmp.sroa.6.0.this.sroa_idx.i.i = getelementptr inbounds nuw i8, ptr %this, i64 12
> store float 0.000000e+00, ptr %ref.tmp.sroa.6.0.this.sroa_idx.i.i, align 4
> %ref.tmp.sroa.7.0.this.sroa_idx.i.i = getelementptr inbounds nuw i8, ptr %this, i64 16
> store float %sub9.i.i, ptr %ref.tmp.sroa.7.0.this.sroa_idx.i.i, align 4
> %ref.tmp.sroa.8.0.this.sroa_idx.i.i = getelementptr inbounds nuw i8, ptr %this, i64 20
> %16 = insertelement <2 x float> poison, float %2, i64 0
> %17 = insertelement <2 x float> %16, float %mul18.i.i, i64 1
> %18 = fmul <2 x float> %6, %17
> %19 = extractelement <2 x float> %18, i64 0
> %sub24.i.i = fsub float %mul22.i.i, %19
> %20 = insertelement <2 x float> poison, float %mul22.i.i, i64 0
> %21 = insertelement <2 x float> %20, float %3, i64 1
> %22 = fadd <2 x float> %21, %18
> %23 = shufflevector <2 x float> %22, <2 x float> poison, <2 x i32> <i32 1, i32 0>
> store <2 x float> %23, ptr %ref.tmp.sroa.8.0.this.sroa_idx.i.i, align 4
> %ref.tmp.sroa.10.0.this.sroa_idx.i.i = getelementptr inbounds nuw i8, ptr %this, i64 28
> store float 0.000000e+00, ptr %ref.tmp.sroa.10.0.this.sroa_idx.i.i, align 4
> %ref.tmp.sroa.11.0.this.sroa_idx.i.i = getelementptr inbounds nuw i8, ptr %this, i64 32
> store float %add13.i.i, ptr %ref.tmp.sroa.11.0.this.sroa_idx.i.i, align 4
> %ref.tmp.sroa.12.0.this.sroa_idx.i.i = getelementptr inbounds nuw i8, ptr %this, i64 36
> store float %sub24.i.i, ptr %ref.tmp.sroa.12.0.this.sroa_idx.i.i, align 4
> %ref.tmp.sroa.13.0.this.sroa_idx.i.i = getelementptr inbounds nuw i8, ptr %this, i64 40
> store float %add35.i.i, ptr %ref.tmp.sroa.13.0.this.sroa_idx.i.i, align 4
> %ref.tmp.sroa.14.0.this.sroa_idx.i.i = getelementptr inbounds nuw i8, ptr %this, i64 44
> %ref.tmp.sroa.18.0.this.sroa_idx.i.i = getelementptr inbounds nuw i8, ptr %this, i64 60
> tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 dereferenceable(16) %ref.tmp.sroa.14.0.this.sroa_idx.i.i, i8 0, i64 16, i1 false)
> store float 1.000000e+00, ptr %ref.tmp.sroa.18.0.this.sroa_idx.i.i, align 4
> ret ptr null
> }
>
> declare float @llvm.sqrt.f32(float)
>
> declare float @llvm.sin.f32(float)
>
> declare float @llvm.cos.f32(float)
>
> $ llc -o /dev/null /tmp/a.ll
> hang ...
> ```
>
> I'll revert this in the meantime
Thank you for pointing out this potential issue and providing an example.
If possible, I think it would be better to address this in the targets, as this commit can enable broader optimization opportunities for all targets.
If anyone is willing to take a look, that would be great. I’ll also continue to study this issue further when time permits.
https://github.com/llvm/llvm-project/pull/157658
More information about the llvm-commits
mailing list