[llvm] 3d76a13 - Revert "[InstCombine] Lower infinite combine loop detection thresholds"

Roman Lebedev via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 19 07:57:08 PDT 2020


Thanks for the reproducer!

On Wed, Aug 19, 2020 at 5:50 PM Florian Hahn <florian_hahn at apple.com> wrote:
>
>
>
> > On Aug 19, 2020, at 14:53, Roman Lebedev via llvm-commits <llvm-commits at lists.llvm.org> wrote:
> >
> >
> > Author: Roman Lebedev
> > Date: 2020-08-19T16:53:30+03:00
> > New Revision: 3d76a133c7e0d4056c1a0657b0b186c381bf7b74
> >
> > URL: https://github.com/llvm/llvm-project/commit/3d76a133c7e0d4056c1a0657b0b186c381bf7b74
> > DIFF: https://github.com/llvm/llvm-project/commit/3d76a133c7e0d4056c1a0657b0b186c381bf7b74.diff
> >
> > LOG: Revert "[InstCombine] Lower infinite combine loop detection thresholds"
> >
> > And as being reported by Florian Hahn, there's a hit
> > in MultiSource/Benchmarks/mafft from the test-suite on X86 with -O3 -flto,
> > so reverting until addressed.
>
>
> Should be reproducible by running opt -instcombine on the IR below
>
> @global = local_unnamed_addr global [16 x [16 x double]] [[16 x double] [double -2.490000e+00, double -8.240000e+00, double -7.040000e+00, double -4.320000e+00, double -6.860000e+00, double -8.390000e+00, double -5.030000e+00, double -5.840000e+00, double 0xC021AE147AE147AE, double -4.680000e+00, double -1.437000e+01, double -1.264000e+01, double -4.010000e+00, double -6.160000e+00, double -1.132000e+01, double -9.050000e+00], [16 x double] [double -8.240000e+00, double -8.000000e-01, double -8.890000e+00, double -5.130000e+00, double -8.610000e+00, double -5.380000e+00, double -5.770000e+00, double -6.600000e+00, double -1.041000e+01, double -4.570000e+00, double -1.453000e+01, double -1.014000e+01, double -5.430000e+00, double -5.940000e+00, double -8.870000e+00, double -1.107000e+01], [16 x double] [double -7.040000e+00, double -8.890000e+00, double -2.110000e+00, double -2.040000e+00, double -9.730000e+00, double -1.105000e+01, double -3.810000e+00, double -4.720000e+00, double 0xC022BD70A3D70A3D, double -5.860000e+00, double -9.080000e+00, double -1.045000e+01, double -5.330000e+00, double -6.930000e+00, double -8.670000e+00, double -7.830000e+00], [16 x double] [double -4.320000e+00, double -5.130000e+00, double -2.040000e+00, double 4.490000e+00, double -5.330000e+00, double -5.610000e+00, double 2.700000e+00, double 5.900000e-01, double -5.560000e+00, double 1.670000e+00, double -6.710000e+00, double -5.170000e+00, double 1.610000e+00, double -5.100000e-01, double -4.810000e+00, double -2.980000e+00], [16 x double] [double -6.860000e+00, double -8.610000e+00, double -9.730000e+00, double -5.330000e+00, double -1.050000e+00, double -8.670000e+00, double -4.880000e+00, double -6.100000e+00, double -7.980000e+00, double -6.000000e+00, double -1.243000e+01, double 0xC01ED70A3D70A3D7, double -5.850000e+00, double 0xC01E333333333333, double -6.630000e+00, double -1.154000e+01], [16 x double] [double -8.390000e+00, double -5.380000e+00, double -1.105000e+01, double -5.610000e+00, double -8.670000e+00, double -1.980000e+00, double -4.130000e+00, double -5.770000e+00, double -1.136000e+01, double -4.660000e+00, double -1.258000e+01, double -1.369000e+01, double -5.750000e+00, double -4.270000e+00, double -1.201000e+01, double -1.079000e+01], [16 x double] [double -5.030000e+00, double -5.770000e+00, double -3.810000e+00, double 2.700000e+00, double -4.880000e+00, double -4.130000e+00, double 5.620000e+00, double 1.210000e+00, double -5.950000e+00, double 2.110000e+00, double -3.700000e+00, double -5.840000e+00, double 1.600000e+00, double -8.000000e-02, double -4.490000e+00, double -3.900000e+00], [16 x double] [double -5.840000e+00, double -6.600000e+00, double -4.720000e+00, double 5.900000e-01, double -6.100000e+00, double -5.770000e+00, double 1.210000e+00, double 3.470000e+00, double 0xC01FB851EB851EB8, double -2.700000e-01, double 0xC01F851EB851EB85, double -5.610000e+00, double -5.700000e-01, double -2.090000e+00, double -5.300000e+00, double -4.450000e+00], [16 x double] [double 0xC021AE147AE147AE, double -1.041000e+01, double 0xC022BD70A3D70A3D, double -5.560000e+00, double -7.980000e+00, double -1.136000e+01, double -5.950000e+00, double 0xC01FB851EB851EB8, double -5.130000e+00, double -3.570000e+00, double -1.045000e+01, double -8.490000e+00, double -2.420000e+00, double -5.630000e+00, double -7.080000e+00, double -8.390000e+00], [16 x double] [double -4.680000e+00, double -4.570000e+00, double -5.860000e+00, double 1.670000e+00, double -6.000000e+00, double -4.660000e+00, double 2.110000e+00, double -2.700000e-01, double -3.570000e+00, double 5.360000e+00, double -5.710000e+00, double -4.960000e+00, double 2.750000e+00, double 1.320000e+00, double -4.910000e+00, double -3.670000e+00], [16 x double] [double -1.437000e+01, double -1.453000e+01, double -9.080000e+00, double -6.710000e+00, double -1.243000e+01, double -1.258000e+01, double -3.700000e+00, double 0xC01F851EB851EB85, double -1.045000e+01, double -5.710000e+00, double -3.590000e+00, double -5.770000e+00, double -6.880000e+00, double -8.410000e+00, double -7.400000e+00, double -5.410000e+00], [16 x double] [double -1.264000e+01, double -1.014000e+01, double -1.045000e+01, double -5.170000e+00, double 0xC01ED70A3D70A3D7, double -1.369000e+01, double -5.840000e+00, double -5.610000e+00, double -8.490000e+00, double -4.960000e+00, double -5.770000e+00, double -2.280000e+00, double -4.720000e+00, double -7.360000e+00, double -3.830000e+00, double -5.210000e+00], [16 x double] [double -4.010000e+00, double -5.430000e+00, double -5.330000e+00, double 1.610000e+00, double -5.850000e+00, double -5.750000e+00, double 1.600000e+00, double -5.700000e-01, double -2.420000e+00, double 2.750000e+00, double -6.880000e+00, double -4.720000e+00, double 4.970000e+00, double 1.140000e+00, double -2.980000e+00, double -3.390000e+00], [16 x double] [double -6.160000e+00, double -5.940000e+00, double -6.930000e+00, double -5.100000e-01, double 0xC01E333333333333, double -4.270000e+00, double -8.000000e-02, double -2.090000e+00, double -5.630000e+00, double 1.320000e+00, double -8.410000e+00, double -7.360000e+00, double 1.140000e+00, double 3.360000e+00, double -4.760000e+00, double -4.280000e+00], [16 x double] [double -1.132000e+01, double -8.870000e+00, double -8.670000e+00, double -4.810000e+00, double -6.630000e+00, double -1.201000e+01, double -4.490000e+00, double -5.300000e+00, double -7.080000e+00, double -4.910000e+00, double -7.400000e+00, double -3.830000e+00, double -2.980000e+00, double -4.760000e+00, double -3.210000e+00, double -5.970000e+00], [16 x double] [double -9.050000e+00, double -1.107000e+01, double -7.830000e+00, double -2.980000e+00, double -1.154000e+01, double -1.079000e+01, double -3.900000e+00, double -4.450000e+00, double -8.390000e+00, double -3.670000e+00, double -5.410000e+00, double -5.210000e+00, double -3.390000e+00, double -4.280000e+00, double -5.970000e+00, double -2.000000e-02]], align 16
>
> define void @blam() local_unnamed_addr {
> bb:
>   br label %bb1
>
> bb1:                                              ; preds = %bb1, %bb
>   br i1 undef, label %bb2, label %bb1
>
> bb2:                                              ; preds = %bb2, %bb1
>   %tmp = fcmp ogt <4 x double> undef, zeroinitializer
>   %tmp3 = fcmp olt <4 x double> undef, zeroinitializer
>   %tmp4 = xor <4 x i1> %tmp, <i1 true, i1 true, i1 true, i1 true>
>   %tmp5 = xor <4 x i1> %tmp3, <i1 true, i1 true, i1 true, i1 true>
>   %tmp6 = and <4 x i1> %tmp5, %tmp4
>   %tmp7 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 1
>   %tmp8 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp7, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
>   %tmp9 = fcmp ogt <4 x double> %tmp8, zeroinitializer
>   %tmp10 = fcmp olt <4 x double> %tmp8, zeroinitializer
>   %tmp11 = or <4 x i1> %tmp, %tmp6
>   %tmp12 = or <4 x i1> %tmp11, undef
>   %tmp13 = and <4 x i1> %tmp9, %tmp12
>   %tmp14 = xor <4 x i1> %tmp9, <i1 true, i1 true, i1 true, i1 true>
>   %tmp15 = and <4 x i1> %tmp14, %tmp12
>   %tmp16 = and <4 x i1> %tmp10, %tmp15
>   %tmp17 = xor <4 x i1> %tmp10, <i1 true, i1 true, i1 true, i1 true>
>   %tmp18 = and <4 x i1> %tmp17, %tmp15
>   %tmp19 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 2
>   %tmp20 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp19, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
>   %tmp21 = fcmp ogt <4 x double> %tmp20, zeroinitializer
>   %tmp22 = fcmp olt <4 x double> %tmp20, zeroinitializer
>   %tmp23 = or <4 x i1> %tmp13, %tmp16
>   %tmp24 = or <4 x i1> %tmp23, %tmp18
>   %tmp25 = and <4 x i1> %tmp21, %tmp24
>   %tmp26 = xor <4 x i1> %tmp21, <i1 true, i1 true, i1 true, i1 true>
>   %tmp27 = and <4 x i1> %tmp26, %tmp24
>   %tmp28 = and <4 x i1> %tmp22, %tmp27
>   %tmp29 = xor <4 x i1> %tmp22, <i1 true, i1 true, i1 true, i1 true>
>   %tmp30 = and <4 x i1> %tmp29, %tmp27
>   %tmp31 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 3
>   %tmp32 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp31, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
>   %tmp33 = fcmp ogt <4 x double> %tmp32, zeroinitializer
>   %tmp34 = fcmp olt <4 x double> %tmp32, zeroinitializer
>   %tmp35 = or <4 x i1> %tmp25, %tmp28
>   %tmp36 = or <4 x i1> %tmp35, %tmp30
>   %tmp37 = and <4 x i1> %tmp33, %tmp36
>   %tmp38 = xor <4 x i1> %tmp33, <i1 true, i1 true, i1 true, i1 true>
>   %tmp39 = and <4 x i1> %tmp38, %tmp36
>   %tmp40 = and <4 x i1> %tmp34, %tmp39
>   %tmp41 = xor <4 x i1> %tmp34, <i1 true, i1 true, i1 true, i1 true>
>   %tmp42 = and <4 x i1> %tmp41, %tmp39
>   %tmp43 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 4
>   %tmp44 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp43, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
>   %tmp45 = fcmp ogt <4 x double> %tmp44, zeroinitializer
>   %tmp46 = fcmp olt <4 x double> %tmp44, zeroinitializer
>   %tmp47 = or <4 x i1> %tmp37, %tmp40
>   %tmp48 = or <4 x i1> %tmp47, %tmp42
>   %tmp49 = and <4 x i1> %tmp45, %tmp48
>   %tmp50 = xor <4 x i1> %tmp45, <i1 true, i1 true, i1 true, i1 true>
>   %tmp51 = and <4 x i1> %tmp50, %tmp48
>   %tmp52 = and <4 x i1> %tmp46, %tmp51
>   %tmp53 = xor <4 x i1> %tmp46, <i1 true, i1 true, i1 true, i1 true>
>   %tmp54 = and <4 x i1> %tmp53, %tmp51
>   %tmp55 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 5
>   %tmp56 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp55, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
>   %tmp57 = fcmp ogt <4 x double> %tmp56, zeroinitializer
>   %tmp58 = fcmp olt <4 x double> %tmp56, zeroinitializer
>   %tmp59 = or <4 x i1> %tmp49, %tmp52
>   %tmp60 = or <4 x i1> %tmp59, %tmp54
>   %tmp61 = and <4 x i1> %tmp57, %tmp60
>   %tmp62 = xor <4 x i1> %tmp57, <i1 true, i1 true, i1 true, i1 true>
>   %tmp63 = and <4 x i1> %tmp62, %tmp60
>   %tmp64 = and <4 x i1> %tmp58, %tmp63
>   %tmp65 = xor <4 x i1> %tmp58, <i1 true, i1 true, i1 true, i1 true>
>   %tmp66 = and <4 x i1> %tmp65, %tmp63
>   %tmp67 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 6
>   %tmp68 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp67, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
>   %tmp69 = fcmp ogt <4 x double> %tmp68, zeroinitializer
>   %tmp70 = fcmp olt <4 x double> %tmp68, zeroinitializer
>   %tmp71 = or <4 x i1> %tmp61, %tmp64
>   %tmp72 = or <4 x i1> %tmp71, %tmp66
>   %tmp73 = and <4 x i1> %tmp69, %tmp72
>   %tmp74 = xor <4 x i1> %tmp69, <i1 true, i1 true, i1 true, i1 true>
>   %tmp75 = and <4 x i1> %tmp74, %tmp72
>   %tmp76 = and <4 x i1> %tmp70, %tmp75
>   %tmp77 = xor <4 x i1> %tmp70, <i1 true, i1 true, i1 true, i1 true>
>   %tmp78 = and <4 x i1> %tmp77, %tmp75
>   %tmp79 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 7
>   %tmp80 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp79, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
>   %tmp81 = fcmp ogt <4 x double> %tmp80, zeroinitializer
>   %tmp82 = fcmp olt <4 x double> %tmp80, zeroinitializer
>   %tmp83 = or <4 x i1> %tmp73, %tmp76
>   %tmp84 = or <4 x i1> %tmp83, %tmp78
>   %tmp85 = and <4 x i1> %tmp81, %tmp84
>   %tmp86 = xor <4 x i1> %tmp81, <i1 true, i1 true, i1 true, i1 true>
>   %tmp87 = and <4 x i1> %tmp86, %tmp84
>   %tmp88 = and <4 x i1> %tmp82, %tmp87
>   %tmp89 = xor <4 x i1> %tmp82, <i1 true, i1 true, i1 true, i1 true>
>   %tmp90 = and <4 x i1> %tmp89, %tmp87
>   %tmp91 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 8
>   %tmp92 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp91, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
>   %tmp93 = fcmp ogt <4 x double> %tmp92, zeroinitializer
>   %tmp94 = fcmp olt <4 x double> %tmp92, zeroinitializer
>   %tmp95 = or <4 x i1> %tmp85, %tmp88
>   %tmp96 = or <4 x i1> %tmp95, %tmp90
>   %tmp97 = and <4 x i1> %tmp93, %tmp96
>   %tmp98 = xor <4 x i1> %tmp93, <i1 true, i1 true, i1 true, i1 true>
>   %tmp99 = and <4 x i1> %tmp98, %tmp96
>   %tmp100 = and <4 x i1> %tmp94, %tmp99
>   %tmp101 = xor <4 x i1> %tmp94, <i1 true, i1 true, i1 true, i1 true>
>   %tmp102 = and <4 x i1> %tmp101, %tmp99
>   %tmp103 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 9
>   %tmp104 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp103, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
>   %tmp105 = fcmp ogt <4 x double> %tmp104, zeroinitializer
>   %tmp106 = fcmp olt <4 x double> %tmp104, zeroinitializer
>   %tmp107 = or <4 x i1> %tmp97, %tmp100
>   %tmp108 = or <4 x i1> %tmp107, %tmp102
>   %tmp109 = and <4 x i1> %tmp105, %tmp108
>   %tmp110 = xor <4 x i1> %tmp105, <i1 true, i1 true, i1 true, i1 true>
>   %tmp111 = and <4 x i1> %tmp110, %tmp108
>   %tmp112 = and <4 x i1> %tmp106, %tmp111
>   %tmp113 = xor <4 x i1> %tmp106, <i1 true, i1 true, i1 true, i1 true>
>   %tmp114 = and <4 x i1> %tmp113, %tmp111
>   %tmp115 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 10
>   %tmp116 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp115, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
>   %tmp117 = fcmp ogt <4 x double> %tmp116, zeroinitializer
>   %tmp118 = fcmp olt <4 x double> %tmp116, zeroinitializer
>   %tmp119 = fadd <4 x double> %tmp116, <double -5.000000e-01, double -5.000000e-01, double -5.000000e-01, double -5.000000e-01>
>   %tmp120 = fptosi <4 x double> %tmp119 to <4 x i32>
>   %tmp121 = fadd <4 x double> %tmp116, <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
>   %tmp122 = fptosi <4 x double> %tmp121 to <4 x i32>
>   %tmp123 = or <4 x i1> %tmp109, %tmp112
>   %tmp124 = or <4 x i1> %tmp123, %tmp114
>   %tmp125 = xor <4 x i1> %tmp117, <i1 true, i1 true, i1 true, i1 true>
>   %tmp126 = and <4 x i1> %tmp125, %tmp124
>   %tmp127 = xor <4 x i1> %tmp118, <i1 true, i1 true, i1 true, i1 true>
>   %tmp128 = and <4 x i1> %tmp127, %tmp126
>   %tmp129 = select <4 x i1> undef, <4 x i32> %tmp120, <4 x i32> %tmp122
>   %tmp130 = select <4 x i1> %tmp128, <4 x i32> zeroinitializer, <4 x i32> %tmp129
>   %tmp131 = sitofp <4 x i32> %tmp130 to <4 x double>
>   call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> %tmp131, <4 x double*> %tmp115, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
>   br label %bb2
> }
>
> ; Function Attrs: nounwind readonly willreturn
> declare <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*>, i32 immarg, <4 x i1>, <4 x double>) #0
>
> ; Function Attrs: nounwind willreturn
> declare void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double>, <4 x double*>, i32 immarg, <4 x i1>) #1
>
> attributes #0 = { nounwind readonly willreturn }
> attributes #1 = { nounwind willreturn }


More information about the llvm-commits mailing list