[llvm] 3d76a13 - Revert "[InstCombine] Lower infinite combine loop detection thresholds"
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 19 07:57:08 PDT 2020
Thanks for the reproducer!
On Wed, Aug 19, 2020 at 5:50 PM Florian Hahn <florian_hahn at apple.com> wrote:
>
>
>
> > On Aug 19, 2020, at 14:53, Roman Lebedev via llvm-commits <llvm-commits at lists.llvm.org> wrote:
> >
> >
> > Author: Roman Lebedev
> > Date: 2020-08-19T16:53:30+03:00
> > New Revision: 3d76a133c7e0d4056c1a0657b0b186c381bf7b74
> >
> > URL: https://github.com/llvm/llvm-project/commit/3d76a133c7e0d4056c1a0657b0b186c381bf7b74
> > DIFF: https://github.com/llvm/llvm-project/commit/3d76a133c7e0d4056c1a0657b0b186c381bf7b74.diff
> >
> > LOG: Revert "[InstCombine] Lower infinite combine loop detection thresholds"
> >
> > And as being reported by Florian Hahn, there's a hit
> > in MultiSource/Benchmarks/mafft from the test-suite on X86 with -O3 -flto,
> > so reverting until addressed.
>
>
> Should be reproducible by running opt -instcombine on the IR below
>
> @global = local_unnamed_addr global [16 x [16 x double]] [[16 x double] [double -2.490000e+00, double -8.240000e+00, double -7.040000e+00, double -4.320000e+00, double -6.860000e+00, double -8.390000e+00, double -5.030000e+00, double -5.840000e+00, double 0xC021AE147AE147AE, double -4.680000e+00, double -1.437000e+01, double -1.264000e+01, double -4.010000e+00, double -6.160000e+00, double -1.132000e+01, double -9.050000e+00], [16 x double] [double -8.240000e+00, double -8.000000e-01, double -8.890000e+00, double -5.130000e+00, double -8.610000e+00, double -5.380000e+00, double -5.770000e+00, double -6.600000e+00, double -1.041000e+01, double -4.570000e+00, double -1.453000e+01, double -1.014000e+01, double -5.430000e+00, double -5.940000e+00, double -8.870000e+00, double -1.107000e+01], [16 x double] [double -7.040000e+00, double -8.890000e+00, double -2.110000e+00, double -2.040000e+00, double -9.730000e+00, double -1.105000e+01, double -3.810000e+00, double -4.720000e+00, double 0xC022BD70A3D70A3D, double -5.860000e+00, double -9.080000e+00, double -1.045000e+01, double -5.330000e+00, double -6.930000e+00, double -8.670000e+00, double -7.830000e+00], [16 x double] [double -4.320000e+00, double -5.130000e+00, double -2.040000e+00, double 4.490000e+00, double -5.330000e+00, double -5.610000e+00, double 2.700000e+00, double 5.900000e-01, double -5.560000e+00, double 1.670000e+00, double -6.710000e+00, double -5.170000e+00, double 1.610000e+00, double -5.100000e-01, double -4.810000e+00, double -2.980000e+00], [16 x double] [double -6.860000e+00, double -8.610000e+00, double -9.730000e+00, double -5.330000e+00, double -1.050000e+00, double -8.670000e+00, double -4.880000e+00, double -6.100000e+00, double -7.980000e+00, double -6.000000e+00, double -1.243000e+01, double 0xC01ED70A3D70A3D7, double -5.850000e+00, double 0xC01E333333333333, double -6.630000e+00, double -1.154000e+01], [16 x double] [double -8.390000e+00, double -5.380000e+00, double -1.105000e+01, double -5.610000e+00, double -8.670000e+00, double -1.980000e+00, double -4.130000e+00, double -5.770000e+00, double -1.136000e+01, double -4.660000e+00, double -1.258000e+01, double -1.369000e+01, double -5.750000e+00, double -4.270000e+00, double -1.201000e+01, double -1.079000e+01], [16 x double] [double -5.030000e+00, double -5.770000e+00, double -3.810000e+00, double 2.700000e+00, double -4.880000e+00, double -4.130000e+00, double 5.620000e+00, double 1.210000e+00, double -5.950000e+00, double 2.110000e+00, double -3.700000e+00, double -5.840000e+00, double 1.600000e+00, double -8.000000e-02, double -4.490000e+00, double -3.900000e+00], [16 x double] [double -5.840000e+00, double -6.600000e+00, double -4.720000e+00, double 5.900000e-01, double -6.100000e+00, double -5.770000e+00, double 1.210000e+00, double 3.470000e+00, double 0xC01FB851EB851EB8, double -2.700000e-01, double 0xC01F851EB851EB85, double -5.610000e+00, double -5.700000e-01, double -2.090000e+00, double -5.300000e+00, double -4.450000e+00], [16 x double] [double 0xC021AE147AE147AE, double -1.041000e+01, double 0xC022BD70A3D70A3D, double -5.560000e+00, double -7.980000e+00, double -1.136000e+01, double -5.950000e+00, double 0xC01FB851EB851EB8, double -5.130000e+00, double -3.570000e+00, double -1.045000e+01, double -8.490000e+00, double -2.420000e+00, double -5.630000e+00, double -7.080000e+00, double -8.390000e+00], [16 x double] [double -4.680000e+00, double -4.570000e+00, double -5.860000e+00, double 1.670000e+00, double -6.000000e+00, double -4.660000e+00, double 2.110000e+00, double -2.700000e-01, double -3.570000e+00, double 5.360000e+00, double -5.710000e+00, double -4.960000e+00, double 2.750000e+00, double 1.320000e+00, double -4.910000e+00, double -3.670000e+00], [16 x double] [double -1.437000e+01, double -1.453000e+01, double -9.080000e+00, double -6.710000e+00, double -1.243000e+01, double -1.258000e+01, double -3.700000e+00, double 0xC01F851EB851EB85, double -1.045000e+01, double -5.710000e+00, double -3.590000e+00, double -5.770000e+00, double -6.880000e+00, double -8.410000e+00, double -7.400000e+00, double -5.410000e+00], [16 x double] [double -1.264000e+01, double -1.014000e+01, double -1.045000e+01, double -5.170000e+00, double 0xC01ED70A3D70A3D7, double -1.369000e+01, double -5.840000e+00, double -5.610000e+00, double -8.490000e+00, double -4.960000e+00, double -5.770000e+00, double -2.280000e+00, double -4.720000e+00, double -7.360000e+00, double -3.830000e+00, double -5.210000e+00], [16 x double] [double -4.010000e+00, double -5.430000e+00, double -5.330000e+00, double 1.610000e+00, double -5.850000e+00, double -5.750000e+00, double 1.600000e+00, double -5.700000e-01, double -2.420000e+00, double 2.750000e+00, double -6.880000e+00, double -4.720000e+00, double 4.970000e+00, double 1.140000e+00, double -2.980000e+00, double -3.390000e+00], [16 x double] [double -6.160000e+00, double -5.940000e+00, double -6.930000e+00, double -5.100000e-01, double 0xC01E333333333333, double -4.270000e+00, double -8.000000e-02, double -2.090000e+00, double -5.630000e+00, double 1.320000e+00, double -8.410000e+00, double -7.360000e+00, double 1.140000e+00, double 3.360000e+00, double -4.760000e+00, double -4.280000e+00], [16 x double] [double -1.132000e+01, double -8.870000e+00, double -8.670000e+00, double -4.810000e+00, double -6.630000e+00, double -1.201000e+01, double -4.490000e+00, double -5.300000e+00, double -7.080000e+00, double -4.910000e+00, double -7.400000e+00, double -3.830000e+00, double -2.980000e+00, double -4.760000e+00, double -3.210000e+00, double -5.970000e+00], [16 x double] [double -9.050000e+00, double -1.107000e+01, double -7.830000e+00, double -2.980000e+00, double -1.154000e+01, double -1.079000e+01, double -3.900000e+00, double -4.450000e+00, double -8.390000e+00, double -3.670000e+00, double -5.410000e+00, double -5.210000e+00, double -3.390000e+00, double -4.280000e+00, double -5.970000e+00, double -2.000000e-02]], align 16
>
> define void @blam() local_unnamed_addr {
> bb:
> br label %bb1
>
> bb1: ; preds = %bb1, %bb
> br i1 undef, label %bb2, label %bb1
>
> bb2: ; preds = %bb2, %bb1
> %tmp = fcmp ogt <4 x double> undef, zeroinitializer
> %tmp3 = fcmp olt <4 x double> undef, zeroinitializer
> %tmp4 = xor <4 x i1> %tmp, <i1 true, i1 true, i1 true, i1 true>
> %tmp5 = xor <4 x i1> %tmp3, <i1 true, i1 true, i1 true, i1 true>
> %tmp6 = and <4 x i1> %tmp5, %tmp4
> %tmp7 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 1
> %tmp8 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp7, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
> %tmp9 = fcmp ogt <4 x double> %tmp8, zeroinitializer
> %tmp10 = fcmp olt <4 x double> %tmp8, zeroinitializer
> %tmp11 = or <4 x i1> %tmp, %tmp6
> %tmp12 = or <4 x i1> %tmp11, undef
> %tmp13 = and <4 x i1> %tmp9, %tmp12
> %tmp14 = xor <4 x i1> %tmp9, <i1 true, i1 true, i1 true, i1 true>
> %tmp15 = and <4 x i1> %tmp14, %tmp12
> %tmp16 = and <4 x i1> %tmp10, %tmp15
> %tmp17 = xor <4 x i1> %tmp10, <i1 true, i1 true, i1 true, i1 true>
> %tmp18 = and <4 x i1> %tmp17, %tmp15
> %tmp19 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 2
> %tmp20 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp19, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
> %tmp21 = fcmp ogt <4 x double> %tmp20, zeroinitializer
> %tmp22 = fcmp olt <4 x double> %tmp20, zeroinitializer
> %tmp23 = or <4 x i1> %tmp13, %tmp16
> %tmp24 = or <4 x i1> %tmp23, %tmp18
> %tmp25 = and <4 x i1> %tmp21, %tmp24
> %tmp26 = xor <4 x i1> %tmp21, <i1 true, i1 true, i1 true, i1 true>
> %tmp27 = and <4 x i1> %tmp26, %tmp24
> %tmp28 = and <4 x i1> %tmp22, %tmp27
> %tmp29 = xor <4 x i1> %tmp22, <i1 true, i1 true, i1 true, i1 true>
> %tmp30 = and <4 x i1> %tmp29, %tmp27
> %tmp31 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 3
> %tmp32 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp31, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
> %tmp33 = fcmp ogt <4 x double> %tmp32, zeroinitializer
> %tmp34 = fcmp olt <4 x double> %tmp32, zeroinitializer
> %tmp35 = or <4 x i1> %tmp25, %tmp28
> %tmp36 = or <4 x i1> %tmp35, %tmp30
> %tmp37 = and <4 x i1> %tmp33, %tmp36
> %tmp38 = xor <4 x i1> %tmp33, <i1 true, i1 true, i1 true, i1 true>
> %tmp39 = and <4 x i1> %tmp38, %tmp36
> %tmp40 = and <4 x i1> %tmp34, %tmp39
> %tmp41 = xor <4 x i1> %tmp34, <i1 true, i1 true, i1 true, i1 true>
> %tmp42 = and <4 x i1> %tmp41, %tmp39
> %tmp43 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 4
> %tmp44 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp43, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
> %tmp45 = fcmp ogt <4 x double> %tmp44, zeroinitializer
> %tmp46 = fcmp olt <4 x double> %tmp44, zeroinitializer
> %tmp47 = or <4 x i1> %tmp37, %tmp40
> %tmp48 = or <4 x i1> %tmp47, %tmp42
> %tmp49 = and <4 x i1> %tmp45, %tmp48
> %tmp50 = xor <4 x i1> %tmp45, <i1 true, i1 true, i1 true, i1 true>
> %tmp51 = and <4 x i1> %tmp50, %tmp48
> %tmp52 = and <4 x i1> %tmp46, %tmp51
> %tmp53 = xor <4 x i1> %tmp46, <i1 true, i1 true, i1 true, i1 true>
> %tmp54 = and <4 x i1> %tmp53, %tmp51
> %tmp55 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 5
> %tmp56 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp55, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
> %tmp57 = fcmp ogt <4 x double> %tmp56, zeroinitializer
> %tmp58 = fcmp olt <4 x double> %tmp56, zeroinitializer
> %tmp59 = or <4 x i1> %tmp49, %tmp52
> %tmp60 = or <4 x i1> %tmp59, %tmp54
> %tmp61 = and <4 x i1> %tmp57, %tmp60
> %tmp62 = xor <4 x i1> %tmp57, <i1 true, i1 true, i1 true, i1 true>
> %tmp63 = and <4 x i1> %tmp62, %tmp60
> %tmp64 = and <4 x i1> %tmp58, %tmp63
> %tmp65 = xor <4 x i1> %tmp58, <i1 true, i1 true, i1 true, i1 true>
> %tmp66 = and <4 x i1> %tmp65, %tmp63
> %tmp67 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 6
> %tmp68 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp67, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
> %tmp69 = fcmp ogt <4 x double> %tmp68, zeroinitializer
> %tmp70 = fcmp olt <4 x double> %tmp68, zeroinitializer
> %tmp71 = or <4 x i1> %tmp61, %tmp64
> %tmp72 = or <4 x i1> %tmp71, %tmp66
> %tmp73 = and <4 x i1> %tmp69, %tmp72
> %tmp74 = xor <4 x i1> %tmp69, <i1 true, i1 true, i1 true, i1 true>
> %tmp75 = and <4 x i1> %tmp74, %tmp72
> %tmp76 = and <4 x i1> %tmp70, %tmp75
> %tmp77 = xor <4 x i1> %tmp70, <i1 true, i1 true, i1 true, i1 true>
> %tmp78 = and <4 x i1> %tmp77, %tmp75
> %tmp79 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 7
> %tmp80 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp79, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
> %tmp81 = fcmp ogt <4 x double> %tmp80, zeroinitializer
> %tmp82 = fcmp olt <4 x double> %tmp80, zeroinitializer
> %tmp83 = or <4 x i1> %tmp73, %tmp76
> %tmp84 = or <4 x i1> %tmp83, %tmp78
> %tmp85 = and <4 x i1> %tmp81, %tmp84
> %tmp86 = xor <4 x i1> %tmp81, <i1 true, i1 true, i1 true, i1 true>
> %tmp87 = and <4 x i1> %tmp86, %tmp84
> %tmp88 = and <4 x i1> %tmp82, %tmp87
> %tmp89 = xor <4 x i1> %tmp82, <i1 true, i1 true, i1 true, i1 true>
> %tmp90 = and <4 x i1> %tmp89, %tmp87
> %tmp91 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 8
> %tmp92 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp91, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
> %tmp93 = fcmp ogt <4 x double> %tmp92, zeroinitializer
> %tmp94 = fcmp olt <4 x double> %tmp92, zeroinitializer
> %tmp95 = or <4 x i1> %tmp85, %tmp88
> %tmp96 = or <4 x i1> %tmp95, %tmp90
> %tmp97 = and <4 x i1> %tmp93, %tmp96
> %tmp98 = xor <4 x i1> %tmp93, <i1 true, i1 true, i1 true, i1 true>
> %tmp99 = and <4 x i1> %tmp98, %tmp96
> %tmp100 = and <4 x i1> %tmp94, %tmp99
> %tmp101 = xor <4 x i1> %tmp94, <i1 true, i1 true, i1 true, i1 true>
> %tmp102 = and <4 x i1> %tmp101, %tmp99
> %tmp103 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 9
> %tmp104 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp103, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
> %tmp105 = fcmp ogt <4 x double> %tmp104, zeroinitializer
> %tmp106 = fcmp olt <4 x double> %tmp104, zeroinitializer
> %tmp107 = or <4 x i1> %tmp97, %tmp100
> %tmp108 = or <4 x i1> %tmp107, %tmp102
> %tmp109 = and <4 x i1> %tmp105, %tmp108
> %tmp110 = xor <4 x i1> %tmp105, <i1 true, i1 true, i1 true, i1 true>
> %tmp111 = and <4 x i1> %tmp110, %tmp108
> %tmp112 = and <4 x i1> %tmp106, %tmp111
> %tmp113 = xor <4 x i1> %tmp106, <i1 true, i1 true, i1 true, i1 true>
> %tmp114 = and <4 x i1> %tmp113, %tmp111
> %tmp115 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 10
> %tmp116 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp115, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
> %tmp117 = fcmp ogt <4 x double> %tmp116, zeroinitializer
> %tmp118 = fcmp olt <4 x double> %tmp116, zeroinitializer
> %tmp119 = fadd <4 x double> %tmp116, <double -5.000000e-01, double -5.000000e-01, double -5.000000e-01, double -5.000000e-01>
> %tmp120 = fptosi <4 x double> %tmp119 to <4 x i32>
> %tmp121 = fadd <4 x double> %tmp116, <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
> %tmp122 = fptosi <4 x double> %tmp121 to <4 x i32>
> %tmp123 = or <4 x i1> %tmp109, %tmp112
> %tmp124 = or <4 x i1> %tmp123, %tmp114
> %tmp125 = xor <4 x i1> %tmp117, <i1 true, i1 true, i1 true, i1 true>
> %tmp126 = and <4 x i1> %tmp125, %tmp124
> %tmp127 = xor <4 x i1> %tmp118, <i1 true, i1 true, i1 true, i1 true>
> %tmp128 = and <4 x i1> %tmp127, %tmp126
> %tmp129 = select <4 x i1> undef, <4 x i32> %tmp120, <4 x i32> %tmp122
> %tmp130 = select <4 x i1> %tmp128, <4 x i32> zeroinitializer, <4 x i32> %tmp129
> %tmp131 = sitofp <4 x i32> %tmp130 to <4 x double>
> call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> %tmp131, <4 x double*> %tmp115, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
> br label %bb2
> }
>
> ; Function Attrs: nounwind readonly willreturn
> declare <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*>, i32 immarg, <4 x i1>, <4 x double>) #0
>
> ; Function Attrs: nounwind willreturn
> declare void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double>, <4 x double*>, i32 immarg, <4 x i1>) #1
>
> attributes #0 = { nounwind readonly willreturn }
> attributes #1 = { nounwind willreturn }
More information about the llvm-commits
mailing list