[llvm] 3d76a13 - Revert "[InstCombine] Lower infinite combine loop detection thresholds"
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 19 07:50:48 PDT 2020
> On Aug 19, 2020, at 14:53, Roman Lebedev via llvm-commits <llvm-commits at lists.llvm.org> wrote:
>
>
> Author: Roman Lebedev
> Date: 2020-08-19T16:53:30+03:00
> New Revision: 3d76a133c7e0d4056c1a0657b0b186c381bf7b74
>
> URL: https://github.com/llvm/llvm-project/commit/3d76a133c7e0d4056c1a0657b0b186c381bf7b74
> DIFF: https://github.com/llvm/llvm-project/commit/3d76a133c7e0d4056c1a0657b0b186c381bf7b74.diff
>
> LOG: Revert "[InstCombine] Lower infinite combine loop detection thresholds"
>
> And as being reported by Florian Hahn, there's a hit
> in MultiSource/Benchmarks/mafft from the test-suite on X86 with -O3 -flto,
> so reverting until addressed.
Should be reproducible by running opt -instcombine on the IR below
@global = local_unnamed_addr global [16 x [16 x double]] [[16 x double] [double -2.490000e+00, double -8.240000e+00, double -7.040000e+00, double -4.320000e+00, double -6.860000e+00, double -8.390000e+00, double -5.030000e+00, double -5.840000e+00, double 0xC021AE147AE147AE, double -4.680000e+00, double -1.437000e+01, double -1.264000e+01, double -4.010000e+00, double -6.160000e+00, double -1.132000e+01, double -9.050000e+00], [16 x double] [double -8.240000e+00, double -8.000000e-01, double -8.890000e+00, double -5.130000e+00, double -8.610000e+00, double -5.380000e+00, double -5.770000e+00, double -6.600000e+00, double -1.041000e+01, double -4.570000e+00, double -1.453000e+01, double -1.014000e+01, double -5.430000e+00, double -5.940000e+00, double -8.870000e+00, double -1.107000e+01], [16 x double] [double -7.040000e+00, double -8.890000e+00, double -2.110000e+00, double -2.040000e+00, double -9.730000e+00, double -1.105000e+01, double -3.810000e+00, double -4.720000e+00, double 0xC022BD70A3D70A3D, double -5.860000e+00, double -9.080000e+00, double -1.045000e+01, double -5.330000e+00, double -6.930000e+00, double -8.670000e+00, double -7.830000e+00], [16 x double] [double -4.320000e+00, double -5.130000e+00, double -2.040000e+00, double 4.490000e+00, double -5.330000e+00, double -5.610000e+00, double 2.700000e+00, double 5.900000e-01, double -5.560000e+00, double 1.670000e+00, double -6.710000e+00, double -5.170000e+00, double 1.610000e+00, double -5.100000e-01, double -4.810000e+00, double -2.980000e+00], [16 x double] [double -6.860000e+00, double -8.610000e+00, double -9.730000e+00, double -5.330000e+00, double -1.050000e+00, double -8.670000e+00, double -4.880000e+00, double -6.100000e+00, double -7.980000e+00, double -6.000000e+00, double -1.243000e+01, double 0xC01ED70A3D70A3D7, double -5.850000e+00, double 0xC01E333333333333, double -6.630000e+00, double -1.154000e+01], [16 x double] [double -8.390000e+00, double -5.380000e+00, double -1.105000e+01, double -5.610000e+00, double -8.670000e+00, double -1.980000e+00, double -4.130000e+00, double -5.770000e+00, double -1.136000e+01, double -4.660000e+00, double -1.258000e+01, double -1.369000e+01, double -5.750000e+00, double -4.270000e+00, double -1.201000e+01, double -1.079000e+01], [16 x double] [double -5.030000e+00, double -5.770000e+00, double -3.810000e+00, double 2.700000e+00, double -4.880000e+00, double -4.130000e+00, double 5.620000e+00, double 1.210000e+00, double -5.950000e+00, double 2.110000e+00, double -3.700000e+00, double -5.840000e+00, double 1.600000e+00, double -8.000000e-02, double -4.490000e+00, double -3.900000e+00], [16 x double] [double -5.840000e+00, double -6.600000e+00, double -4.720000e+00, double 5.900000e-01, double -6.100000e+00, double -5.770000e+00, double 1.210000e+00, double 3.470000e+00, double 0xC01FB851EB851EB8, double -2.700000e-01, double 0xC01F851EB851EB85, double -5.610000e+00, double -5.700000e-01, double -2.090000e+00, double -5.300000e+00, double -4.450000e+00], [16 x double] [double 0xC021AE147AE147AE, double -1.041000e+01, double 0xC022BD70A3D70A3D, double -5.560000e+00, double -7.980000e+00, double -1.136000e+01, double -5.950000e+00, double 0xC01FB851EB851EB8, double -5.130000e+00, double -3.570000e+00, double -1.045000e+01, double -8.490000e+00, double -2.420000e+00, double -5.630000e+00, double -7.080000e+00, double -8.390000e+00], [16 x double] [double -4.680000e+00, double -4.570000e+00, double -5.860000e+00, double 1.670000e+00, double -6.000000e+00, double -4.660000e+00, double 2.110000e+00, double -2.700000e-01, double -3.570000e+00, double 5.360000e+00, double -5.710000e+00, double -4.960000e+00, double 2.750000e+00, double 1.320000e+00, double -4.910000e+00, double -3.670000e+00], [16 x double] [double -1.437000e+01, double -1.453000e+01, double -9.080000e+00, double -6.710000e+00, double -1.243000e+01, double -1.258000e+01, double -3.700000e+00, double 0xC01F851EB851EB85, double -1.045000e+01, double -5.710000e+00, double -3.590000e+00, double -5.770000e+00, double -6.880000e+00, double -8.410000e+00, double -7.400000e+00, double -5.410000e+00], [16 x double] [double -1.264000e+01, double -1.014000e+01, double -1.045000e+01, double -5.170000e+00, double 0xC01ED70A3D70A3D7, double -1.369000e+01, double -5.840000e+00, double -5.610000e+00, double -8.490000e+00, double -4.960000e+00, double -5.770000e+00, double -2.280000e+00, double -4.720000e+00, double -7.360000e+00, double -3.830000e+00, double -5.210000e+00], [16 x double] [double -4.010000e+00, double -5.430000e+00, double -5.330000e+00, double 1.610000e+00, double -5.850000e+00, double -5.750000e+00, double 1.600000e+00, double -5.700000e-01, double -2.420000e+00, double 2.750000e+00, double -6.880000e+00, double -4.720000e+00, double 4.970000e+00, double 1.140000e+00, double -2.980000e+00, double -3.390000e+00], [16 x double] [double -6.160000e+00, double -5.940000e+00, double -6.930000e+00, double -5.100000e-01, double 0xC01E333333333333, double -4.270000e+00, double -8.000000e-02, double -2.090000e+00, double -5.630000e+00, double 1.320000e+00, double -8.410000e+00, double -7.360000e+00, double 1.140000e+00, double 3.360000e+00, double -4.760000e+00, double -4.280000e+00], [16 x double] [double -1.132000e+01, double -8.870000e+00, double -8.670000e+00, double -4.810000e+00, double -6.630000e+00, double -1.201000e+01, double -4.490000e+00, double -5.300000e+00, double -7.080000e+00, double -4.910000e+00, double -7.400000e+00, double -3.830000e+00, double -2.980000e+00, double -4.760000e+00, double -3.210000e+00, double -5.970000e+00], [16 x double] [double -9.050000e+00, double -1.107000e+01, double -7.830000e+00, double -2.980000e+00, double -1.154000e+01, double -1.079000e+01, double -3.900000e+00, double -4.450000e+00, double -8.390000e+00, double -3.670000e+00, double -5.410000e+00, double -5.210000e+00, double -3.390000e+00, double -4.280000e+00, double -5.970000e+00, double -2.000000e-02]], align 16
define void @blam() local_unnamed_addr {
bb:
br label %bb1
bb1: ; preds = %bb1, %bb
br i1 undef, label %bb2, label %bb1
bb2: ; preds = %bb2, %bb1
%tmp = fcmp ogt <4 x double> undef, zeroinitializer
%tmp3 = fcmp olt <4 x double> undef, zeroinitializer
%tmp4 = xor <4 x i1> %tmp, <i1 true, i1 true, i1 true, i1 true>
%tmp5 = xor <4 x i1> %tmp3, <i1 true, i1 true, i1 true, i1 true>
%tmp6 = and <4 x i1> %tmp5, %tmp4
%tmp7 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 1
%tmp8 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp7, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
%tmp9 = fcmp ogt <4 x double> %tmp8, zeroinitializer
%tmp10 = fcmp olt <4 x double> %tmp8, zeroinitializer
%tmp11 = or <4 x i1> %tmp, %tmp6
%tmp12 = or <4 x i1> %tmp11, undef
%tmp13 = and <4 x i1> %tmp9, %tmp12
%tmp14 = xor <4 x i1> %tmp9, <i1 true, i1 true, i1 true, i1 true>
%tmp15 = and <4 x i1> %tmp14, %tmp12
%tmp16 = and <4 x i1> %tmp10, %tmp15
%tmp17 = xor <4 x i1> %tmp10, <i1 true, i1 true, i1 true, i1 true>
%tmp18 = and <4 x i1> %tmp17, %tmp15
%tmp19 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 2
%tmp20 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp19, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
%tmp21 = fcmp ogt <4 x double> %tmp20, zeroinitializer
%tmp22 = fcmp olt <4 x double> %tmp20, zeroinitializer
%tmp23 = or <4 x i1> %tmp13, %tmp16
%tmp24 = or <4 x i1> %tmp23, %tmp18
%tmp25 = and <4 x i1> %tmp21, %tmp24
%tmp26 = xor <4 x i1> %tmp21, <i1 true, i1 true, i1 true, i1 true>
%tmp27 = and <4 x i1> %tmp26, %tmp24
%tmp28 = and <4 x i1> %tmp22, %tmp27
%tmp29 = xor <4 x i1> %tmp22, <i1 true, i1 true, i1 true, i1 true>
%tmp30 = and <4 x i1> %tmp29, %tmp27
%tmp31 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 3
%tmp32 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp31, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
%tmp33 = fcmp ogt <4 x double> %tmp32, zeroinitializer
%tmp34 = fcmp olt <4 x double> %tmp32, zeroinitializer
%tmp35 = or <4 x i1> %tmp25, %tmp28
%tmp36 = or <4 x i1> %tmp35, %tmp30
%tmp37 = and <4 x i1> %tmp33, %tmp36
%tmp38 = xor <4 x i1> %tmp33, <i1 true, i1 true, i1 true, i1 true>
%tmp39 = and <4 x i1> %tmp38, %tmp36
%tmp40 = and <4 x i1> %tmp34, %tmp39
%tmp41 = xor <4 x i1> %tmp34, <i1 true, i1 true, i1 true, i1 true>
%tmp42 = and <4 x i1> %tmp41, %tmp39
%tmp43 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 4
%tmp44 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp43, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
%tmp45 = fcmp ogt <4 x double> %tmp44, zeroinitializer
%tmp46 = fcmp olt <4 x double> %tmp44, zeroinitializer
%tmp47 = or <4 x i1> %tmp37, %tmp40
%tmp48 = or <4 x i1> %tmp47, %tmp42
%tmp49 = and <4 x i1> %tmp45, %tmp48
%tmp50 = xor <4 x i1> %tmp45, <i1 true, i1 true, i1 true, i1 true>
%tmp51 = and <4 x i1> %tmp50, %tmp48
%tmp52 = and <4 x i1> %tmp46, %tmp51
%tmp53 = xor <4 x i1> %tmp46, <i1 true, i1 true, i1 true, i1 true>
%tmp54 = and <4 x i1> %tmp53, %tmp51
%tmp55 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 5
%tmp56 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp55, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
%tmp57 = fcmp ogt <4 x double> %tmp56, zeroinitializer
%tmp58 = fcmp olt <4 x double> %tmp56, zeroinitializer
%tmp59 = or <4 x i1> %tmp49, %tmp52
%tmp60 = or <4 x i1> %tmp59, %tmp54
%tmp61 = and <4 x i1> %tmp57, %tmp60
%tmp62 = xor <4 x i1> %tmp57, <i1 true, i1 true, i1 true, i1 true>
%tmp63 = and <4 x i1> %tmp62, %tmp60
%tmp64 = and <4 x i1> %tmp58, %tmp63
%tmp65 = xor <4 x i1> %tmp58, <i1 true, i1 true, i1 true, i1 true>
%tmp66 = and <4 x i1> %tmp65, %tmp63
%tmp67 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 6
%tmp68 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp67, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
%tmp69 = fcmp ogt <4 x double> %tmp68, zeroinitializer
%tmp70 = fcmp olt <4 x double> %tmp68, zeroinitializer
%tmp71 = or <4 x i1> %tmp61, %tmp64
%tmp72 = or <4 x i1> %tmp71, %tmp66
%tmp73 = and <4 x i1> %tmp69, %tmp72
%tmp74 = xor <4 x i1> %tmp69, <i1 true, i1 true, i1 true, i1 true>
%tmp75 = and <4 x i1> %tmp74, %tmp72
%tmp76 = and <4 x i1> %tmp70, %tmp75
%tmp77 = xor <4 x i1> %tmp70, <i1 true, i1 true, i1 true, i1 true>
%tmp78 = and <4 x i1> %tmp77, %tmp75
%tmp79 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 7
%tmp80 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp79, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
%tmp81 = fcmp ogt <4 x double> %tmp80, zeroinitializer
%tmp82 = fcmp olt <4 x double> %tmp80, zeroinitializer
%tmp83 = or <4 x i1> %tmp73, %tmp76
%tmp84 = or <4 x i1> %tmp83, %tmp78
%tmp85 = and <4 x i1> %tmp81, %tmp84
%tmp86 = xor <4 x i1> %tmp81, <i1 true, i1 true, i1 true, i1 true>
%tmp87 = and <4 x i1> %tmp86, %tmp84
%tmp88 = and <4 x i1> %tmp82, %tmp87
%tmp89 = xor <4 x i1> %tmp82, <i1 true, i1 true, i1 true, i1 true>
%tmp90 = and <4 x i1> %tmp89, %tmp87
%tmp91 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 8
%tmp92 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp91, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
%tmp93 = fcmp ogt <4 x double> %tmp92, zeroinitializer
%tmp94 = fcmp olt <4 x double> %tmp92, zeroinitializer
%tmp95 = or <4 x i1> %tmp85, %tmp88
%tmp96 = or <4 x i1> %tmp95, %tmp90
%tmp97 = and <4 x i1> %tmp93, %tmp96
%tmp98 = xor <4 x i1> %tmp93, <i1 true, i1 true, i1 true, i1 true>
%tmp99 = and <4 x i1> %tmp98, %tmp96
%tmp100 = and <4 x i1> %tmp94, %tmp99
%tmp101 = xor <4 x i1> %tmp94, <i1 true, i1 true, i1 true, i1 true>
%tmp102 = and <4 x i1> %tmp101, %tmp99
%tmp103 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 9
%tmp104 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp103, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
%tmp105 = fcmp ogt <4 x double> %tmp104, zeroinitializer
%tmp106 = fcmp olt <4 x double> %tmp104, zeroinitializer
%tmp107 = or <4 x i1> %tmp97, %tmp100
%tmp108 = or <4 x i1> %tmp107, %tmp102
%tmp109 = and <4 x i1> %tmp105, %tmp108
%tmp110 = xor <4 x i1> %tmp105, <i1 true, i1 true, i1 true, i1 true>
%tmp111 = and <4 x i1> %tmp110, %tmp108
%tmp112 = and <4 x i1> %tmp106, %tmp111
%tmp113 = xor <4 x i1> %tmp106, <i1 true, i1 true, i1 true, i1 true>
%tmp114 = and <4 x i1> %tmp113, %tmp111
%tmp115 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 10
%tmp116 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp115, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
%tmp117 = fcmp ogt <4 x double> %tmp116, zeroinitializer
%tmp118 = fcmp olt <4 x double> %tmp116, zeroinitializer
%tmp119 = fadd <4 x double> %tmp116, <double -5.000000e-01, double -5.000000e-01, double -5.000000e-01, double -5.000000e-01>
%tmp120 = fptosi <4 x double> %tmp119 to <4 x i32>
%tmp121 = fadd <4 x double> %tmp116, <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
%tmp122 = fptosi <4 x double> %tmp121 to <4 x i32>
%tmp123 = or <4 x i1> %tmp109, %tmp112
%tmp124 = or <4 x i1> %tmp123, %tmp114
%tmp125 = xor <4 x i1> %tmp117, <i1 true, i1 true, i1 true, i1 true>
%tmp126 = and <4 x i1> %tmp125, %tmp124
%tmp127 = xor <4 x i1> %tmp118, <i1 true, i1 true, i1 true, i1 true>
%tmp128 = and <4 x i1> %tmp127, %tmp126
%tmp129 = select <4 x i1> undef, <4 x i32> %tmp120, <4 x i32> %tmp122
%tmp130 = select <4 x i1> %tmp128, <4 x i32> zeroinitializer, <4 x i32> %tmp129
%tmp131 = sitofp <4 x i32> %tmp130 to <4 x double>
call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> %tmp131, <4 x double*> %tmp115, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
br label %bb2
}
; Function Attrs: nounwind readonly willreturn
declare <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*>, i32 immarg, <4 x i1>, <4 x double>) #0
; Function Attrs: nounwind willreturn
declare void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double>, <4 x double*>, i32 immarg, <4 x i1>) #1
attributes #0 = { nounwind readonly willreturn }
attributes #1 = { nounwind willreturn }
More information about the llvm-commits
mailing list