[llvm] 3d76a13 - Revert "[InstCombine] Lower infinite combine loop detection thresholds"

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 19 07:50:48 PDT 2020



> On Aug 19, 2020, at 14:53, Roman Lebedev via llvm-commits <llvm-commits at lists.llvm.org> wrote:
> 
> 
> Author: Roman Lebedev
> Date: 2020-08-19T16:53:30+03:00
> New Revision: 3d76a133c7e0d4056c1a0657b0b186c381bf7b74
> 
> URL: https://github.com/llvm/llvm-project/commit/3d76a133c7e0d4056c1a0657b0b186c381bf7b74
> DIFF: https://github.com/llvm/llvm-project/commit/3d76a133c7e0d4056c1a0657b0b186c381bf7b74.diff
> 
> LOG: Revert "[InstCombine] Lower infinite combine loop detection thresholds"
> 
> And as being reported by Florian Hahn, there's a hit
> in MultiSource/Benchmarks/mafft from the test-suite on X86 with -O3 -flto,
> so reverting until addressed.


Should be reproducible by running opt -instcombine on the IR below

@global = local_unnamed_addr global [16 x [16 x double]] [[16 x double] [double -2.490000e+00, double -8.240000e+00, double -7.040000e+00, double -4.320000e+00, double -6.860000e+00, double -8.390000e+00, double -5.030000e+00, double -5.840000e+00, double 0xC021AE147AE147AE, double -4.680000e+00, double -1.437000e+01, double -1.264000e+01, double -4.010000e+00, double -6.160000e+00, double -1.132000e+01, double -9.050000e+00], [16 x double] [double -8.240000e+00, double -8.000000e-01, double -8.890000e+00, double -5.130000e+00, double -8.610000e+00, double -5.380000e+00, double -5.770000e+00, double -6.600000e+00, double -1.041000e+01, double -4.570000e+00, double -1.453000e+01, double -1.014000e+01, double -5.430000e+00, double -5.940000e+00, double -8.870000e+00, double -1.107000e+01], [16 x double] [double -7.040000e+00, double -8.890000e+00, double -2.110000e+00, double -2.040000e+00, double -9.730000e+00, double -1.105000e+01, double -3.810000e+00, double -4.720000e+00, double 0xC022BD70A3D70A3D, double -5.860000e+00, double -9.080000e+00, double -1.045000e+01, double -5.330000e+00, double -6.930000e+00, double -8.670000e+00, double -7.830000e+00], [16 x double] [double -4.320000e+00, double -5.130000e+00, double -2.040000e+00, double 4.490000e+00, double -5.330000e+00, double -5.610000e+00, double 2.700000e+00, double 5.900000e-01, double -5.560000e+00, double 1.670000e+00, double -6.710000e+00, double -5.170000e+00, double 1.610000e+00, double -5.100000e-01, double -4.810000e+00, double -2.980000e+00], [16 x double] [double -6.860000e+00, double -8.610000e+00, double -9.730000e+00, double -5.330000e+00, double -1.050000e+00, double -8.670000e+00, double -4.880000e+00, double -6.100000e+00, double -7.980000e+00, double -6.000000e+00, double -1.243000e+01, double 0xC01ED70A3D70A3D7, double -5.850000e+00, double 0xC01E333333333333, double -6.630000e+00, double -1.154000e+01], [16 x double] [double -8.390000e+00, double -5.380000e+00, double -1.105000e+01, double -5.610000e+00, double -8.670000e+00, double -1.980000e+00, double -4.130000e+00, double -5.770000e+00, double -1.136000e+01, double -4.660000e+00, double -1.258000e+01, double -1.369000e+01, double -5.750000e+00, double -4.270000e+00, double -1.201000e+01, double -1.079000e+01], [16 x double] [double -5.030000e+00, double -5.770000e+00, double -3.810000e+00, double 2.700000e+00, double -4.880000e+00, double -4.130000e+00, double 5.620000e+00, double 1.210000e+00, double -5.950000e+00, double 2.110000e+00, double -3.700000e+00, double -5.840000e+00, double 1.600000e+00, double -8.000000e-02, double -4.490000e+00, double -3.900000e+00], [16 x double] [double -5.840000e+00, double -6.600000e+00, double -4.720000e+00, double 5.900000e-01, double -6.100000e+00, double -5.770000e+00, double 1.210000e+00, double 3.470000e+00, double 0xC01FB851EB851EB8, double -2.700000e-01, double 0xC01F851EB851EB85, double -5.610000e+00, double -5.700000e-01, double -2.090000e+00, double -5.300000e+00, double -4.450000e+00], [16 x double] [double 0xC021AE147AE147AE, double -1.041000e+01, double 0xC022BD70A3D70A3D, double -5.560000e+00, double -7.980000e+00, double -1.136000e+01, double -5.950000e+00, double 0xC01FB851EB851EB8, double -5.130000e+00, double -3.570000e+00, double -1.045000e+01, double -8.490000e+00, double -2.420000e+00, double -5.630000e+00, double -7.080000e+00, double -8.390000e+00], [16 x double] [double -4.680000e+00, double -4.570000e+00, double -5.860000e+00, double 1.670000e+00, double -6.000000e+00, double -4.660000e+00, double 2.110000e+00, double -2.700000e-01, double -3.570000e+00, double 5.360000e+00, double -5.710000e+00, double -4.960000e+00, double 2.750000e+00, double 1.320000e+00, double -4.910000e+00, double -3.670000e+00], [16 x double] [double -1.437000e+01, double -1.453000e+01, double -9.080000e+00, double -6.710000e+00, double -1.243000e+01, double -1.258000e+01, double -3.700000e+00, double 0xC01F851EB851EB85, double -1.045000e+01, double -5.710000e+00, double -3.590000e+00, double -5.770000e+00, double -6.880000e+00, double -8.410000e+00, double -7.400000e+00, double -5.410000e+00], [16 x double] [double -1.264000e+01, double -1.014000e+01, double -1.045000e+01, double -5.170000e+00, double 0xC01ED70A3D70A3D7, double -1.369000e+01, double -5.840000e+00, double -5.610000e+00, double -8.490000e+00, double -4.960000e+00, double -5.770000e+00, double -2.280000e+00, double -4.720000e+00, double -7.360000e+00, double -3.830000e+00, double -5.210000e+00], [16 x double] [double -4.010000e+00, double -5.430000e+00, double -5.330000e+00, double 1.610000e+00, double -5.850000e+00, double -5.750000e+00, double 1.600000e+00, double -5.700000e-01, double -2.420000e+00, double 2.750000e+00, double -6.880000e+00, double -4.720000e+00, double 4.970000e+00, double 1.140000e+00, double -2.980000e+00, double -3.390000e+00], [16 x double] [double -6.160000e+00, double -5.940000e+00, double -6.930000e+00, double -5.100000e-01, double 0xC01E333333333333, double -4.270000e+00, double -8.000000e-02, double -2.090000e+00, double -5.630000e+00, double 1.320000e+00, double -8.410000e+00, double -7.360000e+00, double 1.140000e+00, double 3.360000e+00, double -4.760000e+00, double -4.280000e+00], [16 x double] [double -1.132000e+01, double -8.870000e+00, double -8.670000e+00, double -4.810000e+00, double -6.630000e+00, double -1.201000e+01, double -4.490000e+00, double -5.300000e+00, double -7.080000e+00, double -4.910000e+00, double -7.400000e+00, double -3.830000e+00, double -2.980000e+00, double -4.760000e+00, double -3.210000e+00, double -5.970000e+00], [16 x double] [double -9.050000e+00, double -1.107000e+01, double -7.830000e+00, double -2.980000e+00, double -1.154000e+01, double -1.079000e+01, double -3.900000e+00, double -4.450000e+00, double -8.390000e+00, double -3.670000e+00, double -5.410000e+00, double -5.210000e+00, double -3.390000e+00, double -4.280000e+00, double -5.970000e+00, double -2.000000e-02]], align 16

define void @blam() local_unnamed_addr {
bb:
  br label %bb1

bb1:                                              ; preds = %bb1, %bb
  br i1 undef, label %bb2, label %bb1

bb2:                                              ; preds = %bb2, %bb1
  %tmp = fcmp ogt <4 x double> undef, zeroinitializer
  %tmp3 = fcmp olt <4 x double> undef, zeroinitializer
  %tmp4 = xor <4 x i1> %tmp, <i1 true, i1 true, i1 true, i1 true>
  %tmp5 = xor <4 x i1> %tmp3, <i1 true, i1 true, i1 true, i1 true>
  %tmp6 = and <4 x i1> %tmp5, %tmp4
  %tmp7 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 1
  %tmp8 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp7, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
  %tmp9 = fcmp ogt <4 x double> %tmp8, zeroinitializer
  %tmp10 = fcmp olt <4 x double> %tmp8, zeroinitializer
  %tmp11 = or <4 x i1> %tmp, %tmp6
  %tmp12 = or <4 x i1> %tmp11, undef
  %tmp13 = and <4 x i1> %tmp9, %tmp12
  %tmp14 = xor <4 x i1> %tmp9, <i1 true, i1 true, i1 true, i1 true>
  %tmp15 = and <4 x i1> %tmp14, %tmp12
  %tmp16 = and <4 x i1> %tmp10, %tmp15
  %tmp17 = xor <4 x i1> %tmp10, <i1 true, i1 true, i1 true, i1 true>
  %tmp18 = and <4 x i1> %tmp17, %tmp15
  %tmp19 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 2
  %tmp20 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp19, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
  %tmp21 = fcmp ogt <4 x double> %tmp20, zeroinitializer
  %tmp22 = fcmp olt <4 x double> %tmp20, zeroinitializer
  %tmp23 = or <4 x i1> %tmp13, %tmp16
  %tmp24 = or <4 x i1> %tmp23, %tmp18
  %tmp25 = and <4 x i1> %tmp21, %tmp24
  %tmp26 = xor <4 x i1> %tmp21, <i1 true, i1 true, i1 true, i1 true>
  %tmp27 = and <4 x i1> %tmp26, %tmp24
  %tmp28 = and <4 x i1> %tmp22, %tmp27
  %tmp29 = xor <4 x i1> %tmp22, <i1 true, i1 true, i1 true, i1 true>
  %tmp30 = and <4 x i1> %tmp29, %tmp27
  %tmp31 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 3
  %tmp32 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp31, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
  %tmp33 = fcmp ogt <4 x double> %tmp32, zeroinitializer
  %tmp34 = fcmp olt <4 x double> %tmp32, zeroinitializer
  %tmp35 = or <4 x i1> %tmp25, %tmp28
  %tmp36 = or <4 x i1> %tmp35, %tmp30
  %tmp37 = and <4 x i1> %tmp33, %tmp36
  %tmp38 = xor <4 x i1> %tmp33, <i1 true, i1 true, i1 true, i1 true>
  %tmp39 = and <4 x i1> %tmp38, %tmp36
  %tmp40 = and <4 x i1> %tmp34, %tmp39
  %tmp41 = xor <4 x i1> %tmp34, <i1 true, i1 true, i1 true, i1 true>
  %tmp42 = and <4 x i1> %tmp41, %tmp39
  %tmp43 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 4
  %tmp44 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp43, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
  %tmp45 = fcmp ogt <4 x double> %tmp44, zeroinitializer
  %tmp46 = fcmp olt <4 x double> %tmp44, zeroinitializer
  %tmp47 = or <4 x i1> %tmp37, %tmp40
  %tmp48 = or <4 x i1> %tmp47, %tmp42
  %tmp49 = and <4 x i1> %tmp45, %tmp48
  %tmp50 = xor <4 x i1> %tmp45, <i1 true, i1 true, i1 true, i1 true>
  %tmp51 = and <4 x i1> %tmp50, %tmp48
  %tmp52 = and <4 x i1> %tmp46, %tmp51
  %tmp53 = xor <4 x i1> %tmp46, <i1 true, i1 true, i1 true, i1 true>
  %tmp54 = and <4 x i1> %tmp53, %tmp51
  %tmp55 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 5
  %tmp56 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp55, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
  %tmp57 = fcmp ogt <4 x double> %tmp56, zeroinitializer
  %tmp58 = fcmp olt <4 x double> %tmp56, zeroinitializer
  %tmp59 = or <4 x i1> %tmp49, %tmp52
  %tmp60 = or <4 x i1> %tmp59, %tmp54
  %tmp61 = and <4 x i1> %tmp57, %tmp60
  %tmp62 = xor <4 x i1> %tmp57, <i1 true, i1 true, i1 true, i1 true>
  %tmp63 = and <4 x i1> %tmp62, %tmp60
  %tmp64 = and <4 x i1> %tmp58, %tmp63
  %tmp65 = xor <4 x i1> %tmp58, <i1 true, i1 true, i1 true, i1 true>
  %tmp66 = and <4 x i1> %tmp65, %tmp63
  %tmp67 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 6
  %tmp68 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp67, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
  %tmp69 = fcmp ogt <4 x double> %tmp68, zeroinitializer
  %tmp70 = fcmp olt <4 x double> %tmp68, zeroinitializer
  %tmp71 = or <4 x i1> %tmp61, %tmp64
  %tmp72 = or <4 x i1> %tmp71, %tmp66
  %tmp73 = and <4 x i1> %tmp69, %tmp72
  %tmp74 = xor <4 x i1> %tmp69, <i1 true, i1 true, i1 true, i1 true>
  %tmp75 = and <4 x i1> %tmp74, %tmp72
  %tmp76 = and <4 x i1> %tmp70, %tmp75
  %tmp77 = xor <4 x i1> %tmp70, <i1 true, i1 true, i1 true, i1 true>
  %tmp78 = and <4 x i1> %tmp77, %tmp75
  %tmp79 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 7
  %tmp80 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp79, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
  %tmp81 = fcmp ogt <4 x double> %tmp80, zeroinitializer
  %tmp82 = fcmp olt <4 x double> %tmp80, zeroinitializer
  %tmp83 = or <4 x i1> %tmp73, %tmp76
  %tmp84 = or <4 x i1> %tmp83, %tmp78
  %tmp85 = and <4 x i1> %tmp81, %tmp84
  %tmp86 = xor <4 x i1> %tmp81, <i1 true, i1 true, i1 true, i1 true>
  %tmp87 = and <4 x i1> %tmp86, %tmp84
  %tmp88 = and <4 x i1> %tmp82, %tmp87
  %tmp89 = xor <4 x i1> %tmp82, <i1 true, i1 true, i1 true, i1 true>
  %tmp90 = and <4 x i1> %tmp89, %tmp87
  %tmp91 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 8
  %tmp92 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp91, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
  %tmp93 = fcmp ogt <4 x double> %tmp92, zeroinitializer
  %tmp94 = fcmp olt <4 x double> %tmp92, zeroinitializer
  %tmp95 = or <4 x i1> %tmp85, %tmp88
  %tmp96 = or <4 x i1> %tmp95, %tmp90
  %tmp97 = and <4 x i1> %tmp93, %tmp96
  %tmp98 = xor <4 x i1> %tmp93, <i1 true, i1 true, i1 true, i1 true>
  %tmp99 = and <4 x i1> %tmp98, %tmp96
  %tmp100 = and <4 x i1> %tmp94, %tmp99
  %tmp101 = xor <4 x i1> %tmp94, <i1 true, i1 true, i1 true, i1 true>
  %tmp102 = and <4 x i1> %tmp101, %tmp99
  %tmp103 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 9
  %tmp104 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp103, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
  %tmp105 = fcmp ogt <4 x double> %tmp104, zeroinitializer
  %tmp106 = fcmp olt <4 x double> %tmp104, zeroinitializer
  %tmp107 = or <4 x i1> %tmp97, %tmp100
  %tmp108 = or <4 x i1> %tmp107, %tmp102
  %tmp109 = and <4 x i1> %tmp105, %tmp108
  %tmp110 = xor <4 x i1> %tmp105, <i1 true, i1 true, i1 true, i1 true>
  %tmp111 = and <4 x i1> %tmp110, %tmp108
  %tmp112 = and <4 x i1> %tmp106, %tmp111
  %tmp113 = xor <4 x i1> %tmp106, <i1 true, i1 true, i1 true, i1 true>
  %tmp114 = and <4 x i1> %tmp113, %tmp111
  %tmp115 = getelementptr inbounds [16 x [16 x double]], [16 x [16 x double]]* @global, i64 0, <4 x i64> undef, i64 10
  %tmp116 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %tmp115, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
  %tmp117 = fcmp ogt <4 x double> %tmp116, zeroinitializer
  %tmp118 = fcmp olt <4 x double> %tmp116, zeroinitializer
  %tmp119 = fadd <4 x double> %tmp116, <double -5.000000e-01, double -5.000000e-01, double -5.000000e-01, double -5.000000e-01>
  %tmp120 = fptosi <4 x double> %tmp119 to <4 x i32>
  %tmp121 = fadd <4 x double> %tmp116, <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
  %tmp122 = fptosi <4 x double> %tmp121 to <4 x i32>
  %tmp123 = or <4 x i1> %tmp109, %tmp112
  %tmp124 = or <4 x i1> %tmp123, %tmp114
  %tmp125 = xor <4 x i1> %tmp117, <i1 true, i1 true, i1 true, i1 true>
  %tmp126 = and <4 x i1> %tmp125, %tmp124
  %tmp127 = xor <4 x i1> %tmp118, <i1 true, i1 true, i1 true, i1 true>
  %tmp128 = and <4 x i1> %tmp127, %tmp126
  %tmp129 = select <4 x i1> undef, <4 x i32> %tmp120, <4 x i32> %tmp122
  %tmp130 = select <4 x i1> %tmp128, <4 x i32> zeroinitializer, <4 x i32> %tmp129
  %tmp131 = sitofp <4 x i32> %tmp130 to <4 x double>
  call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> %tmp131, <4 x double*> %tmp115, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
  br label %bb2
}

; Function Attrs: nounwind readonly willreturn
declare <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*>, i32 immarg, <4 x i1>, <4 x double>) #0

; Function Attrs: nounwind willreturn
declare void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double>, <4 x double*>, i32 immarg, <4 x i1>) #1

attributes #0 = { nounwind readonly willreturn }
attributes #1 = { nounwind willreturn }


More information about the llvm-commits mailing list