[llvm] [InstCombine] Transform high latency, dependent FSQRT/FDIV into FMUL (PR #87474)

Joshua Cranmer via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 29 13:46:43 PDT 2024


================
@@ -0,0 +1,459 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes='instcombine<no-verify-fixpoint>' < %s | FileCheck %s
+
+ at x = global double 0.000000e+00
+ at r1 = global double 0.000000e+00
+ at r2 = global double 0.000000e+00
+ at r3 = global double 0.000000e+00
+
+; div/mul/div1 in the same block.
+define void @bb_constraint_case1(double %a) {
+; CHECK-LABEL: define void @bb_constraint_case1(
+; CHECK-SAME: double [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT:    [[TMP0:%.*]] = fdiv reassoc nnan nsz double 1.000000e+00, [[A]]
+; CHECK-NEXT:    [[DIV:%.*]] = fmul reassoc arcp double [[TMP0]], [[SQRT]]
+; CHECK-NEXT:    store double [[DIV]], ptr @x, align 8
+; CHECK-NEXT:    store double [[TMP0]], ptr @r1, align 8
+; CHECK-NEXT:    store double [[SQRT]], ptr @r2, align 8
+; CHECK-NEXT:    ret void
+entry:
+  %sqrt = tail call double @llvm.sqrt.f64(double %a)
+  %div = fdiv reassoc arcp double 1.000000e+00, %sqrt
+  store double %div, ptr @x
+  %mul = fmul reassoc nnan nsz double %div, %div
+  store double %mul, ptr @r1
+  %div1 = fdiv reassoc nsz double %a, %sqrt
+  store double %div1, ptr @r2
+  ret void
+}
+; div/mul in one block and div1 in other block with conditional guard.
+define void @bb_constraint_case2(double %a, i32 %d) {
+; CHECK-LABEL: define void @bb_constraint_case2(
+; CHECK-SAME: double [[A:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT:    [[TMP0:%.*]] = fdiv reassoc nnan nsz double 1.000000e+00, [[A]]
+; CHECK-NEXT:    [[DIV:%.*]] = fmul reassoc arcp double [[TMP0]], [[SQRT]]
+; CHECK-NEXT:    store double [[DIV]], ptr @x, align 8
+; CHECK-NEXT:    store double [[TMP0]], ptr @r1, align 8
+; CHECK-NEXT:    [[D_NOT:%.*]] = icmp eq i32 [[D]], 0
+; CHECK-NEXT:    br i1 [[D_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    store double [[SQRT]], ptr @r2, align 8
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+entry:
+  %sqrt = call double @llvm.sqrt.f64(double %a)
+  %div = fdiv reassoc arcp double 1.000000e+00, %sqrt
+  store double %div, ptr @x
+  %mul = fmul reassoc nnan nsz double %div, %div
+  store double %mul, ptr @r1
+  %d.not = icmp eq i32 %d, 0
+  br i1 %d.not, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %div1 = fdiv reassoc nsz double %a, %sqrt
+  store double %div1, ptr @r2
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+; div in one block. mul/div1 in other block and conditionally guarded. Don't optimize.
+define void @bb_constraint_case3(double %a, i32 %d) {
+; CHECK-LABEL: define void @bb_constraint_case3(
+; CHECK-SAME: double [[A:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc arcp double 1.000000e+00, [[SQRT]]
+; CHECK-NEXT:    store double [[DIV]], ptr @x, align 8
+; CHECK-NEXT:    [[D_NOT:%.*]] = icmp eq i32 [[D]], 0
+; CHECK-NEXT:    br i1 [[D_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc nnan nsz double [[DIV]], [[DIV]]
+; CHECK-NEXT:    store double [[MUL]], ptr @r1, align 8
+; CHECK-NEXT:    [[DIV1:%.*]] = fdiv reassoc nsz double [[A]], [[SQRT]]
+; CHECK-NEXT:    store double [[DIV1]], ptr @r2, align 8
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+entry:
+  %sqrt = call double @llvm.sqrt.f64(double %a)
+  %div = fdiv reassoc arcp double 1.000000e+00, %sqrt
+  store double %div, ptr @x
+  %d.not = icmp eq i32 %d, 0
+  br i1 %d.not, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %mul = fmul reassoc nnan nsz double %div, %div
+  store double %mul, ptr @r1
+  %div1 = fdiv reassoc nsz double %a, %sqrt
+  store double %div1, ptr @r2
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+; div in one block. mul/div1 each in different block and conditionally guarded. Don't optimize.
+define void @bb_constraint_case4(double %a, i32 %c, i32 %d) {
+; CHECK-LABEL: define void @bb_constraint_case4(
+; CHECK-SAME: double [[A:%.*]], i32 [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc arcp double 1.000000e+00, [[SQRT]]
+; CHECK-NEXT:    store double [[DIV]], ptr @x, align 8
+; CHECK-NEXT:    [[C_NOT:%.*]] = icmp eq i32 [[C]], 0
+; CHECK-NEXT:    br i1 [[C_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc nnan nsz double [[DIV]], [[DIV]]
+; CHECK-NEXT:    store double [[MUL]], ptr @r1, align 8
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[D_NOT:%.*]] = icmp eq i32 [[D]], 0
+; CHECK-NEXT:    br i1 [[D_NOT]], label [[IF_END1:%.*]], label [[IF_THEN1:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    [[DIV1:%.*]] = fdiv reassoc nsz double [[A]], [[SQRT]]
+; CHECK-NEXT:    store double [[DIV1]], ptr @r2, align 8
+; CHECK-NEXT:    br label [[IF_END1]]
+; CHECK:       if.end1:
+; CHECK-NEXT:    ret void
+entry:
+  %sqrt = call double @llvm.sqrt.f64(double %a)
+  %div = fdiv reassoc arcp double 1.000000e+00, %sqrt
+  store double %div, ptr @x
+  %c.not = icmp eq i32 %c, 0
+  br i1 %c.not, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %mul = fmul reassoc nnan nsz double %div, %div
+  store double %mul, ptr @r1
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %d.not = icmp eq i32 %d, 0
+  br i1 %d.not, label %if.end1, label %if.then1
+
+if.then1:                                         ; preds = %if.end
+  %div1 = fdiv reassoc nsz double %a, %sqrt
+  store double %div1, ptr @r2
+  br label %if.end1
+
+if.end1:                                          ; preds = %if.then1, %if.end
+  ret void
+}
+
+; sqrt value comes from different blocks. Don't optimize.
+define void @bb_constraint_case5(double %a, i32 %c) {
+; CHECK-LABEL: define void @bb_constraint_case5(
+; CHECK-SAME: double [[A:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C_NOT:%.*]] = icmp eq i32 [[C]], 0
+; CHECK-NEXT:    br i1 [[C_NOT]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT:    br label [[IF_END:%.*]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[A]], 1.000000e+01
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.sqrt.f64(double [[ADD]])
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[SQRT:%.*]] = phi double [ [[TMP0]], [[IF_THEN]] ], [ [[TMP1]], [[IF_ELSE]] ]
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc arcp double 1.000000e+00, [[SQRT]]
+; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc nnan nsz double [[DIV]], [[DIV]]
+; CHECK-NEXT:    store double [[MUL]], ptr @r1, align 8
+; CHECK-NEXT:    [[DIV1:%.*]] = fdiv reassoc nsz double [[A]], [[SQRT]]
+; CHECK-NEXT:    store double [[DIV1]], ptr @r2, align 8
+; CHECK-NEXT:    ret void
+entry:
+  %c.not = icmp eq i32 %c, 0
+  br i1 %c.not, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  %0 = call double @llvm.sqrt.f64(double %a)
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %add = fadd double %a, 1.000000e+01
+  %1 = call double @llvm.sqrt.f64(double %add)
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %sqrt = phi double[ %0, %if.then], [ %1, %if.else]
+  %div = fdiv reassoc arcp double 1.000000e+00, %sqrt
+  %mul = fmul reassoc nnan nsz double %div, %div
+  store double %mul, ptr @r1
+  %div1 = fdiv reassoc nsz double %a, %sqrt
+  store double %div1, ptr @r2
+  ret void
+}
+
+; div in one block and conditionally guarded. mul/div1 in other block. Don't optimize.
+define void @bb_constraint_case6(double %a, i32 %d) {
+; CHECK-LABEL: define void @bb_constraint_case6(
+; CHECK-SAME: double [[A:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT:    [[D_NOT:%.*]] = icmp eq i32 [[D]], 0
+; CHECK-NEXT:    br i1 [[D_NOT]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr @x, align 8
+; CHECK-NEXT:    br label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc arcp double 1.000000e+00, [[SQRT]]
+; CHECK-NEXT:    store double [[TMP1]], ptr @x, align 8
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[DIV:%.*]] = phi double [ [[TMP0]], [[IF_ELSE]] ], [ [[TMP1]], [[IF_THEN]] ]
+; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc nnan nsz double [[DIV]], [[DIV]]
+; CHECK-NEXT:    store double [[MUL]], ptr @r1, align 8
+; CHECK-NEXT:    [[DIV1:%.*]] = fdiv reassoc nsz double [[A]], [[SQRT]]
+; CHECK-NEXT:    store double [[DIV1]], ptr @r2, align 8
+; CHECK-NEXT:    ret void
+entry:
+  %sqrt = tail call double @llvm.sqrt.f64(double %a)
+  %d.not = icmp eq i32 %d, 0
+  br i1 %d.not, label %if.else, label %if.then
+
+if.else:                                          ; preds = %entry
+  %1 = load double, ptr @x
+  br label %if.end
+
+if.then:                                          ; preds = %entry
+  %2 = fdiv reassoc arcp double 1.000000e+00, %sqrt
+  store double %2, ptr @x
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %div = phi double [ %1, %if.else ], [ %2, %if.then ]
+  %mul = fmul reassoc nnan nsz double %div, %div
+  store double %mul, ptr @r1
+  %div1 = fdiv reassoc nsz double %a, %sqrt
+  store double %div1, ptr @r2
+  ret void
+}
+
+; value for mul comes from different blocks. Don't optimize.
+define void @bb_constraint_case7(double %a, i32 %c, i32 %d) {
+; CHECK-LABEL: define void @bb_constraint_case7(
+; CHECK-SAME: double [[A:%.*]], i32 [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc arcp double 1.000000e+00, [[SQRT]]
+; CHECK-NEXT:    store double [[DIV]], ptr @x, align 8
+; CHECK-NEXT:    [[C_NOT:%.*]] = icmp eq i32 [[C]], 0
+; CHECK-NEXT:    br i1 [[C_NOT]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP0:%.*]] = fdiv double 3.000000e+00, [[A]]
+; CHECK-NEXT:    br label [[IF_END:%.*]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[D_NOT:%.*]] = icmp eq i32 [[D]], 0
+; CHECK-NEXT:    br i1 [[D_NOT]], label [[IF_ELSE1:%.*]], label [[IF_THEN1:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv double 2.000000e+00, [[A]]
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.else1:
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc nnan nsz double [[DIV]], [[DIV]]
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[MUL:%.*]] = phi double [ [[TMP1]], [[IF_THEN1]] ], [ [[TMP2]], [[IF_ELSE1]] ], [ [[TMP0]], [[IF_THEN]] ]
+; CHECK-NEXT:    store double [[MUL]], ptr @r1, align 8
+; CHECK-NEXT:    [[DIV1:%.*]] = fdiv reassoc nsz double [[A]], [[SQRT]]
+; CHECK-NEXT:    store double [[DIV1]], ptr @r2, align 8
+; CHECK-NEXT:    ret void
+entry:
+  %sqrt = tail call double @llvm.sqrt.f64(double %a)
+  %div = fdiv reassoc arcp double 1.000000e+00, %sqrt
+  store double %div, ptr @x
+  %c.not = icmp eq i32 %c, 0
+  br i1 %c.not, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  %1 = fdiv double 3.000000e+00, %a
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %d.not = icmp eq i32 %d, 0
+  br i1 %d.not, label %if.else1, label %if.then1
+
+if.then1:                                         ; preds = %if.else
+  %2 = fdiv double 2.000000e+00, %a
+  br label %if.end
+
+if.else1:                                         ; preds = %if.else
+  %3 = fmul reassoc nnan nsz double %div, %div
+  br label %if.end
+
+if.end:                                           ; preds = %if.then1, %if.else1, %if.then
+  %mul = phi double [ %2, %if.then1 ], [ %3, %if.else1 ], [ %1, %if.then ]
+  store double %mul, ptr @r1
+  %div1 = fdiv reassoc nsz double %a, %sqrt
+  store double %div1, ptr @r2
+  ret void
+}
+
+; value of mul  comes from two different blocks(as shown by select ins).
+define void @bb_constraint_case8(double %a, i32 %c) {
+; CHECK-LABEL: define void @bb_constraint_case8(
+; CHECK-SAME: double [[A:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT:    [[TMP0:%.*]] = fdiv reassoc nnan nsz double 1.000000e+00, [[A]]
+; CHECK-NEXT:    [[DIV:%.*]] = fmul reassoc arcp double [[TMP0]], [[SQRT]]
+; CHECK-NEXT:    store double [[DIV]], ptr @x, align 8
+; CHECK-NEXT:    [[C_NOT:%.*]] = icmp eq i32 [[C]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul double [[A]], [[A]]
+; CHECK-NEXT:    [[MUL:%.*]] = select i1 [[C_NOT]], double [[TMP1]], double [[TMP0]]
+; CHECK-NEXT:    store double [[MUL]], ptr @r1, align 8
+; CHECK-NEXT:    store double [[SQRT]], ptr @r2, align 8
+; CHECK-NEXT:    ret void
+entry:
+  %sqrt = call double @llvm.sqrt.f64(double %a)
+  %div = fdiv reassoc arcp double 1.000000e+00, %sqrt
+  store double %div, ptr @x
+  %c.not = icmp eq i32 %c, 0
+  %1 = fmul double %a, %a
+  %2 = fmul reassoc nnan nsz double %div, %div
+  %mul = select i1 %c.not, double %1, double %2
+  store double %mul, ptr @r1
+  %div1 = fdiv reassoc nsz double %a, %sqrt
+  store double %div1, ptr @r2
+  ret void
+}
+
+; multiple instances of multiply ops to optimize. Optimize all.
+define void @mutiple_multiply_instances(double %a, i32 %c) {
+; CHECK-LABEL: define void @mutiple_multiply_instances(
+; CHECK-SAME: double [[A:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT:    [[TMP0:%.*]] = fdiv reassoc nnan nsz double 1.000000e+00, [[A]]
+; CHECK-NEXT:    [[DIV:%.*]] = fmul reassoc arcp double [[TMP0]], [[SQRT]]
+; CHECK-NEXT:    store double [[DIV]], ptr @x, align 8
+; CHECK-NEXT:    [[C_NOT:%.*]] = icmp eq i32 [[C]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul double [[A]], [[A]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul double [[A]], [[A]]
+; CHECK-NEXT:    [[MUL1:%.*]] = select i1 [[C_NOT]], double [[TMP1]], double [[TMP0]]
+; CHECK-NEXT:    [[MUL2:%.*]] = select i1 [[C_NOT]], double [[TMP0]], double [[TMP2]]
+; CHECK-NEXT:    store double [[MUL1]], ptr @r1, align 8
+; CHECK-NEXT:    store double [[MUL2]], ptr @r3, align 8
+; CHECK-NEXT:    store double [[SQRT]], ptr @r2, align 8
+; CHECK-NEXT:    ret void
+entry:
+  %sqrt = tail call double @llvm.sqrt.f64(double %a)
+  %div = fdiv reassoc arcp double 1.000000e+00, %sqrt
+  store double %div, ptr @x
+  %c.not = icmp eq i32 %c, 0
+  %1 = fmul double %a, %a
+  %2 = fmul double %a, %a
+  %3 = fmul reassoc nnan nsz double %div, %div
+  %4 = fmul reassoc nnan nsz double %div, %div
+  %mul1 = select i1 %c.not, double %1, double %3
+  %mul2 = select i1 %c.not, double %4, double %2
+  store double %mul1, ptr @r1
+  store double %mul2, ptr @r3
+  %div1 = fdiv reassoc nsz double %a, %sqrt
+  store double %div1, ptr @r2
+  ret void
+}
+
+; missing flags for optimization.
+define void @missing_flags_on_div(double %a) {
+; CHECK-LABEL: define void @missing_flags_on_div(
+; CHECK-SAME: double [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv double 1.000000e+00, [[SQRT]]
+; CHECK-NEXT:    store double [[DIV]], ptr @x, align 8
+; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc nnan nsz double [[DIV]], [[DIV]]
+; CHECK-NEXT:    store double [[MUL]], ptr @r1, align 8
+; CHECK-NEXT:    [[DIV1:%.*]] = fdiv reassoc nsz double [[A]], [[SQRT]]
+; CHECK-NEXT:    store double [[DIV1]], ptr @r2, align 8
+; CHECK-NEXT:    ret void
+entry:
+  %sqrt = tail call double @llvm.sqrt.f64(double %a)
+  %div = fdiv double 1.000000e+00, %sqrt
+  store double %div, ptr @x
+  %mul = fmul reassoc nnan nsz double %div, %div
+  store double %mul, ptr @r1
+  %div1 = fdiv reassoc nsz double %a, %sqrt
+  store double %div1, ptr @r2
+  ret void
+}
+
+; missing flags for optimization.
+define void @missing_flags_on_mul(double %a) {
+; CHECK-LABEL: define void @missing_flags_on_mul(
+; CHECK-SAME: double [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc arcp double 1.000000e+00, [[SQRT]]
+; CHECK-NEXT:    store double [[DIV]], ptr @x, align 8
+; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[DIV]], [[DIV]]
+; CHECK-NEXT:    store double [[MUL]], ptr @r1, align 8
+; CHECK-NEXT:    [[DIV1:%.*]] = fdiv reassoc nsz double [[A]], [[SQRT]]
+; CHECK-NEXT:    store double [[DIV1]], ptr @r2, align 8
+; CHECK-NEXT:    ret void
+entry:
+  %sqrt = tail call double @llvm.sqrt.f64(double %a)
+  %div = fdiv reassoc arcp double 1.000000e+00, %sqrt
+  store double %div, ptr @x
+  %mul = fmul double %div, %div
+  store double %mul, ptr @r1
+  %div1 = fdiv reassoc nsz double %a, %sqrt
+  store double %div1, ptr @r2
+  ret void
+}
+
+; missing flags for optimization.
+define void @missing_flags_on_div1(double %a) {
+; CHECK-LABEL: define void @missing_flags_on_div1(
+; CHECK-SAME: double [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc arcp double 1.000000e+00, [[SQRT]]
+; CHECK-NEXT:    store double [[DIV]], ptr @x, align 8
+; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc nnan nsz double [[DIV]], [[DIV]]
+; CHECK-NEXT:    store double [[MUL]], ptr @r1, align 8
+; CHECK-NEXT:    [[DIV1:%.*]] = fdiv double [[A]], [[SQRT]]
+; CHECK-NEXT:    store double [[DIV1]], ptr @r2, align 8
+; CHECK-NEXT:    ret void
+entry:
+  %sqrt = tail call double @llvm.sqrt.f64(double %a)
+  %div = fdiv reassoc arcp double 1.000000e+00, %sqrt
+  store double %div, ptr @x
+  %mul = fmul reassoc nnan nsz double %div, %div
+  store double %mul, ptr @r1
+  %div1 = fdiv double %a, %sqrt
+  store double %div1, ptr @r2
+  ret void
+}
+
+; div = -1/sqrt(a)
+define void @negative_fdiv_val(double %a) {
+; CHECK-LABEL: define void @negative_fdiv_val(
+; CHECK-SAME: double [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT:    [[TMP0:%.*]] = fdiv reassoc nnan nsz double 1.000000e+00, [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fneg reassoc arcp double [[TMP0]]
+; CHECK-NEXT:    [[DIV:%.*]] = fmul reassoc arcp double [[SQRT]], [[TMP1]]
+; CHECK-NEXT:    store double [[DIV]], ptr @x, align 8
+; CHECK-NEXT:    store double [[TMP0]], ptr @r1, align 8
+; CHECK-NEXT:    store double [[SQRT]], ptr @r2, align 8
+; CHECK-NEXT:    ret void
+entry:
+  %sqrt = tail call double @llvm.sqrt.f64(double %a)
+  %div = fdiv reassoc arcp double -1.000000e+00, %sqrt
+  store double %div, ptr @x
+  %mul = fmul reassoc nnan nsz double %div, %div
+  store double %mul, ptr @r1
+  %div1 = fdiv reassoc nsz double %a, %sqrt
+  store double %div1, ptr @r2
+  ret void
----------------
jcranmer-intel wrote:

Staring hard in this case, the transformation does not seem correct to me, though I don't have precise rationale for all the necessary preconditions without more work. If you called this expression with `%a = - 0.0`, then the results that we have are

```
  %sqrt = -0.0
  *@x = %div = -1.0 / -0.0 = +infinity
  *@r1 = %mul =  +inf * +inf = +inf
  *@r2 = %div1 = -0.0 / -0.0 [nsz] = nan
```

The transformed code is:
```
  %sqrt = -0.0
  *@r1 = 1.0 / -0.0 [nsz] = +/- infinity
  *@r2 = %sqrt = -0.0
  *@x = -0.0 * %tmp1 = -0.0 * -( +/- infinity) = nan
```

https://github.com/llvm/llvm-project/pull/87474


More information about the llvm-commits mailing list