[llvm] [InstCombine] Transform high latency, dependent FSQRT/FDIV into FMUL (PR #87474)
Sushant Gokhale via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 3 00:04:24 PDT 2024
================
@@ -0,0 +1,489 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes='instcombine<no-verify-fixpoint>' < %s | FileCheck %s
+
+ at x = global double 0.000000e+00
+ at r1 = global double 0.000000e+00
+ at r2 = global double 0.000000e+00
+ at r3 = global double 0.000000e+00
+
+; div/mul/div1 in the same block.
+define void @bb_constraint_case1(double %a) {
+; CHECK-LABEL: define void @bb_constraint_case1(
+; CHECK-SAME: double [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]]
+; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double [[SQRT1]], [[A]]
+; CHECK-NEXT: store double [[DIV]], ptr @x, align 8
+; CHECK-NEXT: store double [[TMP0]], ptr @r1, align 8
+; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8
+; CHECK-NEXT: ret void
+entry:
+ %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a)
+ %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt
+ store double %div, ptr @x
+ %mul = fmul reassoc double %div, %div
+ store double %mul, ptr @r1
+ %div1 = fdiv reassoc double %a, %sqrt
+ store double %div1, ptr @r2
+ ret void
+}
+; div/mul in one block and div1 in other block with conditional guard.
+define void @bb_constraint_case2(double %a, i32 %d) {
+; CHECK-LABEL: define void @bb_constraint_case2(
+; CHECK-SAME: double [[A:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]]
+; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double [[SQRT1]], [[A]]
+; CHECK-NEXT: store double [[DIV]], ptr @x, align 8
+; CHECK-NEXT: store double [[TMP0]], ptr @r1, align 8
+; CHECK-NEXT: [[D_NOT:%.*]] = icmp eq i32 [[D]], 0
+; CHECK-NEXT: br i1 [[D_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.end:
+; CHECK-NEXT: ret void
+entry:
+ %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a)
+ %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt
+ store double %div, ptr @x
+ %mul = fmul reassoc double %div, %div
+ store double %mul, ptr @r1
+ %d.not = icmp eq i32 %d, 0
+ br i1 %d.not, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ %div1 = fdiv reassoc double %a, %sqrt
+ store double %div1, ptr @r2
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+
+; div in one block. mul/div1 in other block and conditionally guarded. Don't optimize.
+define void @bb_constraint_case3(double %a, i32 %d) {
+; CHECK-LABEL: define void @bb_constraint_case3(
+; CHECK-SAME: double [[A:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]]
+; CHECK-NEXT: store double [[DIV]], ptr @x, align 8
+; CHECK-NEXT: [[D_NOT:%.*]] = icmp eq i32 [[D]], 0
+; CHECK-NEXT: br i1 [[D_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc double [[DIV]], [[DIV]]
+; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8
+; CHECK-NEXT: [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]]
+; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.end:
+; CHECK-NEXT: ret void
+entry:
+ %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a)
+ %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt
+ store double %div, ptr @x
+ %d.not = icmp eq i32 %d, 0
+ br i1 %d.not, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ %mul = fmul reassoc double %div, %div
+ store double %mul, ptr @r1
+ %div1 = fdiv reassoc double %a, %sqrt
+ store double %div1, ptr @r2
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+
+; div in one block. mul/div1 each in different block and conditionally guarded. Don't optimize.
+define void @bb_constraint_case4(double %a, i32 %c, i32 %d) {
+; CHECK-LABEL: define void @bb_constraint_case4(
+; CHECK-SAME: double [[A:%.*]], i32 [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]]
+; CHECK-NEXT: store double [[DIV]], ptr @x, align 8
+; CHECK-NEXT: [[C_NOT:%.*]] = icmp eq i32 [[C]], 0
+; CHECK-NEXT: br i1 [[C_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc double [[DIV]], [[DIV]]
+; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.end:
+; CHECK-NEXT: [[D_NOT:%.*]] = icmp eq i32 [[D]], 0
+; CHECK-NEXT: br i1 [[D_NOT]], label [[IF_END1:%.*]], label [[IF_THEN1:%.*]]
+; CHECK: if.then1:
+; CHECK-NEXT: [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]]
+; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8
+; CHECK-NEXT: br label [[IF_END1]]
+; CHECK: if.end1:
+; CHECK-NEXT: ret void
+entry:
+ %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a)
+ %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt
+ store double %div, ptr @x
+ %c.not = icmp eq i32 %c, 0
+ br i1 %c.not, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ %mul = fmul reassoc double %div, %div
+ store double %mul, ptr @r1
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ %d.not = icmp eq i32 %d, 0
+ br i1 %d.not, label %if.end1, label %if.then1
+
+if.then1: ; preds = %if.end
+ %div1 = fdiv reassoc double %a, %sqrt
+ store double %div1, ptr @r2
+ br label %if.end1
+
+if.end1: ; preds = %if.then1, %if.end
+ ret void
+}
+
+; sqrt value comes from different blocks. Don't optimize.
+define void @bb_constraint_case5(double %a, i32 %c) {
+; CHECK-LABEL: define void @bb_constraint_case5(
+; CHECK-SAME: double [[A:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[C_NOT:%.*]] = icmp eq i32 [[C]], 0
+; CHECK-NEXT: br i1 [[C_NOT]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP0:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT: br label [[IF_END:%.*]]
+; CHECK: if.else:
+; CHECK-NEXT: [[ADD:%.*]] = fadd double [[A]], 1.000000e+01
+; CHECK-NEXT: [[TMP1:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[ADD]])
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.end:
+; CHECK-NEXT: [[SQRT:%.*]] = phi double [ [[TMP0]], [[IF_THEN]] ], [ [[TMP1]], [[IF_ELSE]] ]
+; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]]
+; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc double [[DIV]], [[DIV]]
+; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8
+; CHECK-NEXT: [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]]
+; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8
+; CHECK-NEXT: ret void
+entry:
+ %c.not = icmp eq i32 %c, 0
+ br i1 %c.not, label %if.else, label %if.then
+
+if.then: ; preds = %entry
+ %0 = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a)
+ br label %if.end
+
+if.else: ; preds = %entry
+ %add = fadd double %a, 1.000000e+01
+ %1 = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %add)
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %sqrt = phi double[ %0, %if.then], [ %1, %if.else]
+ %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt
+ %mul = fmul reassoc double %div, %div
+ store double %mul, ptr @r1
+ %div1 = fdiv reassoc double %a, %sqrt
+ store double %div1, ptr @r2
+ ret void
+}
+
+; div in one block and conditionally guarded. mul/div1 in other block. Don't optimize.
+define void @bb_constraint_case6(double %a, i32 %d) {
+; CHECK-LABEL: define void @bb_constraint_case6(
+; CHECK-SAME: double [[A:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT: [[D_NOT:%.*]] = icmp eq i32 [[D]], 0
+; CHECK-NEXT: br i1 [[D_NOT]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.else:
+; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr @x, align 8
+; CHECK-NEXT: br label [[IF_END:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP1:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]]
+; CHECK-NEXT: store double [[TMP1]], ptr @x, align 8
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.end:
+; CHECK-NEXT: [[DIV:%.*]] = phi double [ [[TMP0]], [[IF_ELSE]] ], [ [[TMP1]], [[IF_THEN]] ]
+; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc double [[DIV]], [[DIV]]
+; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8
+; CHECK-NEXT: [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]]
+; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8
+; CHECK-NEXT: ret void
+entry:
+ %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a)
+ %d.not = icmp eq i32 %d, 0
+ br i1 %d.not, label %if.else, label %if.then
+
+if.else: ; preds = %entry
+ %1 = load double, ptr @x
+ br label %if.end
+
+if.then: ; preds = %entry
+ %2 = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt
+ store double %2, ptr @x
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %div = phi double [ %1, %if.else ], [ %2, %if.then ]
+ %mul = fmul reassoc double %div, %div
+ store double %mul, ptr @r1
+ %div1 = fdiv reassoc double %a, %sqrt
+ store double %div1, ptr @r2
+ ret void
+}
+
+; value for mul comes from different blocks. Don't optimize.
+define void @bb_constraint_case7(double %a, i32 %c, i32 %d) {
+; CHECK-LABEL: define void @bb_constraint_case7(
+; CHECK-SAME: double [[A:%.*]], i32 [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]]
+; CHECK-NEXT: store double [[DIV]], ptr @x, align 8
+; CHECK-NEXT: [[C_NOT:%.*]] = icmp eq i32 [[C]], 0
+; CHECK-NEXT: br i1 [[C_NOT]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP0:%.*]] = fdiv double 3.000000e+00, [[A]]
+; CHECK-NEXT: br label [[IF_END:%.*]]
+; CHECK: if.else:
+; CHECK-NEXT: [[D_NOT:%.*]] = icmp eq i32 [[D]], 0
+; CHECK-NEXT: br i1 [[D_NOT]], label [[IF_ELSE1:%.*]], label [[IF_THEN1:%.*]]
+; CHECK: if.then1:
+; CHECK-NEXT: [[TMP1:%.*]] = fdiv double 2.000000e+00, [[A]]
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.else1:
+; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc double [[DIV]], [[DIV]]
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.end:
+; CHECK-NEXT: [[MUL:%.*]] = phi double [ [[TMP1]], [[IF_THEN1]] ], [ [[TMP2]], [[IF_ELSE1]] ], [ [[TMP0]], [[IF_THEN]] ]
+; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8
+; CHECK-NEXT: [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]]
+; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8
+; CHECK-NEXT: ret void
+entry:
+ %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a)
+ %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt
+ store double %div, ptr @x
+ %c.not = icmp eq i32 %c, 0
+ br i1 %c.not, label %if.else, label %if.then
+
+if.then: ; preds = %entry
+ %1 = fdiv double 3.000000e+00, %a
+ br label %if.end
+
+if.else: ; preds = %entry
+ %d.not = icmp eq i32 %d, 0
+ br i1 %d.not, label %if.else1, label %if.then1
+
+if.then1: ; preds = %if.else
+ %2 = fdiv double 2.000000e+00, %a
+ br label %if.end
+
+if.else1: ; preds = %if.else
+ %3 = fmul reassoc double %div, %div
+ br label %if.end
+
+if.end: ; preds = %if.then1, %if.else1, %if.then
+ %mul = phi double [ %2, %if.then1 ], [ %3, %if.else1 ], [ %1, %if.then ]
+ store double %mul, ptr @r1
+ %div1 = fdiv reassoc double %a, %sqrt
+ store double %div1, ptr @r2
+ ret void
+}
+
+; value of mul comes from two different blocks(as shown by select ins).
+define void @bb_constraint_case8(double %a, i32 %c) {
+; CHECK-LABEL: define void @bb_constraint_case8(
+; CHECK-SAME: double [[A:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]]
+; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double [[SQRT1]], [[A]]
+; CHECK-NEXT: store double [[DIV]], ptr @x, align 8
+; CHECK-NEXT: [[C_NOT:%.*]] = icmp eq i32 [[C]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = fmul double [[A]], [[A]]
+; CHECK-NEXT: [[MUL:%.*]] = select i1 [[C_NOT]], double [[TMP1]], double [[TMP0]]
+; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8
+; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8
+; CHECK-NEXT: ret void
+entry:
+ %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a)
+ %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt
+ store double %div, ptr @x
+ %c.not = icmp eq i32 %c, 0
+ %1 = fmul double %a, %a
+ %2 = fmul reassoc double %div, %div
+ %mul = select i1 %c.not, double %1, double %2
+ store double %mul, ptr @r1
+ %div1 = fdiv reassoc double %a, %sqrt
+ store double %div1, ptr @r2
+ ret void
+}
+
+; multiple instances of multiply ops to optimize. Optimize all.
+define void @mutiple_multiply_instances(double %a, i32 %c) {
+; CHECK-LABEL: define void @mutiple_multiply_instances(
+; CHECK-SAME: double [[A:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]]
+; CHECK-NEXT: [[TMP1:%.*]] = fdiv reassoc double 1.000000e+00, [[A]]
+; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double [[SQRT1]], [[A]]
+; CHECK-NEXT: store double [[DIV]], ptr @x, align 8
+; CHECK-NEXT: [[C_NOT:%.*]] = icmp eq i32 [[C]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = fmul double [[A]], [[A]]
+; CHECK-NEXT: [[TMP3:%.*]] = fmul double [[A]], [[A]]
+; CHECK-NEXT: [[MUL1:%.*]] = select i1 [[C_NOT]], double [[TMP2]], double [[TMP1]]
+; CHECK-NEXT: [[MUL2:%.*]] = select i1 [[C_NOT]], double [[TMP0]], double [[TMP3]]
+; CHECK-NEXT: store double [[MUL1]], ptr @r1, align 8
+; CHECK-NEXT: store double [[MUL2]], ptr @r3, align 8
+; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8
+; CHECK-NEXT: ret void
+entry:
+ %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a)
+ %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt
+ store double %div, ptr @x
+ %c.not = icmp eq i32 %c, 0
+ %1 = fmul double %a, %a
+ %2 = fmul double %a, %a
+ %3 = fmul reassoc double %div, %div
+ %4 = fmul reassoc double %div, %div
+ %mul1 = select i1 %c.not, double %1, double %3
+ %mul2 = select i1 %c.not, double %4, double %2
+ store double %mul1, ptr @r1
+ store double %mul2, ptr @r3
+ %div1 = fdiv reassoc double %a, %sqrt
+ store double %div1, ptr @r2
+ ret void
+}
+
+; missing flags for optimization.
+define void @missing_flags_on_div(double %a) {
+; CHECK-LABEL: define void @missing_flags_on_div(
+; CHECK-SAME: double [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT: [[DIV:%.*]] = fdiv double 1.000000e+00, [[SQRT]]
+; CHECK-NEXT: store double [[DIV]], ptr @x, align 8
+; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc double [[DIV]], [[DIV]]
+; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8
+; CHECK-NEXT: [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]]
+; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8
+; CHECK-NEXT: ret void
+entry:
+ %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a)
+ %div = fdiv double 1.000000e+00, %sqrt
+ store double %div, ptr @x
+ %mul = fmul reassoc double %div, %div
+ store double %mul, ptr @r1
+ %div1 = fdiv reassoc double %a, %sqrt
+ store double %div1, ptr @r2
+ ret void
+}
+
+; missing flags for optimization.
+define void @missing_flags_on_mul(double %a) {
+; CHECK-LABEL: define void @missing_flags_on_mul(
+; CHECK-SAME: double [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]]
+; CHECK-NEXT: store double [[DIV]], ptr @x, align 8
+; CHECK-NEXT: [[MUL:%.*]] = fmul double [[DIV]], [[DIV]]
+; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8
+; CHECK-NEXT: [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]]
+; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8
+; CHECK-NEXT: ret void
+entry:
+ %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a)
+ %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt
+ store double %div, ptr @x
+ %mul = fmul double %div, %div
+ store double %mul, ptr @r1
+ %div1 = fdiv reassoc double %a, %sqrt
+ store double %div1, ptr @r2
+ ret void
+}
+
+; missing flags for optimization.
+define void @missing_flags_on_div1(double %a) {
+; CHECK-LABEL: define void @missing_flags_on_div1(
+; CHECK-SAME: double [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]]
+; CHECK-NEXT: store double [[DIV]], ptr @x, align 8
+; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc double [[DIV]], [[DIV]]
+; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8
+; CHECK-NEXT: [[DIV1:%.*]] = fdiv double [[A]], [[SQRT]]
+; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8
+; CHECK-NEXT: ret void
+entry:
+ %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a)
+ %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt
+ store double %div, ptr @x
+ %mul = fmul reassoc double %div, %div
+ store double %mul, ptr @r1
+ %div1 = fdiv double %a, %sqrt
+ store double %div1, ptr @r2
+ ret void
+}
+
+; div = -1/sqrt(a)
+define void @negative_fdiv_val(double %a) {
+; CHECK-LABEL: define void @negative_fdiv_val(
+; CHECK-SAME: double [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]])
+; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]]
+; CHECK-NEXT: [[TMP1:%.*]] = fneg reassoc ninf arcp double [[SQRT1]]
+; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double [[TMP1]], [[A]]
+; CHECK-NEXT: store double [[DIV]], ptr @x, align 8
+; CHECK-NEXT: store double [[TMP0]], ptr @r1, align 8
+; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8
+; CHECK-NEXT: ret void
+entry:
+ %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a)
+ %div = fdiv reassoc arcp ninf double -1.000000e+00, %sqrt
+ store double %div, ptr @x
+ %mul = fmul reassoc double %div, %div
+ store double %mul, ptr @r1
+ %div1 = fdiv reassoc double %a, %sqrt
+ store double %div1, ptr @r2
+ ret void
+}
+
+define void @preserve_fpmath_metadata(double %a) {
+; CHECK-LABEL: define void @preserve_fpmath_metadata(
+; CHECK-SAME: double [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]]), !fpmath [[META0:![0-9]+]]
+; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]], !fpmath [[META1:![0-9]+]]
+; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double [[SQRT1]], [[A]]
+; CHECK-NEXT: store double [[DIV]], ptr @x, align 8
+; CHECK-NEXT: store double [[TMP0]], ptr @r1, align 8
+; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8
+; CHECK-NEXT: ret void
+; CHECK: [[META0]] = !{float 5.500000e+00}
+; CHECK: [[META1]] = !{float 4.500000e+00}
+entry:
+ %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a), !fpmath !0
+ %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt, !fpmath !1
+ store double %div, ptr @x
+ %mul = fmul reassoc double %div, %div, !fpmath !2
+ store double %mul, ptr @r1
+ %div1 = fdiv reassoc double %a, %sqrt, !fpmath !3
+ store double %div1, ptr @r2
+ ret void
+}
+declare double @llvm.sqrt.f64(double)
+
----------------
sushgokh wrote:
>Can you add some cases with fdiv -1, x?
already goes by the name `negative_fdiv_val`.
>Also test a vector case
Also, why a vector case would be needed? I know that InstCombine runs even after vectorization but this transform would take well before vectorization, right?
https://github.com/llvm/llvm-project/pull/87474
More information about the llvm-commits
mailing list