[llvm] [AMDGPU] Implement IR variant of isFMAFasterThanFMulAndFAdd (PR #121465)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 2 03:07:21 PST 2025


================
@@ -0,0 +1,185 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=simplifycfg -verify-machineinstrs -hoist-common-insts=true -mtriple=amdgcn-- -mcpu=gfx1030 -fp-contract=fast < %s | FileCheck -check-prefix=GFX -check-prefix=GFX-FP-CONTRACT %s
+; RUN: opt -S -passes=simplifycfg -verify-machineinstrs -hoist-common-insts=true -mtriple=amdgcn-- -mcpu=gfx1030 -enable-unsafe-fp-math --denormal-fp-math=ieee < %s | FileCheck -check-prefix=GFX -check-prefix=GFX-UNSAFE-FP-IEEE %s
+; RUN: opt -S -passes=simplifycfg -verify-machineinstrs -hoist-common-insts=true -mtriple=amdgcn-- -mcpu=gfx1030 -enable-unsafe-fp-math --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX -check-prefix=GFX-UNSAFE-FP-PRESERVE %s
+
+define double @_branch(ptr dereferenceable(8) %x, ptr dereferenceable(8) %y, ptr dereferenceable(8) %a) #0 {
+; GFX-LABEL: define double @_branch(
+; GFX-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; GFX-NEXT:  [[ENTRY:.*:]]
+; GFX-NEXT:    [[TMP0:%.*]] = load double, ptr [[Y]], align 8
+; GFX-NEXT:    [[CMP:%.*]] = fcmp oeq double [[TMP0]], 0.000000e+00
+; GFX-NEXT:    [[TMP1:%.*]] = load double, ptr [[X]], align 8
+; GFX-NEXT:    [[TMP2:%.*]] = load double, ptr [[A]], align 8
+; GFX-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; GFX:       [[COMMON_RET:.*]]:
+; GFX-NEXT:    [[COMMON_RET_OP:%.*]] = phi double [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
+; GFX-NEXT:    ret double [[COMMON_RET_OP]]
+; GFX:       [[IF_THEN]]:
+; GFX-NEXT:    [[MUL]] = fmul fast double [[TMP1]], [[TMP2]]
+; GFX-NEXT:    [[ADD:%.*]] = fadd fast double 1.000000e+00, [[MUL]]
+; GFX-NEXT:    br label %[[COMMON_RET]]
+; GFX:       [[IF_ELSE]]:
+; GFX-NEXT:    [[MUL1:%.*]] = fmul fast double [[TMP1]], [[TMP2]]
+; GFX-NEXT:    [[SUB]] = fsub fast double [[MUL1]], [[TMP0]]
+; GFX-NEXT:    br label %[[COMMON_RET]]
+;
+entry:
+  %0 = load double, ptr %y, align 8
+  %cmp = fcmp oeq double %0, 0.000000e+00
+  %1 = load double, ptr %x, align 8
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %2 = load double, ptr %a, align 8
+  %mul = fmul fast double %1, %2
+  %add = fadd fast double 1.000000e+00, %mul
+  ret double %mul
+
+if.else:                                          ; preds = %entry
+  %3 = load double, ptr %a, align 8
+  %mul1 = fmul fast double %1, %3
+  %sub = fsub fast double %mul1, %0
+  ret double %sub
+}
+
+define float @_branch2(ptr dereferenceable(8) %x, ptr dereferenceable(8) %y, ptr dereferenceable(8) %a) #0 {
+; GFX-LABEL: define float @_branch2(
+; GFX-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
+; GFX-NEXT:  [[ENTRY:.*:]]
+; GFX-NEXT:    [[TMP0:%.*]] = load float, ptr [[Y]], align 8
+; GFX-NEXT:    [[CMP:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00
+; GFX-NEXT:    [[TMP1:%.*]] = load float, ptr [[X]], align 8
+; GFX-NEXT:    [[TMP2:%.*]] = load float, ptr [[A]], align 8
+; GFX-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; GFX:       [[COMMON_RET:.*]]:
+; GFX-NEXT:    [[COMMON_RET_OP:%.*]] = phi float [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
+; GFX-NEXT:    ret float [[COMMON_RET_OP]]
+; GFX:       [[IF_THEN]]:
+; GFX-NEXT:    [[MUL]] = fmul fast float [[TMP1]], [[TMP2]]
+; GFX-NEXT:    [[ADD:%.*]] = fadd fast float 1.000000e+00, [[MUL]]
+; GFX-NEXT:    br label %[[COMMON_RET]]
+; GFX:       [[IF_ELSE]]:
+; GFX-NEXT:    [[MUL1:%.*]] = fmul fast float [[TMP1]], [[TMP2]]
+; GFX-NEXT:    [[SUB]] = fsub fast float [[MUL1]], [[TMP0]]
+; GFX-NEXT:    br label %[[COMMON_RET]]
+;
+entry:
+  %0 = load float, ptr %y, align 8
+  %cmp = fcmp oeq float %0, 0.000000e+00
+  %1 = load float, ptr %x, align 8
+  br i1 %cmp, label %if.then, label %if.else
+
+
+if.then:                                          ; preds = %entry
+  %2 = load float, ptr %a, align 8
+  %mul = fmul fast float %1, %2
+  %add = fadd fast float 1.000000e+00, %mul
+  ret float %mul
+
+if.else:                                          ; preds = %entry
+  %3 = load float, ptr %a, align 8
+  %mul1 = fmul fast float %1, %3
+  %sub = fsub fast float %mul1, %0
+  ret float %sub
+}
+
+define half @_branch3(ptr dereferenceable(8) %x, ptr dereferenceable(8) %y, ptr dereferenceable(8) %a) #0 {
+; GFX-CONTRACT-LABEL: define half @_branchr32(
+; GFX-CONTRACT-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
+; GFX-CONTRACT-NEXT:  [[ENTRY:.*:]]
+; GFX-CONTRACT-NEXT:    [[TMP0:%.*]] = load half, ptr [[Y]], align 8
+; GFX-CONTRACT-NEXT:    [[CMP:%.*]] = fcmp oeq half [[TMP0]], 0xH0000
+; GFX-CONTRACT-NEXT:    [[TMP1:%.*]] = load half, ptr [[X]], align 8
+; GFX-CONTRACT-NEXT:    [[TMP2:%.*]] = load half, ptr [[A]], align 8
+; GFX-CONTRACT-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; GFX-CONTRACT:       [[COMMON_RET:.*]]:
+; GFX-CONTRACT-NEXT:    [[COMMON_RET_OP:%.*]] = phi half [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
+; GFX-CONTRACT-NEXT:    ret half [[COMMON_RET_OP]]
+; GFX-CONTRACT:       [[IF_THEN]]:
+; GFX-CONTRACT-NEXT:    [[MUL]] = fmul fast half [[TMP1]], [[TMP2]]
+; GFX-CONTRACT-NEXT:    [[ADD:%.*]] = fadd fast half 0xH3C00, [[MUL]]
+; GFX-CONTRACT-NEXT:    br label %[[COMMON_RET]]
+; GFX-CONTRACT:       [[IF_ELSE]]:
+; GFX-CONTRACT-NEXT:    [[MUL1:%.*]] = fmul fast half [[TMP1]], [[TMP2]]
+; GFX-CONTRACT-NEXT:    [[SUB]] = fsub fast half [[MUL1]], [[TMP0]]
+; GFX-CONTRACT-NEXT:    br label %[[COMMON_RET]]
+;
+; GFX-FP-CONTRACT-LABEL: define half @_branch3(
+; GFX-FP-CONTRACT-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
+; GFX-FP-CONTRACT-NEXT:  [[ENTRY:.*:]]
+; GFX-FP-CONTRACT-NEXT:    [[TMP0:%.*]] = load half, ptr [[Y]], align 8
+; GFX-FP-CONTRACT-NEXT:    [[CMP:%.*]] = fcmp oeq half [[TMP0]], 0xH0000
+; GFX-FP-CONTRACT-NEXT:    [[TMP1:%.*]] = load half, ptr [[X]], align 8
+; GFX-FP-CONTRACT-NEXT:    [[TMP2:%.*]] = load half, ptr [[A]], align 8
+; GFX-FP-CONTRACT-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; GFX-FP-CONTRACT:       [[COMMON_RET:.*]]:
+; GFX-FP-CONTRACT-NEXT:    [[COMMON_RET_OP:%.*]] = phi half [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
+; GFX-FP-CONTRACT-NEXT:    ret half [[COMMON_RET_OP]]
+; GFX-FP-CONTRACT:       [[IF_THEN]]:
+; GFX-FP-CONTRACT-NEXT:    [[MUL]] = fmul fast half [[TMP1]], [[TMP2]]
+; GFX-FP-CONTRACT-NEXT:    [[ADD:%.*]] = fadd fast half 0xH3C00, [[MUL]]
+; GFX-FP-CONTRACT-NEXT:    br label %[[COMMON_RET]]
+; GFX-FP-CONTRACT:       [[IF_ELSE]]:
+; GFX-FP-CONTRACT-NEXT:    [[MUL1:%.*]] = fmul fast half [[TMP1]], [[TMP2]]
+; GFX-FP-CONTRACT-NEXT:    [[SUB]] = fsub fast half [[MUL1]], [[TMP0]]
+; GFX-FP-CONTRACT-NEXT:    br label %[[COMMON_RET]]
+;
+; GFX-UNSAFE-FP-IEEE-LABEL: define half @_branch3(
+; GFX-UNSAFE-FP-IEEE-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
+; GFX-UNSAFE-FP-IEEE-NEXT:  [[ENTRY:.*:]]
+; GFX-UNSAFE-FP-IEEE-NEXT:    [[TMP0:%.*]] = load half, ptr [[Y]], align 8
+; GFX-UNSAFE-FP-IEEE-NEXT:    [[CMP:%.*]] = fcmp oeq half [[TMP0]], 0xH0000
+; GFX-UNSAFE-FP-IEEE-NEXT:    [[TMP1:%.*]] = load half, ptr [[X]], align 8
+; GFX-UNSAFE-FP-IEEE-NEXT:    [[TMP2:%.*]] = load half, ptr [[A]], align 8
+; GFX-UNSAFE-FP-IEEE-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; GFX-UNSAFE-FP-IEEE:       [[COMMON_RET:.*]]:
+; GFX-UNSAFE-FP-IEEE-NEXT:    [[COMMON_RET_OP:%.*]] = phi half [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
+; GFX-UNSAFE-FP-IEEE-NEXT:    ret half [[COMMON_RET_OP]]
+; GFX-UNSAFE-FP-IEEE:       [[IF_THEN]]:
+; GFX-UNSAFE-FP-IEEE-NEXT:    [[MUL]] = fmul fast half [[TMP1]], [[TMP2]]
+; GFX-UNSAFE-FP-IEEE-NEXT:    [[ADD:%.*]] = fadd fast half 0xH3C00, [[MUL]]
+; GFX-UNSAFE-FP-IEEE-NEXT:    br label %[[COMMON_RET]]
+; GFX-UNSAFE-FP-IEEE:       [[IF_ELSE]]:
+; GFX-UNSAFE-FP-IEEE-NEXT:    [[MUL1:%.*]] = fmul fast half [[TMP1]], [[TMP2]]
+; GFX-UNSAFE-FP-IEEE-NEXT:    [[SUB]] = fsub fast half [[MUL1]], [[TMP0]]
+; GFX-UNSAFE-FP-IEEE-NEXT:    br label %[[COMMON_RET]]
+;
+; GFX-UNSAFE-FP-PRESERVE-LABEL: define half @_branch3(
+; GFX-UNSAFE-FP-PRESERVE-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
+; GFX-UNSAFE-FP-PRESERVE-NEXT:  [[ENTRY:.*:]]
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    [[TMP0:%.*]] = load half, ptr [[Y]], align 8
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    [[CMP:%.*]] = fcmp oeq half [[TMP0]], 0xH0000
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    [[TMP1:%.*]] = load half, ptr [[X]], align 8
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    [[TMP2:%.*]] = load half, ptr [[A]], align 8
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    [[MUL:%.*]] = fmul fast half [[TMP1]], [[TMP2]]
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; GFX-UNSAFE-FP-PRESERVE:       [[COMMON_RET:.*]]:
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    [[COMMON_RET_OP:%.*]] = phi half [ [[MUL]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    ret half [[COMMON_RET_OP]]
+; GFX-UNSAFE-FP-PRESERVE:       [[IF_THEN]]:
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    [[ADD:%.*]] = fadd fast half 0xH3C00, [[MUL]]
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    br label %[[COMMON_RET]]
+; GFX-UNSAFE-FP-PRESERVE:       [[IF_ELSE]]:
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    [[SUB]] = fsub fast half [[MUL]], [[TMP0]]
+; GFX-UNSAFE-FP-PRESERVE-NEXT:    br label %[[COMMON_RET]]
+;
+entry:
+  %0 = load half, ptr %y, align 8
+  %cmp = fcmp oeq half %0, 0.000000e+00
+  %1 = load half, ptr %x, align 8
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %2 = load half, ptr %a, align 8
+  %mul = fmul fast half %1, %2
+  %add = fadd fast half 1.000000e+00, %mul
+  ret half %mul
+
+if.else:                                          ; preds = %entry
+  %3 = load half, ptr %a, align 8
+  %mul1 = fmul fast half %1, %3
+  %sub = fsub fast half %mul1, %0
+  ret half %sub
+}
----------------
arsenm wrote:

Check some cases with additional fneg/fabs modifiers, maybe a constant operand case too. Plus vector and bfloat 

https://github.com/llvm/llvm-project/pull/121465


More information about the llvm-commits mailing list