[llvm] LoopIdiomRecognize: detect and convert powi idiom (PR #72650)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 17 08:05:18 PST 2023
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/72650
>From ee30fb7fa86a08eb8c972113adcb16932b6e8b33 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <Ramkumar.Ramachandra at imgtec.com>
Date: Fri, 17 Nov 2023 12:35:28 +0000
Subject: [PATCH 1/3] LoopIdiomRecognize: add negative tests for powi idiom
The following code, when compiled under -ffast-math, produces bad
codegen due to LoopVectorize:
float powi(float base, int exp) {
float result = 1.0;
for (int i = 0; i < exp; ++i)
result *= base;
return result;
}
It can easily be replaced with the llvm.powi intrinsic, when the
exponent is, at most, a C int type. This is the job of
LoopIdiomRecognize, and has been marked as a TODO item for years. In
preparation to fulfill this wish, add negative tests corresponding to
variations of this program.
---
llvm/test/Transforms/LoopIdiom/powi.ll | 386 +++++++++++++++++++++++++
1 file changed, 386 insertions(+)
create mode 100644 llvm/test/Transforms/LoopIdiom/powi.ll
diff --git a/llvm/test/Transforms/LoopIdiom/powi.ll b/llvm/test/Transforms/LoopIdiom/powi.ll
new file mode 100644
index 000000000000000..63e9fbce5a931eb
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/powi.ll
@@ -0,0 +1,386 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -passes='loop(loop-idiom,loop-deletion,indvars),function(gvn,simplifycfg)' < %s -S | FileCheck %s
+
+define float @powi_f32(float %base, i32 %exp) {
+; CHECK-LABEL: define float @powi_f32(
+; CHECK-SAME: float [[BASE:%.*]], i32 [[EXP:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[EXP]], 0
+; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
+; CHECK: while.body:
+; CHECK-NEXT: [[RESULT:%.*]] = phi float [ [[MUL:%.*]], [[WHILE_BODY]] ], [ 1.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[MERGE_DEC:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[EXP]], [[ENTRY]] ]
+; CHECK-NEXT: [[MUL]] = fmul fast float [[RESULT]], [[BASE]]
+; CHECK-NEXT: [[DEC]] = add nsw i32 [[MERGE_DEC]], -1
+; CHECK-NEXT: [[CMP_EQ:%.*]] = icmp eq i32 [[DEC]], 0
+; CHECK-NEXT: br i1 [[CMP_EQ]], label [[WHILE_END]], label [[WHILE_BODY]]
+; CHECK: while.end:
+; CHECK-NEXT: [[RESULT_LCSSA:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[WHILE_BODY]] ]
+; CHECK-NEXT: ret float [[RESULT_LCSSA]]
+;
+entry:
+ %cmp.not = icmp eq i32 %exp, 0
+ br i1 %cmp.not, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %result = phi float [ %mul, %while.body ], [ 1.000000e+00, %entry ]
+ %merge.dec = phi i32 [ %dec, %while.body ], [ %exp, %entry ]
+ %mul = fmul fast float %result, %base
+ %dec = add nsw i32 %merge.dec, -1
+ %cmp.eq = icmp eq i32 %dec, 0
+ br i1 %cmp.eq, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %result.lcssa = phi float [ 1.000000e+00, %entry ], [ %mul, %while.body ]
+ ret float %result.lcssa
+}
+
+define double @powi_f64(double %base, i32 %exp) {
+; CHECK-LABEL: define double @powi_f64(
+; CHECK-SAME: double [[BASE:%.*]], i32 [[EXP:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[EXP]], 0
+; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
+; CHECK: while.body:
+; CHECK-NEXT: [[RESULT:%.*]] = phi double [ [[MUL:%.*]], [[WHILE_BODY]] ], [ 1.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[MERGE_DEC:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[EXP]], [[ENTRY]] ]
+; CHECK-NEXT: [[MUL]] = fmul fast double [[RESULT]], [[BASE]]
+; CHECK-NEXT: [[DEC]] = add nsw i32 [[MERGE_DEC]], -1
+; CHECK-NEXT: [[CMP_EQ:%.*]] = icmp eq i32 [[DEC]], 0
+; CHECK-NEXT: br i1 [[CMP_EQ]], label [[WHILE_END]], label [[WHILE_BODY]]
+; CHECK: while.end:
+; CHECK-NEXT: [[RESULT_LCSSA:%.*]] = phi double [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[WHILE_BODY]] ]
+; CHECK-NEXT: ret double [[RESULT_LCSSA]]
+;
+entry:
+ %cmp.not = icmp eq i32 %exp, 0
+ br i1 %cmp.not, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %result = phi double [ %mul, %while.body ], [ 1.000000e+00, %entry ]
+ %merge.dec = phi i32 [ %dec, %while.body ], [ %exp, %entry ]
+ %mul = fmul fast double %result, %base
+ %dec = add nsw i32 %merge.dec, -1
+ %cmp.eq = icmp eq i32 %dec, 0
+ br i1 %cmp.eq, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %result.lcssa = phi double [ 1.000000e+00, %entry ], [ %mul, %while.body ]
+ ret double %result.lcssa
+}
+
+define double @powi_i16_iv(double %base, i16 %exp) {
+; CHECK-LABEL: define double @powi_i16_iv(
+; CHECK-SAME: double [[BASE:%.*]], i16 [[EXP:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i16 [[EXP]], 0
+; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
+; CHECK: while.body:
+; CHECK-NEXT: [[RESULT:%.*]] = phi double [ [[MUL:%.*]], [[WHILE_BODY]] ], [ 1.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[MERGE_DEC:%.*]] = phi i16 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[EXP]], [[ENTRY]] ]
+; CHECK-NEXT: [[MUL]] = fmul fast double [[RESULT]], [[BASE]]
+; CHECK-NEXT: [[DEC]] = add nsw i16 [[MERGE_DEC]], -1
+; CHECK-NEXT: [[CMP_EQ:%.*]] = icmp eq i16 [[DEC]], 0
+; CHECK-NEXT: br i1 [[CMP_EQ]], label [[WHILE_END]], label [[WHILE_BODY]]
+; CHECK: while.end:
+; CHECK-NEXT: [[RESULT_LCSSA:%.*]] = phi double [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[WHILE_BODY]] ]
+; CHECK-NEXT: ret double [[RESULT_LCSSA]]
+;
+entry:
+ %cmp.not = icmp eq i16 %exp, 0
+ br i1 %cmp.not, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %result = phi double [ %mul, %while.body ], [ 1.000000e+00, %entry ]
+ %merge.dec = phi i16 [ %dec, %while.body ], [ %exp, %entry ]
+ %mul = fmul fast double %result, %base
+ %dec = add nsw i16 %merge.dec, -1
+ %cmp.eq = icmp eq i16 %dec, 0
+ br i1 %cmp.eq, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %result.lcssa = phi double [ 1.000000e+00, %entry ], [ %mul, %while.body ]
+ ret double %result.lcssa
+}
+
+define float @powi_canonical_iv_signed(float %base, i32 %exp) {
+; CHECK-LABEL: define float @powi_canonical_iv_signed(
+; CHECK-SAME: float [[BASE:%.*]], i32 [[EXP:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_SGT:%.*]] = icmp sgt i32 [[EXP]], 0
+; CHECK-NEXT: br i1 [[CMP_SGT]], label [[FOR_BODY:%.*]], label [[EXIT:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[RESULT:%.*]] = phi float [ [[MUL:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[ENTRY]] ]
+; CHECK-NEXT: [[MUL]] = fmul fast float [[RESULT]], [[BASE]]
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[EXP]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]]
+; CHECK: exit:
+; CHECK-NEXT: [[RESULT_LCSSA:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[FOR_BODY]] ]
+; CHECK-NEXT: ret float [[RESULT_LCSSA]]
+;
+entry:
+ %cmp.sgt = icmp sgt i32 %exp, 0
+ br i1 %cmp.sgt, label %for.body, label %exit
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %result = phi float [ %mul, %for.body ], [ 1.000000e+00, %entry ]
+ %mul = fmul fast float %result, %base
+ %inc = add nuw nsw i32 %iv, 1
+ %exitcond = icmp eq i32 %inc, %exp
+ br i1 %exitcond, label %exit, label %for.body
+
+exit: ; preds = %for.body, %entry
+ %result.lcssa = phi float [ 1.000000e+00, %entry ], [ %mul, %for.body ]
+ ret float %result.lcssa
+}
+
+define float @powi_canonical_iv_unsigned(float %base, i32 %exp) {
+; CHECK-LABEL: define float @powi_canonical_iv_unsigned(
+; CHECK-SAME: float [[BASE:%.*]], i32 [[EXP:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_EQ:%.*]] = icmp eq i32 [[EXP]], 0
+; CHECK-NEXT: br i1 [[CMP_EQ]], label [[EXIT:%.*]], label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[RESULT:%.*]] = phi float [ [[MUL:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[ENTRY]] ]
+; CHECK-NEXT: [[MUL]] = fmul fast float [[RESULT]], [[BASE]]
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[EXP]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]]
+; CHECK: exit:
+; CHECK-NEXT: [[RESULT_LCSSA:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[FOR_BODY]] ]
+; CHECK-NEXT: ret float [[RESULT_LCSSA]]
+;
+entry:
+ %cmp.eq = icmp eq i32 %exp, 0
+ br i1 %cmp.eq, label %exit, label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %result = phi float [ %mul, %for.body ], [ 1.000000e+00, %entry ]
+ %mul = fmul fast float %result, %base
+ %inc = add nuw nsw i32 %iv, 1
+ %exitcond = icmp eq i32 %inc, %exp
+ br i1 %exitcond, label %exit, label %for.body
+
+exit: ; preds = %for.body, %entry
+ %result.lcssa = phi float [ 1.000000e+00, %entry ], [ %mul, %for.body ]
+ ret float %result.lcssa
+}
+
+define float @powi_const_i32_exp(float %base) {
+; CHECK-LABEL: define float @powi_const_i32_exp(
+; CHECK-SAME: float [[BASE:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[RESULT:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[MUL]] = fmul fast float [[RESULT]], [[BASE]]
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC]], 2147483647
+; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK: exit:
+; CHECK-NEXT: ret float [[MUL]]
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %result = phi float [ 1.000000e+00, %entry ], [ %mul, %for.body ]
+ %mul = fmul fast float %result, %base
+ %inc = add nuw nsw i32 %iv, 1
+ %cmp = icmp eq i32 %inc, 2147483647
+ br i1 %cmp, label %exit, label %for.body
+
+exit: ; preds = %for.body
+ ret float %mul
+}
+
+define float @powi_unrelated_computation(float %base, i32 %exp) {
+; CHECK-LABEL: define float @powi_unrelated_computation(
+; CHECK-SAME: float [[BASE:%.*]], i32 [[EXP:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[EXP]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[EXIT:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[UNRELATED:%.*]] = phi i32 [ [[UNRELATED_XOR:%.*]], [[FOR_BODY]] ], [ 5, [[ENTRY]] ]
+; CHECK-NEXT: [[RESULT:%.*]] = phi float [ [[MUL:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[ENTRY]] ]
+; CHECK-NEXT: [[MUL]] = fmul fast float [[RESULT]], [[BASE]]
+; CHECK-NEXT: [[UNRELATED_XOR]] = xor i32 [[IV]], [[UNRELATED]]
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[EXP]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_CLEANUP:%.*]], label [[FOR_BODY]]
+; CHECK: for.cleanup:
+; CHECK-NEXT: [[TMP0:%.*]] = sitofp i32 [[UNRELATED_XOR]] to float
+; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float [[MUL]], [[TMP0]]
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: [[ADD:%.*]] = phi float [ 6.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[FOR_CLEANUP]] ]
+; CHECK-NEXT: ret float [[ADD]]
+;
+entry:
+ %cmp = icmp sgt i32 %exp, 0
+ br i1 %cmp, label %for.body, label %exit
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %unrelated = phi i32 [ %unrelated.xor, %for.body ], [ 5, %entry ]
+ %result = phi float [ %mul, %for.body ], [ 1.000000e+00, %entry ]
+ %mul = fmul fast float %result, %base
+ %unrelated.xor = xor i32 %iv, %unrelated
+ %inc = add nuw nsw i32 %iv, 1
+ %exitcond = icmp eq i32 %inc, %exp
+ br i1 %exitcond, label %for.cleanup, label %for.body
+
+for.cleanup: ; preds = %for.body
+ %0 = sitofp i32 %unrelated.xor to float
+ %1 = fadd fast float %mul, %0
+ br label %exit
+
+exit: ; preds = %for.cleanup, %entry
+ %add = phi float [ 6.000000e+00, %entry ], [ %1, %for.cleanup ]
+ ret float %add
+}
+
+; Negative tests
+
+; The powi idiom is only legal for a base of floating-point type
+define i32 @powi_i32_base(i32 %base, i32 %exp) {
+; CHECK-LABEL: define i32 @powi_i32_base(
+; CHECK-SAME: i32 [[BASE:%.*]], i32 [[EXP:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[EXP]], 0
+; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
+; CHECK: while.body:
+; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[MUL:%.*]], [[WHILE_BODY]] ], [ 1, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[MERGE_DEC:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[EXP]], [[ENTRY]] ]
+; CHECK-NEXT: [[MUL]] = mul nsw i32 [[RESULT]], [[BASE]]
+; CHECK-NEXT: [[DEC]] = add nsw i32 [[MERGE_DEC]], -1
+; CHECK-NEXT: [[CMP_EQ:%.*]] = icmp eq i32 [[DEC]], 0
+; CHECK-NEXT: br i1 [[CMP_EQ]], label [[WHILE_END]], label [[WHILE_BODY]]
+; CHECK: while.end:
+; CHECK-NEXT: [[RESULT_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[MUL]], [[WHILE_BODY]] ]
+; CHECK-NEXT: ret i32 [[RESULT_LCSSA]]
+;
+entry:
+ %cmp.not = icmp eq i32 %exp, 0
+ br i1 %cmp.not, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %result = phi i32 [ %mul, %while.body ], [ 1, %entry ]
+ %merge.dec = phi i32 [ %dec, %while.body ], [ %exp, %entry ]
+ %mul = mul nsw i32 %result, %base
+ %dec = add nsw i32 %merge.dec, -1
+ %cmp.eq = icmp eq i32 %dec, 0
+ br i1 %cmp.eq, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %result.lcssa = phi i32 [ 1, %entry ], [ %mul, %while.body ]
+ ret i32 %result.lcssa
+}
+
+; The powi idiom is only legal in -ffast-math mode
+define float @powi_nofast(float %base, i32 %exp) {
+; CHECK-LABEL: define float @powi_nofast(
+; CHECK-SAME: float [[BASE:%.*]], i32 [[EXP:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[EXP]], 0
+; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
+; CHECK: while.body:
+; CHECK-NEXT: [[RESULT:%.*]] = phi float [ [[MUL:%.*]], [[WHILE_BODY]] ], [ 1.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[MERGE_DEC:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[EXP]], [[ENTRY]] ]
+; CHECK-NEXT: [[MUL]] = fmul float [[RESULT]], [[BASE]]
+; CHECK-NEXT: [[DEC]] = add nsw i32 [[MERGE_DEC]], -1
+; CHECK-NEXT: [[CMP_EQ:%.*]] = icmp eq i32 [[DEC]], 0
+; CHECK-NEXT: br i1 [[CMP_EQ]], label [[WHILE_END]], label [[WHILE_BODY]]
+; CHECK: while.end:
+; CHECK-NEXT: [[RESULT_LCSSA:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[WHILE_BODY]] ]
+; CHECK-NEXT: ret float [[RESULT_LCSSA]]
+;
+entry:
+ %cmp.not = icmp eq i32 %exp, 0
+ br i1 %cmp.not, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %result = phi float [ %mul, %while.body ], [ 1.000000e+00, %entry ]
+ %merge.dec = phi i32 [ %dec, %while.body ], [ %exp, %entry ]
+ %mul = fmul float %result, %base
+ %dec = add nsw i32 %merge.dec, -1
+ %cmp.eq = icmp eq i32 %dec, 0
+ br i1 %cmp.eq, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %result.lcssa = phi float [ 1.000000e+00, %entry ], [ %mul, %while.body ]
+ ret float %result.lcssa
+}
+
+; llvm.powi can only be generated for an exponent that is, at most, a C int type.
+define double @powi_i64_iv(double %base, i64 %exp) {
+; CHECK-LABEL: define double @powi_i64_iv(
+; CHECK-SAME: double [[BASE:%.*]], i64 [[EXP:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[EXP]], 0
+; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
+; CHECK: while.body:
+; CHECK-NEXT: [[RESULT:%.*]] = phi double [ [[MUL:%.*]], [[WHILE_BODY]] ], [ 1.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[MERGE_DEC:%.*]] = phi i64 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[EXP]], [[ENTRY]] ]
+; CHECK-NEXT: [[MUL]] = fmul fast double [[RESULT]], [[BASE]]
+; CHECK-NEXT: [[DEC]] = add nsw i64 [[MERGE_DEC]], -1
+; CHECK-NEXT: [[CMP_EQ:%.*]] = icmp eq i64 [[DEC]], 0
+; CHECK-NEXT: br i1 [[CMP_EQ]], label [[WHILE_END]], label [[WHILE_BODY]]
+; CHECK: while.end:
+; CHECK-NEXT: [[RESULT_LCSSA:%.*]] = phi double [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[WHILE_BODY]] ]
+; CHECK-NEXT: ret double [[RESULT_LCSSA]]
+;
+entry:
+ %cmp.not = icmp eq i64 %exp, 0
+ br i1 %cmp.not, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %result = phi double [ %mul, %while.body ], [ 1.000000e+00, %entry ]
+ %merge.dec = phi i64 [ %dec, %while.body ], [ %exp, %entry ]
+ %mul = fmul fast double %result, %base
+ %dec = add nsw i64 %merge.dec, -1
+ %cmp.eq = icmp eq i64 %dec, 0
+ br i1 %cmp.eq, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %result.lcssa = phi double [ 1.000000e+00, %entry ], [ %mul, %while.body ]
+ ret double %result.lcssa
+}
+
+define float @powi_const_i64_iv(float %base) {
+; CHECK-LABEL: define float @powi_const_i64_iv(
+; CHECK-SAME: float [[BASE:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
+; CHECK: while.body:
+; CHECK-NEXT: [[EXP:%.*]] = phi i64 [ 2147483648, [[ENTRY:%.*]] ], [ [[DEC:%.*]], [[WHILE_BODY]] ]
+; CHECK-NEXT: [[RESULT:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[MUL:%.*]], [[WHILE_BODY]] ]
+; CHECK-NEXT: [[MUL]] = fmul fast float [[RESULT]], [[BASE]]
+; CHECK-NEXT: [[DEC]] = add nsw i64 [[EXP]], -1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[DEC]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; CHECK: while.end:
+; CHECK-NEXT: ret float [[MUL]]
+;
+entry:
+ br label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %exp = phi i64 [ 2147483648, %entry ], [ %dec, %while.body ]
+ %result = phi float [ 1.000000e+00, %entry ], [ %mul, %while.body ]
+ %mul = fmul fast float %result, %base
+ %dec = add nsw i64 %exp, -1
+ %cmp = icmp eq i64 %dec, 0
+ br i1 %cmp, label %while.end, label %while.body
+
+while.end: ; preds = %while.body
+ ret float %mul
+}
>From 430c0bc0fd80d885743203efa4e46bf8898afc4c Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <Ramkumar.Ramachandra at imgtec.com>
Date: Fri, 17 Nov 2023 12:35:28 +0000
Subject: [PATCH 2/3] LoopIdiomRecognize: detect and convert powi idiom
The following code, when compiled under -ffast-math, produces bad
codegen due to LoopVectorize:
float powi(float base, int exp) {
float result = 1.0;
for (int i = 0; i < exp; ++i)
result *= base;
return result;
}
It can easily be replaced with the llvm.powi intrinsic, when the
exponent is, at most, a C int type. This is the job of
LoopIdiomRecognize, and has been marked as a TODO item for years.
Fulfill this wish, and replace computations of this form with the
llvm.powi intrinsic.
---
.../Transforms/Scalar/LoopIdiomRecognize.cpp | 166 +++++++++++++++---
llvm/test/Transforms/LoopIdiom/powi.ll | 99 +++--------
2 files changed, 163 insertions(+), 102 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 4093a5a51a4d79e..3c9e99ebe6f3cb9 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -22,8 +22,6 @@
//
// Future loop memory idioms to recognize:
// memcmp, strlen, etc.
-// Future floating point idioms to recognize in -ffast-math mode:
-// fpowi
//
// This could recognize common matrix multiplies and dot product idioms and
// replace them with calls to BLAS (if linked in??).
@@ -94,6 +92,7 @@
#include <vector>
using namespace llvm;
+using namespace llvm::PatternMatch;
#define DEBUG_TYPE "loop-idiom"
@@ -206,6 +205,7 @@ class LoopIdiomRecognize {
const SCEV *BECount);
bool processLoopMemCpy(MemCpyInst *MCI, const SCEV *BECount);
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
+ bool processLoopPowi(BasicBlock *BB, const SCEV *BECount);
bool processLoopStridedStore(Value *DestPtr, const SCEV *StoreSizeSCEV,
MaybeAlign StoreAlignment, Value *StoredVal,
@@ -298,13 +298,8 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
ApplyCodeSizeHeuristics =
L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;
- HasMemset = TLI->has(LibFunc_memset);
- HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
- HasMemcpy = TLI->has(LibFunc_memcpy);
-
- if (HasMemset || HasMemsetPattern || HasMemcpy)
- if (SE->hasLoopInvariantBackedgeTakenCount(L))
- return runOnCountableLoop();
+ if (SE->hasLoopInvariantBackedgeTakenCount(L))
+ return runOnCountableLoop();
return runOnNoncountableLoop();
}
@@ -549,33 +544,43 @@ bool LoopIdiomRecognize::runOnLoopBlock(
BasicBlock *BB, const SCEV *BECount,
SmallVectorImpl<BasicBlock *> &ExitBlocks) {
// We can only promote stores in this block if they are unconditionally
- // executed in the loop. For a block to be unconditionally executed, it has
- // to dominate all the exit blocks of the loop. Verify this now.
+ // executed in the loop. The powi idiom also requires the block to be
+ // unconditionally executed. For a block to be unconditionally executed, it
+ // has to dominate all the exit blocks of the loop.
for (BasicBlock *ExitBlock : ExitBlocks)
if (!DT->dominates(BB, ExitBlock))
return false;
bool MadeChange = false;
- // Look for store instructions, which may be optimized to memset/memcpy.
- collectStores(BB);
- // Look for a single store or sets of stores with a common base, which can be
- // optimized into a memset (memset_pattern). The latter most commonly happens
- // with structs and handunrolled loops.
- for (auto &SL : StoreRefsForMemset)
- MadeChange |= processLoopStores(SL.second, BECount, ForMemset::Yes);
+ HasMemset = TLI->has(LibFunc_memset);
+ HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
+ HasMemcpy = TLI->has(LibFunc_memcpy);
+
+ if (HasMemset || HasMemsetPattern || HasMemcpy) {
+ // Look for store instructions, which may be optimized to memset/memcpy.
+ collectStores(BB);
+
+ // Look for a single store or sets of stores with a common base, which can
+ // be optimized into a memset (memset_pattern). The latter most commonly
+ // happens with structs and handunrolled loops.
+ for (auto &SL : StoreRefsForMemset)
+ MadeChange |= processLoopStores(SL.second, BECount, ForMemset::Yes);
- for (auto &SL : StoreRefsForMemsetPattern)
- MadeChange |= processLoopStores(SL.second, BECount, ForMemset::No);
+ for (auto &SL : StoreRefsForMemsetPattern)
+ MadeChange |= processLoopStores(SL.second, BECount, ForMemset::No);
- // Optimize the store into a memcpy, if it feeds an similarly strided load.
- for (auto &SI : StoreRefsForMemcpy)
- MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);
+ // Optimize the store into a memcpy, if it feeds an similarly strided load.
+ for (auto &SI : StoreRefsForMemcpy)
+ MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);
- MadeChange |= processLoopMemIntrinsic<MemCpyInst>(
- BB, &LoopIdiomRecognize::processLoopMemCpy, BECount);
- MadeChange |= processLoopMemIntrinsic<MemSetInst>(
- BB, &LoopIdiomRecognize::processLoopMemSet, BECount);
+ MadeChange |= processLoopMemIntrinsic<MemCpyInst>(
+ BB, &LoopIdiomRecognize::processLoopMemCpy, BECount);
+ MadeChange |= processLoopMemIntrinsic<MemSetInst>(
+ BB, &LoopIdiomRecognize::processLoopMemSet, BECount);
+ }
+
+ MadeChange |= processLoopPowi(BB, BECount);
return MadeChange;
}
@@ -925,6 +930,111 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
BECount, IsNegStride, /*IsLoopMemset=*/true);
}
+static CallInst *createPowiIntrinsic(IRBuilder<> &IRBuilder, Value *Base,
+ Value *Exp, const DebugLoc &DL) {
+ Value *Ops[] = {Base, Exp};
+ Type *Tys[] = {Base->getType(), Exp->getType()};
+
+ Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
+ Function *Func = Intrinsic::getDeclaration(M, Intrinsic::powi, Tys);
+ CallInst *CI = IRBuilder.CreateCall(Func, Ops);
+ CI->setDebugLoc(DL);
+ return CI;
+}
+
+// Checks that the Phi is an fmul fast with a loop-invariant operand, and
+// returns the the fmul instruction.
+static Instruction *detectPowiIdiom(PHINode *Phi, BasicBlock *PH,
+ BasicBlock *Latch, Loop *CurLoop) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Performing powi idiom detection\n");
+
+ // The phi must have two incoming values (one from the preheader, and another
+ // from the latch), it must have one use (which we will subsequently check is
+ // an fmul fast instruction), and it must be a floating-point type.
+ if (Phi->getNumIncomingValues() != 2 || !Phi->hasOneUse() ||
+ Phi->getBasicBlockIndex(PH) < 0 || Phi->getBasicBlockIndex(Latch) < 0 ||
+ !Phi->getType()->isFloatingPointTy()) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Unable to operate on this PHI node\n");
+ return nullptr;
+ }
+
+ // Further, check that the incoming value from the preheader is 1.0.
+ auto *ConstFP = dyn_cast<ConstantFP>(Phi->getIncomingValueForBlock(PH));
+ if (!ConstFP || !ConstFP->isExactlyValue(1.0)) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Initial value comparison failed\n");
+ return nullptr;
+ }
+
+ auto *I = cast<Instruction>(Phi->use_begin()->getUser());
+ Value *Op1, *Op2;
+ if (!match(I, m_FMul(m_Value(Op1), m_Value(Op2))) || !I->isFast()) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " fmul-fast test failed\n");
+ return nullptr;
+ }
+ Value *Base = Op1 == Phi ? Op2 : Op1;
+ if (CurLoop->isLoopInvariant(Base))
+ return I;
+ else
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Base is not loop-invariant\n");
+ return nullptr;
+}
+
+/// Detect the powi idiom, and convert it to an intrinsic.
+bool LoopIdiomRecognize::processLoopPowi(BasicBlock *BB, const SCEV *BECount) {
+ // We only process loops where the IV is, at most, a C int type, since this is
+ // used for the exponent of the powi.
+ PHINode *IV = CurLoop->getInductionVariable(*SE);
+ if (!IV || IV->getType()->getScalarSizeInBits() > 32)
+ return false;
+
+ // If the loop doesn't have a valid preheader and latch, give up now.
+ BasicBlock *PH = CurLoop->getLoopPreheader();
+ BasicBlock *Latch = CurLoop->getLoopLatch();
+ if (!PH || !Latch)
+ return false;
+
+ // Collect all phis that are not the induction phi.
+ SmallVector<PHINode *, 1> Phis;
+ for (Instruction &I : *BB) {
+ if (auto *Phi = dyn_cast<PHINode>(&I))
+ if (Phi != IV)
+ Phis.push_back(Phi);
+ }
+
+ // Find the Phi corresponding to the powi idiom, amongst all phis except the
+ // induction phi.
+ for (PHINode *Phi : Phis) {
+ if (Instruction *FMul = detectPowiIdiom(Phi, PH, Latch, CurLoop)) {
+ // Find the trip count, and expand the SCEV to find the exponent of the
+ // powi.
+ IRBuilder<> Builder(PH->getTerminator());
+ SCEVExpander Expander(*SE, *DL, "loop-idiom");
+ SCEVExpanderCleaner ExpCleaner(Expander);
+ Type *ExpTy = IV->getType();
+ const SCEV *TripCount =
+ SE->getTripCountFromExitCount(BECount, ExpTy, CurLoop);
+ if (!Expander.isSafeToExpand(TripCount)) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Trip count not safe to expand\n");
+ return false;
+ }
+ Value *Exp =
+ Expander.expandCodeFor(TripCount, ExpTy, PH->getTerminator());
+
+ // Insert the powi intrinsic, and replace its uses outside the block.
+ const DebugLoc &Loc = FMul->getDebugLoc();
+ Value *Base = isa<PHINode>(FMul->getOperand(1)) ? FMul->getOperand(2)
+ : FMul->getOperand(1);
+ CallInst *Powi = createPowiIntrinsic(Builder, Base, Exp, Loc);
+ FMul->replaceUsesOutsideBlock(Powi, BB);
+ ExpCleaner.markResultUsed();
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " powi idiom optimized!\n");
+ return true;
+ }
+ }
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " powi idiom detection failed\n");
+ return false;
+}
+
/// mayLoopAccessLocation - Return true if the specified loop might access the
/// specified pointer location, which is a loop-strided access. The 'Access'
/// argument specifies what the verboten forms of access are (read or write).
@@ -2216,8 +2326,6 @@ static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
assert(LoopPreheaderBB && "There is always a loop preheader.");
- using namespace PatternMatch;
-
// Step 1: Check if the loop backedge is in desirable form.
ICmpInst::Predicate Pred;
diff --git a/llvm/test/Transforms/LoopIdiom/powi.ll b/llvm/test/Transforms/LoopIdiom/powi.ll
index 63e9fbce5a931eb..5136d5376414c51 100644
--- a/llvm/test/Transforms/LoopIdiom/powi.ll
+++ b/llvm/test/Transforms/LoopIdiom/powi.ll
@@ -6,17 +6,9 @@ define float @powi_f32(float %base, i32 %exp) {
; CHECK-SAME: float [[BASE:%.*]], i32 [[EXP:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[EXP]], 0
-; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
-; CHECK: while.body:
-; CHECK-NEXT: [[RESULT:%.*]] = phi float [ [[MUL:%.*]], [[WHILE_BODY]] ], [ 1.000000e+00, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[MERGE_DEC:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[EXP]], [[ENTRY]] ]
-; CHECK-NEXT: [[MUL]] = fmul fast float [[RESULT]], [[BASE]]
-; CHECK-NEXT: [[DEC]] = add nsw i32 [[MERGE_DEC]], -1
-; CHECK-NEXT: [[CMP_EQ:%.*]] = icmp eq i32 [[DEC]], 0
-; CHECK-NEXT: br i1 [[CMP_EQ]], label [[WHILE_END]], label [[WHILE_BODY]]
-; CHECK: while.end:
-; CHECK-NEXT: [[RESULT_LCSSA:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[WHILE_BODY]] ]
-; CHECK-NEXT: ret float [[RESULT_LCSSA]]
+; CHECK-NEXT: [[TMP0:%.*]] = call float @llvm.powi.f32.i32(float [[BASE]], i32 [[EXP]])
+; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP_NOT]], float 1.000000e+00, float [[TMP0]]
+; CHECK-NEXT: ret float [[SPEC_SELECT]]
;
entry:
%cmp.not = icmp eq i32 %exp, 0
@@ -40,17 +32,9 @@ define double @powi_f64(double %base, i32 %exp) {
; CHECK-SAME: double [[BASE:%.*]], i32 [[EXP:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[EXP]], 0
-; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
-; CHECK: while.body:
-; CHECK-NEXT: [[RESULT:%.*]] = phi double [ [[MUL:%.*]], [[WHILE_BODY]] ], [ 1.000000e+00, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[MERGE_DEC:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[EXP]], [[ENTRY]] ]
-; CHECK-NEXT: [[MUL]] = fmul fast double [[RESULT]], [[BASE]]
-; CHECK-NEXT: [[DEC]] = add nsw i32 [[MERGE_DEC]], -1
-; CHECK-NEXT: [[CMP_EQ:%.*]] = icmp eq i32 [[DEC]], 0
-; CHECK-NEXT: br i1 [[CMP_EQ]], label [[WHILE_END]], label [[WHILE_BODY]]
-; CHECK: while.end:
-; CHECK-NEXT: [[RESULT_LCSSA:%.*]] = phi double [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[WHILE_BODY]] ]
-; CHECK-NEXT: ret double [[RESULT_LCSSA]]
+; CHECK-NEXT: [[TMP0:%.*]] = call double @llvm.powi.f64.i32(double [[BASE]], i32 [[EXP]])
+; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP_NOT]], double 1.000000e+00, double [[TMP0]]
+; CHECK-NEXT: ret double [[SPEC_SELECT]]
;
entry:
%cmp.not = icmp eq i32 %exp, 0
@@ -74,17 +58,9 @@ define double @powi_i16_iv(double %base, i16 %exp) {
; CHECK-SAME: double [[BASE:%.*]], i16 [[EXP:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i16 [[EXP]], 0
-; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
-; CHECK: while.body:
-; CHECK-NEXT: [[RESULT:%.*]] = phi double [ [[MUL:%.*]], [[WHILE_BODY]] ], [ 1.000000e+00, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[MERGE_DEC:%.*]] = phi i16 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[EXP]], [[ENTRY]] ]
-; CHECK-NEXT: [[MUL]] = fmul fast double [[RESULT]], [[BASE]]
-; CHECK-NEXT: [[DEC]] = add nsw i16 [[MERGE_DEC]], -1
-; CHECK-NEXT: [[CMP_EQ:%.*]] = icmp eq i16 [[DEC]], 0
-; CHECK-NEXT: br i1 [[CMP_EQ]], label [[WHILE_END]], label [[WHILE_BODY]]
-; CHECK: while.end:
-; CHECK-NEXT: [[RESULT_LCSSA:%.*]] = phi double [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[WHILE_BODY]] ]
-; CHECK-NEXT: ret double [[RESULT_LCSSA]]
+; CHECK-NEXT: [[TMP0:%.*]] = call double @llvm.powi.f64.i16(double [[BASE]], i16 [[EXP]])
+; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP_NOT]], double 1.000000e+00, double [[TMP0]]
+; CHECK-NEXT: ret double [[SPEC_SELECT]]
;
entry:
%cmp.not = icmp eq i16 %exp, 0
@@ -108,17 +84,9 @@ define float @powi_canonical_iv_signed(float %base, i32 %exp) {
; CHECK-SAME: float [[BASE:%.*]], i32 [[EXP:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP_SGT:%.*]] = icmp sgt i32 [[EXP]], 0
-; CHECK-NEXT: br i1 [[CMP_SGT]], label [[FOR_BODY:%.*]], label [[EXIT:%.*]]
-; CHECK: for.body:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[RESULT:%.*]] = phi float [ [[MUL:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[ENTRY]] ]
-; CHECK-NEXT: [[MUL]] = fmul fast float [[RESULT]], [[BASE]]
-; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[EXP]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]]
-; CHECK: exit:
-; CHECK-NEXT: [[RESULT_LCSSA:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[FOR_BODY]] ]
-; CHECK-NEXT: ret float [[RESULT_LCSSA]]
+; CHECK-NEXT: [[TMP0:%.*]] = call float @llvm.powi.f32.i32(float [[BASE]], i32 [[EXP]])
+; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP_SGT]], float [[TMP0]], float 1.000000e+00
+; CHECK-NEXT: ret float [[SPEC_SELECT]]
;
entry:
%cmp.sgt = icmp sgt i32 %exp, 0
@@ -142,17 +110,9 @@ define float @powi_canonical_iv_unsigned(float %base, i32 %exp) {
; CHECK-SAME: float [[BASE:%.*]], i32 [[EXP:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP_EQ:%.*]] = icmp eq i32 [[EXP]], 0
-; CHECK-NEXT: br i1 [[CMP_EQ]], label [[EXIT:%.*]], label [[FOR_BODY:%.*]]
-; CHECK: for.body:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[RESULT:%.*]] = phi float [ [[MUL:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[ENTRY]] ]
-; CHECK-NEXT: [[MUL]] = fmul fast float [[RESULT]], [[BASE]]
-; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[EXP]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]]
-; CHECK: exit:
-; CHECK-NEXT: [[RESULT_LCSSA:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[FOR_BODY]] ]
-; CHECK-NEXT: ret float [[RESULT_LCSSA]]
+; CHECK-NEXT: [[TMP0:%.*]] = call float @llvm.powi.f32.i32(float [[BASE]], i32 [[EXP]])
+; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP_EQ]], float 1.000000e+00, float [[TMP0]]
+; CHECK-NEXT: ret float [[SPEC_SELECT]]
;
entry:
%cmp.eq = icmp eq i32 %exp, 0
@@ -175,16 +135,8 @@ define float @powi_const_i32_exp(float %base) {
; CHECK-LABEL: define float @powi_const_i32_exp(
; CHECK-SAME: float [[BASE:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: br label [[FOR_BODY:%.*]]
-; CHECK: for.body:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[RESULT:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[MUL]] = fmul fast float [[RESULT]], [[BASE]]
-; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC]], 2147483647
-; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_BODY]]
-; CHECK: exit:
-; CHECK-NEXT: ret float [[MUL]]
+; CHECK-NEXT: [[TMP0:%.*]] = call float @llvm.powi.f32.i32(float [[BASE]], i32 2147483647)
+; CHECK-NEXT: ret float [[TMP0]]
;
entry:
br label %for.body
@@ -206,22 +158,23 @@ define float @powi_unrelated_computation(float %base, i32 %exp) {
; CHECK-SAME: float [[BASE:%.*]], i32 [[EXP:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[EXP]], 0
-; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[EXIT:%.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = call float @llvm.powi.f32.i32(float [[BASE]], i32 [[EXP]])
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[UNRELATED:%.*]] = phi i32 [ [[UNRELATED_XOR:%.*]], [[FOR_BODY]] ], [ 5, [[ENTRY]] ]
-; CHECK-NEXT: [[RESULT:%.*]] = phi float [ [[MUL:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[ENTRY]] ]
-; CHECK-NEXT: [[MUL]] = fmul fast float [[RESULT]], [[BASE]]
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[UNRELATED:%.*]] = phi i32 [ [[UNRELATED_XOR:%.*]], [[FOR_BODY]] ], [ 5, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[UNRELATED_XOR]] = xor i32 [[IV]], [[UNRELATED]]
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[EXP]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_CLEANUP:%.*]], label [[FOR_BODY]]
; CHECK: for.cleanup:
-; CHECK-NEXT: [[TMP0:%.*]] = sitofp i32 [[UNRELATED_XOR]] to float
-; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float [[MUL]], [[TMP0]]
+; CHECK-NEXT: [[TMP1:%.*]] = sitofp i32 [[UNRELATED_XOR]] to float
+; CHECK-NEXT: [[TMP2:%.*]] = fadd fast float [[TMP0]], [[TMP1]]
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
-; CHECK-NEXT: [[ADD:%.*]] = phi float [ 6.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[FOR_CLEANUP]] ]
+; CHECK-NEXT: [[ADD:%.*]] = phi float [ 6.000000e+00, [[ENTRY:%.*]] ], [ [[TMP2]], [[FOR_CLEANUP]] ]
; CHECK-NEXT: ret float [[ADD]]
;
entry:
>From f98b7420c3485802daedd357f65bffe2d04df32b Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <Ramkumar.Ramachandra at imgtec.com>
Date: Fri, 17 Nov 2023 16:02:39 +0000
Subject: [PATCH 3/3] LIR: address @joe-img's review comments; fix bug
---
.../Transforms/Scalar/LoopIdiomRecognize.cpp | 18 ++++++------------
1 file changed, 6 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 3c9e99ebe6f3cb9..d25eeef68f5b060 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -993,18 +993,12 @@ bool LoopIdiomRecognize::processLoopPowi(BasicBlock *BB, const SCEV *BECount) {
if (!PH || !Latch)
return false;
- // Collect all phis that are not the induction phi.
- SmallVector<PHINode *, 1> Phis;
- for (Instruction &I : *BB) {
- if (auto *Phi = dyn_cast<PHINode>(&I))
- if (Phi != IV)
- Phis.push_back(Phi);
- }
-
// Find the Phi corresponding to the powi idiom, amongst all phis except the
// induction phi.
- for (PHINode *Phi : Phis) {
- if (Instruction *FMul = detectPowiIdiom(Phi, PH, Latch, CurLoop)) {
+ for (PHINode &Phi : BB->phis()) {
+ if (&Phi == IV)
+ continue;
+ if (Instruction *FMul = detectPowiIdiom(&Phi, PH, Latch, CurLoop)) {
// Find the trip count, and expand the SCEV to find the exponent of the
// powi.
IRBuilder<> Builder(PH->getTerminator());
@@ -1022,8 +1016,8 @@ bool LoopIdiomRecognize::processLoopPowi(BasicBlock *BB, const SCEV *BECount) {
// Insert the powi intrinsic, and replace its uses outside the block.
const DebugLoc &Loc = FMul->getDebugLoc();
- Value *Base = isa<PHINode>(FMul->getOperand(1)) ? FMul->getOperand(2)
- : FMul->getOperand(1);
+ Value *Base = isa<PHINode>(FMul->getOperand(0)) ? FMul->getOperand(1)
+ : FMul->getOperand(0);
CallInst *Powi = createPowiIntrinsic(Builder, Base, Exp, Loc);
FMul->replaceUsesOutsideBlock(Powi, BB);
ExpCleaner.markResultUsed();
More information about the llvm-commits
mailing list