[llvm] [FuncSpec] Improve handling of Comparison Instructions (PR #114073)

Hari Limaye via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 30 16:22:22 PDT 2024


https://github.com/hazzlim updated https://github.com/llvm/llvm-project/pull/114073

>From 317bcff4e13f38dc1a46ee08835fd7178af7b29c Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Tue, 29 Oct 2024 12:11:52 +0000
Subject: [PATCH 1/4] [FuncSpec] Improve handling of Comparison Instructions

When visiting comparison instructions during computation of a
specializations's bonus, make use of information from the lattice value
of the other operand in the case where we have not found this to have a
specific constant value.
---
 .../Transforms/IPO/FunctionSpecialization.cpp | 21 ++++--
 .../FunctionSpecialization/cmp-with-range.ll  | 74 +++++++++++++++++++
 2 files changed, 89 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/Transforms/FunctionSpecialization/cmp-with-range.ll

diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 919d3143a13f7e..7478e089f41713 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -468,16 +468,25 @@ Constant *InstCostVisitor::visitCastInst(CastInst &I) {
 Constant *InstCostVisitor::visitCmpInst(CmpInst &I) {
   assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
 
+  Constant *Const = LastVisited->second;
   bool Swap = I.getOperand(1) == LastVisited->first;
   Value *V = Swap ? I.getOperand(0) : I.getOperand(1);
   Constant *Other = findConstantFor(V, KnownConstants);
-  if (!Other)
-    return nullptr;
 
-  Constant *Const = LastVisited->second;
-  return Swap ?
-        ConstantFoldCompareInstOperands(I.getPredicate(), Other, Const, DL)
-      : ConstantFoldCompareInstOperands(I.getPredicate(), Const, Other, DL);
+  if (Other)
+    return Swap ? ConstantFoldCompareInstOperands(I.getPredicate(), Other,
+                                                  Const, DL)
+                : ConstantFoldCompareInstOperands(I.getPredicate(), Const,
+                                                  Other, DL);
+
+  // If we haven't found Other to be a specific constant value, we may still be
+  // able constant fold the comparison using information from the lattice value.
+  ValueLatticeElement ConstLV = ValueLatticeElement();
+  ConstLV.markConstant(Const);
+  const ValueLatticeElement &OtherLV = Solver.getLatticeValueFor(V);
+  auto &V1State = Swap ? OtherLV : ConstLV;
+  auto &V2State = Swap ? ConstLV : OtherLV;
+  return V1State.getCompare(I.getPredicate(), I.getType(), V2State, DL);
 }
 
 Constant *InstCostVisitor::visitUnaryOperator(UnaryOperator &I) {
diff --git a/llvm/test/Transforms/FunctionSpecialization/cmp-with-range.ll b/llvm/test/Transforms/FunctionSpecialization/cmp-with-range.ll
new file mode 100644
index 00000000000000..bea044a9904ff5
--- /dev/null
+++ b/llvm/test/Transforms/FunctionSpecialization/cmp-with-range.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=1       \
+; RUN:                                 -funcspec-for-literal-constant=true \
+; RUN:                                 -funcspec-min-codesize-savings=50   \
+; RUN:                                 -funcspec-min-latency-savings=0     \
+; RUN:                                 -S < %s | FileCheck %s
+
+; Verify that we are able to estimate the codesize savings arising from a branch
+; based on a comparison with a value found to have a constant range by IPSCCP.
+define i32 @main() {
+  %notspec = call i32 @test(i32 8)
+  %spec = call i32 @test(i32 0)
+  %sum = add i32 %notspec, %spec
+  ret i32 %sum
+}
+
+define i32 @test(i32 %x) {
+entry:
+  %range = call i32 @foo(), !range !{ i32 1, i32 0 }
+  %bound = shl nsw nuw i32 %range, 3
+  %cmp = icmp uge i32 %x, %bound
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  call void @do_something()
+  call void @do_something()
+  call void @do_something()
+  call void @do_something()
+  br label %if.end
+
+if.end:
+  %res = phi i32 [ 0, %entry ], [ 1, %if.then]
+  ret i32 %res
+}
+
+declare i32 @foo()
+declare void @do_something()
+; CHECK-LABEL: define range(i32 0, 2) i32 @main() {
+; CHECK-NEXT:    [[NOTSPEC:%.*]] = call i32 @test(i32 8)
+; CHECK-NEXT:    [[SPEC:%.*]] = call i32 @test.specialized.1(i32 0)
+; CHECK-NEXT:    [[SUM:%.*]] = add nuw nsw i32 [[NOTSPEC]], 0
+; CHECK-NEXT:    ret i32 [[SUM]]
+;
+;
+; CHECK-LABEL: define range(i32 0, 2) i32 @test(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[RANGE:%.*]] = call i32 @foo(), !range [[RNG0:![0-9]+]]
+; CHECK-NEXT:    [[BOUND:%.*]] = shl nuw nsw i32 [[RANGE]], 3
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i32 [[X]], [[BOUND]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 1, %[[IF_THEN]] ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+;
+; CHECK-LABEL: define internal i32 @test.specialized.1(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[RANGE:%.*]] = call i32 @foo(), !range [[RNG0]]
+; CHECK-NEXT:    [[BOUND:%.*]] = shl nuw nsw i32 [[RANGE]], 3
+; CHECK-NEXT:    br label %[[IF_END:.*]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    ret i32 poison
+;
+;.
+; CHECK: [[RNG0]] = !{i32 1, i32 0}
+;.

>From 444c48f49c0d726278067438cc3bcd8cad2792cb Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Wed, 30 Oct 2024 16:44:14 +0000
Subject: [PATCH 2/4] Address review comments

- Improve readability of existing constantfold case
- Rename Swap -> ConstOnRHS
- Add a test case where the Use we are querying is on the RHS, and is
  not a direct Use of the Arg
---
 .../Transforms/IPO/FunctionSpecialization.cpp | 19 ++---
 .../FunctionSpecialization/cmp-with-range.ll  | 73 ++++++++++++++++---
 2 files changed, 73 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 7478e089f41713..2d4249f7847062 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -469,23 +469,24 @@ Constant *InstCostVisitor::visitCmpInst(CmpInst &I) {
   assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
 
   Constant *Const = LastVisited->second;
-  bool Swap = I.getOperand(1) == LastVisited->first;
-  Value *V = Swap ? I.getOperand(0) : I.getOperand(1);
+  bool ConstOnRHS = I.getOperand(1) == LastVisited->first;
+  Value *V = ConstOnRHS ? I.getOperand(0) : I.getOperand(1);
   Constant *Other = findConstantFor(V, KnownConstants);
 
-  if (Other)
-    return Swap ? ConstantFoldCompareInstOperands(I.getPredicate(), Other,
-                                                  Const, DL)
-                : ConstantFoldCompareInstOperands(I.getPredicate(), Const,
-                                                  Other, DL);
+  if (Other) {
+    if (ConstOnRHS) {
+      std::swap(Const, Other);
+    }
+    return ConstantFoldCompareInstOperands(I.getPredicate(), Const, Other, DL);
+  }
 
   // If we haven't found Other to be a specific constant value, we may still be
   // able constant fold the comparison using information from the lattice value.
   ValueLatticeElement ConstLV = ValueLatticeElement();
   ConstLV.markConstant(Const);
   const ValueLatticeElement &OtherLV = Solver.getLatticeValueFor(V);
-  auto &V1State = Swap ? OtherLV : ConstLV;
-  auto &V2State = Swap ? ConstLV : OtherLV;
+  auto &V1State = ConstOnRHS ? OtherLV : ConstLV;
+  auto &V2State = ConstOnRHS ? ConstLV : OtherLV;
   return V1State.getCompare(I.getPredicate(), I.getType(), V2State, DL);
 }
 
diff --git a/llvm/test/Transforms/FunctionSpecialization/cmp-with-range.ll b/llvm/test/Transforms/FunctionSpecialization/cmp-with-range.ll
index bea044a9904ff5..d6922947363561 100644
--- a/llvm/test/Transforms/FunctionSpecialization/cmp-with-range.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/cmp-with-range.ll
@@ -8,13 +8,15 @@
 ; Verify that we are able to estimate the codesize savings arising from a branch
 ; based on a comparison with a value found to have a constant range by IPSCCP.
 define i32 @main() {
-  %notspec = call i32 @test(i32 8)
-  %spec = call i32 @test(i32 0)
-  %sum = add i32 %notspec, %spec
-  ret i32 %sum
+  %notspec = call i32 @test_use_on_lhs(i32 8)
+  %spec1 = call i32 @test_use_on_lhs(i32 0)
+  %spec2 = call i32 @test_use_on_rhs(i32 1)
+  %sum1 = add i32 %notspec, %spec1
+  %sum2 = add i32 %sum1, %spec2
+  ret i32 %sum2
 }
 
-define i32 @test(i32 %x) {
+define i32 @test_use_on_lhs(i32 %x) {
 entry:
   %range = call i32 @foo(), !range !{ i32 1, i32 0 }
   %bound = shl nsw nuw i32 %range, 3
@@ -33,16 +35,38 @@ if.end:
   ret i32 %res
 }
 
+define i32 @test_use_on_rhs(i32 %x) {
+entry:
+  %range = call i32 @foo(), !range !{ i32 1, i32 0 }
+  %bound = shl nsw nuw i32 %range, 3
+  %x.sub = sub nsw nuw i32 %x, 1
+  %cmp = icmp ult i32 %bound, %x.sub
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  call void @do_something()
+  call void @do_something()
+  call void @do_something()
+  call void @do_something()
+  br label %if.end
+
+if.end:
+  %res = phi i32 [ 0, %entry ], [ 1, %if.then]
+  ret i32 %res
+}
+
 declare i32 @foo()
 declare void @do_something()
 ; CHECK-LABEL: define range(i32 0, 2) i32 @main() {
-; CHECK-NEXT:    [[NOTSPEC:%.*]] = call i32 @test(i32 8)
-; CHECK-NEXT:    [[SPEC:%.*]] = call i32 @test.specialized.1(i32 0)
+; CHECK-NEXT:    [[NOTSPEC:%.*]] = call i32 @test_use_on_lhs(i32 8)
+; CHECK-NEXT:    [[SPEC1:%.*]] = call i32 @test_use_on_lhs.specialized.1(i32 0)
+; CHECK-NEXT:    [[SPEC2:%.*]] = call i32 @test_use_on_rhs.specialized.2(i32 1)
 ; CHECK-NEXT:    [[SUM:%.*]] = add nuw nsw i32 [[NOTSPEC]], 0
-; CHECK-NEXT:    ret i32 [[SUM]]
+; CHECK-NEXT:    [[RES:%.*]] = add nuw nsw i32 [[SUM]], 0
+; CHECK-NEXT:    ret i32 [[RES]]
 ;
 ;
-; CHECK-LABEL: define range(i32 0, 2) i32 @test(
+; CHECK-LABEL: define range(i32 0, 2) i32 @test_use_on_lhs(
 ; CHECK-SAME: i32 [[X:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[RANGE:%.*]] = call i32 @foo(), !range [[RNG0:![0-9]+]]
@@ -60,7 +84,36 @@ declare void @do_something()
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;
 ;
-; CHECK-LABEL: define internal i32 @test.specialized.1(
+; CHECK-LABEL: define range(i32 0, 2) i32 @test_use_on_rhs(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[RANGE:%.*]] = call i32 @foo(), !range [[RNG0]]
+; CHECK-NEXT:    [[BOUND:%.*]] = shl nuw nsw i32 [[RANGE]], 3
+; CHECK-NEXT:    [[X_SUB:%.*]] = sub nuw nsw i32 [[X]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[BOUND]], [[X_SUB]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 1, %[[IF_THEN]] ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+;
+; CHECK-LABEL: define internal i32 @test_use_on_lhs.specialized.1(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[RANGE:%.*]] = call i32 @foo(), !range [[RNG0]]
+; CHECK-NEXT:    [[BOUND:%.*]] = shl nuw nsw i32 [[RANGE]], 3
+; CHECK-NEXT:    br label %[[IF_END:.*]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    ret i32 poison
+;
+;
+; CHECK-LABEL: define internal i32 @test_use_on_rhs.specialized.2(
 ; CHECK-SAME: i32 [[X:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[RANGE:%.*]] = call i32 @foo(), !range [[RNG0]]

>From dae9b603601db5de520645c1b03dc8e6f9e38ea6 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Wed, 30 Oct 2024 23:10:33 +0000
Subject: [PATCH 3/4] Address review

- Remove unnecessary braces from single statement body of if-statement
---
 llvm/lib/Transforms/IPO/FunctionSpecialization.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 2d4249f7847062..d481e935c2f0d9 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -474,9 +474,8 @@ Constant *InstCostVisitor::visitCmpInst(CmpInst &I) {
   Constant *Other = findConstantFor(V, KnownConstants);
 
   if (Other) {
-    if (ConstOnRHS) {
+    if (ConstOnRHS)
       std::swap(Const, Other);
-    }
     return ConstantFoldCompareInstOperands(I.getPredicate(), Const, Other, DL);
   }
 

>From 7a0138d073dfcd8c7b167d1a5dc20c4fd115dc1a Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Wed, 30 Oct 2024 23:06:12 +0000
Subject: [PATCH 4/4] [FuncSpec] Add const qualifier to Solver in
 InstCostVisitor

Add the const qualifier to the InstCostVisitor's reference to the
IPSCCPSolver, to document and enforce that we should never alter the
state of the Solver when evaluating a candidate specialization's bonus.
---
 llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index e82155a6c72974..ff5af5988656a1 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -155,7 +155,7 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
   Function *F;
   const DataLayout &DL;
   TargetTransformInfo &TTI;
-  SCCPSolver &Solver;
+  const SCCPSolver &Solver;
 
   ConstMap KnownConstants;
   // Basic blocks known to be unreachable after constant propagation.



More information about the llvm-commits mailing list