[llvm] [FuncSpec] Improve handling of Comparison Instructions (PR #114073)

Hari Limaye via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 30 09:57:06 PDT 2024


https://github.com/hazzlim updated https://github.com/llvm/llvm-project/pull/114073

>From 317bcff4e13f38dc1a46ee08835fd7178af7b29c Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Tue, 29 Oct 2024 12:11:52 +0000
Subject: [PATCH 1/2] [FuncSpec] Improve handling of Comparison Instructions

When visiting comparison instructions during computation of a
specializations's bonus, make use of information from the lattice value
of the other operand in the case where we have not found this to have a
specific constant value.
---
 .../Transforms/IPO/FunctionSpecialization.cpp | 21 ++++--
 .../FunctionSpecialization/cmp-with-range.ll  | 74 +++++++++++++++++++
 2 files changed, 89 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/Transforms/FunctionSpecialization/cmp-with-range.ll

diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 919d3143a13f7e..7478e089f41713 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -468,16 +468,25 @@ Constant *InstCostVisitor::visitCastInst(CastInst &I) {
 Constant *InstCostVisitor::visitCmpInst(CmpInst &I) {
   assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
 
+  Constant *Const = LastVisited->second;
   bool Swap = I.getOperand(1) == LastVisited->first;
   Value *V = Swap ? I.getOperand(0) : I.getOperand(1);
   Constant *Other = findConstantFor(V, KnownConstants);
-  if (!Other)
-    return nullptr;
 
-  Constant *Const = LastVisited->second;
-  return Swap ?
-        ConstantFoldCompareInstOperands(I.getPredicate(), Other, Const, DL)
-      : ConstantFoldCompareInstOperands(I.getPredicate(), Const, Other, DL);
+  if (Other)
+    return Swap ? ConstantFoldCompareInstOperands(I.getPredicate(), Other,
+                                                  Const, DL)
+                : ConstantFoldCompareInstOperands(I.getPredicate(), Const,
+                                                  Other, DL);
+
+  // If we haven't found Other to be a specific constant value, we may still be
+  // able constant fold the comparison using information from the lattice value.
+  ValueLatticeElement ConstLV = ValueLatticeElement();
+  ConstLV.markConstant(Const);
+  const ValueLatticeElement &OtherLV = Solver.getLatticeValueFor(V);
+  auto &V1State = Swap ? OtherLV : ConstLV;
+  auto &V2State = Swap ? ConstLV : OtherLV;
+  return V1State.getCompare(I.getPredicate(), I.getType(), V2State, DL);
 }
 
 Constant *InstCostVisitor::visitUnaryOperator(UnaryOperator &I) {
diff --git a/llvm/test/Transforms/FunctionSpecialization/cmp-with-range.ll b/llvm/test/Transforms/FunctionSpecialization/cmp-with-range.ll
new file mode 100644
index 00000000000000..bea044a9904ff5
--- /dev/null
+++ b/llvm/test/Transforms/FunctionSpecialization/cmp-with-range.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=1       \
+; RUN:                                 -funcspec-for-literal-constant=true \
+; RUN:                                 -funcspec-min-codesize-savings=50   \
+; RUN:                                 -funcspec-min-latency-savings=0     \
+; RUN:                                 -S < %s | FileCheck %s
+
+; Verify that we are able to estimate the codesize savings arising from a branch
+; based on a comparison with a value found to have a constant range by IPSCCP.
+define i32 @main() {
+  %notspec = call i32 @test(i32 8)
+  %spec = call i32 @test(i32 0)
+  %sum = add i32 %notspec, %spec
+  ret i32 %sum
+}
+
+define i32 @test(i32 %x) {
+entry:
+  %range = call i32 @foo(), !range !{ i32 1, i32 0 }
+  %bound = shl nsw nuw i32 %range, 3
+  %cmp = icmp uge i32 %x, %bound
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  call void @do_something()
+  call void @do_something()
+  call void @do_something()
+  call void @do_something()
+  br label %if.end
+
+if.end:
+  %res = phi i32 [ 0, %entry ], [ 1, %if.then]
+  ret i32 %res
+}
+
+declare i32 @foo()
+declare void @do_something()
+; CHECK-LABEL: define range(i32 0, 2) i32 @main() {
+; CHECK-NEXT:    [[NOTSPEC:%.*]] = call i32 @test(i32 8)
+; CHECK-NEXT:    [[SPEC:%.*]] = call i32 @test.specialized.1(i32 0)
+; CHECK-NEXT:    [[SUM:%.*]] = add nuw nsw i32 [[NOTSPEC]], 0
+; CHECK-NEXT:    ret i32 [[SUM]]
+;
+;
+; CHECK-LABEL: define range(i32 0, 2) i32 @test(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[RANGE:%.*]] = call i32 @foo(), !range [[RNG0:![0-9]+]]
+; CHECK-NEXT:    [[BOUND:%.*]] = shl nuw nsw i32 [[RANGE]], 3
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i32 [[X]], [[BOUND]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 1, %[[IF_THEN]] ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+;
+; CHECK-LABEL: define internal i32 @test.specialized.1(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[RANGE:%.*]] = call i32 @foo(), !range [[RNG0]]
+; CHECK-NEXT:    [[BOUND:%.*]] = shl nuw nsw i32 [[RANGE]], 3
+; CHECK-NEXT:    br label %[[IF_END:.*]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    ret i32 poison
+;
+;.
+; CHECK: [[RNG0]] = !{i32 1, i32 0}
+;.

>From 444c48f49c0d726278067438cc3bcd8cad2792cb Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Wed, 30 Oct 2024 16:44:14 +0000
Subject: [PATCH 2/2] Address review comments

- Improve readability of existing constantfold case
- Rename Swap -> ConstOnRHS
- Add a test case where the Use we are querying is on the RHS, and is
  not a direct Use of the Arg
---
 .../Transforms/IPO/FunctionSpecialization.cpp | 19 ++---
 .../FunctionSpecialization/cmp-with-range.ll  | 73 ++++++++++++++++---
 2 files changed, 73 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 7478e089f41713..2d4249f7847062 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -469,23 +469,24 @@ Constant *InstCostVisitor::visitCmpInst(CmpInst &I) {
   assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
 
   Constant *Const = LastVisited->second;
-  bool Swap = I.getOperand(1) == LastVisited->first;
-  Value *V = Swap ? I.getOperand(0) : I.getOperand(1);
+  bool ConstOnRHS = I.getOperand(1) == LastVisited->first;
+  Value *V = ConstOnRHS ? I.getOperand(0) : I.getOperand(1);
   Constant *Other = findConstantFor(V, KnownConstants);
 
-  if (Other)
-    return Swap ? ConstantFoldCompareInstOperands(I.getPredicate(), Other,
-                                                  Const, DL)
-                : ConstantFoldCompareInstOperands(I.getPredicate(), Const,
-                                                  Other, DL);
+  if (Other) {
+    if (ConstOnRHS) {
+      std::swap(Const, Other);
+    }
+    return ConstantFoldCompareInstOperands(I.getPredicate(), Const, Other, DL);
+  }
 
   // If we haven't found Other to be a specific constant value, we may still be
   // able constant fold the comparison using information from the lattice value.
   ValueLatticeElement ConstLV = ValueLatticeElement();
   ConstLV.markConstant(Const);
   const ValueLatticeElement &OtherLV = Solver.getLatticeValueFor(V);
-  auto &V1State = Swap ? OtherLV : ConstLV;
-  auto &V2State = Swap ? ConstLV : OtherLV;
+  auto &V1State = ConstOnRHS ? OtherLV : ConstLV;
+  auto &V2State = ConstOnRHS ? ConstLV : OtherLV;
   return V1State.getCompare(I.getPredicate(), I.getType(), V2State, DL);
 }
 
diff --git a/llvm/test/Transforms/FunctionSpecialization/cmp-with-range.ll b/llvm/test/Transforms/FunctionSpecialization/cmp-with-range.ll
index bea044a9904ff5..d6922947363561 100644
--- a/llvm/test/Transforms/FunctionSpecialization/cmp-with-range.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/cmp-with-range.ll
@@ -8,13 +8,15 @@
 ; Verify that we are able to estimate the codesize savings arising from a branch
 ; based on a comparison with a value found to have a constant range by IPSCCP.
 define i32 @main() {
-  %notspec = call i32 @test(i32 8)
-  %spec = call i32 @test(i32 0)
-  %sum = add i32 %notspec, %spec
-  ret i32 %sum
+  %notspec = call i32 @test_use_on_lhs(i32 8)
+  %spec1 = call i32 @test_use_on_lhs(i32 0)
+  %spec2 = call i32 @test_use_on_rhs(i32 1)
+  %sum1 = add i32 %notspec, %spec1
+  %sum2 = add i32 %sum1, %spec2
+  ret i32 %sum2
 }
 
-define i32 @test(i32 %x) {
+define i32 @test_use_on_lhs(i32 %x) {
 entry:
   %range = call i32 @foo(), !range !{ i32 1, i32 0 }
   %bound = shl nsw nuw i32 %range, 3
@@ -33,16 +35,38 @@ if.end:
   ret i32 %res
 }
 
+define i32 @test_use_on_rhs(i32 %x) {
+entry:
+  %range = call i32 @foo(), !range !{ i32 1, i32 0 }
+  %bound = shl nsw nuw i32 %range, 3
+  %x.sub = sub nsw nuw i32 %x, 1
+  %cmp = icmp ult i32 %bound, %x.sub
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  call void @do_something()
+  call void @do_something()
+  call void @do_something()
+  call void @do_something()
+  br label %if.end
+
+if.end:
+  %res = phi i32 [ 0, %entry ], [ 1, %if.then]
+  ret i32 %res
+}
+
 declare i32 @foo()
 declare void @do_something()
 ; CHECK-LABEL: define range(i32 0, 2) i32 @main() {
-; CHECK-NEXT:    [[NOTSPEC:%.*]] = call i32 @test(i32 8)
-; CHECK-NEXT:    [[SPEC:%.*]] = call i32 @test.specialized.1(i32 0)
+; CHECK-NEXT:    [[NOTSPEC:%.*]] = call i32 @test_use_on_lhs(i32 8)
+; CHECK-NEXT:    [[SPEC1:%.*]] = call i32 @test_use_on_lhs.specialized.1(i32 0)
+; CHECK-NEXT:    [[SPEC2:%.*]] = call i32 @test_use_on_rhs.specialized.2(i32 1)
 ; CHECK-NEXT:    [[SUM:%.*]] = add nuw nsw i32 [[NOTSPEC]], 0
-; CHECK-NEXT:    ret i32 [[SUM]]
+; CHECK-NEXT:    [[RES:%.*]] = add nuw nsw i32 [[SUM]], 0
+; CHECK-NEXT:    ret i32 [[RES]]
 ;
 ;
-; CHECK-LABEL: define range(i32 0, 2) i32 @test(
+; CHECK-LABEL: define range(i32 0, 2) i32 @test_use_on_lhs(
 ; CHECK-SAME: i32 [[X:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[RANGE:%.*]] = call i32 @foo(), !range [[RNG0:![0-9]+]]
@@ -60,7 +84,36 @@ declare void @do_something()
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;
 ;
-; CHECK-LABEL: define internal i32 @test.specialized.1(
+; CHECK-LABEL: define range(i32 0, 2) i32 @test_use_on_rhs(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[RANGE:%.*]] = call i32 @foo(), !range [[RNG0]]
+; CHECK-NEXT:    [[BOUND:%.*]] = shl nuw nsw i32 [[RANGE]], 3
+; CHECK-NEXT:    [[X_SUB:%.*]] = sub nuw nsw i32 [[X]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[BOUND]], [[X_SUB]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 1, %[[IF_THEN]] ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+;
+; CHECK-LABEL: define internal i32 @test_use_on_lhs.specialized.1(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[RANGE:%.*]] = call i32 @foo(), !range [[RNG0]]
+; CHECK-NEXT:    [[BOUND:%.*]] = shl nuw nsw i32 [[RANGE]], 3
+; CHECK-NEXT:    br label %[[IF_END:.*]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    ret i32 poison
+;
+;
+; CHECK-LABEL: define internal i32 @test_use_on_rhs.specialized.2(
 ; CHECK-SAME: i32 [[X:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[RANGE:%.*]] = call i32 @foo(), !range [[RNG0]]



More information about the llvm-commits mailing list