[llvm] 6b825e5 - [ArgPromotion] Change the condition to check the promotion limit

Thu Apr 28 09:46:57 PDT 2022

Author: Pavel Samolysov
Date: 2022-04-28T09:42:58-07:00
New Revision: 6b825e50f7f41f40cb473587bfe65bcb0069da47

URL: https://github.com/llvm/llvm-project/commit/6b825e50f7f41f40cb473587bfe65bcb0069da47
DIFF: https://github.com/llvm/llvm-project/commit/6b825e50f7f41f40cb473587bfe65bcb0069da47.diff

LOG: [ArgPromotion] Change the condition to check the promotion limit

The condition should be 'ArgParts.size() > MaxElements', so that if we
have exactly 3 elements in the 'ArgParts' vector, the promotion should
be allowed because the 'MaxElement' threshold is not exceeded yet.

The default value for 'MaxElement' has been decreased to 2 in order
to avoid an actual change in argument promoting behavior. However,
this changes byval argument transformation behavior by allowing
adding not more than 2 arguments to the function instead of 3 allowed
before.

Reviewed By: aeubanks

Differential Revision: https://reviews.llvm.org/D124178

Added: 
    llvm/test/Transforms/ArgumentPromotion/max-elements-limit.ll

Modified: 
    llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
    llvm/lib/Transforms/IPO/ArgumentPromotion.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
index 3ea5f8e61c766..35481843c0e3e 100644

--- a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
+++ b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
@@ -25,7 +25,7 @@ class ArgumentPromotionPass : public PassInfoMixin<ArgumentPromotionPass> {
   unsigned MaxElements;
 
 public:
-  ArgumentPromotionPass(unsigned MaxElements = 3u) : MaxElements(MaxElements) {}
+  ArgumentPromotionPass(unsigned MaxElements = 2u) : MaxElements(MaxElements) {}
 
   /// Checks if a type could have padding bytes.
   static bool isDenselyPacked(Type *Ty, const DataLayout &DL);

diff  --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index f8d94af572b10..81faf2a1b04a7 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -509,7 +509,7 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
 
     // We limit promotion to only promoting up to a fixed number of elements of
     // the aggregate.
-    if (MaxElements > 0 && ArgParts.size() >= MaxElements) {
+    if (MaxElements > 0 && ArgParts.size() > MaxElements) {
       LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
                         << "more than " << MaxElements << " parts\n");
       return false;

diff  --git a/llvm/test/Transforms/ArgumentPromotion/max-elements-limit.ll b/llvm/test/Transforms/ArgumentPromotion/max-elements-limit.ll
new file mode 100644
index 0000000000000..7e59c6d0755d4
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/max-elements-limit.ll
@@ -0,0 +1,90 @@
+; RUN: opt -passes=argpromotion -S %s | FileCheck %s
+
+define internal i32 @callee2(i32* noundef %0) {
+; CHECK-LABEL: define {{[^@]+}}@callee2
+; CHECK-SAME: (i32 [[P_0:%.*]], i32 [[P_1:%.*]]) {
+; CHECK-NEXT:    [[SUM:%.*]] = add nsw i32 [[P_0]], [[P_1]]
+; CHECK-NEXT:    ret i32 [[SUM]]
+;
+  %2 = getelementptr inbounds i32, i32* %0, i64 0
+  %3 = load i32, i32* %2, align 4
+  %4 = getelementptr inbounds i32, i32* %0, i64 1
+  %5 = load i32, i32* %4, align 4
+  %6 = add nsw i32 %3, %5
+  ret i32 %6
+}
+
+define i32 @caller2(i32 %0, i32 %1) {
+; CHECK-LABEL: define {{[^@]+}}@caller2
+; CHECK-SAME: (i32 [[P_0:%.*]], i32 [[P_1:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = alloca [2 x i32], align 4
+; CHECK-NEXT:    [[PL_0:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP1]], i64 0, i64 0
+; CHECK-NEXT:    store i32 [[P_0]], i32* [[PL_0]], align 4
+; CHECK-NEXT:    [[PL_1:%.*]] = getelementptr inbounds i32, i32* [[PL_0]], i64 1
+; CHECK-NEXT:    store i32 [[P_1]], i32* [[PL_1]], align 4
+; CHECK-NEXT:    [[PL_2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP1]], i64 0, i64 0
+; CHECK-NEXT:    [[VAL_0:%.*]] = load i32, i32* [[PL_2]], align 4
+; CHECK-NEXT:    [[PL_3:%.*]] = getelementptr i32, i32* [[PL_2]], i64 1
+; CHECK-NEXT:    [[VAL_1:%.*]] = load i32, i32* [[PL_3]], align 4
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @callee2(i32 [[VAL_0]], i32 [[VAL_1]])
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %3 = alloca [2 x i32], align 4
+  %4 = getelementptr inbounds [2 x i32], [2 x i32]* %3, i64 0, i64 0
+  store i32 %0, i32* %4, align 4
+  %5 = getelementptr inbounds i32, i32* %4, i64 1
+  store i32 %1, i32* %5, align 4
+  %6 = getelementptr inbounds [2 x i32], [2 x i32]* %3, i64 0, i64 0
+  %7 = call i32 @callee2(i32* noundef %6)
+  ret i32 %7
+}
+
+define internal i32 @callee3(i32* noundef %0) {
+; CHECK-LABEL: define {{[^@]+}}@callee3
+; CHECK-SAME: (i32* noundef [[P_0:%.*]]) {
+; CHECK-NEXT:    [[PL_0:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 0
+; CHECK-NEXT:    [[VAL_0:%.*]] = load i32, i32* [[PL_0]], align 4
+; CHECK-NEXT:    [[PL_1:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 1
+; CHECK-NEXT:    [[VAL_1:%.*]] = load i32, i32* [[PL_1]], align 4
+; CHECK-NEXT:    [[SUM_0:%.*]] = add nsw i32 [[VAL_0]], [[VAL_1]]
+; CHECK-NEXT:    [[PL_2:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 2
+; CHECK-NEXT:    [[VAL_2:%.*]] = load i32, i32* [[PL_2]], align 4
+; CHECK-NEXT:    [[RES:%.*]] = add nsw i32 [[SUM_0]], [[VAL_2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %2 = getelementptr inbounds i32, i32* %0, i64 0
+  %3 = load i32, i32* %2, align 4
+  %4 = getelementptr inbounds i32, i32* %0, i64 1
+  %5 = load i32, i32* %4, align 4
+  %6 = add nsw i32 %3, %5
+  %7 = getelementptr inbounds i32, i32* %0, i64 2
+  %8 = load i32, i32* %7, align 4
+  %9 = add nsw i32 %6, %8
+  ret i32 %9
+}
+
+define i32 @caller3(i32 %0, i32 %1, i32 %2) {
+; CHECK-LABEL: define {{[^@]+}}@caller3
+; CHECK-SAME: (i32 [[P_0:%.*]], i32 [[P_1:%.*]], i32 [[P_2:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = alloca [3 x i32], align 4
+; CHECK-NEXT:    [[PL_0:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[TMP1]], i64 0, i64 0
+; CHECK-NEXT:    store i32 [[P_0]], i32* [[PL_0]], align 4
+; CHECK-NEXT:    [[PL_1:%.*]] = getelementptr inbounds i32, i32* [[PL_0]], i64 1
+; CHECK-NEXT:    store i32 [[P_1]], i32* [[PL_1]], align 4
+; CHECK-NEXT:    [[PL_2:%.*]] = getelementptr inbounds i32, i32* [[PL_1]], i64 1
+; CHECK-NEXT:    store i32 [[P_2]], i32* [[PL_2]], align 4
+; CHECK-NEXT:    [[PL_3:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[TMP1]], i64 0, i64 0
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @callee3(i32* noundef [[PL_3]])
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %4 = alloca [3 x i32], align 4
+  %5 = getelementptr inbounds [3 x i32], [3 x i32]* %4, i64 0, i64 0
+  store i32 %0, i32* %5, align 4
+  %6 = getelementptr inbounds i32, i32* %5, i64 1
+  store i32 %1, i32* %6, align 4
+  %7 = getelementptr inbounds i32, i32* %6, i64 1
+  store i32 %2, i32* %7, align 4
+  %8 = getelementptr inbounds [3 x i32], [3 x i32]* %4, i64 0, i64 0
+  %9 = call i32 @callee3(i32* noundef %8)
+  ret i32 %9
+}