[llvm] da4a5a4 - [InstCombine] Promote expression tree with @llvm.vscale when zero-extending result.

Thu Feb 2 03:19:38 PST 2023

Author: Sander de Smalen
Date: 2023-02-02T11:18:16Z
New Revision: da4a5a46b3c86df436d37f81c0b7849f93d7fbde

URL: https://github.com/llvm/llvm-project/commit/da4a5a46b3c86df436d37f81c0b7849f93d7fbde
DIFF: https://github.com/llvm/llvm-project/commit/da4a5a46b3c86df436d37f81c0b7849f93d7fbde.diff

LOG: [InstCombine] Promote expression tree with @llvm.vscale when zero-extending result.

The LoopVectorizer emits the (scaled) element count as i32, which for
scalable VFs results in calls to @llvm.vscale.i32(). This value is scaled
and further zero-extended to i64.

The zero-extend can be folded away by executing the whole expression in i64
type using @llvm.vscale.i64(). Any logical `and` that would needed to mask
the result can be further folded away by KnownBits analysis when
vscale_range is set.

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D143016

Added: 
    llvm/test/Transforms/InstCombine/vscale.ll

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 70ec15fca808f..118f7174f1156 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -252,6 +252,20 @@ Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
     Res = CastInst::Create(
       static_cast<Instruction::CastOps>(Opc), I->getOperand(0), Ty);
     break;
+  case Instruction::Call:
+    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+      switch (II->getIntrinsicID()) {
+      default:
+        llvm_unreachable("Unsupported call!");
+      case Intrinsic::vscale: {
+        Function *Fn =
+            Intrinsic::getDeclaration(I->getModule(), Intrinsic::vscale, {Ty});
+        Res = CallInst::Create(Fn->getFunctionType(), Fn);
+        break;
+      }
+      }
+    }
+    break;
   default:
     // TODO: Can handle more cases here.
     llvm_unreachable("Unreachable!");
@@ -1213,6 +1227,13 @@ static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
         return false;
     return true;
   }
+  case Instruction::Call:
+    // llvm.vscale() can always be executed in larger type, because the
+    // value is automatically zero-extended.
+    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+      if (II->getIntrinsicID() == Intrinsic::vscale)
+        return true;
+    return false;
   default:
     // TODO: Can handle more cases here.
     return false;

diff  --git a/llvm/test/Transforms/InstCombine/vscale.ll b/llvm/test/Transforms/InstCombine/vscale.ll
new file mode 100644
index 0000000000000..dbb5ca4bae9be
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vscale.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes='instcombine' -S < %s | FileCheck %s
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+define i64 @promote_vscale_i32_to_i64() {
+; CHECK-LABEL: @promote_vscale_i32_to_i64(
+; CHECK-NEXT:    [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[EXT:%.*]] = and i64 [[VSCALE]], 4294967295
+; CHECK-NEXT:    ret i64 [[EXT]]
+;
+  %vscale = call i32 @llvm.vscale.i32()
+  %ext = zext i32 %vscale to i64
+  ret i64 %ext
+}
+
+define i64 @pomote_zext_shl_vscale_i32_to_i64() {
+; CHECK-LABEL: @pomote_zext_shl_vscale_i32_to_i64(
+; CHECK-NEXT:    [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[SHL:%.*]] = shl i64 [[VSCALE]], 3
+; CHECK-NEXT:    [[EXT:%.*]] = and i64 [[SHL]], 4294967288
+; CHECK-NEXT:    ret i64 [[EXT]]
+;
+  %vscale = call i32 @llvm.vscale.i32()
+  %shl = shl i32 %vscale, 3
+  %ext = zext i32 %shl to i64
+  ret i64 %ext
+}
+
+; Same test as @pomote_zext_shl_vscale_i32_to_i64, but with the
+; vscale_range attribute so that the 'and' is folded away.
+define i64 @free_zext_vscale_shl_i32_to_i64() #0 {
+; CHECK-LABEL: @free_zext_vscale_shl_i32_to_i64(
+; CHECK-NEXT:    [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw nsw i64 [[VSCALE]], 3
+; CHECK-NEXT:    ret i64 [[SHL]]
+;
+  %vscale = call i32 @llvm.vscale.i32()
+  %shl = shl i32 %vscale, 3
+  %ext = zext i32 %shl to i64
+  ret i64 %ext
+}
+
+declare i32 @llvm.vscale.i32()
+
+attributes #0 = { vscale_range(1,16) }