[llvm] [InstCombine] Canonicalize scalable GEPs to use llvm.vscale intrinsic (PR #90569)

Mon Apr 29 23:52:09 PDT 2024

https://github.com/nikic created https://github.com/llvm/llvm-project/pull/90569

Canonicalize getelementptr instructions for scalable vector types into ptradd representation with an explicit llvm.vscale call. This representation has better support in BasicAA, which can reason about llvm.vscale, but not plain scalable GEPs.

@davemgreen I'm curious what you think about this change, as you did the work for llvm.vscale support in BasicAA. Can we do this, or would it regress something else?

(There is also a less aggressive variant where we just move the `<vscale x` part out of the GEP type, but don't do the full offset expansion. Doing it this way is easier...)

>From 36a484a9c6ae1abda95c78790e842b116153b629 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 30 Apr 2024 15:43:17 +0900
Subject: [PATCH] [InstCombine] Canonicalize scalable GEPs to use llvm.vscale
 intrinsic

Canonicalize getelementptr instructions for scalable vector types
into ptradd representation with an explicit llvm.vscale call. This
representation has better support in BasicAA, which can reason
about llvm.vscale, but not plain scalable GEPs.
---
 .../InstCombine/InstructionCombining.cpp        |  8 ++++++++
 .../Analysis/ValueTracking/phi-known-bits.ll    | 16 ++++++++++------
 llvm/test/Transforms/InstCombine/gep-vector.ll  |  5 ++++-
 .../Transforms/InstCombine/gepofconstgepi8.ll   |  7 +++++--
 llvm/test/Transforms/InstCombine/icmp-gep.ll    | 17 ++++++++++++-----
 llvm/test/Transforms/InstCombine/opaque-ptr.ll  |  8 ++++++--
 .../InstCombine/scalable-vector-array.ll        |  8 ++++++--
 llvm/test/Transforms/InstCombine/vscale_gep.ll  | 13 ++++++++++---
 8 files changed, 61 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 7356941be64593..8fe1d01a4515d6 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2787,6 +2787,14 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
                                     GEP.isInBounds()));
   }
 
+  // Canonicalize scalable GEPs to an explicit offset using the llvm.vscale
+  // intrinsic. This has better support in BasicAA.
+  if (IsGEPSrcEleScalable) {
+    Value *Offset = EmitGEPOffset(cast<GEPOperator>(&GEP));
+    return replaceInstUsesWith(
+        GEP, Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.isInBounds()));
+  }
+
   // Check to see if the inputs to the PHI node are getelementptr instructions.
   if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
     auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
diff --git a/llvm/test/Analysis/ValueTracking/phi-known-bits.ll b/llvm/test/Analysis/ValueTracking/phi-known-bits.ll
index e5b8ba151e04c2..7b5e143fecfd9c 100644
--- a/llvm/test/Analysis/ValueTracking/phi-known-bits.ll
+++ b/llvm/test/Analysis/ValueTracking/phi-known-bits.ll
@@ -932,9 +932,11 @@ define i1 @recursiveGEP_withPtrSub_scalableGEP(ptr %val1) {
 ; CHECK-NEXT:    br label [[WHILE_COND_I:%.*]]
 ; CHECK:       while.cond.i:
 ; CHECK-NEXT:    [[A_PN_I:%.*]] = phi ptr [ [[TEST_0_I:%.*]], [[WHILE_COND_I]] ], [ [[VAL1:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TEST_0_I]] = getelementptr <vscale x 16 x i8>, ptr [[A_PN_I]], i64 1
-; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TEST_0_I]], align 1
-; CHECK-NEXT:    [[CMP3_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 4
+; CHECK-NEXT:    [[TEST_0_I]] = getelementptr i8, ptr [[A_PN_I]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TEST_0_I]], align 1
+; CHECK-NEXT:    [[CMP3_NOT_I:%.*]] = icmp eq i8 [[TMP2]], 0
 ; CHECK-NEXT:    br i1 [[CMP3_NOT_I]], label [[WHILE_END_I:%.*]], label [[WHILE_COND_I]]
 ; CHECK:       while.end.i:
 ; CHECK-NEXT:    [[BOOL:%.*]] = icmp eq ptr [[TEST_0_I]], [[VAL1]]
@@ -964,9 +966,11 @@ define i1 @recursiveGEP_withPtrSub_scalableGEP_inbounds(ptr %val1) {
 ; CHECK-NEXT:    br label [[WHILE_COND_I:%.*]]
 ; CHECK:       while.cond.i:
 ; CHECK-NEXT:    [[A_PN_I:%.*]] = phi ptr [ [[TEST_0_I:%.*]], [[WHILE_COND_I]] ], [ [[VAL1:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TEST_0_I]] = getelementptr inbounds <vscale x 16 x i8>, ptr [[A_PN_I]], i64 1
-; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TEST_0_I]], align 1
-; CHECK-NEXT:    [[CMP3_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 4
+; CHECK-NEXT:    [[TEST_0_I]] = getelementptr inbounds i8, ptr [[A_PN_I]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TEST_0_I]], align 1
+; CHECK-NEXT:    [[CMP3_NOT_I:%.*]] = icmp eq i8 [[TMP2]], 0
 ; CHECK-NEXT:    br i1 [[CMP3_NOT_I]], label [[WHILE_END_I:%.*]], label [[WHILE_COND_I]]
 ; CHECK:       while.end.i:
 ; CHECK-NEXT:    [[BOOL:%.*]] = icmp eq ptr [[TEST_0_I]], [[VAL1]]
diff --git a/llvm/test/Transforms/InstCombine/gep-vector.ll b/llvm/test/Transforms/InstCombine/gep-vector.ll
index 3465c620b70fe5..f058338fbf7cdc 100644
--- a/llvm/test/Transforms/InstCombine/gep-vector.ll
+++ b/llvm/test/Transforms/InstCombine/gep-vector.ll
@@ -127,7 +127,10 @@ define ptr addrspace(3) @inbounds_bitcast_vec_to_array_addrspace_matching_alloc_
 
 define ptr @test_accumulate_constant_offset_vscale_nonzero(<vscale x 16 x i1> %pg, ptr %base) {
 ; CHECK-LABEL: @test_accumulate_constant_offset_vscale_nonzero(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 1, i64 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP1]], 4
+; CHECK-NEXT:    [[GEP_OFFS:%.*]] = or disjoint i64 [[TMP2]], 4
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[GEP_OFFS]]
 ; CHECK-NEXT:    ret ptr [[GEP]]
 ;
   %gep = getelementptr <vscale x 16 x i8>, ptr %base, i64 1, i64 4
diff --git a/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll b/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll
index 7b7c6fba699c21..4c8c56a9262e34 100644
--- a/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll
+++ b/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll
@@ -280,8 +280,11 @@ define ptr @test_scalable(ptr %base, i64 %a) {
 ; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P1]], i64 [[A]]
-; CHECK-NEXT:    [[P2:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[TMP0]], i64 1
+; CHECK-NEXT:    [[INDEX:%.*]] = add i64 [[A]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 4
+; CHECK-NEXT:    [[P2_IDX:%.*]] = mul i64 [[INDEX]], [[TMP1]]
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P1]], i64 [[P2_IDX]]
 ; CHECK-NEXT:    ret ptr [[P2]]
 ;
 entry:
diff --git a/llvm/test/Transforms/InstCombine/icmp-gep.ll b/llvm/test/Transforms/InstCombine/icmp-gep.ll
index a0e03a5428e6c2..29d0c941ac5c8b 100644
--- a/llvm/test/Transforms/InstCombine/icmp-gep.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-gep.ll
@@ -472,7 +472,14 @@ define void @test60_extra_use_fold(ptr %foo, i64 %start.idx, i64 %end.offset) {
 
 define i1 @test_scalable_same(ptr %x) {
 ; CHECK-LABEL: @test_scalable_same(
-; CHECK-NEXT:    ret i1 false
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[A_IDX:%.*]] = shl i64 [[TMP1]], 5
+; CHECK-NEXT:    [[A:%.*]] = getelementptr i8, ptr [[X:%.*]], i64 [[A_IDX]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[B_IDX:%.*]] = shl i64 [[TMP2]], 5
+; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[B_IDX]]
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt ptr [[A]], [[B]]
+; CHECK-NEXT:    ret i1 [[C]]
 ;
   %a = getelementptr <vscale x 4 x i8>, ptr %x, i64 8
   %b = getelementptr inbounds <vscale x 4 x i8>, ptr %x, i64 8
@@ -507,11 +514,11 @@ define i1 @test_scalable_xc(ptr %x) {
 define i1 @test_scalable_xy(ptr %foo, i64 %i, i64 %j) {
 ; CHECK-LABEL: @test_scalable_xy(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP1]], 2
-; CHECK-NEXT:    [[GEP2_IDX:%.*]] = mul nsw i64 [[TMP2]], [[J:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP1]], 4
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = mul nsw i64 [[TMP2]], [[I:%.*]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP4:%.*]] = shl i64 [[TMP3]], 4
-; CHECK-NEXT:    [[GEP1_IDX:%.*]] = mul nsw i64 [[TMP4]], [[I:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = shl i64 [[TMP3]], 2
+; CHECK-NEXT:    [[GEP2_IDX:%.*]] = mul nsw i64 [[TMP4]], [[J:%.*]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[GEP2_IDX]], [[GEP1_IDX]]
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/opaque-ptr.ll b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
index 4d38e2cd37c959..5cc444042e1796 100644
--- a/llvm/test/Transforms/InstCombine/opaque-ptr.ll
+++ b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
@@ -289,7 +289,9 @@ define ptr @geps_combinable_different_elem_type_extra_use2(ptr %a, i64 %idx) {
 
 define ptr @geps_combinable_scalable(ptr %a, i64 %idx) {
 ; CHECK-LABEL: @geps_combinable_scalable(
-; CHECK-NEXT:    [[A2:%.*]] = getelementptr inbounds <vscale x 2 x i32>, ptr [[A:%.*]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP1]], 3
+; CHECK-NEXT:    [[A2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[A3:%.*]] = getelementptr inbounds i8, ptr [[A2]], i64 4
 ; CHECK-NEXT:    ret ptr [[A3]]
 ;
@@ -300,7 +302,9 @@ define ptr @geps_combinable_scalable(ptr %a, i64 %idx) {
 
 define ptr @geps_combinable_scalable_vector_array(ptr %a, i64 %idx) {
 ; CHECK-LABEL: @geps_combinable_scalable_vector_array(
-; CHECK-NEXT:    [[A2:%.*]] = getelementptr inbounds [4 x <vscale x 2 x i32>], ptr [[A:%.*]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP1]], 5
+; CHECK-NEXT:    [[A2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[A3:%.*]] = getelementptr inbounds i8, ptr [[A2]], i64 4
 ; CHECK-NEXT:    ret ptr [[A3]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/scalable-vector-array.ll b/llvm/test/Transforms/InstCombine/scalable-vector-array.ll
index d03184766b4a68..20e9f2d99dd9a2 100644
--- a/llvm/test/Transforms/InstCombine/scalable-vector-array.ll
+++ b/llvm/test/Transforms/InstCombine/scalable-vector-array.ll
@@ -4,7 +4,9 @@
 define <vscale x 4 x i32> @load(ptr %x) {
 ; CHECK-LABEL: define <vscale x 4 x i32> @load
 ; CHECK-SAME: (ptr [[X:%.*]]) {
-; CHECK-NEXT:    [[A_ELT1:%.*]] = getelementptr inbounds [2 x <vscale x 4 x i32>], ptr [[X]], i64 0, i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP1]], 4
+; CHECK-NEXT:    [[A_ELT1:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[A_UNPACK2:%.*]] = load <vscale x 4 x i32>, ptr [[A_ELT1]], align 16
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[A_UNPACK2]]
 ;
@@ -17,7 +19,9 @@ define void @store(ptr %x, <vscale x 4 x i32> %y, <vscale x 4 x i32> %z) {
 ; CHECK-LABEL: define void @store
 ; CHECK-SAME: (ptr [[X:%.*]], <vscale x 4 x i32> [[Y:%.*]], <vscale x 4 x i32> [[Z:%.*]]) {
 ; CHECK-NEXT:    store <vscale x 4 x i32> [[Y]], ptr [[X]], align 16
-; CHECK-NEXT:    [[X_REPACK1:%.*]] = getelementptr inbounds [2 x <vscale x 4 x i32>], ptr [[X]], i64 0, i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP1]], 4
+; CHECK-NEXT:    [[X_REPACK1:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[TMP2]]
 ; CHECK-NEXT:    store <vscale x 4 x i32> [[Z]], ptr [[X_REPACK1]], align 16
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/InstCombine/vscale_gep.ll b/llvm/test/Transforms/InstCombine/vscale_gep.ll
index 534888dc1a1c49..f424650d21e840 100644
--- a/llvm/test/Transforms/InstCombine/vscale_gep.ll
+++ b/llvm/test/Transforms/InstCombine/vscale_gep.ll
@@ -14,7 +14,9 @@ define <vscale x 2 x ptr> @gep_index_type_is_scalable(ptr %p) {
 ; This test serves to verify code changes for "GEP.getNumIndices() == 1".
 define ptr @gep_num_of_indices_1(ptr %p) {
 ; CHECK-LABEL: @gep_num_of_indices_1(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P:%.*]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP1]], 4
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP2]]
 ; CHECK-NEXT:    ret ptr [[GEP]]
 ;
   %gep = getelementptr <vscale x 4 x i32>, ptr %p, i64 1
@@ -25,7 +27,9 @@ define ptr @gep_num_of_indices_1(ptr %p) {
 define void @gep_bitcast(ptr %p) {
 ; CHECK-LABEL: @gep_bitcast(
 ; CHECK-NEXT:    store <vscale x 16 x i8> zeroinitializer, ptr [[P:%.*]], align 16
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[P]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP1]], 4
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP2]]
 ; CHECK-NEXT:    store <vscale x 16 x i8> zeroinitializer, ptr [[GEP2]], align 16
 ; CHECK-NEXT:    ret void
 ;
@@ -54,7 +58,10 @@ define i32 @gep_alloca_inbounds_vscale_zero() {
 define i32 @gep_alloca_inbounds_vscale_nonzero() {
 ; CHECK-LABEL: @gep_alloca_inbounds_vscale_nonzero(
 ; CHECK-NEXT:    [[A:%.*]] = alloca <vscale x 4 x i32>, align 16
-; CHECK-NEXT:    [[TMP:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[A]], i64 1, i64 2
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP1]], 4
+; CHECK-NEXT:    [[TMP_OFFS:%.*]] = or disjoint i64 [[TMP2]], 8
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP_OFFS]]
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[TMP]], align 4
 ; CHECK-NEXT:    ret i32 [[LOAD]]
 ;