[llvm] [VectorCombine] Prevent extract/ins rewrite to GEP (PR #150216)
Nathan Gauër via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 29 06:57:20 PDT 2025
https://github.com/Keenuts updated https://github.com/llvm/llvm-project/pull/150216
>From c6ee280e207a84b758d563dbdfeb3719004d45e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= <brioche at google.com>
Date: Wed, 23 Jul 2025 14:43:41 +0200
Subject: [PATCH 1/2] [VectorCombine] Prevent extract/ins rewrite to GEP
Using GEP to index into a vector is not disallowed, but not
recommended.
The SPIR-V backend needs to generate structured access into types,
which is impossible with an untyped GEP instruction unless we add
more info to the IR. Finding a solution is a work-in-progress, but
in the meantime, we'd like to reduce the amount of failures.
Preventing this optimizations from rewritting extract/insert
instructions into a GEP helps us lower more code to SPIR-V.
This change should be OK as it's only active when targeting SPIR-V and
disabling a non-recommended transformation.
Related to #145002
---
.../Transforms/Vectorize/VectorCombine.cpp | 11 +-
.../VectorCombine/load-insert-store.ll | 382 ++++++++++++++++++
2 files changed, 389 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 82adc34fdbd84..20b8165ff280a 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -3750,8 +3750,9 @@ bool VectorCombine::run() {
LLVM_DEBUG(dbgs() << "\n\nVECTORCOMBINE on " << F.getName() << "\n");
+ const bool isSPIRV = F.getParent()->getTargetTriple().isSPIRV();
bool MadeChange = false;
- auto FoldInst = [this, &MadeChange](Instruction &I) {
+ auto FoldInst = [this, &MadeChange, isSPIRV](Instruction &I) {
Builder.SetInsertPoint(&I);
bool IsVectorType = isa<VectorType>(I.getType());
bool IsFixedVectorType = isa<FixedVectorType>(I.getType());
@@ -3780,13 +3781,15 @@ bool VectorCombine::run() {
// TODO: Identify and allow other scalable transforms
if (IsVectorType) {
MadeChange |= scalarizeOpOrCmp(I);
- MadeChange |= scalarizeLoadExtract(I);
- MadeChange |= scalarizeExtExtract(I);
+ if (!isSPIRV) {
+ MadeChange |= scalarizeLoadExtract(I);
+ MadeChange |= scalarizeExtExtract(I);
+ }
MadeChange |= scalarizeVPIntrinsic(I);
MadeChange |= foldInterleaveIntrinsics(I);
}
- if (Opcode == Instruction::Store)
+ if (Opcode == Instruction::Store && !isSPIRV)
MadeChange |= foldSingleElementStore(I);
// If this is an early pipeline invocation of this pass, we are done.
diff --git a/llvm/test/Transforms/VectorCombine/load-insert-store.ll b/llvm/test/Transforms/VectorCombine/load-insert-store.ll
index 93565c1a708eb..0181ec76088bd 100644
--- a/llvm/test/Transforms/VectorCombine/load-insert-store.ll
+++ b/llvm/test/Transforms/VectorCombine/load-insert-store.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -passes=vector-combine -data-layout=e < %s | FileCheck %s
; RUN: opt -S -passes=vector-combine -data-layout=E < %s | FileCheck %s
+; RUN: opt -S -passes=vector-combine -data-layout=E -mtriple=spirv-unknown-vulkan1.3-library %s | FileCheck %s --check-prefix=SPIRV
define void @insert_store(ptr %q, i8 zeroext %s) {
; CHECK-LABEL: @insert_store(
@@ -9,6 +10,13 @@ define void @insert_store(ptr %q, i8 zeroext %s) {
; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 3
+; SPIRV-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <16 x i8>, ptr %q
%vecins = insertelement <16 x i8> %0, i8 %s, i32 3
@@ -23,6 +31,13 @@ define void @insert_store_i16_align1(ptr %q, i16 zeroext %s) {
; CHECK-NEXT: store i16 [[S:%.*]], ptr [[TMP0]], align 2
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_i16_align1(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 3
+; SPIRV-NEXT: store <8 x i16> [[VECINS]], ptr [[Q]], align 1
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <8 x i16>, ptr %q
%vecins = insertelement <8 x i16> %0, i16 %s, i32 3
@@ -39,6 +54,13 @@ define void @insert_store_outofbounds(ptr %q, i16 zeroext %s) {
; CHECK-NEXT: store <8 x i16> [[VECINS]], ptr [[Q]], align 16
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_outofbounds(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 9
+; SPIRV-NEXT: store <8 x i16> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <8 x i16>, ptr %q
%vecins = insertelement <8 x i16> %0, i16 %s, i32 9
@@ -53,6 +75,13 @@ define void @insert_store_vscale(ptr %q, i16 zeroext %s) {
; CHECK-NEXT: store i16 [[S:%.*]], ptr [[TMP0]], align 2
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_vscale(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <vscale x 8 x i16>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <vscale x 8 x i16> [[TMP0]], i16 [[S:%.*]], i32 3
+; SPIRV-NEXT: store <vscale x 8 x i16> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <vscale x 8 x i16>, ptr %q
%vecins = insertelement <vscale x 8 x i16> %0, i16 %s, i32 3
@@ -70,6 +99,13 @@ define void @insert_store_vscale_exceeds(ptr %q, i16 zeroext %s) {
; CHECK-NEXT: store <vscale x 8 x i16> [[VECINS]], ptr [[Q]], align 16
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_vscale_exceeds(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <vscale x 8 x i16>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <vscale x 8 x i16> [[TMP0]], i16 [[S:%.*]], i32 9
+; SPIRV-NEXT: store <vscale x 8 x i16> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <vscale x 8 x i16>, ptr %q
%vecins = insertelement <vscale x 8 x i16> %0, i16 %s, i32 9
@@ -85,6 +121,13 @@ define void @insert_store_v9i4(ptr %q, i4 zeroext %s) {
; CHECK-NEXT: store <9 x i4> [[VECINS]], ptr [[Q]], align 1
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_v9i4(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <9 x i4>, ptr [[Q:%.*]], align 8
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <9 x i4> [[TMP0]], i4 [[S:%.*]], i32 3
+; SPIRV-NEXT: store <9 x i4> [[VECINS]], ptr [[Q]], align 1
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <9 x i4>, ptr %q
%vecins = insertelement <9 x i4> %0, i4 %s, i32 3
@@ -100,6 +143,13 @@ define void @insert_store_v4i27(ptr %q, i27 zeroext %s) {
; CHECK-NEXT: store <4 x i27> [[VECINS]], ptr [[Q]], align 1
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_v4i27(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <4 x i27>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <4 x i27> [[TMP0]], i27 [[S:%.*]], i32 3
+; SPIRV-NEXT: store <4 x i27> [[VECINS]], ptr [[Q]], align 1
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <4 x i27>, ptr %q
%vecins = insertelement <4 x i27> %0, i27 %s, i32 3
@@ -113,6 +163,12 @@ define void @insert_store_v32i1(ptr %p) {
; CHECK-NEXT: [[INS:%.*]] = insertelement <32 x i1> [[VEC]], i1 true, i64 0
; CHECK-NEXT: store <32 x i1> [[INS]], ptr [[P]], align 4
; CHECK-NEXT: ret void
+;
+; SPIRV-LABEL: @insert_store_v32i1(
+; SPIRV-NEXT: [[VEC:%.*]] = load <32 x i1>, ptr [[P:%.*]], align 4
+; SPIRV-NEXT: [[INS:%.*]] = insertelement <32 x i1> [[VEC]], i1 true, i64 0
+; SPIRV-NEXT: store <32 x i1> [[INS]], ptr [[P]], align 4
+; SPIRV-NEXT: ret void
;
%vec = load <32 x i1>, ptr %p
%ins = insertelement <32 x i1> %vec, i1 true, i64 0
@@ -130,6 +186,15 @@ define void @insert_store_blk_differ(ptr %q, i16 zeroext %s) {
; CHECK-NEXT: store <8 x i16> [[VECINS]], ptr [[Q]], align 16
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_blk_differ(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: br label [[CONT:%.*]]
+; SPIRV: cont:
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 3
+; SPIRV-NEXT: store <8 x i16> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <8 x i16>, ptr %q
br label %cont
@@ -147,6 +212,13 @@ define void @insert_store_nonconst(ptr %q, i8 zeroext %s, i32 %idx) {
; CHECK-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_nonconst(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX:%.*]]
+; SPIRV-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <16 x i8>, ptr %q
%vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx
@@ -164,6 +236,13 @@ define void @insert_store_vscale_nonconst(ptr %q, i8 zeroext %s, i32 %idx) {
; CHECK-NEXT: store <vscale x 16 x i8> [[VECINS]], ptr [[Q]], align 16
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_vscale_nonconst(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <vscale x 16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX:%.*]]
+; SPIRV-NEXT: store <vscale x 16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <vscale x 16 x i8>, ptr %q
%vecins = insertelement <vscale x 16 x i8> %0, i8 %s, i32 %idx
@@ -181,6 +260,15 @@ define void @insert_store_nonconst_large_alignment(ptr %q, i32 zeroext %s, i32 %
; CHECK-NEXT: store i32 [[S:%.*]], ptr [[TMP0]], align 4
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_nonconst_large_alignment(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4
+; SPIRV-NEXT: call void @llvm.assume(i1 [[CMP]])
+; SPIRV-NEXT: [[I:%.*]] = load <4 x i32>, ptr [[Q:%.*]], align 128
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <4 x i32> [[I]], i32 [[S:%.*]], i32 [[IDX]]
+; SPIRV-NEXT: store <4 x i32> [[VECINS]], ptr [[Q]], align 128
+; SPIRV-NEXT: ret void
+;
entry:
%cmp = icmp ult i32 %idx, 4
call void @llvm.assume(i1 %cmp)
@@ -197,6 +285,14 @@ define void @insert_store_nonconst_align_maximum_8(ptr %q, i64 %s, i32 %idx) {
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, ptr [[Q:%.*]], i32 0, i32 [[IDX]]
; CHECK-NEXT: store i64 [[S:%.*]], ptr [[TMP1]], align 8
; CHECK-NEXT: ret void
+;
+; SPIRV-LABEL: @insert_store_nonconst_align_maximum_8(
+; SPIRV-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2
+; SPIRV-NEXT: call void @llvm.assume(i1 [[CMP]])
+; SPIRV-NEXT: [[I:%.*]] = load <8 x i64>, ptr [[Q:%.*]], align 8
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <8 x i64> [[I]], i64 [[S:%.*]], i32 [[IDX]]
+; SPIRV-NEXT: store <8 x i64> [[VECINS]], ptr [[Q]], align 8
+; SPIRV-NEXT: ret void
;
%cmp = icmp ult i32 %idx, 2
call void @llvm.assume(i1 %cmp)
@@ -213,6 +309,14 @@ define void @insert_store_nonconst_align_maximum_4(ptr %q, i64 %s, i32 %idx) {
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, ptr [[Q:%.*]], i32 0, i32 [[IDX]]
; CHECK-NEXT: store i64 [[S:%.*]], ptr [[TMP1]], align 4
; CHECK-NEXT: ret void
+;
+; SPIRV-LABEL: @insert_store_nonconst_align_maximum_4(
+; SPIRV-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2
+; SPIRV-NEXT: call void @llvm.assume(i1 [[CMP]])
+; SPIRV-NEXT: [[I:%.*]] = load <8 x i64>, ptr [[Q:%.*]], align 4
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <8 x i64> [[I]], i64 [[S:%.*]], i32 [[IDX]]
+; SPIRV-NEXT: store <8 x i64> [[VECINS]], ptr [[Q]], align 4
+; SPIRV-NEXT: ret void
;
%cmp = icmp ult i32 %idx, 2
call void @llvm.assume(i1 %cmp)
@@ -229,6 +333,14 @@ define void @insert_store_nonconst_align_larger(ptr %q, i64 %s, i32 %idx) {
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, ptr [[Q:%.*]], i32 0, i32 [[IDX]]
; CHECK-NEXT: store i64 [[S:%.*]], ptr [[TMP1]], align 4
; CHECK-NEXT: ret void
+;
+; SPIRV-LABEL: @insert_store_nonconst_align_larger(
+; SPIRV-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2
+; SPIRV-NEXT: call void @llvm.assume(i1 [[CMP]])
+; SPIRV-NEXT: [[I:%.*]] = load <8 x i64>, ptr [[Q:%.*]], align 4
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <8 x i64> [[I]], i64 [[S:%.*]], i32 [[IDX]]
+; SPIRV-NEXT: store <8 x i64> [[VECINS]], ptr [[Q]], align 2
+; SPIRV-NEXT: ret void
;
%cmp = icmp ult i32 %idx, 2
call void @llvm.assume(i1 %cmp)
@@ -247,6 +359,15 @@ define void @insert_store_nonconst_index_known_valid_by_assume(ptr %q, i8 zeroex
; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_nonconst_index_known_valid_by_assume(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4
+; SPIRV-NEXT: call void @llvm.assume(i1 [[CMP]])
+; SPIRV-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX]]
+; SPIRV-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%cmp = icmp ult i32 %idx, 4
call void @llvm.assume(i1 %cmp)
@@ -267,6 +388,15 @@ define void @insert_store_vscale_nonconst_index_known_valid_by_assume(ptr %q, i8
; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_vscale_nonconst_index_known_valid_by_assume(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4
+; SPIRV-NEXT: call void @llvm.assume(i1 [[CMP]])
+; SPIRV-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <vscale x 16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX]]
+; SPIRV-NEXT: store <vscale x 16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%cmp = icmp ult i32 %idx, 4
call void @llvm.assume(i1 %cmp)
@@ -289,6 +419,16 @@ define void @insert_store_nonconst_index_not_known_valid_by_assume_after_load(pt
; CHECK-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_nonconst_index_not_known_valid_by_assume_after_load(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4
+; SPIRV-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: call void @maythrow()
+; SPIRV-NEXT: call void @llvm.assume(i1 [[CMP]])
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX]]
+; SPIRV-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%cmp = icmp ult i32 %idx, 4
%0 = load <16 x i8>, ptr %q
@@ -309,6 +449,15 @@ define void @insert_store_nonconst_index_not_known_valid_by_assume(ptr %q, i8 ze
; CHECK-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_nonconst_index_not_known_valid_by_assume(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 17
+; SPIRV-NEXT: call void @llvm.assume(i1 [[CMP]])
+; SPIRV-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX]]
+; SPIRV-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%cmp = icmp ult i32 %idx, 17
call void @llvm.assume(i1 %cmp)
@@ -330,6 +479,15 @@ define void @insert_store_vscale_nonconst_index_not_known_valid_by_assume(ptr %q
; CHECK-NEXT: store <vscale x 16 x i8> [[VECINS]], ptr [[Q]], align 16
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_vscale_nonconst_index_not_known_valid_by_assume(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 17
+; SPIRV-NEXT: call void @llvm.assume(i1 [[CMP]])
+; SPIRV-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <vscale x 16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX]]
+; SPIRV-NEXT: store <vscale x 16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%cmp = icmp ult i32 %idx, 17
call void @llvm.assume(i1 %cmp)
@@ -349,6 +507,14 @@ define void @insert_store_nonconst_index_known_noundef_and_valid_by_and(ptr %q,
; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_nonconst_index_known_noundef_and_valid_by_and(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 7
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
+; SPIRV-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <16 x i8>, ptr %q
%idx.clamped = and i32 %idx, 7
@@ -367,6 +533,14 @@ define void @insert_store_vscale_nonconst_index_known_noundef_and_valid_by_and(p
; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_vscale_nonconst_index_known_noundef_and_valid_by_and(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 7
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <vscale x 16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
+; SPIRV-NEXT: store <vscale x 16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <vscale x 16 x i8>, ptr %q
%idx.clamped = and i32 %idx, 7
@@ -384,6 +558,15 @@ define void @insert_store_nonconst_index_base_frozen_and_valid_by_and(ptr %q, i8
; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_nonconst_index_base_frozen_and_valid_by_and(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[IDX_FROZEN:%.*]] = freeze i32 [[IDX:%.*]]
+; SPIRV-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX_FROZEN]], 7
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
+; SPIRV-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <16 x i8>, ptr %q
%idx.frozen = freeze i32 %idx
@@ -403,6 +586,15 @@ define void @insert_store_nonconst_index_frozen_and_valid_by_and(ptr %q, i8 zero
; CHECK-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_nonconst_index_frozen_and_valid_by_and(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 7
+; SPIRV-NEXT: [[IDX_CLAMPED_FROZEN:%.*]] = freeze i32 [[IDX_CLAMPED]]
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED_FROZEN]]
+; SPIRV-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <16 x i8>, ptr %q
%idx.clamped = and i32 %idx, 7
@@ -421,6 +613,14 @@ define void @insert_store_nonconst_index_known_valid_by_and_but_may_be_poison(pt
; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_nonconst_index_known_valid_by_and_but_may_be_poison(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 7
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
+; SPIRV-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <16 x i8>, ptr %q
%idx.clamped = and i32 %idx, 7
@@ -438,6 +638,14 @@ define void @insert_store_nonconst_index_not_known_valid_by_and(ptr %q, i8 zeroe
; CHECK-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_nonconst_index_not_known_valid_by_and(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 16
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
+; SPIRV-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <16 x i8>, ptr %q
%idx.clamped = and i32 %idx, 16
@@ -455,6 +663,14 @@ define void @insert_store_nonconst_index_known_noundef_not_known_valid_by_and(pt
; CHECK-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_nonconst_index_known_noundef_not_known_valid_by_and(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 16
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
+; SPIRV-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <16 x i8>, ptr %q
%idx.clamped = and i32 %idx, 16
@@ -474,6 +690,14 @@ define void @insert_store_vscale_nonconst_index_not_known_valid_by_and(ptr %q, i
; CHECK-NEXT: store <vscale x 16 x i8> [[VECINS]], ptr [[Q]], align 16
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_vscale_nonconst_index_not_known_valid_by_and(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 31
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <vscale x 16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
+; SPIRV-NEXT: store <vscale x 16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <vscale x 16 x i8>, ptr %q
%idx.clamped = and i32 %idx, 31
@@ -490,6 +714,14 @@ define void @insert_store_nonconst_index_known_noundef_and_valid_by_urem(ptr %q,
; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_nonconst_index_known_noundef_and_valid_by_urem(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 16
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
+; SPIRV-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <16 x i8>, ptr %q
%idx.clamped = urem i32 %idx, 16
@@ -508,6 +740,14 @@ define void @insert_store_vscale_nonconst_index_known_noundef_and_valid_by_urem(
; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_vscale_nonconst_index_known_noundef_and_valid_by_urem(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 16
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <vscale x 16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
+; SPIRV-NEXT: store <vscale x 16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <vscale x 16 x i8>, ptr %q
%idx.clamped = urem i32 %idx, 16
@@ -525,6 +765,15 @@ define void @insert_store_nonconst_index_base_frozen_and_valid_by_urem(ptr %q, i
; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_nonconst_index_base_frozen_and_valid_by_urem(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[IDX_FROZEN:%.*]] = freeze i32 [[IDX:%.*]]
+; SPIRV-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX_FROZEN]], 16
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
+; SPIRV-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <16 x i8>, ptr %q
%idx.frozen = freeze i32 %idx
@@ -544,6 +793,15 @@ define void @insert_store_nonconst_index_frozen_and_valid_by_urem(ptr %q, i8 zer
; CHECK-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_nonconst_index_frozen_and_valid_by_urem(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 16
+; SPIRV-NEXT: [[IDX_CLAMPED_FROZEN:%.*]] = freeze i32 [[IDX_CLAMPED]]
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED_FROZEN]]
+; SPIRV-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <16 x i8>, ptr %q
%idx.clamped = urem i32 %idx, 16
@@ -562,6 +820,14 @@ define void @insert_store_nonconst_index_known_valid_by_urem_but_may_be_poison(p
; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_nonconst_index_known_valid_by_urem_but_may_be_poison(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 16
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
+; SPIRV-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <16 x i8>, ptr %q
%idx.clamped = urem i32 %idx, 16
@@ -579,6 +845,14 @@ define void @insert_store_nonconst_index_not_known_valid_by_urem(ptr %q, i8 zero
; CHECK-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_nonconst_index_not_known_valid_by_urem(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 17
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
+; SPIRV-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <16 x i8>, ptr %q
%idx.clamped = urem i32 %idx, 17
@@ -598,6 +872,14 @@ define void @insert_store_vscale_nonconst_index_not_known_valid_by_urem(ptr %q,
; CHECK-NEXT: store <vscale x 16 x i8> [[VECINS]], ptr [[Q]], align 16
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_vscale_nonconst_index_not_known_valid_by_urem(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 17
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <vscale x 16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
+; SPIRV-NEXT: store <vscale x 16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <vscale x 16 x i8>, ptr %q
%idx.clamped = urem i32 %idx, 17
@@ -615,6 +897,14 @@ define void @insert_store_nonconst_index_known_noundef_not_known_valid_by_urem(p
; CHECK-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_nonconst_index_known_noundef_not_known_valid_by_urem(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 17
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
+; SPIRV-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <16 x i8>, ptr %q
%idx.clamped = urem i32 %idx, 17
@@ -630,6 +920,13 @@ define void @insert_store_ptr_strip(ptr %q, i8 zeroext %s) {
; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_ptr_strip(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 3
+; SPIRV-NEXT: store <16 x i8> [[VECINS]], ptr [[Q]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <16 x i8>, ptr %q
%vecins = insertelement <16 x i8> %0, i8 %s, i32 3
@@ -648,6 +945,16 @@ define void @volatile_update(ptr %q, ptr %p, i8 zeroext %s) {
; CHECK-NEXT: store <16 x i8> [[VECINS1]], ptr [[P]], align 16
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @volatile_update(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 3
+; SPIRV-NEXT: store volatile <16 x i8> [[VECINS0]], ptr [[Q]], align 16
+; SPIRV-NEXT: [[TMP1:%.*]] = load volatile <16 x i8>, ptr [[P:%.*]], align 16
+; SPIRV-NEXT: [[VECINS1:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[S]], i32 1
+; SPIRV-NEXT: store <16 x i8> [[VECINS1]], ptr [[P]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%0 = load <16 x i8>, ptr %q
%vecins0 = insertelement <16 x i8> %0, i8 %s, i32 3
@@ -667,6 +974,13 @@ define void @insert_store_addr_differ(ptr %p, ptr %q, i8 %s) {
; CHECK-NEXT: store <16 x i8> [[INS]], ptr [[Q:%.*]], align 16
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_addr_differ(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[LD:%.*]] = load <16 x i8>, ptr [[P:%.*]], align 16
+; SPIRV-NEXT: [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3
+; SPIRV-NEXT: store <16 x i8> [[INS]], ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%ld = load <16 x i8>, ptr %p
%ins = insertelement <16 x i8> %ld, i8 %s, i32 3
@@ -691,6 +1005,22 @@ define void @insert_store_mem_modify(ptr %p, ptr %q, ptr noalias %r, i8 %s, i32
; CHECK-NEXT: store <4 x i32> [[INS3]], ptr [[P]], align 16
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_mem_modify(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[LD:%.*]] = load <16 x i8>, ptr [[P:%.*]], align 16
+; SPIRV-NEXT: store <16 x i8> zeroinitializer, ptr [[Q:%.*]], align 16
+; SPIRV-NEXT: [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3
+; SPIRV-NEXT: store <16 x i8> [[INS]], ptr [[P]], align 16
+; SPIRV-NEXT: [[LD2:%.*]] = load <16 x i8>, ptr [[Q]], align 16
+; SPIRV-NEXT: store <16 x i8> zeroinitializer, ptr [[R:%.*]], align 16
+; SPIRV-NEXT: [[INS2:%.*]] = insertelement <16 x i8> [[LD2]], i8 [[S]], i32 7
+; SPIRV-NEXT: store <16 x i8> [[INS2]], ptr [[Q]], align 16
+; SPIRV-NEXT: [[LD3:%.*]] = load <4 x i32>, ptr [[P]], align 16
+; SPIRV-NEXT: store <16 x i8> zeroinitializer, ptr [[P]], align 16
+; SPIRV-NEXT: [[INS3:%.*]] = insertelement <4 x i32> [[LD3]], i32 [[M:%.*]], i32 0
+; SPIRV-NEXT: store <4 x i32> [[INS3]], ptr [[P]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
; p may alias q
%ld = load <16 x i8>, ptr %p
@@ -727,6 +1057,19 @@ define void @insert_store_with_call(ptr %p, ptr %q, i8 %s) {
; CHECK-NEXT: store i8 [[S]], ptr [[TMP0]], align 1
; CHECK-NEXT: ret void
;
+; SPIRV-LABEL: @insert_store_with_call(
+; SPIRV-NEXT: entry:
+; SPIRV-NEXT: [[LD:%.*]] = load <16 x i8>, ptr [[P:%.*]], align 16
+; SPIRV-NEXT: call void @maywrite(ptr [[P]])
+; SPIRV-NEXT: [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3
+; SPIRV-NEXT: store <16 x i8> [[INS]], ptr [[P]], align 16
+; SPIRV-NEXT: call void @foo()
+; SPIRV-NEXT: [[LD2:%.*]] = load <16 x i8>, ptr [[P]], align 16
+; SPIRV-NEXT: call void @nowrite(ptr [[P]])
+; SPIRV-NEXT: [[INS2:%.*]] = insertelement <16 x i8> [[LD2]], i8 [[S]], i32 7
+; SPIRV-NEXT: store <16 x i8> [[INS2]], ptr [[P]], align 16
+; SPIRV-NEXT: ret void
+;
entry:
%ld = load <16 x i8>, ptr %p
call void @maywrite(ptr %p)
@@ -786,6 +1129,45 @@ define i32 @insert_store_maximum_scan_instrs(i32 %arg, ptr %arg1, ptr %arg2, i8
; CHECK-NEXT: store <16 x i8> [[I36]], ptr [[ARG2]], align 16
; CHECK-NEXT: ret i32 [[I35]]
;
+; SPIRV-LABEL: @insert_store_maximum_scan_instrs(
+; SPIRV-NEXT: bb:
+; SPIRV-NEXT: [[I:%.*]] = or i32 [[ARG:%.*]], 1
+; SPIRV-NEXT: [[I4:%.*]] = load <16 x i8>, ptr [[ARG2:%.*]], align 16
+; SPIRV-NEXT: [[I5:%.*]] = tail call i32 @bar(i32 [[I]], i1 true)
+; SPIRV-NEXT: [[I6:%.*]] = shl i32 [[ARG]], [[I5]]
+; SPIRV-NEXT: [[I7:%.*]] = lshr i32 [[I6]], 26
+; SPIRV-NEXT: [[I8:%.*]] = trunc i32 [[I7]] to i8
+; SPIRV-NEXT: [[I9:%.*]] = and i8 [[I8]], 31
+; SPIRV-NEXT: [[I10:%.*]] = lshr i32 [[I6]], 11
+; SPIRV-NEXT: [[I11:%.*]] = and i32 [[I10]], 32767
+; SPIRV-NEXT: [[I12:%.*]] = zext i8 [[I9]] to i64
+; SPIRV-NEXT: [[I13:%.*]] = getelementptr inbounds i16, ptr [[ARG1:%.*]], i64 [[I12]]
+; SPIRV-NEXT: [[I14:%.*]] = load i16, ptr [[I13]], align 2
+; SPIRV-NEXT: [[I15:%.*]] = zext i16 [[I14]] to i32
+; SPIRV-NEXT: [[I16:%.*]] = add nuw nsw i8 [[I9]], 1
+; SPIRV-NEXT: [[I17:%.*]] = zext i8 [[I16]] to i64
+; SPIRV-NEXT: [[I18:%.*]] = getelementptr inbounds i16, ptr [[ARG1]], i64 [[I17]]
+; SPIRV-NEXT: [[I19:%.*]] = load i16, ptr [[I18]], align 2
+; SPIRV-NEXT: [[I20:%.*]] = zext i16 [[I19]] to i32
+; SPIRV-NEXT: [[I21:%.*]] = sub nsw i32 [[I20]], [[I15]]
+; SPIRV-NEXT: [[I22:%.*]] = mul nsw i32 [[I11]], [[I21]]
+; SPIRV-NEXT: [[I23:%.*]] = ashr i32 [[I22]], 15
+; SPIRV-NEXT: [[I24:%.*]] = shl nuw nsw i32 [[I5]], 15
+; SPIRV-NEXT: [[I25:%.*]] = xor i32 [[I24]], 1015808
+; SPIRV-NEXT: [[I26:%.*]] = add nuw nsw i32 [[I25]], [[I15]]
+; SPIRV-NEXT: [[I27:%.*]] = add nsw i32 [[I26]], [[I23]]
+; SPIRV-NEXT: [[I28:%.*]] = sitofp i32 [[ARG]] to double
+; SPIRV-NEXT: [[I29:%.*]] = tail call double @llvm.log2.f64(double [[I28]])
+; SPIRV-NEXT: [[I30:%.*]] = fptosi double [[I29]] to i32
+; SPIRV-NEXT: [[I31:%.*]] = shl nsw i32 [[I30]], 15
+; SPIRV-NEXT: [[I32:%.*]] = or i32 [[I31]], 4
+; SPIRV-NEXT: [[I33:%.*]] = icmp eq i32 [[I27]], [[I32]]
+; SPIRV-NEXT: [[I34:%.*]] = select i1 [[I33]], i32 [[ARG]], i32 [[I31]]
+; SPIRV-NEXT: [[I35:%.*]] = lshr i32 [[I34]], 1
+; SPIRV-NEXT: [[I36:%.*]] = insertelement <16 x i8> [[I4]], i8 [[ARG3:%.*]], i32 3
+; SPIRV-NEXT: store <16 x i8> [[I36]], ptr [[ARG2]], align 16
+; SPIRV-NEXT: ret i32 [[I35]]
+;
bb:
%i = or i32 %arg, 1
%i4 = load <16 x i8>, ptr %arg2, align 16
>From ac6b559057bc5e2fc9095288e40b71bcf431629f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= <brioche at google.com>
Date: Tue, 29 Jul 2025 15:56:25 +0200
Subject: [PATCH 2/2] add TTI hook
---
.../llvm/Analysis/TargetTransformInfo.h | 4 ++++
.../llvm/Analysis/TargetTransformInfoImpl.h | 2 ++
llvm/lib/Analysis/TargetTransformInfo.cpp | 4 ++++
.../Target/SPIRV/SPIRVTargetTransformInfo.h | 2 ++
.../lib/Transforms/Vectorize/VectorCombine.cpp | 18 +++++++++++-------
5 files changed, 23 insertions(+), 7 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 7928835f7f84d..be0529754a116 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1950,6 +1950,10 @@ class TargetTransformInfo {
const Function &F,
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const;
+ /// Returns true if GEP should not be used to index into vectors for this
+ /// target.
+ LLVM_ABI bool isVectorElementIndexingUsingGEPAllowed() const;
+
private:
std::unique_ptr<const TargetTransformInfoImplBase> TTIImpl;
};
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 2ea87b3c62895..0e705cc8258f2 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1145,6 +1145,8 @@ class TargetTransformInfoImplBase {
const Function &F,
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {}
+ virtual bool isVectorElementIndexingUsingGEPAllowed() const { return true; }
+
protected:
// Obtain the minimum required size to hold the value (without the sign)
// In case of a vector it returns the min required size for one element.
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 55ba52a1079ce..50c1993eb35a2 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1486,6 +1486,10 @@ void TargetTransformInfo::collectKernelLaunchBounds(
return TTIImpl->collectKernelLaunchBounds(F, LB);
}
+bool TargetTransformInfo::isVectorElementIndexingUsingGEPAllowed() const {
+ return TTIImpl->isVectorElementIndexingUsingGEPAllowed();
+}
+
TargetTransformInfoImplBase::~TargetTransformInfoImplBase() = default;
TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
index 43bf6e9dd2a6e..bd066873a28af 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
@@ -59,6 +59,8 @@ class SPIRVTTIImpl final : public BasicTTIImplBase<SPIRVTTIImpl> {
Intrinsic::ID IID) const override;
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
Value *NewV) const override;
+
+ bool isVectorElementIndexingUsingGEPAllowed() const override { return false; }
};
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 20b8165ff280a..51463317101cb 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1664,6 +1664,8 @@ static Align computeAlignmentAfterScalarization(Align VectorAlignment,
// %1 = getelementptr inbounds i32, i32* %0, i64 0, i64 1
// store i32 %b, i32* %1
bool VectorCombine::foldSingleElementStore(Instruction &I) {
+ if (!TTI.isVectorElementIndexingUsingGEPAllowed())
+ return false;
auto *SI = cast<StoreInst>(&I);
if (!SI->isSimple() || !isa<VectorType>(SI->getValueOperand()->getType()))
return false;
@@ -1719,6 +1721,9 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) {
/// Try to scalarize vector loads feeding extractelement instructions.
bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
+ if (!TTI.isVectorElementIndexingUsingGEPAllowed())
+ return false;
+
Value *Ptr;
if (!match(&I, m_Load(m_Value(Ptr))))
return false;
@@ -1827,6 +1832,8 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
}
bool VectorCombine::scalarizeExtExtract(Instruction &I) {
+ if (!TTI.isVectorElementIndexingUsingGEPAllowed())
+ return false;
auto *Ext = dyn_cast<ZExtInst>(&I);
if (!Ext)
return false;
@@ -3750,9 +3757,8 @@ bool VectorCombine::run() {
LLVM_DEBUG(dbgs() << "\n\nVECTORCOMBINE on " << F.getName() << "\n");
- const bool isSPIRV = F.getParent()->getTargetTriple().isSPIRV();
bool MadeChange = false;
- auto FoldInst = [this, &MadeChange, isSPIRV](Instruction &I) {
+ auto FoldInst = [this, &MadeChange](Instruction &I) {
Builder.SetInsertPoint(&I);
bool IsVectorType = isa<VectorType>(I.getType());
bool IsFixedVectorType = isa<FixedVectorType>(I.getType());
@@ -3781,15 +3787,13 @@ bool VectorCombine::run() {
// TODO: Identify and allow other scalable transforms
if (IsVectorType) {
MadeChange |= scalarizeOpOrCmp(I);
- if (!isSPIRV) {
- MadeChange |= scalarizeLoadExtract(I);
- MadeChange |= scalarizeExtExtract(I);
- }
+ MadeChange |= scalarizeLoadExtract(I);
+ MadeChange |= scalarizeExtExtract(I);
MadeChange |= scalarizeVPIntrinsic(I);
MadeChange |= foldInterleaveIntrinsics(I);
}
- if (Opcode == Instruction::Store && !isSPIRV)
+ if (Opcode == Instruction::Store)
MadeChange |= foldSingleElementStore(I);
// If this is an early pipeline invocation of this pass, we are done.
More information about the llvm-commits
mailing list