[llvm] [GlobalISel] Add support for interleave and deinterleave intrinsics to IRTranslator (PR #85199)
Dhruv Chawla via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 14 20:49:36 PDT 2024
https://github.com/dc03-work updated https://github.com/llvm/llvm-project/pull/85199
>From 915cdc5e4cc2542e0ea75ac57e0639b3be86fc69 Mon Sep 17 00:00:00 2001
From: Dhruv Chawla <dhruvc at nvidia.com>
Date: Mon, 11 Mar 2024 11:44:21 +0530
Subject: [PATCH 1/5] [GlobalISel] Add support for interleave and deinterleave
intrinsics to IRTranslator
This patch adds support for the @llvm.experimental.vector.{interleave2,
deinterleave2} intrinsics to IRTranslator for fixed-width vector types.
They are lowered to vector shuffles, in roughly the same manner as
SelectionDAG.
---
llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 44 ++++++++++
.../AArch64/complex-deinterleaving-f16-add.ll | 81 +++++++++++++------
.../AArch64/fixed-vector-deinterleave.ll | 79 ++++++++++++------
.../AArch64/fixed-vector-interleave.ll | 51 +++++++-----
4 files changed, 187 insertions(+), 68 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 94fdb37e283bb4..9db77adf0aaae2 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
@@ -2474,6 +2475,49 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return true;
}
+
+ case Intrinsic::experimental_vector_interleave2: {
+ Value *Src0 = CI.getOperand(0);
+ Value *Src1 = CI.getOperand(1);
+
+ // Canonicalize fixed-width vector types to G_SHUFFLE_VECTOR
+ // (similar to SelectionDAG)
+ LLT OpType = getLLTForType(*Src0->getType(), MIRBuilder.getDataLayout());
+ if (!OpType.isFixedVector())
+ break;
+
+ Register Op0 = getOrCreateVReg(*Src0);
+ Register Op1 = getOrCreateVReg(*Src1);
+ Register Res = getOrCreateVReg(CI);
+
+ MIRBuilder.buildShuffleVector(
+ Res, Op0, Op1, createInterleaveMask(OpType.getNumElements(), 2));
+
+ return true;
+ }
+
+ case Intrinsic::experimental_vector_deinterleave2: {
+ Value *Src = CI.getOperand(0);
+
+ // Canonicalize fixed-width vector types to shuffles that extract
+ // sub-vectors (similar to SelectionDAG)
+ ArrayRef<Register> Res = getOrCreateVRegs(CI);
+ LLT ResTy = MRI->getType(Res[0]);
+ if (!ResTy.isFixedVector())
+ break;
+
+ Register Op = getOrCreateVReg(*Src);
+ LLT OpType = getLLTForType(*Src->getType(), MIRBuilder.getDataLayout());
+
+ auto Undef = MIRBuilder.buildUndef(OpType);
+ MIRBuilder.buildShuffleVector(
+ Res[0], Op, Undef, createStrideMask(0, 2, ResTy.getNumElements()));
+ MIRBuilder.buildShuffleVector(
+ Res[1], Op, Undef, createStrideMask(1, 2, ResTy.getNumElements()));
+
+ return true;
+ }
+
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll
index 7b8448de2331b4..7cdb10e7159f03 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll
@@ -1,23 +1,42 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s
-; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve -o - | FileCheck %s
-; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - | FileCheck %s
+; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16 -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16,+sve -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
target triple = "aarch64"
+; CHECK-GI: warning: Instruction selection used fallback path for complex_add_v16f16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for complex_add_v32f16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for complex_add_v16f16_with_intrinsic
+
; Expected to not transform
define <2 x half> @complex_add_v2f16(<2 x half> %a, <2 x half> %b) {
-; CHECK-LABEL: complex_add_v2f16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov h2, v0.h[1]
-; CHECK-NEXT: mov h3, v1.h[1]
-; CHECK-NEXT: fsub h1, h1, h2
-; CHECK-NEXT: fadd h0, h3, h0
-; CHECK-NEXT: mov v1.h[1], v0.h[0]
-; CHECK-NEXT: fmov d0, d1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: complex_add_v2f16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: mov h2, v0.h[1]
+; CHECK-SD-NEXT: mov h3, v1.h[1]
+; CHECK-SD-NEXT: fsub h1, h1, h2
+; CHECK-SD-NEXT: fadd h0, h3, h0
+; CHECK-SD-NEXT: mov v1.h[1], v0.h[0]
+; CHECK-SD-NEXT: fmov d0, d1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: complex_add_v2f16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NEXT: mov h3, v1.h[1]
+; CHECK-GI-NEXT: fsub h1, h1, h2
+; CHECK-GI-NEXT: fadd h0, h3, h0
+; CHECK-GI-NEXT: mov v1.h[1], v0.h[0]
+; CHECK-GI-NEXT: fmov d0, d1
+; CHECK-GI-NEXT: ret
entry:
%a.real = shufflevector <2 x half> %a, <2 x half> zeroinitializer, <1 x i32> <i32 0>
%a.imag = shufflevector <2 x half> %a, <2 x half> zeroinitializer, <1 x i32> <i32 1>
@@ -162,17 +181,29 @@ entry:
; Expected not to transform as it is integer
define <16 x i16> @complex_add_v16i16(<16 x i16> %a, <16 x i16> %b) {
-; CHECK-LABEL: complex_add_v16i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: uzp1 v4.8h, v2.8h, v3.8h
-; CHECK-NEXT: uzp1 v5.8h, v0.8h, v1.8h
-; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: uzp2 v1.8h, v2.8h, v3.8h
-; CHECK-NEXT: sub v2.8h, v4.8h, v0.8h
-; CHECK-NEXT: add v1.8h, v1.8h, v5.8h
-; CHECK-NEXT: zip1 v0.8h, v2.8h, v1.8h
-; CHECK-NEXT: zip2 v1.8h, v2.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: complex_add_v16i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: uzp1 v4.8h, v2.8h, v3.8h
+; CHECK-SD-NEXT: uzp1 v5.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: uzp2 v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: uzp2 v1.8h, v2.8h, v3.8h
+; CHECK-SD-NEXT: sub v2.8h, v4.8h, v0.8h
+; CHECK-SD-NEXT: add v1.8h, v1.8h, v5.8h
+; CHECK-SD-NEXT: zip1 v0.8h, v2.8h, v1.8h
+; CHECK-SD-NEXT: zip2 v1.8h, v2.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: complex_add_v16i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: uzp1 v4.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: uzp2 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT: uzp2 v2.8h, v2.8h, v3.8h
+; CHECK-GI-NEXT: sub v1.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: add v2.8h, v2.8h, v4.8h
+; CHECK-GI-NEXT: zip1 v0.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: zip2 v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: ret
entry:
%a.real = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%a.imag = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
index 1b1cfead0f97ac..2ad5623b655176 100644
--- a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
+++ b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
@@ -1,29 +1,50 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define {<2 x half>, <2 x half>} @vector_deinterleave_v2f16_v4f16(<4 x half> %vec) {
-; CHECK-LABEL: vector_deinterleave_v2f16_v4f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: dup v2.2s, v0.s[1]
-; CHECK-NEXT: mov v1.16b, v2.16b
-; CHECK-NEXT: mov v1.h[0], v0.h[1]
-; CHECK-NEXT: mov v0.h[1], v2.h[0]
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: vector_deinterleave_v2f16_v4f16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: dup v2.2s, v0.s[1]
+; CHECK-SD-NEXT: mov v1.16b, v2.16b
+; CHECK-SD-NEXT: mov v1.h[0], v0.h[1]
+; CHECK-SD-NEXT: mov v0.h[1], v2.h[0]
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vector_deinterleave_v2f16_v4f16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uzp1 v2.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT: uzp2 v1.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT: mov h0, v2.h[1]
+; CHECK-GI-NEXT: mov h3, v1.h[1]
+; CHECK-GI-NEXT: mov v2.h[1], v0.h[0]
+; CHECK-GI-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-GI-NEXT: fmov d0, d2
+; CHECK-GI-NEXT: ret
%retval = call {<2 x half>, <2 x half>} @llvm.experimental.vector.deinterleave2.v4f16(<4 x half> %vec)
ret {<2 x half>, <2 x half>} %retval
}
define {<4 x half>, <4 x half>} @vector_deinterleave_v4f16_v8f16(<8 x half> %vec) {
-; CHECK-LABEL: vector_deinterleave_v4f16_v8f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: uzp1 v2.4h, v0.4h, v1.4h
-; CHECK-NEXT: uzp2 v1.4h, v0.4h, v1.4h
-; CHECK-NEXT: fmov d0, d2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: vector_deinterleave_v4f16_v8f16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: uzp1 v2.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT: uzp2 v1.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT: fmov d0, d2
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vector_deinterleave_v4f16_v8f16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uzp1 v2.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT: uzp2 v1.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-GI-NEXT: fmov d0, d2
+; CHECK-GI-NEXT: ret
%retval = call {<4 x half>, <4 x half>} @llvm.experimental.vector.deinterleave2.v8f16(<8 x half> %vec)
ret {<4 x half>, <4 x half>} %retval
}
@@ -40,13 +61,21 @@ define {<8 x half>, <8 x half>} @vector_deinterleave_v8f16_v16f16(<16 x half> %v
}
define {<2 x float>, <2 x float>} @vector_deinterleave_v2f32_v4f32(<4 x float> %vec) {
-; CHECK-LABEL: vector_deinterleave_v2f32_v4f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: zip1 v2.2s, v0.2s, v1.2s
-; CHECK-NEXT: zip2 v1.2s, v0.2s, v1.2s
-; CHECK-NEXT: fmov d0, d2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: vector_deinterleave_v2f32_v4f32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: zip1 v2.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: zip2 v1.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: fmov d0, d2
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vector_deinterleave_v2f32_v4f32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uzp1 v2.4s, v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp2 v1.4s, v0.4s, v0.4s
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-GI-NEXT: fmov d0, d2
+; CHECK-GI-NEXT: ret
%retval = call {<2 x float>, <2 x float>} @llvm.experimental.vector.deinterleave2.v4f32(<4 x float> %vec)
ret {<2 x float>, <2 x float>} %retval
}
diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
index 071c1ffdbb45dc..eb81aff33e4963 100644
--- a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
+++ b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define <4 x half> @interleave2_v4f16(<2 x half> %vec0, <2 x half> %vec1) {
; CHECK-LABEL: interleave2_v4f16:
@@ -11,15 +12,22 @@ define <4 x half> @interleave2_v4f16(<2 x half> %vec0, <2 x half> %vec1) {
}
define <8 x half> @interleave2_v8f16(<4 x half> %vec0, <4 x half> %vec1) {
-; CHECK-LABEL: interleave2_v8f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: adrp x8, .LCPI1_0
-; CHECK-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
-; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: interleave2_v8f16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: adrp x8, .LCPI1_0
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
+; CHECK-SD-NEXT: tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: interleave2_v8f16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: zip1 v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: ret
%retval = call <8 x half> @llvm.experimental.vector.interleave2.v8f16(<4 x half> %vec0, <4 x half> %vec1)
ret <8 x half> %retval
}
@@ -36,14 +44,21 @@ define <16 x half> @interleave2_v16f16(<8 x half> %vec0, <8 x half> %vec1) {
}
define <4 x float> @interleave2_v4f32(<2 x float> %vec0, <2 x float> %vec1) {
-; CHECK-LABEL: interleave2_v4f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-NEXT: rev64 v1.4s, v0.4s
-; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: interleave2_v4f32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: rev64 v1.4s, v0.4s
+; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: interleave2_v4f32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: zip1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: ret
%retval = call <4 x float> @llvm.experimental.vector.interleave2.v4f32(<2 x float> %vec0, <2 x float> %vec1)
ret <4 x float> %retval
}
>From e9c420e47aeaf8a2a6ef344cdbd084a96c1df3a7 Mon Sep 17 00:00:00 2001
From: Dhruv Chawla <dhruvc at nvidia.com>
Date: Thu, 14 Mar 2024 15:16:58 +0530
Subject: [PATCH 2/5] Add IRTranslator tests
---
.../irtranslator-vector-deinterleave2.ll | 34 +++++++++++++++++++
.../irtranslator-vector-interleave2.ll | 30 ++++++++++++++++
2 files changed, 64 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-vector-deinterleave2.ll
create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-vector-interleave2.ll
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-vector-deinterleave2.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-vector-deinterleave2.ll
new file mode 100644
index 00000000000000..10882a06af1b40
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-vector-deinterleave2.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -O0 -mtriple=aarch64-- --global-isel --global-isel-abort=2 --verify-machineinstrs --stop-after=irtranslator %s -o - | FileCheck %s
+
+define void @vector_deinterleave2_v4i32(<4 x i32> %a) {
+ ; CHECK-LABEL: name: vector_deinterleave2_v4i32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[DEF]], shufflemask(0, 2)
+ ; CHECK-NEXT: [[SHUF1:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[DEF]], shufflemask(1, 3)
+ ; CHECK-NEXT: RET_ReallyLR
+ %res = call {<2 x i32>, <2 x i32>} @llvm.experimental.vector.deinterleave2.v4i32(<4 x i32> %a)
+ ret void
+}
+
+define void @vector_deinterleave2_v8f32(<8 x float> %a) {
+ ; CHECK-LABEL: name: vector_deinterleave2_v8f32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[BITCAST]](<4 x s32>), [[BITCAST1]](<4 x s32>)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[CONCAT_VECTORS]](<8 x s32>), [[DEF]], shufflemask(0, 2, 4, 6)
+ ; CHECK-NEXT: [[SHUF1:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[CONCAT_VECTORS]](<8 x s32>), [[DEF]], shufflemask(1, 3, 5, 7)
+ ; CHECK-NEXT: RET_ReallyLR
+ %res = call {<4 x float>, <4 x float>} @llvm.experimental.vector.deinterleave2.v8f32(<8 x float> %a)
+ ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-vector-interleave2.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-vector-interleave2.ll
new file mode 100644
index 00000000000000..f51e47a428d10e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-vector-interleave2.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -O0 -mtriple=aarch64-- --global-isel --global-isel-abort=2 --verify-machineinstrs --stop-after=irtranslator %s -o - | FileCheck %s
+
+define void @vector_interleave2_v4i32(<2 x i32> %a, <2 x i32> %b) {
+ ; CHECK-LABEL: name: vector_interleave2_v4i32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(0, 2, 1, 3)
+ ; CHECK-NEXT: RET_ReallyLR
+ %res = call <4 x i32> @llvm.experimental.vector.interleave2.v4i32(<2 x i32> %a, <2 x i32> %b)
+ ret void
+}
+
+define void @vector_interleave2_v8f32(<4 x float> %a, <4 x float> %b) {
+ ; CHECK-LABEL: name: vector_interleave2_v8f32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>)
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s32>) = G_SHUFFLE_VECTOR [[BITCAST]](<4 x s32>), [[BITCAST1]], shufflemask(0, 4, 1, 5, 2, 6, 3, 7)
+ ; CHECK-NEXT: RET_ReallyLR
+ %res = call <8 x float> @llvm.experimental.vector.interleave2.v8f32(<4 x float> %a, <4 x float> %b)
+ ret void
+}
>From 8d8038cd4f9d7721727974133978ec5d421a57db Mon Sep 17 00:00:00 2001
From: Dhruv Chawla <dhruvc at nvidia.com>
Date: Thu, 14 Mar 2024 17:20:03 +0530
Subject: [PATCH 3/5] Refactor code
---
.../llvm/CodeGen/GlobalISel/IRTranslator.h | 8 ++
llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 75 ++++++++++---------
2 files changed, 47 insertions(+), 36 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index 29f675b2203b6b..cea0e0e11af123 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -243,6 +243,14 @@ class IRTranslator : public MachineFunctionPass {
bool translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder,
unsigned Opcode);
+ // Translate @llvm.experimental.vector.interleave2 and
+ // @llvm.experimental.vector.deinterleave2 intrinsics for fixed-width vector
+ // types into vector shuffles.
+ bool translateVectorInterleave2Intrinsic(const CallInst &CI,
+ MachineIRBuilder &MIRBuilder);
+ bool translateVectorDeinterleave2Intrinsic(const CallInst &CI,
+ MachineIRBuilder &MIRBuilder);
+
void getStackGuard(Register DstReg, MachineIRBuilder &MIRBuilder);
bool translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 9db77adf0aaae2..90ff930b2571b0 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1771,6 +1771,37 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
return true;
}
+bool IRTranslator::translateVectorInterleave2Intrinsic(
+ const CallInst &CI, MachineIRBuilder &MIRBuilder) {
+ // Canonicalize interleave2 to G_SHUFFLE_VECTOR (similar to SelectionDAG)
+ Register Op0 = getOrCreateVReg(*CI.getOperand(0));
+ Register Op1 = getOrCreateVReg(*CI.getOperand(1));
+ Register Res = getOrCreateVReg(CI);
+
+ LLT OpTy = MRI->getType(Op0);
+ MIRBuilder.buildShuffleVector(Res, Op0, Op1,
+ createInterleaveMask(OpTy.getNumElements(), 2));
+
+ return true;
+}
+
+bool IRTranslator::translateVectorDeinterleave2Intrinsic(
+ const CallInst &CI, MachineIRBuilder &MIRBuilder) {
+ // Canonicalize deinterleave2 shuffles that extract sub-vectors (similar to
+ // SelectionDAG)
+ Register Op = getOrCreateVReg(*CI.getOperand(0));
+ auto Undef = MIRBuilder.buildUndef(MRI->getType(Op));
+ ArrayRef<Register> Res = getOrCreateVRegs(CI);
+
+ LLT ResTy = MRI->getType(Res[0]);
+ MIRBuilder.buildShuffleVector(Res[0], Op, Undef,
+ createStrideMask(0, 2, ResTy.getNumElements()));
+ MIRBuilder.buildShuffleVector(Res[1], Op, Undef,
+ createStrideMask(1, 2, ResTy.getNumElements()));
+
+ return true;
+}
+
void IRTranslator::getStackGuard(Register DstReg,
MachineIRBuilder &MIRBuilder) {
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
@@ -2476,46 +2507,18 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return true;
}
- case Intrinsic::experimental_vector_interleave2: {
- Value *Src0 = CI.getOperand(0);
- Value *Src1 = CI.getOperand(1);
-
- // Canonicalize fixed-width vector types to G_SHUFFLE_VECTOR
- // (similar to SelectionDAG)
- LLT OpType = getLLTForType(*Src0->getType(), MIRBuilder.getDataLayout());
- if (!OpType.isFixedVector())
- break;
-
- Register Op0 = getOrCreateVReg(*Src0);
- Register Op1 = getOrCreateVReg(*Src1);
- Register Res = getOrCreateVReg(CI);
-
- MIRBuilder.buildShuffleVector(
- Res, Op0, Op1, createInterleaveMask(OpType.getNumElements(), 2));
-
- return true;
- }
-
+ case Intrinsic::experimental_vector_interleave2:
case Intrinsic::experimental_vector_deinterleave2: {
- Value *Src = CI.getOperand(0);
-
- // Canonicalize fixed-width vector types to shuffles that extract
- // sub-vectors (similar to SelectionDAG)
- ArrayRef<Register> Res = getOrCreateVRegs(CI);
- LLT ResTy = MRI->getType(Res[0]);
+ // Both intrinsics have at least one operand.
+ Value *Op0 = CI.getOperand(0);
+ LLT ResTy = getLLTForType(*Op0->getType(), MIRBuilder.getDataLayout());
if (!ResTy.isFixedVector())
- break;
-
- Register Op = getOrCreateVReg(*Src);
- LLT OpType = getLLTForType(*Src->getType(), MIRBuilder.getDataLayout());
+ return false;
- auto Undef = MIRBuilder.buildUndef(OpType);
- MIRBuilder.buildShuffleVector(
- Res[0], Op, Undef, createStrideMask(0, 2, ResTy.getNumElements()));
- MIRBuilder.buildShuffleVector(
- Res[1], Op, Undef, createStrideMask(1, 2, ResTy.getNumElements()));
+ if (CI.getIntrinsicID() == Intrinsic::experimental_vector_interleave2)
+ return translateVectorInterleave2Intrinsic(CI, MIRBuilder);
- return true;
+ return translateVectorDeinterleave2Intrinsic(CI, MIRBuilder);
}
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
>From 8f880b303bb512501f15c6684f1939b912f07393 Mon Sep 17 00:00:00 2001
From: Dhruv Chawla <dhruvc at nvidia.com>
Date: Thu, 14 Mar 2024 17:22:25 +0530
Subject: [PATCH 4/5] Fix English.
---
llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 90ff930b2571b0..150f44bef60c29 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1773,7 +1773,7 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
bool IRTranslator::translateVectorInterleave2Intrinsic(
const CallInst &CI, MachineIRBuilder &MIRBuilder) {
- // Canonicalize interleave2 to G_SHUFFLE_VECTOR (similar to SelectionDAG)
+ // Canonicalize interleave2 to G_SHUFFLE_VECTOR (similar to SelectionDAG).
Register Op0 = getOrCreateVReg(*CI.getOperand(0));
Register Op1 = getOrCreateVReg(*CI.getOperand(1));
Register Res = getOrCreateVReg(CI);
@@ -1787,8 +1787,8 @@ bool IRTranslator::translateVectorInterleave2Intrinsic(
bool IRTranslator::translateVectorDeinterleave2Intrinsic(
const CallInst &CI, MachineIRBuilder &MIRBuilder) {
- // Canonicalize deinterleave2 shuffles that extract sub-vectors (similar to
- // SelectionDAG)
+ // Canonicalize deinterleave2 to shuffles that extract sub-vectors (similar to
+ // SelectionDAG).
Register Op = getOrCreateVReg(*CI.getOperand(0));
auto Undef = MIRBuilder.buildUndef(MRI->getType(Op));
ArrayRef<Register> Res = getOrCreateVRegs(CI);
>From b6984adbc3b7172c0ee58e2fb6df90159bbe43b2 Mon Sep 17 00:00:00 2001
From: Dhruv Chawla <dhruvc at nvidia.com>
Date: Fri, 15 Mar 2024 09:18:59 +0530
Subject: [PATCH 5/5] Add assert to functions
---
llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 150f44bef60c29..5cdc7926b013ea 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1773,6 +1773,8 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
bool IRTranslator::translateVectorInterleave2Intrinsic(
const CallInst &CI, MachineIRBuilder &MIRBuilder) {
+ assert(CI.getIntrinsicID() == Intrinsic::experimental_vector_interleave2 &&
+ "This function can only be called on the interleave2 intrinsic!");
// Canonicalize interleave2 to G_SHUFFLE_VECTOR (similar to SelectionDAG).
Register Op0 = getOrCreateVReg(*CI.getOperand(0));
Register Op1 = getOrCreateVReg(*CI.getOperand(1));
@@ -1787,6 +1789,8 @@ bool IRTranslator::translateVectorInterleave2Intrinsic(
bool IRTranslator::translateVectorDeinterleave2Intrinsic(
const CallInst &CI, MachineIRBuilder &MIRBuilder) {
+ assert(CI.getIntrinsicID() == Intrinsic::experimental_vector_deinterleave2 &&
+ "This function can only be called on the deinterleave2 intrinsic!");
// Canonicalize deinterleave2 to shuffles that extract sub-vectors (similar to
// SelectionDAG).
Register Op = getOrCreateVReg(*CI.getOperand(0));
More information about the llvm-commits
mailing list