[llvm] [AArch64][GlobalISel] Refactor BITCAST Legalization (PR #80505)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 2 14:54:32 PST 2024
https://github.com/chuongg3 created https://github.com/llvm/llvm-project/pull/80505
Ensure BITCAST is only legal for types with the same amount of bits.
Enable BITCAST to work with non-legal vector types as well.
>From b8801421db02d158419d72a0eb32529430eb9c4f Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Fri, 2 Feb 2024 15:58:57 +0000
Subject: [PATCH 1/2] [AArch64][GlobalISel] Pre-Commit Tests for Refactor
BITCAST
---
llvm/test/CodeGen/AArch64/bitcast.ll | 508 ++++++++++++++++++++++++++-
1 file changed, 499 insertions(+), 9 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/bitcast.ll b/llvm/test/CodeGen/AArch64/bitcast.ll
index d60bd4ab3fc5f..bac9b48a4087b 100644
--- a/llvm/test/CodeGen/AArch64/bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast.ll
@@ -1,12 +1,39 @@
-; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; PR23065: SCALAR_TO_VECTOR implies the top elements 1 to N-1 of the N-element vector are undefined.
-define <4 x i16> @foo1(<2 x i32> %a) {
-; CHECK-LABEL: foo1:
-; CHECK: movi v0.2d, #0000000000000000
-; CHECK-NEXT: ret
+; CHECK-GI: warning: Instruction selection used fallback path for bitcast_v4i8_i32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_i32_v4i8
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v2i16_i32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_i32_v2i16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v2i16_v4i8
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v4i8_v2i16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v4i64_v8i32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v4i64_v16i16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v8i32_v4i64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v8i32_v16i16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v8i64_v16i32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v16i16_v4i64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v16i16_v8i32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v16i32_v8i64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v3i32_v6i16
+define <4 x i16> @foo1(<2 x i32> %a) {
+; CHECK-SD-LABEL: foo1:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: foo1:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #58712 // =0xe558
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: mov v1.s[1], w8
+; CHECK-GI-NEXT: zip1 v0.2s, v1.2s, v0.2s
+; CHECK-GI-NEXT: rev32 v0.4h, v0.4h
+; CHECK-GI-NEXT: ret
%1 = shufflevector <2 x i32> <i32 58712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2>
; Can't optimize the following bitcast to scalar_to_vector.
%2 = bitcast <2 x i32> %1 to <4 x i16>
@@ -15,13 +42,476 @@ define <4 x i16> @foo1(<2 x i32> %a) {
}
define <4 x i16> @foo2(<2 x i32> %a) {
-; CHECK-LABEL: foo2:
-; CHECK: movi v0.2d, #0000000000000000
-; CHECK-NEXT: ret
-
+; CHECK-SD-LABEL: foo2:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: foo2:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #712 // =0x2c8
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: mov v1.s[1], w8
+; CHECK-GI-NEXT: zip1 v0.2s, v1.2s, v0.2s
+; CHECK-GI-NEXT: rev32 v0.4h, v0.4h
+; CHECK-GI-NEXT: ret
%1 = shufflevector <2 x i32> <i32 712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2>
; Can't optimize the following bitcast to scalar_to_vector.
%2 = bitcast <2 x i32> %1 to <4 x i16>
%3 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
ret <4 x i16> %3
}
+
+; ===== To and From Scalar Types =====
+
+define i32 @bitcast_v4i8_i32(<4 x i8> %a, <4 x i8> %b){
+; CHECK-LABEL: bitcast_v4i8_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: xtn v0.8b, v0.8h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: ret
+ %c = add <4 x i8> %a, %b
+ %d = bitcast <4 x i8> %c to i32
+ ret i32 %d
+}
+
+define <4 x i8> @bitcast_i32_v4i8(i32 %a, i32 %b){
+; CHECK-LABEL: bitcast_i32_v4i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: zip1 v0.8b, v0.8b, v0.8b
+; CHECK-NEXT: ret
+ %c = add i32 %a, %b
+ %d = bitcast i32 %c to <4 x i8>
+ ret <4 x i8> %d
+}
+
+define i32 @bitcast_v2i16_i32(<2 x i16> %a, <2 x i16> %b){
+; CHECK-LABEL: bitcast_v2i16_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: mov w8, v0.s[1]
+; CHECK-NEXT: fmov w9, s0
+; CHECK-NEXT: strh w9, [sp, #12]
+; CHECK-NEXT: strh w8, [sp, #14]
+; CHECK-NEXT: ldr w0, [sp, #12]
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: ret
+ %c = add <2 x i16> %a, %b
+ %d = bitcast <2 x i16> %c to i32
+ ret i32 %d
+}
+
+define <2 x i16> @bitcast_i32_v2i16(i32 %a, i32 %b){
+; CHECK-LABEL: bitcast_i32_v2i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
+ %c = add i32 %a, %b
+ %d = bitcast i32 %c to <2 x i16>
+ ret <2 x i16> %d
+}
+
+define i64 @bitcast_v8i8_i64(<8 x i8> %a, <8 x i8> %b){
+; CHECK-LABEL: bitcast_v8i8_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+ %c = add <8 x i8> %a, %b
+ %d = bitcast <8 x i8> %c to i64
+ ret i64 %d
+}
+
+define <8 x i8> @bitcast_i64_v8i8(i64 %a, i64 %b){
+; CHECK-LABEL: bitcast_i64_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add x8, x0, x1
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ret
+ %c = add i64 %a, %b
+ %d = bitcast i64 %c to <8 x i8>
+ ret <8 x i8> %d
+}
+
+define i64 @bitcast_v4i16_i64(<4 x i16> %a, <4 x i16> %b){
+; CHECK-LABEL: bitcast_v4i16_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+ %c = add <4 x i16> %a, %b
+ %d = bitcast <4 x i16> %c to i64
+ ret i64 %d
+}
+
+define <4 x i16> @bitcast_i64_v4i16(i64 %a, i64 %b){
+; CHECK-LABEL: bitcast_i64_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add x8, x0, x1
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ret
+ %c = add i64 %a, %b
+ %d = bitcast i64 %c to <4 x i16>
+ ret <4 x i16> %d
+}
+
+define i64 @bitcast_v2i32_i64(<2 x i32> %a, <2 x i32> %b){
+; CHECK-LABEL: bitcast_v2i32_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+ %c = add <2 x i32> %a, %b
+ %d = bitcast <2 x i32> %c to i64
+ ret i64 %d
+}
+
+define <2 x i32> @bitcast_i64_v2i32(i64 %a, i64 %b){
+; CHECK-LABEL: bitcast_i64_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add x8, x0, x1
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ret
+ %c = add i64 %a, %b
+ %d = bitcast i64 %c to <2 x i32>
+ ret <2 x i32> %d
+}
+
+; ===== Legal Vector Types =====
+
+define <4 x i16> @bitcast_v2i32_v4i16(<2 x i32> %a, <2 x i32> %b){
+; CHECK-LABEL: bitcast_v2i32_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+ %c = add <2 x i32> %a, %b
+ %d = bitcast <2 x i32> %c to <4 x i16>
+ ret <4 x i16> %d
+}
+
+define <4 x i32> @bitcast_v2i64_v4i32(<2 x i64> %a, <2 x i64> %b){
+; CHECK-LABEL: bitcast_v2i64_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ %c = add <2 x i64> %a, %b
+ %d = bitcast <2 x i64> %c to <4 x i32>
+ ret <4 x i32> %d
+}
+
+define <8 x i8> @bitcast_v2i32_v8i8(<2 x i32> %a, <2 x i32> %b){
+; CHECK-LABEL: bitcast_v2i32_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+ %c = add <2 x i32> %a, %b
+ %d = bitcast <2 x i32> %c to <8 x i8>
+ ret <8 x i8> %d
+}
+
+define <8 x i16> @bitcast_v2i64_v8i16(<2 x i64> %a, <2 x i64> %b){
+; CHECK-LABEL: bitcast_v2i64_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ %c = add <2 x i64> %a, %b
+ %d = bitcast <2 x i64> %c to <8 x i16>
+ ret <8 x i16> %d
+}
+
+define <16 x i8> @bitcast_v2i64_v16i8(<2 x i64> %a, <2 x i64> %b){
+; CHECK-LABEL: bitcast_v2i64_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ %c = add <2 x i64> %a, %b
+ %d = bitcast <2 x i64> %c to <16 x i8>
+ ret <16 x i8> %d
+}
+
+define <2 x i32> @bitcast_v4i16_v2i32(<4 x i16> %a, <4 x i16> %b){
+; CHECK-LABEL: bitcast_v4i16_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+ %c = add <4 x i16> %a, %b
+ %d = bitcast <4 x i16> %c to <2 x i32>
+ ret <2 x i32> %d
+}
+
+define <2 x i64> @bitcast_v4i32_v2i64(<4 x i32> %a, <4 x i32> %b){
+; CHECK-LABEL: bitcast_v4i32_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %c = add <4 x i32> %a, %b
+ %d = bitcast <4 x i32> %c to <2 x i64>
+ ret <2 x i64> %d
+}
+
+define <8 x i8> @bitcast_v4i16_v8i8(<4 x i16> %a, <4 x i16> %b){
+; CHECK-LABEL: bitcast_v4i16_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+ %c = add <4 x i16> %a, %b
+ %d = bitcast <4 x i16> %c to <8 x i8>
+ ret <8 x i8> %d
+}
+
+define <8 x i16> @bitcast_v4i32_v8i16(<4 x i32> %a, <4 x i32> %b){
+; CHECK-LABEL: bitcast_v4i32_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %c = add <4 x i32> %a, %b
+ %d = bitcast <4 x i32> %c to <8 x i16>
+ ret <8 x i16> %d
+}
+
+define <16 x i8> @bitcast_v4i32_v16i8(<4 x i32> %a, <4 x i32> %b){
+; CHECK-LABEL: bitcast_v4i32_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %c = add <4 x i32> %a, %b
+ %d = bitcast <4 x i32> %c to <16 x i8>
+ ret <16 x i8> %d
+}
+
+define <2 x i32> @bitcast_v8i8_v2i32(<8 x i8> %a, <8 x i8> %b){
+; CHECK-LABEL: bitcast_v8i8_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ret
+ %c = add <8 x i8> %a, %b
+ %d = bitcast <8 x i8> %c to <2 x i32>
+ ret <2 x i32> %d
+}
+
+define <2 x i64> @bitcast_v8i16_v2i64(<8 x i16> %a, <8 x i16> %b){
+; CHECK-LABEL: bitcast_v8i16_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %c = add <8 x i16> %a, %b
+ %d = bitcast <8 x i16> %c to <2 x i64>
+ ret <2 x i64> %d
+}
+
+define <4 x i16> @bitcast_v8i8_v4i16(<8 x i8> %a, <8 x i8> %b){
+; CHECK-LABEL: bitcast_v8i8_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ret
+ %c = add <8 x i8> %a, %b
+ %d = bitcast <8 x i8> %c to <4 x i16>
+ ret <4 x i16> %d
+}
+
+define <4 x i32> @bitcast_v8i16_v4i32(<8 x i16> %a, <8 x i16> %b){
+; CHECK-LABEL: bitcast_v8i16_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %c = add <8 x i16> %a, %b
+ %d = bitcast <8 x i16> %c to <4 x i32>
+ ret <4 x i32> %d
+}
+
+define <16 x i8> @bitcast_v8i16_v16i8(<8 x i16> %a, <8 x i16> %b){
+; CHECK-LABEL: bitcast_v8i16_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %c = add <8 x i16> %a, %b
+ %d = bitcast <8 x i16> %c to <16 x i8>
+ ret <16 x i8> %d
+}
+
+define <2 x i64> @bitcast_v16i8_v2i64(<16 x i8> %a, <16 x i8> %b){
+; CHECK-LABEL: bitcast_v16i8_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %c = add <16 x i8> %a, %b
+ %d = bitcast <16 x i8> %c to <2 x i64>
+ ret <2 x i64> %d
+}
+
+define <4 x i32> @bitcast_v16i8_v4i32(<16 x i8> %a, <16 x i8> %b){
+; CHECK-LABEL: bitcast_v16i8_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %c = add <16 x i8> %a, %b
+ %d = bitcast <16 x i8> %c to <4 x i32>
+ ret <4 x i32> %d
+}
+
+define <8 x i16> @bitcast_v16i8_v8i16(<16 x i8> %a, <16 x i8> %b){
+; CHECK-LABEL: bitcast_v16i8_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %c = add <16 x i8> %a, %b
+ %d = bitcast <16 x i8> %c to <8 x i16>
+ ret <8 x i16> %d
+}
+
+; ===== Smaller/Larger Width Vectors with Legal Element Sizes =====
+
+define <4 x i8> @bitcast_v2i16_v4i8(<2 x i16> %a, <2 x i16> %b){
+; CHECK-LABEL: bitcast_v2i16_v4i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: mov w8, v0.s[1]
+; CHECK-NEXT: fmov w9, s0
+; CHECK-NEXT: strh w9, [sp, #12]
+; CHECK-NEXT: strh w8, [sp, #14]
+; CHECK-NEXT: ldr s0, [sp, #12]
+; CHECK-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: ret
+ %c = add <2 x i16> %a, %b
+ %d = bitcast <2 x i16> %c to <4 x i8>
+ ret <4 x i8> %d
+}
+
+define <2 x i16> @bitcast_v4i8_v2i16(<4 x i8> %a, <4 x i8> %b){
+; CHECK-LABEL: bitcast_v4i8_v2i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: add x8, sp, #12
+; CHECK-NEXT: xtn v0.8b, v0.8h
+; CHECK-NEXT: str s0, [sp, #12]
+; CHECK-NEXT: ld1 { v0.h }[0], [x8]
+; CHECK-NEXT: orr x8, x8, #0x2
+; CHECK-NEXT: ld1 { v0.h }[2], [x8]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: ret
+ %c = add <4 x i8> %a, %b
+ %d = bitcast <4 x i8> %c to <2 x i16>
+ ret <2 x i16> %d
+}
+
+define <8 x i32> @bitcast_v4i64_v8i32(<4 x i64> %a, <4 x i64> %b){
+; CHECK-LABEL: bitcast_v4i64_v8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v1.2d, v1.2d, v3.2d
+; CHECK-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT: ret
+ %c = add <4 x i64> %a, %b
+ %d = bitcast <4 x i64> %c to <8 x i32>
+ ret <8 x i32> %d
+}
+
+define <16 x i16> @bitcast_v4i64_v16i16(<4 x i64> %a, <4 x i64> %b){
+; CHECK-LABEL: bitcast_v4i64_v16i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v1.2d, v1.2d, v3.2d
+; CHECK-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT: ret
+ %c = add <4 x i64> %a, %b
+ %d = bitcast <4 x i64> %c to <16 x i16>
+ ret <16 x i16> %d
+}
+
+define <4 x i64> @bitcast_v8i32_v4i64(<8 x i32> %a, <8 x i32> %b){
+; CHECK-LABEL: bitcast_v8i32_v4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
+; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: ret
+ %c = add <8 x i32> %a, %b
+ %d = bitcast <8 x i32> %c to <4 x i64>
+ ret <4 x i64> %d
+}
+
+define <16 x i16> @bitcast_v8i32_v16i16(<8 x i32> %a, <8 x i32> %b){
+; CHECK-LABEL: bitcast_v8i32_v16i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
+; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: ret
+ %c = add <8 x i32> %a, %b
+ %d = bitcast <8 x i32> %c to <16 x i16>
+ ret <16 x i16> %d
+}
+
+define <16 x i32> @bitcast_v8i64_v16i32(<8 x i64> %a, <8 x i64> %b){
+; CHECK-LABEL: bitcast_v8i64_v16i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v2.2d, v2.2d, v6.2d
+; CHECK-NEXT: add v0.2d, v0.2d, v4.2d
+; CHECK-NEXT: add v1.2d, v1.2d, v5.2d
+; CHECK-NEXT: add v3.2d, v3.2d, v7.2d
+; CHECK-NEXT: ret
+ %c = add <8 x i64> %a, %b
+ %d = bitcast <8 x i64> %c to <16 x i32>
+ ret <16 x i32> %d
+}
+
+define <4 x i64> @bitcast_v16i16_v4i64(<16 x i16> %a, <16 x i16> %b){
+; CHECK-LABEL: bitcast_v16i16_v4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v1.8h, v1.8h, v3.8h
+; CHECK-NEXT: add v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: ret
+ %c = add <16 x i16> %a, %b
+ %d = bitcast <16 x i16> %c to <4 x i64>
+ ret <4 x i64> %d
+}
+
+define <8 x i32> @bitcast_v16i16_v8i32(<16 x i16> %a, <16 x i16> %b){
+; CHECK-LABEL: bitcast_v16i16_v8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v1.8h, v1.8h, v3.8h
+; CHECK-NEXT: add v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: ret
+ %c = add <16 x i16> %a, %b
+ %d = bitcast <16 x i16> %c to <8 x i32>
+ ret <8 x i32> %d
+}
+
+define <8 x i64> @bitcast_v16i32_v8i64(<16 x i32> %a, <16 x i32> %b){
+; CHECK-LABEL: bitcast_v16i32_v8i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v2.4s, v2.4s, v6.4s
+; CHECK-NEXT: add v0.4s, v0.4s, v4.4s
+; CHECK-NEXT: add v1.4s, v1.4s, v5.4s
+; CHECK-NEXT: add v3.4s, v3.4s, v7.4s
+; CHECK-NEXT: ret
+ %c = add <16 x i32> %a, %b
+ %d = bitcast <16 x i32> %c to <8 x i64>
+ ret <8 x i64> %d
+}
+
+; ===== Vectors with Non-Pow 2 Widths =====
+
+define <6 x i16> @bitcast_v3i32_v6i16(<3 x i32> %a, <3 x i32> %b){
+; CHECK-LABEL: bitcast_v3i32_v6i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %c = add <3 x i32> %a, %b
+ %d = bitcast <3 x i32> %c to <6 x i16>
+ ret <6 x i16> %d
+}
>From 44072418c0a04435c5b7700913cae6a84684eede Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Fri, 2 Feb 2024 10:54:24 +0000
Subject: [PATCH 2/2] [AArch64][GlobalISel] Refactor Legalize BITCAST
32-bit still does not work because it is unable to select for
G_TRUNC with vector types that are smaller than legal
---
.../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 5 +
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 52 ++++++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 15 +-
.../GlobalISel/legalizer-info-validation.mir | 4 +-
llvm/test/CodeGen/AArch64/bitcast.ll | 173 ++++++++++++------
5 files changed, 188 insertions(+), 61 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index a7ecf0dc1ba21..11e1f6a3339c5 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -338,6 +338,11 @@ class LegalizerHelper {
unsigned TypeIdx,
LLT NarrowTy);
+ // Fewer Elements for bitcast, ensuring that the size of the Src and Dst
+ // registers will be the same
+ LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy);
+
LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 464ff0864d146..372c75dea45ce 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4654,11 +4654,49 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
case G_FPOWI:
return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
+ case G_BITCAST:
+ return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
default:
return UnableToLegalize;
}
}
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsBitcast(MachineInstr &MI, unsigned int TypeIdx,
+ LLT NarrowTy) {
+ assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
+ "Not a bitcast operation");
+
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+
+ unsigned SrcScalSize = SrcTy.getScalarSizeInBits();
+ LLT SrcNarrowTy =
+ LLT::fixed_vector(NarrowTy.getSizeInBits() / SrcScalSize, SrcScalSize);
+
+ // Split the Src and Dst Reg into smaller registers
+ LLT SrcLeftoverTy;
+ SmallVector<Register> SrcVRegs, SrcLeftoverVRegs, BitcastVRegs;
+ // MIRBuilder.setInstrAndDebugLoc(MI);
+ if (!extractParts(SrcReg, SrcTy, SrcNarrowTy, SrcLeftoverTy, SrcVRegs,
+ SrcLeftoverVRegs, MIRBuilder, MRI))
+ return UnableToLegalize;
+
+ assert(SrcLeftoverVRegs.size() == 0 && "Splitting Source register failed");
+
+ // Build new smaller bitcast instructions
+ // Not supporting Leftover types for now but will have to
+ for (unsigned i = 0; i < SrcVRegs.size(); i++)
+ BitcastVRegs.push_back(
+ MIRBuilder.buildBitcast(NarrowTy, SrcVRegs[i]).getReg(0));
+
+ MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
@@ -5342,6 +5380,20 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_BITCAST: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ unsigned SrcScalSize =
+ MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits();
+ LLT NewTy =
+ LLT::fixed_vector(MoreTy.getSizeInBits() / SrcScalSize, SrcScalSize);
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, NewTy, 1);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
default:
return UnableToLegalize;
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index cbf5655706e69..7630c5fa83d55 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -744,12 +744,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
// Casts for 32 and 64-bit width type are just copies.
// Same for 128-bit width type, except they are on the FPR bank.
getActionDefinitionsBuilder(G_BITCAST)
- // FIXME: This is wrong since G_BITCAST is not allowed to change the
- // number of bits but it's what the previous code described and fixing
- // it breaks tests.
- .legalForCartesianProduct({s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
- v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
- v2p0});
+ // Keeping 32-bit instructions legal to prevent regression in some tests
+ .legalForCartesianProduct({s32, v2s16, v4s8})
+ .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
+ .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
+ .moreElementsToNextPow2(0)
+ .clampNumElements(0, v8s8, v16s8)
+ .clampNumElements(0, v4s16, v8s16)
+ .clampNumElements(0, v2s32, v4s32)
+ .lower();
getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index c90c31aa27ef5..58b9100c2f2a4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -127,8 +127,8 @@
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
#
# DEBUG-NEXT: G_BITCAST (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
-# DEBUG-NEXT: .. the first uncovered type index: 2, OK
-# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
#
# DEBUG-NEXT: G_FREEZE (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
diff --git a/llvm/test/CodeGen/AArch64/bitcast.ll b/llvm/test/CodeGen/AArch64/bitcast.ll
index bac9b48a4087b..24002fa70845e 100644
--- a/llvm/test/CodeGen/AArch64/bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast.ll
@@ -10,15 +10,6 @@
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_i32_v2i16
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v2i16_v4i8
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v4i8_v2i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v4i64_v8i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v4i64_v16i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v8i32_v4i64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v8i32_v16i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v8i64_v16i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v16i16_v4i64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v16i16_v8i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v16i32_v8i64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v3i32_v6i16
define <4 x i16> @foo1(<2 x i32> %a) {
; CHECK-SD-LABEL: foo1:
@@ -413,92 +404,168 @@ define <2 x i16> @bitcast_v4i8_v2i16(<4 x i8> %a, <4 x i8> %b){
}
define <8 x i32> @bitcast_v4i64_v8i32(<4 x i64> %a, <4 x i64> %b){
-; CHECK-LABEL: bitcast_v4i64_v8i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add v1.2d, v1.2d, v3.2d
-; CHECK-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: bitcast_v4i64_v8i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add v1.2d, v1.2d, v3.2d
+; CHECK-SD-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bitcast_v4i64_v8i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-GI-NEXT: add v1.2d, v1.2d, v3.2d
+; CHECK-GI-NEXT: ret
%c = add <4 x i64> %a, %b
%d = bitcast <4 x i64> %c to <8 x i32>
ret <8 x i32> %d
}
define <16 x i16> @bitcast_v4i64_v16i16(<4 x i64> %a, <4 x i64> %b){
-; CHECK-LABEL: bitcast_v4i64_v16i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add v1.2d, v1.2d, v3.2d
-; CHECK-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: bitcast_v4i64_v16i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add v1.2d, v1.2d, v3.2d
+; CHECK-SD-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bitcast_v4i64_v16i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-GI-NEXT: add v1.2d, v1.2d, v3.2d
+; CHECK-GI-NEXT: ret
%c = add <4 x i64> %a, %b
%d = bitcast <4 x i64> %c to <16 x i16>
ret <16 x i16> %d
}
define <4 x i64> @bitcast_v8i32_v4i64(<8 x i32> %a, <8 x i32> %b){
-; CHECK-LABEL: bitcast_v8i32_v4i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
-; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: bitcast_v8i32_v4i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add v1.4s, v1.4s, v3.4s
+; CHECK-SD-NEXT: add v0.4s, v0.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bitcast_v8i32_v4i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT: add v1.4s, v1.4s, v3.4s
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: mov d3, v1.d[1]
+; CHECK-GI-NEXT: fmov x8, d2
+; CHECK-GI-NEXT: fmov x9, d3
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: mov v1.d[1], x9
+; CHECK-GI-NEXT: ret
%c = add <8 x i32> %a, %b
%d = bitcast <8 x i32> %c to <4 x i64>
ret <4 x i64> %d
}
define <16 x i16> @bitcast_v8i32_v16i16(<8 x i32> %a, <8 x i32> %b){
-; CHECK-LABEL: bitcast_v8i32_v16i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
-; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: bitcast_v8i32_v16i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add v1.4s, v1.4s, v3.4s
+; CHECK-SD-NEXT: add v0.4s, v0.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bitcast_v8i32_v16i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT: add v1.4s, v1.4s, v3.4s
+; CHECK-GI-NEXT: ret
%c = add <8 x i32> %a, %b
%d = bitcast <8 x i32> %c to <16 x i16>
ret <16 x i16> %d
}
define <16 x i32> @bitcast_v8i64_v16i32(<8 x i64> %a, <8 x i64> %b){
-; CHECK-LABEL: bitcast_v8i64_v16i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add v2.2d, v2.2d, v6.2d
-; CHECK-NEXT: add v0.2d, v0.2d, v4.2d
-; CHECK-NEXT: add v1.2d, v1.2d, v5.2d
-; CHECK-NEXT: add v3.2d, v3.2d, v7.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: bitcast_v8i64_v16i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add v2.2d, v2.2d, v6.2d
+; CHECK-SD-NEXT: add v0.2d, v0.2d, v4.2d
+; CHECK-SD-NEXT: add v1.2d, v1.2d, v5.2d
+; CHECK-SD-NEXT: add v3.2d, v3.2d, v7.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bitcast_v8i64_v16i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add v0.2d, v0.2d, v4.2d
+; CHECK-GI-NEXT: add v1.2d, v1.2d, v5.2d
+; CHECK-GI-NEXT: add v2.2d, v2.2d, v6.2d
+; CHECK-GI-NEXT: add v3.2d, v3.2d, v7.2d
+; CHECK-GI-NEXT: ret
%c = add <8 x i64> %a, %b
%d = bitcast <8 x i64> %c to <16 x i32>
ret <16 x i32> %d
}
define <4 x i64> @bitcast_v16i16_v4i64(<16 x i16> %a, <16 x i16> %b){
-; CHECK-LABEL: bitcast_v16i16_v4i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add v1.8h, v1.8h, v3.8h
-; CHECK-NEXT: add v0.8h, v0.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: bitcast_v16i16_v4i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add v1.8h, v1.8h, v3.8h
+; CHECK-SD-NEXT: add v0.8h, v0.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bitcast_v16i16_v4i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add v0.8h, v0.8h, v2.8h
+; CHECK-GI-NEXT: add v1.8h, v1.8h, v3.8h
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: mov d3, v1.d[1]
+; CHECK-GI-NEXT: fmov x8, d2
+; CHECK-GI-NEXT: fmov x9, d3
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: mov v1.d[1], x9
+; CHECK-GI-NEXT: ret
%c = add <16 x i16> %a, %b
%d = bitcast <16 x i16> %c to <4 x i64>
ret <4 x i64> %d
}
define <8 x i32> @bitcast_v16i16_v8i32(<16 x i16> %a, <16 x i16> %b){
-; CHECK-LABEL: bitcast_v16i16_v8i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add v1.8h, v1.8h, v3.8h
-; CHECK-NEXT: add v0.8h, v0.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: bitcast_v16i16_v8i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add v1.8h, v1.8h, v3.8h
+; CHECK-SD-NEXT: add v0.8h, v0.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bitcast_v16i16_v8i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add v0.8h, v0.8h, v2.8h
+; CHECK-GI-NEXT: add v1.8h, v1.8h, v3.8h
+; CHECK-GI-NEXT: ret
%c = add <16 x i16> %a, %b
%d = bitcast <16 x i16> %c to <8 x i32>
ret <8 x i32> %d
}
define <8 x i64> @bitcast_v16i32_v8i64(<16 x i32> %a, <16 x i32> %b){
-; CHECK-LABEL: bitcast_v16i32_v8i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add v2.4s, v2.4s, v6.4s
-; CHECK-NEXT: add v0.4s, v0.4s, v4.4s
-; CHECK-NEXT: add v1.4s, v1.4s, v5.4s
-; CHECK-NEXT: add v3.4s, v3.4s, v7.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: bitcast_v16i32_v8i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add v2.4s, v2.4s, v6.4s
+; CHECK-SD-NEXT: add v0.4s, v0.4s, v4.4s
+; CHECK-SD-NEXT: add v1.4s, v1.4s, v5.4s
+; CHECK-SD-NEXT: add v3.4s, v3.4s, v7.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bitcast_v16i32_v8i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add v0.4s, v0.4s, v4.4s
+; CHECK-GI-NEXT: add v1.4s, v1.4s, v5.4s
+; CHECK-GI-NEXT: add v2.4s, v2.4s, v6.4s
+; CHECK-GI-NEXT: add v3.4s, v3.4s, v7.4s
+; CHECK-GI-NEXT: mov d4, v0.d[1]
+; CHECK-GI-NEXT: mov d5, v1.d[1]
+; CHECK-GI-NEXT: mov d6, v2.d[1]
+; CHECK-GI-NEXT: mov d7, v3.d[1]
+; CHECK-GI-NEXT: fmov x8, d4
+; CHECK-GI-NEXT: fmov x9, d5
+; CHECK-GI-NEXT: fmov x10, d6
+; CHECK-GI-NEXT: fmov x11, d7
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: mov v1.d[1], x9
+; CHECK-GI-NEXT: mov v2.d[1], x10
+; CHECK-GI-NEXT: mov v3.d[1], x11
+; CHECK-GI-NEXT: ret
%c = add <16 x i32> %a, %b
%d = bitcast <16 x i32> %c to <8 x i64>
ret <8 x i64> %d
More information about the llvm-commits
mailing list