[llvm] 3bb0c73 - [AArch64][GlobalISel] Add more test coverage for bitreverse.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Sat Dec 21 22:43:44 PST 2024
Author: Amara Emerson
Date: 2024-12-21T22:34:11-08:00
New Revision: 3bb0c73ab50121f518c8d66154283cfd50d6d31a
URL: https://github.com/llvm/llvm-project/commit/3bb0c73ab50121f518c8d66154283cfd50d6d31a
DIFF: https://github.com/llvm/llvm-project/commit/3bb0c73ab50121f518c8d66154283cfd50d6d31a.diff
LOG: [AArch64][GlobalISel] Add more test coverage for bitreverse.
The IR tests show there's some missing optimizations to form rbit.
Added:
Modified:
llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitreverse.mir
llvm/test/CodeGen/AArch64/bitreverse.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitreverse.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitreverse.mir
index 2e6f55916547db..607f213f671458 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitreverse.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitreverse.mir
@@ -186,3 +186,183 @@ body: |
$d0 = COPY %bitreverse
RET_ReallyLR implicit $q0
...
+---
+name: v2s32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0
+ ; CHECK-LABEL: name: v2s32
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vec:_(<2 x s32>) = COPY $d0
+ ; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<2 x s32>) = G_BSWAP %vec
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -252645136
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s32>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<2 x s32>)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<2 x s32>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<2 x s32>)
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[LSHR]], [[AND1]]
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -858993460
+ ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<2 x s32>) = G_AND [[OR]], [[BUILD_VECTOR3]]
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s32>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<2 x s32>)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s32>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<2 x s32>)
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<2 x s32>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[LSHR1]], [[AND3]]
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32)
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1431655766
+ ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32)
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<2 x s32>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
+ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s32>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<2 x s32>)
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<2 x s32>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<2 x s32>)
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<2 x s32>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
+ ; CHECK-NEXT: %bitreverse:_(<2 x s32>) = G_OR [[LSHR2]], [[AND5]]
+ ; CHECK-NEXT: $d0 = COPY %bitreverse(<2 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %vec:_(<2 x s32>) = COPY $d0
+ %bitreverse:_(<2 x s32>) = G_BITREVERSE %vec
+ $d0 = COPY %bitreverse
+ RET_ReallyLR implicit $d0
+...
+---
+name: v2s64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0
+ ; CHECK-LABEL: name: v2s64
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vec:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<2 x s64>) = G_BSWAP %vec
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1085102592571150096
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C1]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s64>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<2 x s64>)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<2 x s64>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<2 x s64>)
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s64>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[LSHR]], [[AND1]]
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -3689348814741910324
+ ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C3]](s64), [[C3]](s64)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<2 x s64>) = G_AND [[OR]], [[BUILD_VECTOR3]]
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s64>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<2 x s64>)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s64>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<2 x s64>)
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<2 x s64>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s64>) = G_OR [[LSHR1]], [[AND3]]
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C4]](s64), [[C4]](s64)
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 -6148914691236517206
+ ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C5]](s64), [[C5]](s64)
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<2 x s64>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
+ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s64>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<2 x s64>)
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<2 x s64>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<2 x s64>)
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<2 x s64>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
+ ; CHECK-NEXT: %bitreverse:_(<2 x s64>) = G_OR [[LSHR2]], [[AND5]]
+ ; CHECK-NEXT: $q0 = COPY %bitreverse(<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %vec:_(<2 x s64>) = COPY $q0
+ %bitreverse:_(<2 x s64>) = G_BITREVERSE %vec
+ $q0 = COPY %bitreverse
+ RET_ReallyLR implicit $q0
+...
+---
+name: v4s32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0
+ ; CHECK-LABEL: name: v4s32
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vec:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<4 x s32>) = G_BSWAP %vec
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -252645136
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<4 x s32>)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s32>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<4 x s32>)
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[LSHR]], [[AND1]]
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32), [[C2]](s32), [[C2]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -858993460
+ ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<4 x s32>) = G_AND [[OR]], [[BUILD_VECTOR3]]
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<4 x s32>)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<4 x s32>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<4 x s32>)
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<4 x s32>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<4 x s32>) = G_OR [[LSHR1]], [[AND3]]
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32)
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1431655766
+ ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32), [[C5]](s32), [[C5]](s32)
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<4 x s32>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
+ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<4 x s32>)
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<4 x s32>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<4 x s32>)
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<4 x s32>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
+ ; CHECK-NEXT: %bitreverse:_(<4 x s32>) = G_OR [[LSHR2]], [[AND5]]
+ ; CHECK-NEXT: $q0 = COPY %bitreverse(<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %vec:_(<4 x s32>) = COPY $q0
+ %bitreverse:_(<4 x s32>) = G_BITREVERSE %vec
+ $q0 = COPY %bitreverse
+ RET_ReallyLR implicit $q0
+...
+---
+name: v8s16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0
+ ; CHECK-LABEL: name: v8s16
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vec:_(<8 x s16>) = COPY $q0
+ ; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<8 x s16>) = G_BSWAP %vec
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -3856
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<8 x s16>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<8 x s16>)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<8 x s16>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<8 x s16>)
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<8 x s16>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<8 x s16>) = G_OR [[LSHR]], [[AND1]]
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -13108
+ ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<8 x s16>) = G_AND [[OR]], [[BUILD_VECTOR3]]
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<8 x s16>)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<8 x s16>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<8 x s16>)
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<8 x s16>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<8 x s16>) = G_OR [[LSHR1]], [[AND3]]
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
+ ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16)
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -21846
+ ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16)
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<8 x s16>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
+ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<8 x s16>)
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<8 x s16>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<8 x s16>)
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<8 x s16>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
+ ; CHECK-NEXT: %bitreverse:_(<8 x s16>) = G_OR [[LSHR2]], [[AND5]]
+ ; CHECK-NEXT: $q0 = COPY %bitreverse(<8 x s16>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %vec:_(<8 x s16>) = COPY $q0
+ %bitreverse:_(<8 x s16>) = G_BITREVERSE %vec
+ $q0 = COPY %bitreverse
+ RET_ReallyLR implicit $q0
+...
diff --git a/llvm/test/CodeGen/AArch64/bitreverse.ll b/llvm/test/CodeGen/AArch64/bitreverse.ll
index a6d3683a92a8dc..04b78c5825a2de 100644
--- a/llvm/test/CodeGen/AArch64/bitreverse.ll
+++ b/llvm/test/CodeGen/AArch64/bitreverse.ll
@@ -1,17 +1,69 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,SDAG
+; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=1 %s -o - | FileCheck %s --check-prefixes=CHECK,GISEL
; These tests just check that the plumbing is in place for @llvm.bitreverse.
declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>) readnone
define <2 x i16> @f(<2 x i16> %a) {
-; CHECK-LABEL: f:
-; CHECK: // %bb.0:
-; CHECK-NEXT: rev32 v0.8b, v0.8b
-; CHECK-NEXT: rbit v0.8b, v0.8b
-; CHECK-NEXT: ushr v0.2s, v0.2s, #16
-; CHECK-NEXT: ret
+; SDAG-LABEL: f:
+; SDAG: // %bb.0:
+; SDAG-NEXT: rev32 v0.8b, v0.8b
+; SDAG-NEXT: rbit v0.8b, v0.8b
+; SDAG-NEXT: ushr v0.2s, v0.2s, #16
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: f:
+; GISEL: // %bb.0:
+; GISEL-NEXT: uzp1 v0.4h, v0.4h, v0.4h
+; GISEL-NEXT: mov w8, #61680 // =0xf0f0
+; GISEL-NEXT: dup v1.2s, w8
+; GISEL-NEXT: mov w8, #4 // =0x4
+; GISEL-NEXT: fmov s3, w8
+; GISEL-NEXT: rev16 v0.8b, v0.8b
+; GISEL-NEXT: mov v3.h[1], w8
+; GISEL-NEXT: mov w8, #52428 // =0xcccc
+; GISEL-NEXT: ushll v2.4s, v0.4h, #0
+; GISEL-NEXT: neg v4.4h, v3.4h
+; GISEL-NEXT: and v2.8b, v2.8b, v1.8b
+; GISEL-NEXT: uzp1 v2.4h, v2.4h, v0.4h
+; GISEL-NEXT: ushl v0.4h, v0.4h, v3.4h
+; GISEL-NEXT: ushll v0.4s, v0.4h, #0
+; GISEL-NEXT: ushl v2.4h, v2.4h, v4.4h
+; GISEL-NEXT: and v0.8b, v0.8b, v1.8b
+; GISEL-NEXT: ushll v1.4s, v2.4h, #0
+; GISEL-NEXT: dup v2.2s, w8
+; GISEL-NEXT: mov w8, #2 // =0x2
+; GISEL-NEXT: orr v0.8b, v1.8b, v0.8b
+; GISEL-NEXT: fmov s1, w8
+; GISEL-NEXT: and v3.8b, v0.8b, v2.8b
+; GISEL-NEXT: uzp1 v0.4h, v0.4h, v0.4h
+; GISEL-NEXT: mov v1.h[1], w8
+; GISEL-NEXT: mov w8, #43690 // =0xaaaa
+; GISEL-NEXT: uzp1 v3.4h, v3.4h, v0.4h
+; GISEL-NEXT: neg v4.4h, v1.4h
+; GISEL-NEXT: ushl v0.4h, v0.4h, v1.4h
+; GISEL-NEXT: ushll v0.4s, v0.4h, #0
+; GISEL-NEXT: ushl v1.4h, v3.4h, v4.4h
+; GISEL-NEXT: and v0.8b, v0.8b, v2.8b
+; GISEL-NEXT: dup v2.2s, w8
+; GISEL-NEXT: mov w8, #1 // =0x1
+; GISEL-NEXT: ushll v1.4s, v1.4h, #0
+; GISEL-NEXT: orr v0.8b, v1.8b, v0.8b
+; GISEL-NEXT: fmov s1, w8
+; GISEL-NEXT: and v3.8b, v0.8b, v2.8b
+; GISEL-NEXT: uzp1 v0.4h, v0.4h, v0.4h
+; GISEL-NEXT: mov v1.h[1], w8
+; GISEL-NEXT: uzp1 v3.4h, v3.4h, v0.4h
+; GISEL-NEXT: neg v4.4h, v1.4h
+; GISEL-NEXT: ushl v0.4h, v0.4h, v1.4h
+; GISEL-NEXT: ushll v0.4s, v0.4h, #0
+; GISEL-NEXT: ushl v1.4h, v3.4h, v4.4h
+; GISEL-NEXT: and v0.8b, v0.8b, v2.8b
+; GISEL-NEXT: ushll v1.4s, v1.4h, #0
+; GISEL-NEXT: orr v0.8b, v1.8b, v0.8b
+; GISEL-NEXT: ret
%b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
ret <2 x i16> %b
}
@@ -87,11 +139,34 @@ define <16 x i8> @g_vec_16x8(<16 x i8> %a) {
declare <4 x i16> @llvm.bitreverse.v4i16(<4 x i16>) readnone
define <4 x i16> @g_vec_4x16(<4 x i16> %a) {
-; CHECK-LABEL: g_vec_4x16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: rev16 v0.8b, v0.8b
-; CHECK-NEXT: rbit v0.8b, v0.8b
-; CHECK-NEXT: ret
+; SDAG-LABEL: g_vec_4x16:
+; SDAG: // %bb.0:
+; SDAG-NEXT: rev16 v0.8b, v0.8b
+; SDAG-NEXT: rbit v0.8b, v0.8b
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: g_vec_4x16:
+; GISEL: // %bb.0:
+; GISEL-NEXT: movi v1.8b, #240
+; GISEL-NEXT: rev16 v0.8b, v0.8b
+; GISEL-NEXT: and v2.8b, v0.8b, v1.8b
+; GISEL-NEXT: shl v0.4h, v0.4h, #4
+; GISEL-NEXT: ushr v2.4h, v2.4h, #4
+; GISEL-NEXT: and v0.8b, v0.8b, v1.8b
+; GISEL-NEXT: movi v1.8b, #204
+; GISEL-NEXT: orr v0.8b, v2.8b, v0.8b
+; GISEL-NEXT: and v2.8b, v0.8b, v1.8b
+; GISEL-NEXT: shl v0.4h, v0.4h, #2
+; GISEL-NEXT: ushr v2.4h, v2.4h, #2
+; GISEL-NEXT: and v0.8b, v0.8b, v1.8b
+; GISEL-NEXT: movi v1.8b, #170
+; GISEL-NEXT: orr v0.8b, v2.8b, v0.8b
+; GISEL-NEXT: and v2.8b, v0.8b, v1.8b
+; GISEL-NEXT: shl v0.4h, v0.4h, #1
+; GISEL-NEXT: ushr v2.4h, v2.4h, #1
+; GISEL-NEXT: and v0.8b, v0.8b, v1.8b
+; GISEL-NEXT: orr v0.8b, v2.8b, v0.8b
+; GISEL-NEXT: ret
%b = call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> %a)
ret <4 x i16> %b
}
@@ -99,11 +174,34 @@ define <4 x i16> @g_vec_4x16(<4 x i16> %a) {
declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>) readnone
define <8 x i16> @g_vec_8x16(<8 x i16> %a) {
-; CHECK-LABEL: g_vec_8x16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: rev16 v0.16b, v0.16b
-; CHECK-NEXT: rbit v0.16b, v0.16b
-; CHECK-NEXT: ret
+; SDAG-LABEL: g_vec_8x16:
+; SDAG: // %bb.0:
+; SDAG-NEXT: rev16 v0.16b, v0.16b
+; SDAG-NEXT: rbit v0.16b, v0.16b
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: g_vec_8x16:
+; GISEL: // %bb.0:
+; GISEL-NEXT: movi v1.16b, #240
+; GISEL-NEXT: rev16 v0.16b, v0.16b
+; GISEL-NEXT: and v2.16b, v0.16b, v1.16b
+; GISEL-NEXT: shl v0.8h, v0.8h, #4
+; GISEL-NEXT: ushr v2.8h, v2.8h, #4
+; GISEL-NEXT: and v0.16b, v0.16b, v1.16b
+; GISEL-NEXT: movi v1.16b, #204
+; GISEL-NEXT: orr v0.16b, v2.16b, v0.16b
+; GISEL-NEXT: and v2.16b, v0.16b, v1.16b
+; GISEL-NEXT: shl v0.8h, v0.8h, #2
+; GISEL-NEXT: ushr v2.8h, v2.8h, #2
+; GISEL-NEXT: and v0.16b, v0.16b, v1.16b
+; GISEL-NEXT: movi v1.16b, #170
+; GISEL-NEXT: orr v0.16b, v2.16b, v0.16b
+; GISEL-NEXT: and v2.16b, v0.16b, v1.16b
+; GISEL-NEXT: shl v0.8h, v0.8h, #1
+; GISEL-NEXT: ushr v2.8h, v2.8h, #1
+; GISEL-NEXT: and v0.16b, v0.16b, v1.16b
+; GISEL-NEXT: orr v0.16b, v2.16b, v0.16b
+; GISEL-NEXT: ret
%b = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a)
ret <8 x i16> %b
}
@@ -111,12 +209,34 @@ define <8 x i16> @g_vec_8x16(<8 x i16> %a) {
declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>) readnone
define <2 x i32> @g_vec_2x32(<2 x i32> %a) {
-; CHECK-LABEL: g_vec_2x32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: rev32 v0.8b, v0.8b
-; CHECK-NEXT: rbit v0.8b, v0.8b
-; CHECK-NEXT: ret
-
+; SDAG-LABEL: g_vec_2x32:
+; SDAG: // %bb.0:
+; SDAG-NEXT: rev32 v0.8b, v0.8b
+; SDAG-NEXT: rbit v0.8b, v0.8b
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: g_vec_2x32:
+; GISEL: // %bb.0:
+; GISEL-NEXT: movi v1.8b, #240
+; GISEL-NEXT: rev32 v0.8b, v0.8b
+; GISEL-NEXT: and v2.8b, v0.8b, v1.8b
+; GISEL-NEXT: shl v0.2s, v0.2s, #4
+; GISEL-NEXT: ushr v2.2s, v2.2s, #4
+; GISEL-NEXT: and v0.8b, v0.8b, v1.8b
+; GISEL-NEXT: movi v1.8b, #204
+; GISEL-NEXT: orr v0.8b, v2.8b, v0.8b
+; GISEL-NEXT: and v2.8b, v0.8b, v1.8b
+; GISEL-NEXT: shl v0.2s, v0.2s, #2
+; GISEL-NEXT: ushr v2.2s, v2.2s, #2
+; GISEL-NEXT: and v0.8b, v0.8b, v1.8b
+; GISEL-NEXT: movi v1.8b, #170
+; GISEL-NEXT: orr v0.8b, v2.8b, v0.8b
+; GISEL-NEXT: and v2.8b, v0.8b, v1.8b
+; GISEL-NEXT: shl v0.2s, v0.2s, #1
+; GISEL-NEXT: ushr v2.2s, v2.2s, #1
+; GISEL-NEXT: and v0.8b, v0.8b, v1.8b
+; GISEL-NEXT: orr v0.8b, v2.8b, v0.8b
+; GISEL-NEXT: ret
%b = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %a)
ret <2 x i32> %b
}
@@ -124,11 +244,34 @@ define <2 x i32> @g_vec_2x32(<2 x i32> %a) {
declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) readnone
define <4 x i32> @g_vec_4x32(<4 x i32> %a) {
-; CHECK-LABEL: g_vec_4x32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: rev32 v0.16b, v0.16b
-; CHECK-NEXT: rbit v0.16b, v0.16b
-; CHECK-NEXT: ret
+; SDAG-LABEL: g_vec_4x32:
+; SDAG: // %bb.0:
+; SDAG-NEXT: rev32 v0.16b, v0.16b
+; SDAG-NEXT: rbit v0.16b, v0.16b
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: g_vec_4x32:
+; GISEL: // %bb.0:
+; GISEL-NEXT: movi v1.16b, #240
+; GISEL-NEXT: rev32 v0.16b, v0.16b
+; GISEL-NEXT: and v2.16b, v0.16b, v1.16b
+; GISEL-NEXT: shl v0.4s, v0.4s, #4
+; GISEL-NEXT: ushr v2.4s, v2.4s, #4
+; GISEL-NEXT: and v0.16b, v0.16b, v1.16b
+; GISEL-NEXT: movi v1.16b, #204
+; GISEL-NEXT: orr v0.16b, v2.16b, v0.16b
+; GISEL-NEXT: and v2.16b, v0.16b, v1.16b
+; GISEL-NEXT: shl v0.4s, v0.4s, #2
+; GISEL-NEXT: ushr v2.4s, v2.4s, #2
+; GISEL-NEXT: and v0.16b, v0.16b, v1.16b
+; GISEL-NEXT: movi v1.16b, #170
+; GISEL-NEXT: orr v0.16b, v2.16b, v0.16b
+; GISEL-NEXT: and v2.16b, v0.16b, v1.16b
+; GISEL-NEXT: shl v0.4s, v0.4s, #1
+; GISEL-NEXT: ushr v2.4s, v2.4s, #1
+; GISEL-NEXT: and v0.16b, v0.16b, v1.16b
+; GISEL-NEXT: orr v0.16b, v2.16b, v0.16b
+; GISEL-NEXT: ret
%b = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a)
ret <4 x i32> %b
}
@@ -136,11 +279,18 @@ define <4 x i32> @g_vec_4x32(<4 x i32> %a) {
declare <1 x i64> @llvm.bitreverse.v1i64(<1 x i64>) readnone
define <1 x i64> @g_vec_1x64(<1 x i64> %a) {
-; CHECK-LABEL: g_vec_1x64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: rev64 v0.8b, v0.8b
-; CHECK-NEXT: rbit v0.8b, v0.8b
-; CHECK-NEXT: ret
+; SDAG-LABEL: g_vec_1x64:
+; SDAG: // %bb.0:
+; SDAG-NEXT: rev64 v0.8b, v0.8b
+; SDAG-NEXT: rbit v0.8b, v0.8b
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: g_vec_1x64:
+; GISEL: // %bb.0:
+; GISEL-NEXT: fmov x8, d0
+; GISEL-NEXT: rbit x8, x8
+; GISEL-NEXT: fmov d0, x8
+; GISEL-NEXT: ret
%b = call <1 x i64> @llvm.bitreverse.v1i64(<1 x i64> %a)
ret <1 x i64> %b
}
@@ -148,11 +298,34 @@ define <1 x i64> @g_vec_1x64(<1 x i64> %a) {
declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>) readnone
define <2 x i64> @g_vec_2x64(<2 x i64> %a) {
-; CHECK-LABEL: g_vec_2x64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: rev64 v0.16b, v0.16b
-; CHECK-NEXT: rbit v0.16b, v0.16b
-; CHECK-NEXT: ret
+; SDAG-LABEL: g_vec_2x64:
+; SDAG: // %bb.0:
+; SDAG-NEXT: rev64 v0.16b, v0.16b
+; SDAG-NEXT: rbit v0.16b, v0.16b
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: g_vec_2x64:
+; GISEL: // %bb.0:
+; GISEL-NEXT: movi v1.16b, #240
+; GISEL-NEXT: rev64 v0.16b, v0.16b
+; GISEL-NEXT: and v2.16b, v0.16b, v1.16b
+; GISEL-NEXT: shl v0.2d, v0.2d, #4
+; GISEL-NEXT: ushr v2.2d, v2.2d, #4
+; GISEL-NEXT: and v0.16b, v0.16b, v1.16b
+; GISEL-NEXT: movi v1.16b, #204
+; GISEL-NEXT: orr v0.16b, v2.16b, v0.16b
+; GISEL-NEXT: and v2.16b, v0.16b, v1.16b
+; GISEL-NEXT: shl v0.2d, v0.2d, #2
+; GISEL-NEXT: ushr v2.2d, v2.2d, #2
+; GISEL-NEXT: and v0.16b, v0.16b, v1.16b
+; GISEL-NEXT: movi v1.16b, #170
+; GISEL-NEXT: orr v0.16b, v2.16b, v0.16b
+; GISEL-NEXT: and v2.16b, v0.16b, v1.16b
+; GISEL-NEXT: shl v0.2d, v0.2d, #1
+; GISEL-NEXT: ushr v2.2d, v2.2d, #1
+; GISEL-NEXT: and v0.16b, v0.16b, v1.16b
+; GISEL-NEXT: orr v0.16b, v2.16b, v0.16b
+; GISEL-NEXT: ret
%b = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a)
ret <2 x i64> %b
}
More information about the llvm-commits
mailing list