[llvm] [AArch64] Generate rev16 for certain uses of __builtin_bswap16 (PR #105375)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 3 14:09:33 PDT 2024
https://github.com/adprasad-nvidia updated https://github.com/llvm/llvm-project/pull/105375
>From d877a20e79c4e3276272dd036b7795ee6d213bd3 Mon Sep 17 00:00:00 2001
From: adprasad <adprasad at nvidia.com>
Date: Tue, 13 Aug 2024 16:40:58 +0530
Subject: [PATCH 1/5] [REV] Generate rev16 for all (srl (bswap x), (i64 16))
instructions
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 2fff6fffcd7c6d..99f55a8247ee9f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2836,8 +2836,8 @@ def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>;
def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>;
def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
-// Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero.
-def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>;
+// Match (srl (bswap x), C) -> revC.
+def : Pat<(srl (bswap GPR32:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>;
def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>;
def : Pat<(or (and (srl GPR64:$Rn, (i64 8)), (i64 0x00ff00ff00ff00ff)),
>From 07ddc928bdf61957ac6f9c80ef9ce6be14f47ba5 Mon Sep 17 00:00:00 2001
From: adprasad <adprasad at nvidia.com>
Date: Tue, 13 Aug 2024 18:03:34 +0530
Subject: [PATCH 2/5] [REV] Update test files
---
llvm/test/CodeGen/AArch64/arm64-rev.ll | 15 +++++----------
llvm/test/CodeGen/AArch64/bswap.ll | 3 +--
llvm/test/CodeGen/AArch64/memcmp.ll | 15 +++++----------
llvm/test/CodeGen/AArch64/merge-trunc-store.ll | 12 ++++--------
4 files changed, 15 insertions(+), 30 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll
index f548a0e01feee6..b0fd0d33f0b522 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -27,15 +27,13 @@ entry:
define i32 @test_rev_w_srl16(i16 %a) {
; CHECK-SD-LABEL: test_rev_w_srl16:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: rev w8, w0
-; CHECK-SD-NEXT: lsr w0, w8, #16
+; CHECK-SD-NEXT: rev16 w0, w0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_rev_w_srl16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: and w8, w0, #0xffff
-; CHECK-GI-NEXT: rev w8, w8
-; CHECK-GI-NEXT: lsr w0, w8, #16
+; CHECK-GI-NEXT: rev16 w0, w8
; CHECK-GI-NEXT: ret
entry:
%0 = zext i16 %a to i32
@@ -48,8 +46,7 @@ define i32 @test_rev_w_srl16_load(ptr %a) {
; CHECK-LABEL: test_rev_w_srl16_load:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: rev w8, w8
-; CHECK-NEXT: lsr w0, w8, #16
+; CHECK-NEXT: rev16 w0, w8
; CHECK-NEXT: ret
entry:
%0 = load i16, ptr %a
@@ -71,8 +68,7 @@ define i32 @test_rev_w_srl16_add(i8 %a, i8 %b) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: and w8, w1, #0xff
; CHECK-GI-NEXT: add w8, w8, w0, uxtb
-; CHECK-GI-NEXT: rev w8, w8
-; CHECK-GI-NEXT: lsr w0, w8, #16
+; CHECK-GI-NEXT: rev16 w0, w8
; CHECK-GI-NEXT: ret
entry:
%0 = zext i8 %a to i32
@@ -472,8 +468,7 @@ define void @test_rev16_truncstore() {
; CHECK-GI-NEXT: .LBB30_1: // %cleanup
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-GI-NEXT: ldrh w8, [x8]
-; CHECK-GI-NEXT: rev w8, w8
-; CHECK-GI-NEXT: lsr w8, w8, #16
+; CHECK-GI-NEXT: rev16 w8, w8
; CHECK-GI-NEXT: strh w8, [x8]
; CHECK-GI-NEXT: tbz wzr, #0, .LBB30_1
; CHECK-GI-NEXT: .LBB30_2: // %fail
diff --git a/llvm/test/CodeGen/AArch64/bswap.ll b/llvm/test/CodeGen/AArch64/bswap.ll
index 071613b9cc011e..2a60abdc2308f0 100644
--- a/llvm/test/CodeGen/AArch64/bswap.ll
+++ b/llvm/test/CodeGen/AArch64/bswap.ll
@@ -6,8 +6,7 @@
define i16 @bswap_i16(i16 %a){
; CHECK-LABEL: bswap_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: rev w8, w0
-; CHECK-NEXT: lsr w0, w8, #16
+; CHECK-NEXT: rev16 w0, w0
; CHECK-NEXT: ret
%3 = call i16 @llvm.bswap.i16(i16 %a)
ret i16 %3
diff --git a/llvm/test/CodeGen/AArch64/memcmp.ll b/llvm/test/CodeGen/AArch64/memcmp.ll
index 4da7c8c95a4e4f..0a6a03844128c3 100644
--- a/llvm/test/CodeGen/AArch64/memcmp.ll
+++ b/llvm/test/CodeGen/AArch64/memcmp.ll
@@ -39,9 +39,8 @@ define i32 @length2(ptr %X, ptr %Y) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: ldrh w9, [x1]
-; CHECK-NEXT: rev w8, w8
+; CHECK-NEXT: rev16 w8, w8
; CHECK-NEXT: rev w9, w9
-; CHECK-NEXT: lsr w8, w8, #16
; CHECK-NEXT: sub w0, w8, w9, lsr #16
; CHECK-NEXT: ret
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
@@ -93,9 +92,8 @@ define i1 @length2_lt(ptr %X, ptr %Y) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: ldrh w9, [x1]
-; CHECK-NEXT: rev w8, w8
+; CHECK-NEXT: rev16 w8, w8
; CHECK-NEXT: rev w9, w9
-; CHECK-NEXT: lsr w8, w8, #16
; CHECK-NEXT: sub w8, w8, w9, lsr #16
; CHECK-NEXT: lsr w0, w8, #31
; CHECK-NEXT: ret
@@ -109,9 +107,8 @@ define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: ldrh w9, [x1]
-; CHECK-NEXT: rev w8, w8
+; CHECK-NEXT: rev16 w8, w8
; CHECK-NEXT: rev w9, w9
-; CHECK-NEXT: lsr w8, w8, #16
; CHECK-NEXT: sub w8, w8, w9, lsr #16
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w0, gt
@@ -536,10 +533,8 @@ define i32 @length10(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldrh w8, [x0, #8]
; CHECK-NEXT: ldrh w9, [x1, #8]
-; CHECK-NEXT: rev w8, w8
-; CHECK-NEXT: rev w9, w9
-; CHECK-NEXT: lsr w8, w8, #16
-; CHECK-NEXT: lsr w9, w9, #16
+; CHECK-NEXT: rev16 w8, w8
+; CHECK-NEXT: rev16 w9, w9
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: b.ne .LBB32_3
; CHECK-NEXT: // %bb.2:
diff --git a/llvm/test/CodeGen/AArch64/merge-trunc-store.ll b/llvm/test/CodeGen/AArch64/merge-trunc-store.ll
index b161d746ad11d5..4fcd030db1bace 100644
--- a/llvm/test/CodeGen/AArch64/merge-trunc-store.ll
+++ b/llvm/test/CodeGen/AArch64/merge-trunc-store.ll
@@ -10,8 +10,7 @@ define void @le_i16_to_i8(i16 %x, ptr %p0) {
;
; BE-LABEL: le_i16_to_i8:
; BE: // %bb.0:
-; BE-NEXT: rev w8, w0
-; BE-NEXT: lsr w8, w8, #16
+; BE-NEXT: rev16 w8, w0
; BE-NEXT: strh w8, [x1]
; BE-NEXT: ret
%sh1 = lshr i16 %x, 8
@@ -31,8 +30,7 @@ define void @le_i16_to_i8_order(i16 %x, ptr %p0) {
;
; BE-LABEL: le_i16_to_i8_order:
; BE: // %bb.0:
-; BE-NEXT: rev w8, w0
-; BE-NEXT: lsr w8, w8, #16
+; BE-NEXT: rev16 w8, w0
; BE-NEXT: strh w8, [x1]
; BE-NEXT: ret
%sh1 = lshr i16 %x, 8
@@ -47,8 +45,7 @@ define void @le_i16_to_i8_order(i16 %x, ptr %p0) {
define void @be_i16_to_i8_offset(i16 %x, ptr %p0) {
; LE-LABEL: be_i16_to_i8_offset:
; LE: // %bb.0:
-; LE-NEXT: rev w8, w0
-; LE-NEXT: lsr w8, w8, #16
+; LE-NEXT: rev16 w8, w0
; LE-NEXT: sturh w8, [x1, #11]
; LE-NEXT: ret
;
@@ -69,8 +66,7 @@ define void @be_i16_to_i8_offset(i16 %x, ptr %p0) {
define void @be_i16_to_i8_order(i16 %x, ptr %p0) {
; LE-LABEL: be_i16_to_i8_order:
; LE: // %bb.0:
-; LE-NEXT: rev w8, w0
-; LE-NEXT: lsr w8, w8, #16
+; LE-NEXT: rev16 w8, w0
; LE-NEXT: strh w8, [x1]
; LE-NEXT: ret
;
>From 290190bafb2ef59ec310c1912f3eed39502d9864 Mon Sep 17 00:00:00 2001
From: adprasad <adprasad at nvidia.com>
Date: Thu, 29 Aug 2024 04:38:11 +0530
Subject: [PATCH 3/5] Revert "[REV] Update test files"
This reverts commit 5cda4a951123b38114e4ba2fb224aebf71981bbf.
---
llvm/test/CodeGen/AArch64/arm64-rev.ll | 15 ++++++++++-----
llvm/test/CodeGen/AArch64/bswap.ll | 3 ++-
llvm/test/CodeGen/AArch64/memcmp.ll | 15 ++++++++++-----
llvm/test/CodeGen/AArch64/merge-trunc-store.ll | 12 ++++++++----
4 files changed, 30 insertions(+), 15 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll
index b0fd0d33f0b522..f548a0e01feee6 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -27,13 +27,15 @@ entry:
define i32 @test_rev_w_srl16(i16 %a) {
; CHECK-SD-LABEL: test_rev_w_srl16:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: rev16 w0, w0
+; CHECK-SD-NEXT: rev w8, w0
+; CHECK-SD-NEXT: lsr w0, w8, #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_rev_w_srl16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: and w8, w0, #0xffff
-; CHECK-GI-NEXT: rev16 w0, w8
+; CHECK-GI-NEXT: rev w8, w8
+; CHECK-GI-NEXT: lsr w0, w8, #16
; CHECK-GI-NEXT: ret
entry:
%0 = zext i16 %a to i32
@@ -46,7 +48,8 @@ define i32 @test_rev_w_srl16_load(ptr %a) {
; CHECK-LABEL: test_rev_w_srl16_load:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: rev16 w0, w8
+; CHECK-NEXT: rev w8, w8
+; CHECK-NEXT: lsr w0, w8, #16
; CHECK-NEXT: ret
entry:
%0 = load i16, ptr %a
@@ -68,7 +71,8 @@ define i32 @test_rev_w_srl16_add(i8 %a, i8 %b) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: and w8, w1, #0xff
; CHECK-GI-NEXT: add w8, w8, w0, uxtb
-; CHECK-GI-NEXT: rev16 w0, w8
+; CHECK-GI-NEXT: rev w8, w8
+; CHECK-GI-NEXT: lsr w0, w8, #16
; CHECK-GI-NEXT: ret
entry:
%0 = zext i8 %a to i32
@@ -468,7 +472,8 @@ define void @test_rev16_truncstore() {
; CHECK-GI-NEXT: .LBB30_1: // %cleanup
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-GI-NEXT: ldrh w8, [x8]
-; CHECK-GI-NEXT: rev16 w8, w8
+; CHECK-GI-NEXT: rev w8, w8
+; CHECK-GI-NEXT: lsr w8, w8, #16
; CHECK-GI-NEXT: strh w8, [x8]
; CHECK-GI-NEXT: tbz wzr, #0, .LBB30_1
; CHECK-GI-NEXT: .LBB30_2: // %fail
diff --git a/llvm/test/CodeGen/AArch64/bswap.ll b/llvm/test/CodeGen/AArch64/bswap.ll
index 2a60abdc2308f0..071613b9cc011e 100644
--- a/llvm/test/CodeGen/AArch64/bswap.ll
+++ b/llvm/test/CodeGen/AArch64/bswap.ll
@@ -6,7 +6,8 @@
define i16 @bswap_i16(i16 %a){
; CHECK-LABEL: bswap_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: rev16 w0, w0
+; CHECK-NEXT: rev w8, w0
+; CHECK-NEXT: lsr w0, w8, #16
; CHECK-NEXT: ret
%3 = call i16 @llvm.bswap.i16(i16 %a)
ret i16 %3
diff --git a/llvm/test/CodeGen/AArch64/memcmp.ll b/llvm/test/CodeGen/AArch64/memcmp.ll
index 0a6a03844128c3..4da7c8c95a4e4f 100644
--- a/llvm/test/CodeGen/AArch64/memcmp.ll
+++ b/llvm/test/CodeGen/AArch64/memcmp.ll
@@ -39,8 +39,9 @@ define i32 @length2(ptr %X, ptr %Y) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: ldrh w9, [x1]
-; CHECK-NEXT: rev16 w8, w8
+; CHECK-NEXT: rev w8, w8
; CHECK-NEXT: rev w9, w9
+; CHECK-NEXT: lsr w8, w8, #16
; CHECK-NEXT: sub w0, w8, w9, lsr #16
; CHECK-NEXT: ret
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
@@ -92,8 +93,9 @@ define i1 @length2_lt(ptr %X, ptr %Y) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: ldrh w9, [x1]
-; CHECK-NEXT: rev16 w8, w8
+; CHECK-NEXT: rev w8, w8
; CHECK-NEXT: rev w9, w9
+; CHECK-NEXT: lsr w8, w8, #16
; CHECK-NEXT: sub w8, w8, w9, lsr #16
; CHECK-NEXT: lsr w0, w8, #31
; CHECK-NEXT: ret
@@ -107,8 +109,9 @@ define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: ldrh w9, [x1]
-; CHECK-NEXT: rev16 w8, w8
+; CHECK-NEXT: rev w8, w8
; CHECK-NEXT: rev w9, w9
+; CHECK-NEXT: lsr w8, w8, #16
; CHECK-NEXT: sub w8, w8, w9, lsr #16
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w0, gt
@@ -533,8 +536,10 @@ define i32 @length10(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldrh w8, [x0, #8]
; CHECK-NEXT: ldrh w9, [x1, #8]
-; CHECK-NEXT: rev16 w8, w8
-; CHECK-NEXT: rev16 w9, w9
+; CHECK-NEXT: rev w8, w8
+; CHECK-NEXT: rev w9, w9
+; CHECK-NEXT: lsr w8, w8, #16
+; CHECK-NEXT: lsr w9, w9, #16
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: b.ne .LBB32_3
; CHECK-NEXT: // %bb.2:
diff --git a/llvm/test/CodeGen/AArch64/merge-trunc-store.ll b/llvm/test/CodeGen/AArch64/merge-trunc-store.ll
index 4fcd030db1bace..b161d746ad11d5 100644
--- a/llvm/test/CodeGen/AArch64/merge-trunc-store.ll
+++ b/llvm/test/CodeGen/AArch64/merge-trunc-store.ll
@@ -10,7 +10,8 @@ define void @le_i16_to_i8(i16 %x, ptr %p0) {
;
; BE-LABEL: le_i16_to_i8:
; BE: // %bb.0:
-; BE-NEXT: rev16 w8, w0
+; BE-NEXT: rev w8, w0
+; BE-NEXT: lsr w8, w8, #16
; BE-NEXT: strh w8, [x1]
; BE-NEXT: ret
%sh1 = lshr i16 %x, 8
@@ -30,7 +31,8 @@ define void @le_i16_to_i8_order(i16 %x, ptr %p0) {
;
; BE-LABEL: le_i16_to_i8_order:
; BE: // %bb.0:
-; BE-NEXT: rev16 w8, w0
+; BE-NEXT: rev w8, w0
+; BE-NEXT: lsr w8, w8, #16
; BE-NEXT: strh w8, [x1]
; BE-NEXT: ret
%sh1 = lshr i16 %x, 8
@@ -45,7 +47,8 @@ define void @le_i16_to_i8_order(i16 %x, ptr %p0) {
define void @be_i16_to_i8_offset(i16 %x, ptr %p0) {
; LE-LABEL: be_i16_to_i8_offset:
; LE: // %bb.0:
-; LE-NEXT: rev16 w8, w0
+; LE-NEXT: rev w8, w0
+; LE-NEXT: lsr w8, w8, #16
; LE-NEXT: sturh w8, [x1, #11]
; LE-NEXT: ret
;
@@ -66,7 +69,8 @@ define void @be_i16_to_i8_offset(i16 %x, ptr %p0) {
define void @be_i16_to_i8_order(i16 %x, ptr %p0) {
; LE-LABEL: be_i16_to_i8_order:
; LE: // %bb.0:
-; LE-NEXT: rev16 w8, w0
+; LE-NEXT: rev w8, w0
+; LE-NEXT: lsr w8, w8, #16
; LE-NEXT: strh w8, [x1]
; LE-NEXT: ret
;
>From cedb8288d5736a39527a86eb1e16ced69031897f Mon Sep 17 00:00:00 2001
From: adprasad <adprasad at nvidia.com>
Date: Thu, 29 Aug 2024 04:38:16 +0530
Subject: [PATCH 4/5] Revert "[REV] Generate rev16 for all (srl (bswap x), (i64
16)) instructions"
This reverts commit 7d0d37404c613be62e84536d8efd675756160867.
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 99f55a8247ee9f..2fff6fffcd7c6d 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2836,8 +2836,8 @@ def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>;
def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>;
def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
-// Match (srl (bswap x), C) -> revC.
-def : Pat<(srl (bswap GPR32:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>;
+// Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero.
+def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>;
def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>;
def : Pat<(or (and (srl GPR64:$Rn, (i64 8)), (i64 0x00ff00ff00ff00ff)),
>From d6b04482e687ad9765c2b58130e126201143f7b0 Mon Sep 17 00:00:00 2001
From: adprasad <adprasad at nvidia.com>
Date: Thu, 29 Aug 2024 04:39:11 +0530
Subject: [PATCH 5/5] [AArch64] Lower __builtin_bswap16 to rev16 if return
value is 16-bit
Fixes #77222.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 16 ++++++++++++
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 4 +++
llvm/test/CodeGen/AArch64/bswap.ll | 26 +++++++++++++++----
3 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 11aca69db0a148..01d6ae96d58d22 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -22137,6 +22137,22 @@ static SDValue performExtendCombine(SDNode *N,
N->getOperand(0)->getOpcode() == ISD::SETCC)
return performSignExtendSetCCCombine(N, DCI, DAG);
+ // If we see (any_extend (bswap ...)) with bswap returning an i16, we know
+ // that the top half of the result register must be unused, due to the
+ // any_extend. This means that we can replace this pattern with (rev16
+ // (any_extend ...)). This saves a machine instruction compared to (lsr (rev
+ // ...)), which is what this pattern would otherwise be lowered to.
+ if (N->getOpcode() == ISD::ANY_EXTEND &&
+ N->getOperand(0).getOpcode() == ISD::BSWAP &&
+ N->getOperand(0).getValueType().isScalarInteger() &&
+ N->getOperand(0).getValueType().getFixedSizeInBits() == 16) {
+ SDNode *BswapNode = N->getOperand(0).getNode();
+ SDValue NewAnyExtend = DAG.getNode(ISD::ANY_EXTEND, SDLoc(BswapNode),
+ EVT(MVT::i32), BswapNode->getOperand(0));
+ return DAG.getNode(AArch64ISD::REV16, SDLoc(N), N->getValueType(0),
+ NewAnyExtend);
+ }
+
return SDValue();
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 2fff6fffcd7c6d..f5fed09be7b207 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -758,6 +758,8 @@ def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>;
def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>;
def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>;
+def AArch64rev16_scalar : SDNode<"AArch64ISD::REV16", SDTIntUnaryOp>;
+
def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>;
def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>;
def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>;
@@ -2840,6 +2842,8 @@ def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>;
def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>;
+def : Pat<(AArch64rev16_scalar GPR32:$Rn), (REV16Wr GPR32:$Rn)>;
+
def : Pat<(or (and (srl GPR64:$Rn, (i64 8)), (i64 0x00ff00ff00ff00ff)),
(and (shl GPR64:$Rn, (i64 8)), (i64 0xff00ff00ff00ff00))),
(REV16Xr GPR64:$Rn)>;
diff --git a/llvm/test/CodeGen/AArch64/bswap.ll b/llvm/test/CodeGen/AArch64/bswap.ll
index 071613b9cc011e..0b9af406859136 100644
--- a/llvm/test/CodeGen/AArch64/bswap.ll
+++ b/llvm/test/CodeGen/AArch64/bswap.ll
@@ -3,16 +3,32 @@
; RUN: llc -mtriple=aarch64 -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; ====== Scalar Tests =====
-define i16 @bswap_i16(i16 %a){
-; CHECK-LABEL: bswap_i16:
+define i16 @bswap_i16_to_i16(i16 %a){
+; CHECK-SD-LABEL: bswap_i16_to_i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: rev16 w0, w0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bswap_i16_to_i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: rev w8, w0
+; CHECK-GI-NEXT: lsr w0, w8, #16
+; CHECK-GI-NEXT: ret
+ %3 = call i16 @llvm.bswap.i16(i16 %a)
+ ret i16 %3
+}
+declare i16 @llvm.bswap.i16(i16)
+
+define i32 @bswap_i16_to_i32(i16 %a){
+; CHECK-LABEL: bswap_i16_to_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: rev w8, w0
; CHECK-NEXT: lsr w0, w8, #16
; CHECK-NEXT: ret
- %3 = call i16 @llvm.bswap.i16(i16 %a)
- ret i16 %3
+ %3 = call i16 @llvm.bswap.i16(i16 %a)
+ %4 = zext i16 %3 to i32
+ ret i32 %4
}
-declare i16 @llvm.bswap.i16(i16)
define i32 @bswap_i32(i32 %a){
; CHECK-LABEL: bswap_i32:
More information about the llvm-commits
mailing list