[llvm] [AArch64][GlobalISel] Legalize BSWAP for Vector Types (PR #80036)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 30 09:11:34 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
@llvm/pr-subscribers-backend-aarch64
Author: None (chuongg3)
<details>
<summary>Changes</summary>
Add support of i16 vector operation for BSWAP and change to TableGen to select instructions
Handle vector types that are smaller/larger than legal for BSWAP
---
Patch is 20.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/80036.diff
7 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (-1)
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+7)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp (-37)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+8-4)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir (+68-26)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/select-bswap.mir (+72-18)
- (added) llvm/test/CodeGen/AArch64/bswap.ll (+206)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b8ed02e268b18..253901b217dfa 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -9031,7 +9031,6 @@ SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
SDLoc dl(N);
EVT VT = N->getValueType(0);
SDValue Op = N->getOperand(0);
-
if (!VT.isSimple())
return SDValue();
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 03baa7497615e..f6eaf034c59a9 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5163,6 +5163,13 @@ def : Pat<(v8i16 (concat_vectors
(v4i32 VImm8000)))))),
(SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
+// Select BSWAP instructions into REV instructions
+def : Pat<(v4i16 (bswap (v4i16 V64:$Rn))), (v4i16 (REV16v8i8 (v4i16 V64:$Rn)))>;
+def : Pat<(v8i16 (bswap (v8i16 V128:$Rn))), (v8i16 (REV16v16i8 (v8i16 V128:$Rn)))>;
+def : Pat<(v2i32 (bswap (v2i32 V64:$Rn))), (v2i32 (REV32v8i8 (v2i32 V64:$Rn)))>;
+def : Pat<(v4i32 (bswap (v4i32 V128:$Rn))), (v4i32 (REV32v16i8 (v4i32 V128:$Rn)))>;
+def : Pat<(v2i64 (bswap (v2i64 V128:$Rn))), (v2i64 (REV64v16i8 (v2i64 V128:$Rn)))>;
+
//===----------------------------------------------------------------------===//
// Advanced SIMD three vector instructions.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 48cfb60210d96..2515991fbea11 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2567,43 +2567,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
}
- case TargetOpcode::G_BSWAP: {
- // Handle vector types for G_BSWAP directly.
- Register DstReg = I.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
-
- // We should only get vector types here; everything else is handled by the
- // importer right now.
- if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
- LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
- return false;
- }
-
- // Only handle 4 and 2 element vectors for now.
- // TODO: 16-bit elements.
- unsigned NumElts = DstTy.getNumElements();
- if (NumElts != 4 && NumElts != 2) {
- LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
- return false;
- }
-
- // Choose the correct opcode for the supported types. Right now, that's
- // v2s32, v4s32, and v2s64.
- unsigned Opc = 0;
- unsigned EltSize = DstTy.getElementType().getSizeInBits();
- if (EltSize == 32)
- Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
- : AArch64::REV32v16i8;
- else if (EltSize == 64)
- Opc = AArch64::REV64v16i8;
-
- // We should always get something by the time we get here...
- assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
-
- I.setDesc(TII.get(Opc));
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
-
case TargetOpcode::G_FCONSTANT:
case TargetOpcode::G_CONSTANT: {
const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 7220efd807c28..b0732d4fd3de0 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -118,9 +118,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampMaxNumElements(0, p0, 2);
getActionDefinitionsBuilder(G_BSWAP)
- .legalFor({s32, s64, v4s32, v2s32, v2s64})
- .widenScalarToNextPow2(0)
- .clampScalar(0, s32, s64);
+ .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
+ .widenScalarToNextPow2(0, 32)
+ .clampScalar(0, s32, s64)
+ .clampNumElements(0, v4s16, v8s16)
+ .clampNumElements(0, v2s32, v4s32)
+ .clampNumElements(0, v2s64, v2s64)
+ .moreElementsToNextPow2(0);
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
.legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
@@ -1226,7 +1230,7 @@ bool AArch64LegalizerInfo::legalizeCustom(
case TargetOpcode::G_PREFETCH:
return legalizePrefetch(MI, Helper);
}
-
+
llvm_unreachable("expected switch to return");
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir
index fba0881d4e86f..3040ab920f1af 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir
@@ -11,12 +11,13 @@ body: |
; CHECK-LABEL: name: bswap_s16
; CHECK: liveins: $w0
- ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
- ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY]]
- ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
- ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s64)
- ; CHECK: $w0 = COPY [[LSHR]](s32)
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s64)
+ ; CHECK-NEXT: $w0 = COPY [[LSHR]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%1:_(s32) = COPY $w0
%0:_(s16) = G_TRUNC %1(s32)
%2:_(s16) = G_BSWAP %0
@@ -32,10 +33,11 @@ body: |
liveins: $w0
; CHECK-LABEL: name: bswap_s32_legal
; CHECK: liveins: $w0
- ; CHECK: %copy:_(s32) = COPY $w0
- ; CHECK: %bswap:_(s32) = G_BSWAP %copy
- ; CHECK: $w0 = COPY %bswap(s32)
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s32) = COPY $w0
+ ; CHECK-NEXT: %bswap:_(s32) = G_BSWAP %copy
+ ; CHECK-NEXT: $w0 = COPY %bswap(s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%copy:_(s32) = COPY $w0
%bswap:_(s32) = G_BSWAP %copy
$w0 = COPY %bswap(s32)
@@ -49,16 +51,53 @@ body: |
liveins: $x0
; CHECK-LABEL: name: bswap_s64_legal
; CHECK: liveins: $x0
- ; CHECK: %copy:_(s64) = COPY $x0
- ; CHECK: %bswap:_(s64) = G_BSWAP %copy
- ; CHECK: $x0 = COPY %bswap(s64)
- ; CHECK: RET_ReallyLR implicit $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s64) = COPY $x0
+ ; CHECK-NEXT: %bswap:_(s64) = G_BSWAP %copy
+ ; CHECK-NEXT: $x0 = COPY %bswap(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
%copy:_(s64) = COPY $x0
%bswap:_(s64) = G_BSWAP %copy
$x0 = COPY %bswap(s64)
RET_ReallyLR implicit $x0
...
---
+name: bswap_v4s16_legal
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0
+ ; CHECK-LABEL: name: bswap_v4s16_legal
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(<4 x s16>) = COPY $d0
+ ; CHECK-NEXT: %bswap:_(<4 x s16>) = G_BSWAP %copy
+ ; CHECK-NEXT: $d0 = COPY %bswap(<4 x s16>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %copy:_(<4 x s16>) = COPY $d0
+ %bswap:_(<4 x s16>) = G_BSWAP %copy
+ $d0 = COPY %bswap(<4 x s16>)
+ RET_ReallyLR implicit $d0
+...
+---
+name: bswap_v8s16_legal
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0
+ ; CHECK-LABEL: name: bswap_v8s16_legal
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(<8 x s16>) = COPY $q0
+ ; CHECK-NEXT: %bswap:_(<8 x s16>) = G_BSWAP %copy
+ ; CHECK-NEXT: $q0 = COPY %bswap(<8 x s16>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %copy:_(<8 x s16>) = COPY $q0
+ %bswap:_(<8 x s16>) = G_BSWAP %copy
+ $q0 = COPY %bswap(<8 x s16>)
+ RET_ReallyLR implicit $q0
+...
+---
name: bswap_v4s32_legal
tracksRegLiveness: true
body: |
@@ -66,10 +105,11 @@ body: |
liveins: $q0
; CHECK-LABEL: name: bswap_v4s32_legal
; CHECK: liveins: $q0
- ; CHECK: %copy:_(<4 x s32>) = COPY $q0
- ; CHECK: %bswap:_(<4 x s32>) = G_BSWAP %copy
- ; CHECK: $q0 = COPY %bswap(<4 x s32>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: %bswap:_(<4 x s32>) = G_BSWAP %copy
+ ; CHECK-NEXT: $q0 = COPY %bswap(<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%copy:_(<4 x s32>) = COPY $q0
%bswap:_(<4 x s32>) = G_BSWAP %copy
$q0 = COPY %bswap(<4 x s32>)
@@ -83,10 +123,11 @@ body: |
liveins: $d0
; CHECK-LABEL: name: bswap_v2s32_legal
; CHECK: liveins: $d0
- ; CHECK: %copy:_(<2 x s32>) = COPY $d0
- ; CHECK: %bswap:_(<2 x s32>) = G_BSWAP %copy
- ; CHECK: $d0 = COPY %bswap(<2 x s32>)
- ; CHECK: RET_ReallyLR implicit $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(<2 x s32>) = COPY $d0
+ ; CHECK-NEXT: %bswap:_(<2 x s32>) = G_BSWAP %copy
+ ; CHECK-NEXT: $d0 = COPY %bswap(<2 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
%copy:_(<2 x s32>) = COPY $d0
%bswap:_(<2 x s32>) = G_BSWAP %copy
$d0 = COPY %bswap(<2 x s32>)
@@ -100,10 +141,11 @@ body: |
liveins: $q0
; CHECK-LABEL: name: bswap_v2s64_legal
; CHECK: liveins: $q0
- ; CHECK: %copy:_(<2 x s64>) = COPY $q0
- ; CHECK: %bswap:_(<2 x s64>) = G_BSWAP %copy
- ; CHECK: $q0 = COPY %bswap(<2 x s64>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: %bswap:_(<2 x s64>) = G_BSWAP %copy
+ ; CHECK-NEXT: $q0 = COPY %bswap(<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%copy:_(<2 x s64>) = COPY $q0
%bswap:_(<2 x s64>) = G_BSWAP %copy
$q0 = COPY %bswap(<2 x s64>)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-bswap.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-bswap.mir
index 77c03073033c0..5e0121682e327 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-bswap.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-bswap.mir
@@ -16,9 +16,11 @@ body: |
liveins: $w0
; CHECK-LABEL: name: bswap_s32
- ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
- ; CHECK: [[REVWr:%[0-9]+]]:gpr32 = REVWr [[COPY]]
- ; CHECK: $w0 = COPY [[REVWr]]
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK-NEXT: [[REVWr:%[0-9]+]]:gpr32 = REVWr [[COPY]]
+ ; CHECK-NEXT: $w0 = COPY [[REVWr]]
%0(s32) = COPY $w0
%1(s32) = G_BSWAP %0
$w0 = COPY %1
@@ -38,13 +40,62 @@ body: |
liveins: $x0
; CHECK-LABEL: name: bswap_s64
- ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
- ; CHECK: [[REVXr:%[0-9]+]]:gpr64 = REVXr [[COPY]]
- ; CHECK: $x0 = COPY [[REVXr]]
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[REVXr:%[0-9]+]]:gpr64 = REVXr [[COPY]]
+ ; CHECK-NEXT: $x0 = COPY [[REVXr]]
%0(s64) = COPY $x0
%1(s64) = G_BSWAP %0
$x0 = COPY %1
+...
+---
+name: bswap_v4s16
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: bswap_v4s16
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK-NEXT: [[REV16v8i8_:%[0-9]+]]:fpr64 = REV16v8i8 [[COPY]]
+ ; CHECK-NEXT: $d0 = COPY [[REV16v8i8_]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:fpr(<4 x s16>) = COPY $d0
+ %1:fpr(<4 x s16>) = G_BSWAP %0
+ $d0 = COPY %1(<4 x s16>)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: bswap_v8s16
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $q0
+ ; CHECK-LABEL: name: bswap_v8s16
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK-NEXT: [[REV16v16i8_:%[0-9]+]]:fpr128 = REV16v16i8 [[COPY]]
+ ; CHECK-NEXT: $q0 = COPY [[REV16v16i8_]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:fpr(<8 x s16>) = COPY $q0
+ %1:fpr(<8 x s16>) = G_BSWAP %0
+ $q0 = COPY %1(<8 x s16>)
+ RET_ReallyLR implicit $q0
+
...
---
name: bswap_v4s32
@@ -59,10 +110,11 @@ body: |
; CHECK-LABEL: name: bswap_v4s32
; CHECK: liveins: $q0
- ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
- ; CHECK: [[REV32v16i8_:%[0-9]+]]:fpr128 = REV32v16i8 [[COPY]]
- ; CHECK: $q0 = COPY [[REV32v16i8_]]
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK-NEXT: [[REV32v16i8_:%[0-9]+]]:fpr128 = REV32v16i8 [[COPY]]
+ ; CHECK-NEXT: $q0 = COPY [[REV32v16i8_]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:fpr(<4 x s32>) = COPY $q0
%1:fpr(<4 x s32>) = G_BSWAP %0
$q0 = COPY %1(<4 x s32>)
@@ -82,10 +134,11 @@ body: |
; CHECK-LABEL: name: bswap_v2s32
; CHECK: liveins: $d0
- ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
- ; CHECK: [[REV32v8i8_:%[0-9]+]]:fpr64 = REV32v8i8 [[COPY]]
- ; CHECK: $d0 = COPY [[REV32v8i8_]]
- ; CHECK: RET_ReallyLR implicit $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK-NEXT: [[REV32v8i8_:%[0-9]+]]:fpr64 = REV32v8i8 [[COPY]]
+ ; CHECK-NEXT: $d0 = COPY [[REV32v8i8_]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:fpr(<2 x s32>) = COPY $d0
%1:fpr(<2 x s32>) = G_BSWAP %0
$d0 = COPY %1(<2 x s32>)
@@ -105,10 +158,11 @@ body: |
; CHECK-LABEL: name: bswap_v2s64
; CHECK: liveins: $q0
- ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
- ; CHECK: [[REV64v16i8_:%[0-9]+]]:fpr128 = REV64v16i8 [[COPY]]
- ; CHECK: $q0 = COPY [[REV64v16i8_]]
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK-NEXT: [[REV64v16i8_:%[0-9]+]]:fpr128 = REV64v16i8 [[COPY]]
+ ; CHECK-NEXT: $q0 = COPY [[REV64v16i8_]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:fpr(<2 x s64>) = COPY $q0
%1:fpr(<2 x s64>) = G_BSWAP %0
$q0 = COPY %1(<2 x s64>)
diff --git a/llvm/test/CodeGen/AArch64/bswap.ll b/llvm/test/CodeGen/AArch64/bswap.ll
new file mode 100644
index 0000000000000..6df62a00a8f8f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/bswap.ll
@@ -0,0 +1,206 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK-GI: warning: Instruction selection used fallback path for bswap_v2i16
+
+; ====== Scalar Tests =====
+define i16 @bswap_i16(i16 %a){
+; CHECK-LABEL: bswap_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev w8, w0
+; CHECK-NEXT: lsr w0, w8, #16
+; CHECK-NEXT: ret
+ %3 = call i16 @llvm.bswap.i16(i16 %a)
+ ret i16 %3
+}
+declare i16 @llvm.bswap.i16(i16)
+
+define i32 @bswap_i32(i32 %a){
+; CHECK-LABEL: bswap_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev w0, w0
+; CHECK-NEXT: ret
+ %3 = call i32 @llvm.bswap.i32(i32 %a)
+ ret i32 %3
+}
+declare i32 @llvm.bswap.i32(i32)
+
+define i64 @bswap_i64(i64 %a){
+; CHECK-LABEL: bswap_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev x0, x0
+; CHECK-NEXT: ret
+ %3 = call i64 @llvm.bswap.i64(i64 %a)
+ ret i64 %3
+}
+declare i64 @llvm.bswap.i64(i64)
+
+define i128 @bswap_i128(i128 %a){
+; CHECK-LABEL: bswap_i128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev x8, x1
+; CHECK-NEXT: rev x1, x0
+; CHECK-NEXT: mov x0, x8
+; CHECK-NEXT: ret
+ %3 = call i128 @llvm.bswap.i128(i128 %a)
+ ret i128 %3
+}
+declare i128 @llvm.bswap.i128(i128)
+
+; ===== Legal Vector Type Tests =====
+
+define <4 x i16> @bswap_v4i16(<4 x i16> %a){
+; CHECK-LABEL: bswap_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev16 v0.8b, v0.8b
+; CHECK-NEXT: ret
+ %3 = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %a)
+ ret <4 x i16> %3
+}
+declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>)
+
+define <8 x i16> @bswap_v8i16(<8 x i16> %a){
+; CHECK-LABEL: bswap_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev16 v0.16b, v0.16b
+; CHECK-NEXT: ret
+ %3 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a)
+ ret <8 x i16> %3
+}
+declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
+
+define <2 x i32> @bswap_v2i32(<2 x i32> %a){
+; CHECK-LABEL: bswap_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev32 v0.8b, v0.8b
+; CHECK-NEXT: ret
+ %3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
+ ret <2 x i32> %3
+}
+declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>)
+
+define <4 x i32> @bswap_v4i32(<4 x i32> %a){
+; CHECK-LABEL: bswap_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev32 v0.16b, v0.16b
+; CHECK-NEXT: ret
+ %3 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a)
+ ret <4 x i32> %3
+}
+declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
+
+define <2 x i64> @bswap_v2i64(<2 x i64> %a){
+; CHECK-LABEL: bswap_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev64 v0.16b, v0.16b
+; CHECK-NEXT: ret
+ %3 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a)
+ ret <2 x i64> %3
+}
+declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
+
+; ===== Smaller/Larger Width Vectors with Legal Element Sizes =====
+
+define <2 x i16> @bswap_v2i16(<2 x i16> %a){
+; CHECK-LABEL: bswap_v2i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev32 v0.8b, v0.8b
+; CHECK-NEXT: ushr v0.2s, v0.2s, #16
+; CHECK-NEXT: ret
+entry:
+ %res = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %a)
+ ret <2 x i16> %res
+}
+declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>)
+
+define <16 x i16> @bswap_v16i16(<16 x i16> %a){
+; CHECK-LABEL: bswap_v16i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev16 v0.16b, v0.16b
+; CHECK-NEXT: rev16 v1.16b, v1.16b
+; CHECK-NEXT: ret
+entry:
+ %res = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a)
+ ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
+
+define <1 x i32> @bswap_v1i32(<1 x i32> %a){
+; CHECK-SD-LABEL: bswap_v1i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: rev32 v0.8b, v0.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bswap_v1i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: rev w8, w8
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: mov v0.s[1], w8
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
+entry:
+ %res = call <1 x i32> @llvm.bswap.v1i32(<1 x i32> %a)
+ ret <1 x i32> %res
+}
+declare <1 x i32> @llvm.bswap.v1i32(<1 x i32>)
+
+define <8 x i32> @bswap_v8i32(<8 x i32> %a){
+; CHECK-LABEL: bswap_v8i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev32 v0.16b, v0.16b
+; CHECK-NEXT: rev32 v1.16b, v1.16b
+; CHECK-NEXT: ret
+entry:
+ %res = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a)
+ ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>)
+
+define <4 x i64> @bswap_v4i64(<4 x i64> %a){
+; CHECK-LABEL: bswap_v4i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev64 v0.16b, v0.16b
+; CHECK-NEXT: rev64 v1.16b, v1.16b
+; CHECK-NEXT: ret
+entry:
+ %res = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a)
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)
+
+; ===== Vectors with Non-Pow 2 Widths =====
+
+define <3 x i16> @bswap_v3i16(<3 x i16> %a){
+; CHECK-LABEL: bswap_v3i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev16 v0.8b, v0.8b
+; CHECK-NEXT: ret
+entry:
+ %res = call <3 x i16> @llvm.bswap.v3i16(<3 x i16> %a)
+ ret...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/80036
More information about the llvm-commits
mailing list