[llvm] [AArch64] Remove vector REV16, use BSWAP instead (PR #186414)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 13 08:25:06 PDT 2026
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/186414
This removes the generation of vector REV16 nodes, generating a bswap instead. This allows us to remove most uses of AArch64ISD::REV16 and all uses of G_REV16.
>From ef65327f2a701d0137f2db509764cd972cd1978d Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 13 Mar 2026 15:09:17 +0000
Subject: [PATCH] [AArch64] Remove vector REV16, use BSWAP instead
This removes the generation of vector REV16 nodes, generating a bswap instead.
This allows us to remove most uses of AArch64ISD::REV16 and all uses of
G_REV16.
---
llvm/lib/Target/AArch64/AArch64Combine.td | 10 ++++----
.../Target/AArch64/AArch64ISelLowering.cpp | 24 ++++++++++++++-----
llvm/lib/Target/AArch64/AArch64InstrGISel.td | 9 -------
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 +-
.../GISel/AArch64PostLegalizerLowering.cpp | 19 ++++++++++++---
.../CodeGen/AArch64/GlobalISel/select-rev.mir | 16 ++++++++-----
6 files changed, 50 insertions(+), 30 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 6ddeb37f642f0..fe953a627939d 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -95,35 +95,35 @@ def rev : GICombineRule<
(defs root:$root, shuffle_matchdata:$matchinfo),
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
[{ return matchREV(*${root}, MRI, ${matchinfo}); }]),
- (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
+ (apply [{ applyShuffleVectorPseudo(*${root}, MRI, ${matchinfo}); }])
>;
def zip : GICombineRule<
(defs root:$root, shuffle_matchdata:$matchinfo),
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
[{ return matchZip(*${root}, MRI, ${matchinfo}); }]),
- (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
+ (apply [{ applyShuffleVectorPseudo(*${root}, MRI, ${matchinfo}); }])
>;
def uzp : GICombineRule<
(defs root:$root, shuffle_matchdata:$matchinfo),
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
[{ return matchUZP(*${root}, MRI, ${matchinfo}); }]),
- (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
+ (apply [{ applyShuffleVectorPseudo(*${root}, MRI, ${matchinfo}); }])
>;
def dup: GICombineRule <
(defs root:$root, shuffle_matchdata:$matchinfo),
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
[{ return matchDup(*${root}, MRI, ${matchinfo}); }]),
- (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
+ (apply [{ applyShuffleVectorPseudo(*${root}, MRI, ${matchinfo}); }])
>;
def trn : GICombineRule<
(defs root:$root, shuffle_matchdata:$matchinfo),
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
[{ return matchTRN(*${root}, MRI, ${matchinfo}); }]),
- (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
+ (apply [{ applyShuffleVectorPseudo(*${root}, MRI, ${matchinfo}); }])
>;
def ext: GICombineRule <
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8deb0a33f27d1..bc5550b66f820 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -14848,7 +14848,7 @@ static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2,
switch (OpNum) {
default:
llvm_unreachable("Unknown shuffle opcode!");
- case OP_VREV:
+ case OP_VREV: {
// VREV divides the vector in half and swaps within the half.
if (VT.getVectorElementType() == MVT::i32 ||
VT.getVectorElementType() == MVT::f32)
@@ -14858,9 +14858,14 @@ static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2,
VT.getVectorElementType() == MVT::f16 ||
VT.getVectorElementType() == MVT::bf16)
return DAG.getNode(AArch64ISD::REV32, DL, VT, OpLHS);
- // vrev <4 x i8> -> REV16
- assert(VT.getVectorElementType() == MVT::i8);
- return DAG.getNode(AArch64ISD::REV16, DL, VT, OpLHS);
+ // vrev <4 x i8> -> BSWAP which is REV16
+ assert(VT == MVT::v8i8 || VT == MVT::v16i8);
+ EVT BSVT = VT == MVT::v8i8 ? MVT::v4i16 : MVT::v8i16;
+ return DAG.getNode(
+ AArch64ISD::NVCAST, DL, VT,
+ DAG.getNode(ISD::BSWAP, DL, BSVT,
+ DAG.getNode(AArch64ISD::NVCAST, DL, BSVT, OpLHS)));
+ }
case OP_VDUP0:
case OP_VDUP1:
case OP_VDUP2:
@@ -15272,8 +15277,15 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
return DAG.getNode(AArch64ISD::REV64, DL, V1.getValueType(), V1);
if (isREVMask(ShuffleMask, EltSize, NumElts, 32))
return DAG.getNode(AArch64ISD::REV32, DL, V1.getValueType(), V1);
- if (isREVMask(ShuffleMask, EltSize, NumElts, 16))
- return DAG.getNode(AArch64ISD::REV16, DL, V1.getValueType(), V1);
+ if (isREVMask(ShuffleMask, EltSize, NumElts, 16)) {
+ EVT VT = V1.getValueType();
+ assert(VT == MVT::v8i8 || VT == MVT::v16i8);
+ EVT BSVT = VT == MVT::v8i8 ? MVT::v4i16 : MVT::v8i16;
+ return DAG.getNode(
+ AArch64ISD::NVCAST, DL, VT,
+ DAG.getNode(ISD::BSWAP, DL, BSVT,
+ DAG.getNode(AArch64ISD::NVCAST, DL, BSVT, V1)));
+ }
if (((NumElts == 8 && EltSize == 16) || (NumElts == 16 && EltSize == 8)) &&
ShuffleVectorInst::isReverseMask(ShuffleMask, ShuffleMask.size())) {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index d0c08036e7d41..db0d7d8c76f14 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -25,14 +25,6 @@ def G_ADD_LOW : AArch64GenericInstruction {
let hasSideEffects = 0;
}
-// Pseudo for a rev16 instruction. Produced post-legalization from
-// G_SHUFFLE_VECTORs with appropriate masks.
-def G_REV16 : AArch64GenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src);
- let hasSideEffects = 0;
-}
-
// Pseudo for a rev32 instruction. Produced post-legalization from
// G_SHUFFLE_VECTORs with appropriate masks.
def G_REV32 : AArch64GenericInstruction {
@@ -296,7 +288,6 @@ def G_BSP : AArch64GenericInstruction {
let hasSideEffects = 0;
}
-def : GINodeEquiv<G_REV16, AArch64rev16>;
def : GINodeEquiv<G_REV32, AArch64rev32>;
def : GINodeEquiv<G_REV64, AArch64rev64>;
def : GINodeEquiv<G_UZP1, AArch64uzp1>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 84566946260c3..1751bf182c797 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6011,7 +6011,7 @@ def : Pat<(vnot (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>;
-defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>;
+defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", null_frag>;
defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>;
defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>;
defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp",
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index e73ff042c00f0..70e917187fac0 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -199,7 +199,7 @@ bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
else if (LaneSize == 32U)
Opcode = AArch64::G_REV32;
else
- Opcode = AArch64::G_REV16;
+ Opcode = AArch64::G_BSWAP;
MatchInfo = ShuffleVectorPseudo(Opcode, Dst, {Src});
return true;
@@ -407,10 +407,23 @@ bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
-void applyShuffleVectorPseudo(MachineInstr &MI,
+void applyShuffleVectorPseudo(MachineInstr &MI, MachineRegisterInfo &MRI,
ShuffleVectorPseudo &MatchInfo) {
MachineIRBuilder MIRBuilder(MI);
- MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
+ if (MatchInfo.Opc == TargetOpcode::G_BSWAP) {
+ assert(MatchInfo.SrcOps.size() == 1);
+ LLT DstTy = MRI.getType(MatchInfo.Dst);
+ assert(DstTy == LLT::fixed_vector(8, 8) ||
+ DstTy == LLT::fixed_vector(16, 8));
+ LLT BSTy = DstTy == LLT::fixed_vector(8, 8) ? LLT::fixed_vector(4, 16)
+ : LLT::fixed_vector(8, 16);
+ // FIXME: NVCAST
+ auto BS1 = MIRBuilder.buildInstr(TargetOpcode::G_BITCAST, {BSTy},
+ MatchInfo.SrcOps[0]);
+ auto BS2 = MIRBuilder.buildInstr(MatchInfo.Opc, {BSTy}, {BS1});
+ MIRBuilder.buildInstr(TargetOpcode::G_BITCAST, {MatchInfo.Dst}, {BS2});
+ } else
+ MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
MI.eraseFromParent();
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-rev.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-rev.mir
index 2d3051ec09e1b..d011143667e48 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-rev.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-rev.mir
@@ -244,11 +244,13 @@ body: |
; CHECK: liveins: $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:fpr64 = COPY $d0
- ; CHECK-NEXT: %rev:fpr64 = REV16v8i8 %copy
- ; CHECK-NEXT: $d0 = COPY %rev
+ ; CHECK-NEXT: %rev2:fpr64 = REV16v8i8 %copy
+ ; CHECK-NEXT: $d0 = COPY %rev2
; CHECK-NEXT: RET_ReallyLR implicit $d0
%copy:fpr(<8 x s8>) = COPY $d0
- %rev:fpr(<8 x s8>) = G_REV16 %copy
+ %rev1:fpr(<4 x s16>) = G_BITCAST %copy
+ %rev2:fpr(<4 x s16>) = G_BSWAP %rev1
+ %rev:fpr(<8 x s8>) = G_BITCAST %rev2
$d0 = COPY %rev(<8 x s8>)
RET_ReallyLR implicit $d0
@@ -266,10 +268,12 @@ body: |
; CHECK: liveins: $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:fpr128 = COPY $q0
- ; CHECK-NEXT: %rev:fpr128 = REV16v16i8 %copy
- ; CHECK-NEXT: $q0 = COPY %rev
+ ; CHECK-NEXT: %rev2:fpr128 = REV16v16i8 %copy
+ ; CHECK-NEXT: $q0 = COPY %rev2
; CHECK-NEXT: RET_ReallyLR implicit $q0
%copy:fpr(<16 x s8>) = COPY $q0
- %rev:fpr(<16 x s8>) = G_REV16 %copy
+ %rev1:fpr(<8 x s16>) = G_BITCAST %copy
+ %rev2:fpr(<8 x s16>) = G_BSWAP %rev1
+ %rev:fpr(<16 x s8>) = G_BITCAST %rev2
$q0 = COPY %rev(<16 x s8>)
RET_ReallyLR implicit $q0
More information about the llvm-commits
mailing list