[llvm] d65be16 - [AArch64][GlobalISel] Add combine for build_vector(unmerge, unmerge, undef, undef) (#165539)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 17 07:55:44 PST 2025
Author: Ryan Cowan
Date: 2025-11-17T15:55:40Z
New Revision: d65be16ab6adf00af21e75d29049ae5de0f3a38a
URL: https://github.com/llvm/llvm-project/commit/d65be16ab6adf00af21e75d29049ae5de0f3a38a
DIFF: https://github.com/llvm/llvm-project/commit/d65be16ab6adf00af21e75d29049ae5de0f3a38a.diff
LOG: [AArch64][GlobalISel] Add combine for build_vector(unmerge, unmerge, undef, undef) (#165539)
This PR adds a new combine to the `post-legalizer-combiner` pass. The
new combine checks for vectors being unmerged and subsequently padded
with `G_IMPLICIT_DEF` values by building a new vector. If such a case is
found, the vector being unmerged is instead just concatenated with a
`G_IMPLICIT_DEF` that is as wide as the vector being unmerged.
This removes unnecessary `mov` instructions in a few places.
Added:
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/test/CodeGen/AArch64/fptrunc.ll
llvm/test/CodeGen/AArch64/itofp.ll
llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 96cb7cdf2d531..9de1a643f1000 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -647,6 +647,12 @@ class CombinerHelper {
bool matchRotateOutOfRange(MachineInstr &MI) const;
void applyRotateOutOfRange(MachineInstr &MI) const;
+ bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
+ Register &UnmergeSrc) const;
+ void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ Register &UnmergeSrc) const;
+
bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const;
void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 119695e53c3cb..0ab2d9487a295 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -921,6 +921,15 @@ def merge_of_x_and_zero : GICombineRule <
[{ return Helper.matchMergeXAndZero(*${MI}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${MI}, ${matchinfo}); }])>;
+// Transform build_vector(unmerge(src, 0), ... unmerge(src, n), undef, ..., undef)
+// => concat_vectors(src, undef)
+def combine_build_unmerge : GICombineRule<
+ (defs root:$root, register_matchinfo:$unmergeSrc),
+ (match (G_BUILD_VECTOR $dst, GIVariadic<>:$unused):$root,
+ [{ return Helper.matchCombineBuildUnmerge(*${root}, MRI, ${unmergeSrc}); }]),
+ (apply [{ Helper.applyCombineBuildUnmerge(*${root}, MRI, B, ${unmergeSrc}); }])
+>;
+
def merge_combines: GICombineGroup<[
unmerge_anyext_build_vector,
unmerge_merge,
@@ -930,7 +939,8 @@ def merge_combines: GICombineGroup<[
unmerge_dead_to_trunc,
unmerge_zext_to_zext,
merge_of_x_and_undef,
- merge_of_x_and_zero
+ merge_of_x_and_zero,
+ combine_build_unmerge
]>;
// Under certain conditions, transform:
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index ec4d13f1cd1b3..45a08347b1ec2 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -3463,6 +3463,88 @@ static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
isConstTrueVal(TLI, Cst, IsVector, IsFP);
}
+// This pattern aims to match the following shape to avoid extra mov
+// instructions
+// G_BUILD_VECTOR(
+// G_UNMERGE_VALUES(src, 0)
+// G_UNMERGE_VALUES(src, 1)
+// G_IMPLICIT_DEF
+// G_IMPLICIT_DEF
+// )
+// ->
+// G_CONCAT_VECTORS(
+// src,
+// undef
+// )
+bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ Register &UnmergeSrc) const {
+ auto &BV = cast<GBuildVector>(MI);
+
+ unsigned BuildUseCount = BV.getNumSources();
+ if (BuildUseCount % 2 != 0)
+ return false;
+
+ unsigned NumUnmerge = BuildUseCount / 2;
+
+ auto *Unmerge = getOpcodeDef<GUnmerge>(BV.getSourceReg(0), MRI);
+
+ // Check the first operand is an unmerge and has the correct number of
+ // operands
+ if (!Unmerge || Unmerge->getNumDefs() != NumUnmerge)
+ return false;
+
+ UnmergeSrc = Unmerge->getSourceReg();
+
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT UnmergeSrcTy = MRI.getType(UnmergeSrc);
+
+ // Ensure we only generate legal instructions post-legalizer
+ if (!IsPreLegalize &&
+ !isLegal({TargetOpcode::G_CONCAT_VECTORS, {DstTy, UnmergeSrcTy}}))
+ return false;
+
+ // Check that all of the operands before the midpoint come from the same
+ // unmerge and are in the same order as they are used in the build_vector
+ for (unsigned I = 0; I < NumUnmerge; ++I) {
+ auto MaybeUnmergeReg = BV.getSourceReg(I);
+ auto *LoopUnmerge = getOpcodeDef<GUnmerge>(MaybeUnmergeReg, MRI);
+
+ if (!LoopUnmerge || LoopUnmerge != Unmerge)
+ return false;
+
+ if (LoopUnmerge->getOperand(I).getReg() != MaybeUnmergeReg)
+ return false;
+ }
+
+ // Check that all of the unmerged values are used
+ if (Unmerge->getNumDefs() != NumUnmerge)
+ return false;
+
+ // Check that all of the operands after the mid point are undefs.
+ for (unsigned I = NumUnmerge; I < BuildUseCount; ++I) {
+ auto *Undef = getDefIgnoringCopies(BV.getSourceReg(I), MRI);
+
+ if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)
+ return false;
+ }
+
+ return true;
+}
+
+void CombinerHelper::applyCombineBuildUnmerge(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ Register &UnmergeSrc) const {
+ assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES");
+ B.setInstrAndDebugLoc(MI);
+
+ Register UndefVec = B.buildUndef(MRI.getType(UnmergeSrc)).getReg(0);
+ B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec});
+
+ MI.eraseFromParent();
+}
+
// This combine tries to reduce the number of scalarised G_TRUNC instructions by
// using vector truncates instead
//
@@ -8426,4 +8508,4 @@ bool CombinerHelper::matchSuboCarryOut(const MachineInstr &MI,
}
return false;
-}
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AArch64/fptrunc.ll b/llvm/test/CodeGen/AArch64/fptrunc.ll
index da19991d56259..ae86129286ddc 100644
--- a/llvm/test/CodeGen/AArch64/fptrunc.ll
+++ b/llvm/test/CodeGen/AArch64/fptrunc.ll
@@ -345,19 +345,11 @@ entry:
}
define <2 x half> @fptrunc_v2f32_v2f16(<2 x float> %a) {
-; CHECK-SD-LABEL: fptrunc_v2f32_v2f16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: fcvtn v0.4h, v0.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fptrunc_v2f32_v2f16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-GI-NEXT: fcvtn v0.4h, v1.4s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fptrunc_v2f32_v2f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-NEXT: ret
entry:
%c = fptrunc <2 x float> %a to <2 x half>
ret <2 x half> %c
diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll
index fce4f8e69f14d..e526a9f7bc0f6 100644
--- a/llvm/test/CodeGen/AArch64/itofp.ll
+++ b/llvm/test/CodeGen/AArch64/itofp.ll
@@ -5763,18 +5763,14 @@ define <2 x half> @stofp_v2i64_v2f16(<2 x i64> %a) {
; CHECK-NOFP16-GI: // %bb.0: // %entry
; CHECK-NOFP16-GI-NEXT: scvtf v0.2d, v0.2d
; CHECK-NOFP16-GI-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: stofp_v2i64_v2f16:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: scvtf v0.2d, v0.2d
; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%c = sitofp <2 x i64> %a to <2 x half>
@@ -5808,18 +5804,14 @@ define <2 x half> @utofp_v2i64_v2f16(<2 x i64> %a) {
; CHECK-NOFP16-GI: // %bb.0: // %entry
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2d, v0.2d
; CHECK-NOFP16-GI-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: utofp_v2i64_v2f16:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: ucvtf v0.2d, v0.2d
; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%c = uitofp <2 x i64> %a to <2 x half>
@@ -6232,17 +6224,13 @@ define <2 x half> @stofp_v2i32_v2f16(<2 x i32> %a) {
; CHECK-NOFP16-GI-LABEL: stofp_v2i32_v2f16:
; CHECK-NOFP16-GI: // %bb.0: // %entry
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
-; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: stofp_v2i32_v2f16:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: scvtf v0.2s, v0.2s
-; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%c = sitofp <2 x i32> %a to <2 x half>
@@ -6267,17 +6255,13 @@ define <2 x half> @utofp_v2i32_v2f16(<2 x i32> %a) {
; CHECK-NOFP16-GI-LABEL: utofp_v2i32_v2f16:
; CHECK-NOFP16-GI: // %bb.0: // %entry
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
-; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: utofp_v2i32_v2f16:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: ucvtf v0.2s, v0.2s
-; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%c = uitofp <2 x i32> %a to <2 x half>
@@ -6480,9 +6464,7 @@ define <2 x half> @stofp_v2i16_v2f16(<2 x i16> %a) {
; CHECK-NOFP16-GI-NEXT: shl v0.2s, v0.2s, #16
; CHECK-NOFP16-GI-NEXT: sshr v0.2s, v0.2s, #16
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
-; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
entry:
%c = sitofp <2 x i16> %a to <2 x half>
@@ -6509,9 +6491,7 @@ define <2 x half> @utofp_v2i16_v2f16(<2 x i16> %a) {
; CHECK-NOFP16-GI-NEXT: movi d1, #0x00ffff0000ffff
; CHECK-NOFP16-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
-; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
entry:
%c = uitofp <2 x i16> %a to <2 x half>
@@ -6766,9 +6746,7 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) {
; CHECK-NOFP16-GI-NEXT: shl v0.2s, v0.2s, #24
; CHECK-NOFP16-GI-NEXT: sshr v0.2s, v0.2s, #24
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
-; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: stofp_v2i8_v2f16:
@@ -6817,9 +6795,7 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) {
; CHECK-NOFP16-GI-NEXT: movi d1, #0x0000ff000000ff
; CHECK-NOFP16-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
-; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
-; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: utofp_v2i8_v2f16:
diff --git a/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll b/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
index c1b8bc6031b18..f7dbcd137e742 100644
--- a/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
@@ -762,25 +762,13 @@ define void @undef_hi3_v4f16(half %arg0) {
}
define void @undef_hi2_v4i16(<2 x i16> %arg0) {
-; GFX8-SDAG-LABEL: undef_hi2_v4i16:
-; GFX8-SDAG: ; %bb.0:
-; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT: ;;#ASMSTART
-; GFX8-SDAG-NEXT: ; use v[0:1]
-; GFX8-SDAG-NEXT: ;;#ASMEND
-; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-GISEL-LABEL: undef_hi2_v4i16:
-; GFX8-GISEL: ; %bb.0:
-; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX8-GISEL-NEXT: ;;#ASMSTART
-; GFX8-GISEL-NEXT: ; use v[0:1]
-; GFX8-GISEL-NEXT: ;;#ASMEND
-; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX8-LABEL: undef_hi2_v4i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: ;;#ASMSTART
+; GFX8-NEXT: ; use v[0:1]
+; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: undef_hi2_v4i16:
; GFX9: ; %bb.0:
@@ -803,25 +791,13 @@ define void @undef_hi2_v4i16(<2 x i16> %arg0) {
}
define void @undef_hi2_v4f16(<2 x half> %arg0) {
-; GFX8-SDAG-LABEL: undef_hi2_v4f16:
-; GFX8-SDAG: ; %bb.0:
-; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT: ;;#ASMSTART
-; GFX8-SDAG-NEXT: ; use v[0:1]
-; GFX8-SDAG-NEXT: ;;#ASMEND
-; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-GISEL-LABEL: undef_hi2_v4f16:
-; GFX8-GISEL: ; %bb.0:
-; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX8-GISEL-NEXT: ;;#ASMSTART
-; GFX8-GISEL-NEXT: ; use v[0:1]
-; GFX8-GISEL-NEXT: ;;#ASMEND
-; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX8-LABEL: undef_hi2_v4f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: ;;#ASMSTART
+; GFX8-NEXT: ; use v[0:1]
+; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: undef_hi2_v4f16:
; GFX9: ; %bb.0:
@@ -842,5 +818,3 @@ define void @undef_hi2_v4f16(<2 x half> %arg0) {
call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi);
ret void
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX8: {{.*}}
More information about the llvm-commits
mailing list