[llvm] [GlobalISel][AMDGPU] Fix handling of v2i128 type for AND, OR, XOR (PR #138574)
via llvm-commits
llvm-commits at lists.llvm.org
Mon May 5 13:16:50 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
Author: Chinmay Deshpande (chinmaydd)
<details>
<summary>Changes</summary>
Current behavior crashes the compiler.
This bug was found using the AMDGPU Fuzzing project.
Fixes SWDEV-508816.
---
Full diff: https://github.com/llvm/llvm-project/pull/138574.diff
4 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+14-1)
- (added) llvm/test/CodeGen/AMDGPU/GlobalISel/and.v2i128.ll (+117)
- (added) llvm/test/CodeGen/AMDGPU/GlobalISel/or.v2i128.ll (+117)
- (added) llvm/test/CodeGen/AMDGPU/GlobalISel/xor.v2i128.ll (+117)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index ff8658ed82a72..e8063d54ac65a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -119,6 +119,18 @@ static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx) {
};
}
+static LegalizeMutation breakCurrentEltsToSize32Or64(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[TypeIdx];
+ const LLT EltTy = Ty.getElementType();
+ const int Size = Ty.getSizeInBits();
+ const int EltSize = EltTy.getSizeInBits();
+ const unsigned TargetEltSize = EltSize % 64 == 0 ? 64 : 32;
+ const unsigned NewNumElts = (Size + (TargetEltSize - 1)) / TargetEltSize;
+ return std::pair(TypeIdx, LLT::fixed_vector(NewNumElts, TargetEltSize));
+ };
+}
+
// Increase the number of vector elements to reach the next multiple of 32-bit
// type.
static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) {
@@ -875,7 +887,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16})
.clampScalar(0, S32, S64)
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
- .fewerElementsIf(vectorWiderThan(0, 64), fewerEltsToSize64Vector(0))
+ .fewerElementsIf(all(vectorWiderThan(0, 64), scalarOrEltNarrowerThan(0, 64)), fewerEltsToSize64Vector(0))
+ .bitcastIf(all(vectorWiderThan(0, 64), scalarOrEltWiderThan(0, 64)), breakCurrentEltsToSize32Or64(0))
.widenScalarToNextPow2(0)
.scalarize(0);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/and.v2i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/and.v2i128.ll
new file mode 100644
index 0000000000000..532a797094d14
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/and.v2i128.ll
@@ -0,0 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX10 %s
+
+define <2 x i128> @v_and_v2i128(<2 x i128> %a, <2 x i128> %b) {
+; GFX7-LABEL: v_and_v2i128:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_and_b32_e32 v0, v0, v8
+; GFX7-NEXT: v_and_b32_e32 v1, v1, v9
+; GFX7-NEXT: v_and_b32_e32 v2, v2, v10
+; GFX7-NEXT: v_and_b32_e32 v3, v3, v11
+; GFX7-NEXT: v_and_b32_e32 v4, v4, v12
+; GFX7-NEXT: v_and_b32_e32 v5, v5, v13
+; GFX7-NEXT: v_and_b32_e32 v6, v6, v14
+; GFX7-NEXT: v_and_b32_e32 v7, v7, v15
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_and_v2i128:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_and_b32_e32 v0, v0, v8
+; GFX9-NEXT: v_and_b32_e32 v1, v1, v9
+; GFX9-NEXT: v_and_b32_e32 v2, v2, v10
+; GFX9-NEXT: v_and_b32_e32 v3, v3, v11
+; GFX9-NEXT: v_and_b32_e32 v4, v4, v12
+; GFX9-NEXT: v_and_b32_e32 v5, v5, v13
+; GFX9-NEXT: v_and_b32_e32 v6, v6, v14
+; GFX9-NEXT: v_and_b32_e32 v7, v7, v15
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_and_v2i128:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_and_b32_e32 v0, v0, v8
+; GFX8-NEXT: v_and_b32_e32 v1, v1, v9
+; GFX8-NEXT: v_and_b32_e32 v2, v2, v10
+; GFX8-NEXT: v_and_b32_e32 v3, v3, v11
+; GFX8-NEXT: v_and_b32_e32 v4, v4, v12
+; GFX8-NEXT: v_and_b32_e32 v5, v5, v13
+; GFX8-NEXT: v_and_b32_e32 v6, v6, v14
+; GFX8-NEXT: v_and_b32_e32 v7, v7, v15
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_and_v2i128:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_and_b32_e32 v0, v0, v8
+; GFX10-NEXT: v_and_b32_e32 v1, v1, v9
+; GFX10-NEXT: v_and_b32_e32 v2, v2, v10
+; GFX10-NEXT: v_and_b32_e32 v3, v3, v11
+; GFX10-NEXT: v_and_b32_e32 v4, v4, v12
+; GFX10-NEXT: v_and_b32_e32 v5, v5, v13
+; GFX10-NEXT: v_and_b32_e32 v6, v6, v14
+; GFX10-NEXT: v_and_b32_e32 v7, v7, v15
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %and = and <2 x i128> %a, %b
+ ret <2 x i128> %and
+}
+
+define <2 x i128> @v_and_v2i128_inline_imm(<2 x i128> %a) {
+; GFX7-LABEL: v_and_v2i128_inline_imm:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_mov_b64 s[4:5], 64
+; GFX7-NEXT: s_mov_b64 s[6:7], 0
+; GFX7-NEXT: s_mov_b64 s[4:5], s[4:5]
+; GFX7-NEXT: s_mov_b64 s[6:7], s[6:7]
+; GFX7-NEXT: v_and_b32_e32 v0, s4, v0
+; GFX7-NEXT: v_and_b32_e32 v1, s5, v1
+; GFX7-NEXT: v_and_b32_e32 v2, s6, v2
+; GFX7-NEXT: v_and_b32_e32 v3, s7, v3
+; GFX7-NEXT: v_and_b32_e32 v4, s4, v4
+; GFX7-NEXT: v_and_b32_e32 v5, s5, v5
+; GFX7-NEXT: v_and_b32_e32 v6, s6, v6
+; GFX7-NEXT: v_and_b32_e32 v7, s7, v7
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_and_v2i128_inline_imm:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b64 s[4:5], 64
+; GFX9-NEXT: s_mov_b64 s[6:7], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[4:5]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[6:7]
+; GFX9-NEXT: v_and_b32_e32 v0, s4, v0
+; GFX9-NEXT: v_and_b32_e32 v1, s5, v1
+; GFX9-NEXT: v_and_b32_e32 v2, s6, v2
+; GFX9-NEXT: v_and_b32_e32 v3, s7, v3
+; GFX9-NEXT: v_and_b32_e32 v4, s4, v4
+; GFX9-NEXT: v_and_b32_e32 v5, s5, v5
+; GFX9-NEXT: v_and_b32_e32 v6, s6, v6
+; GFX9-NEXT: v_and_b32_e32 v7, s7, v7
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_and_v2i128_inline_imm:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_mov_b64 s[4:5], 64
+; GFX8-NEXT: s_mov_b64 s[6:7], 0
+; GFX8-NEXT: s_mov_b64 s[4:5], s[4:5]
+; GFX8-NEXT: s_mov_b64 s[6:7], s[6:7]
+; GFX8-NEXT: v_and_b32_e32 v0, s4, v0
+; GFX8-NEXT: v_and_b32_e32 v1, s5, v1
+; GFX8-NEXT: v_and_b32_e32 v2, s6, v2
+; GFX8-NEXT: v_and_b32_e32 v3, s7, v3
+; GFX8-NEXT: v_and_b32_e32 v4, s4, v4
+; GFX8-NEXT: v_and_b32_e32 v5, s5, v5
+; GFX8-NEXT: v_and_b32_e32 v6, s6, v6
+; GFX8-NEXT: v_and_b32_e32 v7, s7, v7
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %and = and <2 x i128> %a, <i128 64, i128 64>
+ ret <2 x i128> %and
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/or.v2i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/or.v2i128.ll
new file mode 100644
index 0000000000000..eaba0500dc1f3
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/or.v2i128.ll
@@ -0,0 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX10 %s
+
+define <2 x i128> @v_or_v2i128(<2 x i128> %a, <2 x i128> %b) {
+; GFX7-LABEL: v_or_v2i128:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_or_b32_e32 v0, v0, v8
+; GFX7-NEXT: v_or_b32_e32 v1, v1, v9
+; GFX7-NEXT: v_or_b32_e32 v2, v2, v10
+; GFX7-NEXT: v_or_b32_e32 v3, v3, v11
+; GFX7-NEXT: v_or_b32_e32 v4, v4, v12
+; GFX7-NEXT: v_or_b32_e32 v5, v5, v13
+; GFX7-NEXT: v_or_b32_e32 v6, v6, v14
+; GFX7-NEXT: v_or_b32_e32 v7, v7, v15
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_or_v2i128:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_or_b32_e32 v0, v0, v8
+; GFX9-NEXT: v_or_b32_e32 v1, v1, v9
+; GFX9-NEXT: v_or_b32_e32 v2, v2, v10
+; GFX9-NEXT: v_or_b32_e32 v3, v3, v11
+; GFX9-NEXT: v_or_b32_e32 v4, v4, v12
+; GFX9-NEXT: v_or_b32_e32 v5, v5, v13
+; GFX9-NEXT: v_or_b32_e32 v6, v6, v14
+; GFX9-NEXT: v_or_b32_e32 v7, v7, v15
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_or_v2i128:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v8
+; GFX8-NEXT: v_or_b32_e32 v1, v1, v9
+; GFX8-NEXT: v_or_b32_e32 v2, v2, v10
+; GFX8-NEXT: v_or_b32_e32 v3, v3, v11
+; GFX8-NEXT: v_or_b32_e32 v4, v4, v12
+; GFX8-NEXT: v_or_b32_e32 v5, v5, v13
+; GFX8-NEXT: v_or_b32_e32 v6, v6, v14
+; GFX8-NEXT: v_or_b32_e32 v7, v7, v15
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_or_v2i128:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_or_b32_e32 v0, v0, v8
+; GFX10-NEXT: v_or_b32_e32 v1, v1, v9
+; GFX10-NEXT: v_or_b32_e32 v2, v2, v10
+; GFX10-NEXT: v_or_b32_e32 v3, v3, v11
+; GFX10-NEXT: v_or_b32_e32 v4, v4, v12
+; GFX10-NEXT: v_or_b32_e32 v5, v5, v13
+; GFX10-NEXT: v_or_b32_e32 v6, v6, v14
+; GFX10-NEXT: v_or_b32_e32 v7, v7, v15
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %or = or <2 x i128> %a, %b
+ ret <2 x i128> %or
+}
+
+define <2 x i128> @v_or_v2i128_inline_imm(<2 x i128> %a) {
+; GFX7-LABEL: v_or_v2i128_inline_imm:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_mov_b64 s[4:5], 64
+; GFX7-NEXT: s_mov_b64 s[6:7], 0
+; GFX7-NEXT: s_mov_b64 s[4:5], s[4:5]
+; GFX7-NEXT: s_mov_b64 s[6:7], s[6:7]
+; GFX7-NEXT: v_or_b32_e32 v0, s4, v0
+; GFX7-NEXT: v_or_b32_e32 v1, s5, v1
+; GFX7-NEXT: v_or_b32_e32 v2, s6, v2
+; GFX7-NEXT: v_or_b32_e32 v3, s7, v3
+; GFX7-NEXT: v_or_b32_e32 v4, s4, v4
+; GFX7-NEXT: v_or_b32_e32 v5, s5, v5
+; GFX7-NEXT: v_or_b32_e32 v6, s6, v6
+; GFX7-NEXT: v_or_b32_e32 v7, s7, v7
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_or_v2i128_inline_imm:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b64 s[4:5], 64
+; GFX9-NEXT: s_mov_b64 s[6:7], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[4:5]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[6:7]
+; GFX9-NEXT: v_or_b32_e32 v0, s4, v0
+; GFX9-NEXT: v_or_b32_e32 v1, s5, v1
+; GFX9-NEXT: v_or_b32_e32 v2, s6, v2
+; GFX9-NEXT: v_or_b32_e32 v3, s7, v3
+; GFX9-NEXT: v_or_b32_e32 v4, s4, v4
+; GFX9-NEXT: v_or_b32_e32 v5, s5, v5
+; GFX9-NEXT: v_or_b32_e32 v6, s6, v6
+; GFX9-NEXT: v_or_b32_e32 v7, s7, v7
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_or_v2i128_inline_imm:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_mov_b64 s[4:5], 64
+; GFX8-NEXT: s_mov_b64 s[6:7], 0
+; GFX8-NEXT: s_mov_b64 s[4:5], s[4:5]
+; GFX8-NEXT: s_mov_b64 s[6:7], s[6:7]
+; GFX8-NEXT: v_or_b32_e32 v0, s4, v0
+; GFX8-NEXT: v_or_b32_e32 v1, s5, v1
+; GFX8-NEXT: v_or_b32_e32 v2, s6, v2
+; GFX8-NEXT: v_or_b32_e32 v3, s7, v3
+; GFX8-NEXT: v_or_b32_e32 v4, s4, v4
+; GFX8-NEXT: v_or_b32_e32 v5, s5, v5
+; GFX8-NEXT: v_or_b32_e32 v6, s6, v6
+; GFX8-NEXT: v_or_b32_e32 v7, s7, v7
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %or = or <2 x i128> %a, <i128 64, i128 64>
+ ret <2 x i128> %or
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.v2i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.v2i128.ll
new file mode 100644
index 0000000000000..291d27b0cf527
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.v2i128.ll
@@ -0,0 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX10 %s
+
+define <2 x i128> @v_xor_v2i128(<2 x i128> %a, <2 x i128> %b) {
+; GFX7-LABEL: v_xor_v2i128:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_xor_b32_e32 v0, v0, v8
+; GFX7-NEXT: v_xor_b32_e32 v1, v1, v9
+; GFX7-NEXT: v_xor_b32_e32 v2, v2, v10
+; GFX7-NEXT: v_xor_b32_e32 v3, v3, v11
+; GFX7-NEXT: v_xor_b32_e32 v4, v4, v12
+; GFX7-NEXT: v_xor_b32_e32 v5, v5, v13
+; GFX7-NEXT: v_xor_b32_e32 v6, v6, v14
+; GFX7-NEXT: v_xor_b32_e32 v7, v7, v15
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_xor_v2i128:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_xor_b32_e32 v0, v0, v8
+; GFX9-NEXT: v_xor_b32_e32 v1, v1, v9
+; GFX9-NEXT: v_xor_b32_e32 v2, v2, v10
+; GFX9-NEXT: v_xor_b32_e32 v3, v3, v11
+; GFX9-NEXT: v_xor_b32_e32 v4, v4, v12
+; GFX9-NEXT: v_xor_b32_e32 v5, v5, v13
+; GFX9-NEXT: v_xor_b32_e32 v6, v6, v14
+; GFX9-NEXT: v_xor_b32_e32 v7, v7, v15
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_xor_v2i128:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_xor_b32_e32 v0, v0, v8
+; GFX8-NEXT: v_xor_b32_e32 v1, v1, v9
+; GFX8-NEXT: v_xor_b32_e32 v2, v2, v10
+; GFX8-NEXT: v_xor_b32_e32 v3, v3, v11
+; GFX8-NEXT: v_xor_b32_e32 v4, v4, v12
+; GFX8-NEXT: v_xor_b32_e32 v5, v5, v13
+; GFX8-NEXT: v_xor_b32_e32 v6, v6, v14
+; GFX8-NEXT: v_xor_b32_e32 v7, v7, v15
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_xor_v2i128:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_xor_b32_e32 v0, v0, v8
+; GFX10-NEXT: v_xor_b32_e32 v1, v1, v9
+; GFX10-NEXT: v_xor_b32_e32 v2, v2, v10
+; GFX10-NEXT: v_xor_b32_e32 v3, v3, v11
+; GFX10-NEXT: v_xor_b32_e32 v4, v4, v12
+; GFX10-NEXT: v_xor_b32_e32 v5, v5, v13
+; GFX10-NEXT: v_xor_b32_e32 v6, v6, v14
+; GFX10-NEXT: v_xor_b32_e32 v7, v7, v15
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %xor = xor <2 x i128> %a, %b
+ ret <2 x i128> %xor
+}
+
+define <2 x i128> @v_xor_v2i128_inline_imm(<2 x i128> %a) {
+; GFX7-LABEL: v_xor_v2i128_inline_imm:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_mov_b64 s[4:5], 64
+; GFX7-NEXT: s_mov_b64 s[6:7], 0
+; GFX7-NEXT: s_mov_b64 s[4:5], s[4:5]
+; GFX7-NEXT: s_mov_b64 s[6:7], s[6:7]
+; GFX7-NEXT: v_xor_b32_e32 v0, s4, v0
+; GFX7-NEXT: v_xor_b32_e32 v1, s5, v1
+; GFX7-NEXT: v_xor_b32_e32 v2, s6, v2
+; GFX7-NEXT: v_xor_b32_e32 v3, s7, v3
+; GFX7-NEXT: v_xor_b32_e32 v4, s4, v4
+; GFX7-NEXT: v_xor_b32_e32 v5, s5, v5
+; GFX7-NEXT: v_xor_b32_e32 v6, s6, v6
+; GFX7-NEXT: v_xor_b32_e32 v7, s7, v7
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_xor_v2i128_inline_imm:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b64 s[4:5], 64
+; GFX9-NEXT: s_mov_b64 s[6:7], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[4:5]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[6:7]
+; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0
+; GFX9-NEXT: v_xor_b32_e32 v1, s5, v1
+; GFX9-NEXT: v_xor_b32_e32 v2, s6, v2
+; GFX9-NEXT: v_xor_b32_e32 v3, s7, v3
+; GFX9-NEXT: v_xor_b32_e32 v4, s4, v4
+; GFX9-NEXT: v_xor_b32_e32 v5, s5, v5
+; GFX9-NEXT: v_xor_b32_e32 v6, s6, v6
+; GFX9-NEXT: v_xor_b32_e32 v7, s7, v7
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_xor_v2i128_inline_imm:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_mov_b64 s[4:5], 64
+; GFX8-NEXT: s_mov_b64 s[6:7], 0
+; GFX8-NEXT: s_mov_b64 s[4:5], s[4:5]
+; GFX8-NEXT: s_mov_b64 s[6:7], s[6:7]
+; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0
+; GFX8-NEXT: v_xor_b32_e32 v1, s5, v1
+; GFX8-NEXT: v_xor_b32_e32 v2, s6, v2
+; GFX8-NEXT: v_xor_b32_e32 v3, s7, v3
+; GFX8-NEXT: v_xor_b32_e32 v4, s4, v4
+; GFX8-NEXT: v_xor_b32_e32 v5, s5, v5
+; GFX8-NEXT: v_xor_b32_e32 v6, s6, v6
+; GFX8-NEXT: v_xor_b32_e32 v7, s7, v7
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %xor = xor <2 x i128> %a, <i128 64, i128 64>
+ ret <2 x i128> %xor
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/138574
More information about the llvm-commits
mailing list