[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue May 27 10:34:42 PDT 2025
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120385
>From 85e5dc5e42cfe0f2f4875cb4db990f92b68295ed Mon Sep 17 00:00:00 2001
From: jofrn <jofernau at amd.com>
Date: Wed, 18 Dec 2024 03:37:17 -0500
Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load
`load atomic <1 x T>` is not valid. This change legalizes
vector types of atomic load via scalarization in SelectionDAG
so that it can, for example, translate from `v1i32` to `i32`.
commit-id:5c36cc8c
---
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 +
.../SelectionDAG/LegalizeVectorTypes.cpp | 15 ++
llvm/test/CodeGen/X86/atomic-load-store.ll | 250 +++++++++++++++++-
3 files changed, 257 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index dd9af47da5287..d24b4517a460d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -879,6 +879,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N);
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+ SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N);
SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_VSELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 4d844f0036a75..d6cbf2211f053 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -65,6 +65,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
R = ScalarizeVecRes_UnaryOpWithExtraInput(N);
break;
case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::ATOMIC_LOAD:
+ R = ScalarizeVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
+ break;
case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
@@ -455,6 +458,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
return Op;
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
+ SDValue Result = DAG.getAtomicLoad(
+ ISD::NON_EXTLOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(),
+ N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(),
+ N->getMemOperand());
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+ return Result;
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
assert(N->isUnindexed() && "Indexed vector load?");
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 45277ce3d26c4..4f5cb5a4e9247 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O3
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O3
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-O3
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-O3
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-SSE-O3
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O3
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O3
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O0
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O0
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-O0
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-O0
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-SSE-O0
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O0
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O0
define void @test1(ptr %ptr, i32 %val1) {
; CHECK-LABEL: test1:
@@ -34,6 +34,238 @@ define i32 @test3(ptr %ptr) {
%val = load atomic i32, ptr %ptr seq_cst, align 4
ret i32 %val
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-O0: {{.*}}
-; CHECK-O3: {{.*}}
+
+define <1 x i32> @atomic_vec1_i32(ptr %x) {
+; CHECK-LABEL: atomic_vec1_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl (%rdi), %eax
+; CHECK-NEXT: retq
+ %ret = load atomic <1 x i32>, ptr %x acquire, align 4
+ ret <1 x i32> %ret
+}
+
+define <1 x i8> @atomic_vec1_i8(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec1_i8:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movzbl (%rdi), %eax
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec1_i8:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movzbl (%rdi), %eax
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec1_i8:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movzbl (%rdi), %eax
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec1_i8:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movb (%rdi), %al
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec1_i8:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movb (%rdi), %al
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec1_i8:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movb (%rdi), %al
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <1 x i8>, ptr %x acquire, align 1
+ ret <1 x i8> %ret
+}
+
+define <1 x i16> @atomic_vec1_i16(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec1_i16:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec1_i16:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec1_i16:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec1_i16:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movw (%rdi), %ax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec1_i16:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movw (%rdi), %ax
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec1_i16:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movw (%rdi), %ax
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <1 x i16>, ptr %x acquire, align 2
+ ret <1 x i16> %ret
+}
+
+define <1 x i32> @atomic_vec1_i8_zext(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec1_i8_zext:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movzbl (%rdi), %eax
+; CHECK-O3-NEXT: movzbl %al, %eax
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec1_i8_zext:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movzbl (%rdi), %eax
+; CHECK-SSE-O3-NEXT: movzbl %al, %eax
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec1_i8_zext:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movzbl (%rdi), %eax
+; CHECK-AVX-O3-NEXT: movzbl %al, %eax
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec1_i8_zext:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movb (%rdi), %al
+; CHECK-O0-NEXT: movzbl %al, %eax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec1_i8_zext:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movb (%rdi), %al
+; CHECK-SSE-O0-NEXT: movzbl %al, %eax
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec1_i8_zext:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movb (%rdi), %al
+; CHECK-AVX-O0-NEXT: movzbl %al, %eax
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <1 x i8>, ptr %x acquire, align 1
+ %zret = zext <1 x i8> %ret to <1 x i32>
+ ret <1 x i32> %zret
+}
+
+define <1 x i64> @atomic_vec1_i16_sext(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec1_i16_sext:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-O3-NEXT: movswq %ax, %rax
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec1_i16_sext:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-SSE-O3-NEXT: movswq %ax, %rax
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec1_i16_sext:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-AVX-O3-NEXT: movswq %ax, %rax
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec1_i16_sext:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movw (%rdi), %ax
+; CHECK-O0-NEXT: movswq %ax, %rax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec1_i16_sext:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movw (%rdi), %ax
+; CHECK-SSE-O0-NEXT: movswq %ax, %rax
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec1_i16_sext:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movw (%rdi), %ax
+; CHECK-AVX-O0-NEXT: movswq %ax, %rax
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <1 x i16>, ptr %x acquire, align 2
+ %sret = sext <1 x i16> %ret to <1 x i64>
+ ret <1 x i64> %sret
+}
+
+define <1 x ptr addrspace(270)> @atomic_vec1_ptr270(ptr %x) {
+; CHECK-LABEL: atomic_vec1_ptr270:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl (%rdi), %eax
+; CHECK-NEXT: retq
+ %ret = load atomic <1 x ptr addrspace(270)>, ptr %x acquire, align 4
+ ret <1 x ptr addrspace(270)> %ret
+}
+
+define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec1_bfloat:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec1_bfloat:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec1_bfloat:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec1_bfloat:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movw (%rdi), %cx
+; CHECK-O0-NEXT: # implicit-def: $eax
+; CHECK-O0-NEXT: movw %cx, %ax
+; CHECK-O0-NEXT: # implicit-def: $xmm0
+; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec1_bfloat:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movw (%rdi), %cx
+; CHECK-SSE-O0-NEXT: # implicit-def: $eax
+; CHECK-SSE-O0-NEXT: movw %cx, %ax
+; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
+; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec1_bfloat:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movw (%rdi), %cx
+; CHECK-AVX-O0-NEXT: # implicit-def: $eax
+; CHECK-AVX-O0-NEXT: movw %cx, %ax
+; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0
+; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <1 x bfloat>, ptr %x acquire, align 2
+ ret <1 x bfloat> %ret
+}
+
+define <1 x ptr> @atomic_vec1_ptr_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec1_ptr_align:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq (%rdi), %rax
+; CHECK-NEXT: retq
+ %ret = load atomic <1 x ptr>, ptr %x acquire, align 8
+ ret <1 x ptr> %ret
+}
+
+define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec1_i64_align:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq (%rdi), %rax
+; CHECK-NEXT: retq
+ %ret = load atomic <1 x i64>, ptr %x acquire, align 8
+ ret <1 x i64> %ret
+}
More information about the llvm-branch-commits
mailing list