[llvm] [X86] Lowering of load atomic float via cast (PR #117189)

Sun Dec 1 11:46:17 PST 2024

https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/117189

>From 9b0a33b3f4e9d7da6e755e36722e72396fb9f064 Mon Sep 17 00:00:00 2001
From: jofrn <jofernau at amd.com>
Date: Mon, 25 Nov 2024 15:05:53 -0500
Subject: [PATCH 1/2] [Verifier] Allow vector type in atomic load and store

Vector types on atomics are assumed to be invalid by the verifier. However,
this type can be valid if it is lowered by codegen.
---
 llvm/docs/LangRef.rst         |  8 ++++----
 llvm/lib/IR/Verifier.cpp      | 14 ++++++++------
 llvm/test/Verifier/atomics.ll | 15 ++++++++-------
 3 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 79bdd25c18f1fd..32ba5ebdec6d37 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -10956,8 +10956,8 @@ If the ``load`` is marked as ``atomic``, it takes an extra :ref:`ordering
 <ordering>` and optional ``syncscope("<target-scope>")`` argument. The
 ``release`` and ``acq_rel`` orderings are not valid on ``load`` instructions.
 Atomic loads produce :ref:`defined <memmodel>` results when they may see
-multiple atomic stores. The type of the pointee must be an integer, pointer, or
-floating-point type whose bit width is a power of two greater than or equal to
+multiple atomic stores. The type of the pointee must be an integer, pointer,
+floating-point, or vector type whose bit width is a power of two greater than or equal to
 eight and less than or equal to a target-specific size limit.  ``align`` must be
 explicitly specified on atomic loads. Note: if the alignment is not greater or
 equal to the size of the `<value>` type, the atomic operation is likely to
@@ -11097,8 +11097,8 @@ If the ``store`` is marked as ``atomic``, it takes an extra :ref:`ordering
 <ordering>` and optional ``syncscope("<target-scope>")`` argument. The
 ``acquire`` and ``acq_rel`` orderings aren't valid on ``store`` instructions.
 Atomic loads produce :ref:`defined <memmodel>` results when they may see
-multiple atomic stores. The type of the pointee must be an integer, pointer, or
-floating-point type whose bit width is a power of two greater than or equal to
+multiple atomic stores. The type of the pointee must be an integer, pointer,
+floating-point, or vector type whose bit width is a power of two greater than or equal to
 eight and less than or equal to a target-specific size limit.  ``align`` must be
 explicitly specified on atomic stores. Note: if the alignment is not greater or
 equal to the size of the `<value>` type, the atomic operation is likely to
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 55de486e90e190..6f847e3b3fc70c 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -4255,9 +4255,10 @@ void Verifier::visitLoadInst(LoadInst &LI) {
     Check(LI.getOrdering() != AtomicOrdering::Release &&
               LI.getOrdering() != AtomicOrdering::AcquireRelease,
           "Load cannot have Release ordering", &LI);
-    Check(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(),
-          "atomic load operand must have integer, pointer, or floating point "
-          "type!",
+    Check(ElTy->getScalarType()->isIntOrPtrTy() ||
+              ElTy->getScalarType()->isFloatingPointTy(),
+          "atomic load operand must have integer, pointer, floating point, "
+          "or vector type!",
           ElTy, &LI);
     checkAtomicMemAccessSize(ElTy, &LI);
   } else {
@@ -4281,9 +4282,10 @@ void Verifier::visitStoreInst(StoreInst &SI) {
     Check(SI.getOrdering() != AtomicOrdering::Acquire &&
               SI.getOrdering() != AtomicOrdering::AcquireRelease,
           "Store cannot have Acquire ordering", &SI);
-    Check(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(),
-          "atomic store operand must have integer, pointer, or floating point "
-          "type!",
+    Check(ElTy->getScalarType()->isIntOrPtrTy() ||
+              ElTy->getScalarType()->isFloatingPointTy(),
+          "atomic store operand must have integer, pointer, floating point, "
+          "or vector type!",
           ElTy, &SI);
     checkAtomicMemAccessSize(ElTy, &SI);
   } else {
diff --git a/llvm/test/Verifier/atomics.ll b/llvm/test/Verifier/atomics.ll
index f835b98b243456..17bf5a0528d738 100644
--- a/llvm/test/Verifier/atomics.ll
+++ b/llvm/test/Verifier/atomics.ll
@@ -1,14 +1,15 @@
 ; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s
+; CHECK: atomic store operand must have integer, pointer, floating point, or vector type!
+; CHECK: atomic load operand must have integer, pointer, floating point, or vector type!
 
-; CHECK: atomic store operand must have integer, pointer, or floating point type!
-; CHECK: atomic load operand must have integer, pointer, or floating point type!
+%ty = type { i32 };
 
-define void @foo(ptr %P, <1 x i64> %v) {
-  store atomic <1 x i64> %v, ptr %P unordered, align 8
+define void @foo(ptr %P, %ty %v) {
+  store atomic %ty %v, ptr %P unordered, align 8
   ret void
 }
 
-define <1 x i64> @bar(ptr %P) {
-  %v = load atomic <1 x i64>, ptr %P unordered, align 8
-  ret <1 x i64> %v
+define %ty @bar(ptr %P) {
+  %v = load atomic %ty, ptr %P unordered, align 8
+  ret %ty %v
 }

>From 1eabd74c9289b046ea7e08fceaf5aad3dad20e00 Mon Sep 17 00:00:00 2001
From: jofernau <Joe.Fernau at amd.com>
Date: Thu, 21 Nov 2024 11:46:32 -0500
Subject: [PATCH 2/2] [X86] Lowering of load atomic float via cast

X86 backend does not lower load atomic float, so it can be casted to an
integer before lowering.
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  8 ++++
 llvm/lib/Target/X86/X86ISelLowering.h         |  2 +
 llvm/test/CodeGen/X86/atomicvec-float.ll      | 38 +++++++++++++++++++
 .../AtomicExpand/atomicvec-float.ll           | 38 +++++++++++++++++++
 4 files changed, 86 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/atomicvec-float.ll
 create mode 100644 llvm/test/Transforms/AtomicExpand/atomicvec-float.ll

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9048d1d83f1874..3e0134cb6852a4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31158,6 +31158,14 @@ bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const {
   return false;
 }
 
+TargetLoweringBase::AtomicExpansionKind
+X86TargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const {
+  if (const auto VT = dyn_cast<VectorType>(LI->getType()))
+    if (VT->getElementType()->isFloatingPointTy())
+      return AtomicExpansionKind::CastToInteger;
+  return TargetLowering::shouldCastAtomicLoadInIR(LI);
+}
+
 TargetLoweringBase::AtomicExpansionKind
 X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
   Type *MemType = SI->getValueOperand()->getType();
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 2db25d6dda061a..b4abb92822b70e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1808,6 +1808,8 @@ namespace llvm {
     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
     ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
 
+    TargetLoweringBase::AtomicExpansionKind
+    shouldCastAtomicLoadInIR(LoadInst *LI) const override;
     TargetLoweringBase::AtomicExpansionKind
     shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
     TargetLoweringBase::AtomicExpansionKind
diff --git a/llvm/test/CodeGen/X86/atomicvec-float.ll b/llvm/test/CodeGen/X86/atomicvec-float.ll
new file mode 100644
index 00000000000000..248dec1ee6acce
--- /dev/null
+++ b/llvm/test/CodeGen/X86/atomicvec-float.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s --mtriple=x86_64 | FileCheck %s
+
+define float @load_atomic_float(ptr %src) {
+; CHECK-LABEL: load_atomic_float:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    retq
+  %ret = load atomic float, ptr %src acquire, align 4
+  ret float %ret
+}
+
+define <1 x float> @load_atomic_vector_float1(ptr %src) {
+; CHECK-LABEL: load_atomic_vector_float1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    retq
+  %ret = load atomic <1 x float>, ptr %src acquire, align 4
+  ret <1 x float> %ret
+}
+
+define <2 x float> @load_atomic_vector_float2(ptr %src) {
+; CHECK-LABEL: load_atomic_vector_float2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    movq %rdi, %rsi
+; CHECK-NEXT:    movq %rsp, %rdx
+; CHECK-NEXT:    movl $8, %edi
+; CHECK-NEXT:    movl $2, %ecx
+; CHECK-NEXT:    callq __atomic_load at PLT
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+  %ret = load atomic <2 x float>, ptr %src acquire, align 4
+  ret <2 x float> %ret
+}
diff --git a/llvm/test/Transforms/AtomicExpand/atomicvec-float.ll b/llvm/test/Transforms/AtomicExpand/atomicvec-float.ll
new file mode 100644
index 00000000000000..61a78f8c5cf769
--- /dev/null
+++ b/llvm/test/Transforms/AtomicExpand/atomicvec-float.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s --mtriple=x86_64 --passes=atomic-expand -S -o - | FileCheck %s
+
+define float @load_atomic_float(ptr %src) {
+; CHECK-LABEL: define float @load_atomic_float(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i32, ptr [[SRC]] acquire, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %ret = load atomic float, ptr %src acquire, align 4
+  ret float %ret
+}
+
+define <1 x float> @load_atomic_vector_float1(ptr %src) {
+; CHECK-LABEL: define <1 x float> @load_atomic_vector_float1(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i32, ptr [[SRC]] acquire, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[TMP1]] to <1 x float>
+; CHECK-NEXT:    ret <1 x float> [[TMP2]]
+;
+  %ret = load atomic <1 x float>, ptr %src acquire, align 4
+  ret <1 x float> %ret
+}
+
+define <2 x float> @load_atomic_vector_float2(ptr %src) {
+; CHECK-LABEL: define <2 x float> @load_atomic_vector_float2(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = alloca <2 x float>, align 8
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr [[TMP1]])
+; CHECK-NEXT:    call void @__atomic_load(i64 8, ptr [[SRC]], ptr [[TMP1]], i32 2)
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[TMP1]], align 8
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP1]])
+; CHECK-NEXT:    ret <2 x float> [[TMP2]]
+;
+  %ret = load atomic <2 x float>, ptr %src acquire, align 4
+  ret <2 x float> %ret
+}