[llvm] [SelectionDAG][X86] Add floating point promotion. (PR #118793)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 5 04:14:54 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: None (jofrn)
<details>
<summary>Changes</summary>
When lowering atomic vector types with floats, selection can fail since this pattern is unsupported. To support this, floats can be casted to an integer type of the same size.
---
Full diff: https://github.com/llvm/llvm-project/pull/118793.diff
8 Files Affected:
- (modified) llvm/docs/LangRef.rst (+4-4)
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h (+1)
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (+16)
- (modified) llvm/lib/IR/Verifier.cpp (+8-6)
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+5)
- (modified) llvm/test/Assembler/atomic.ll (+9)
- (added) llvm/test/CodeGen/X86/atomic-scalarization.ll (+40)
- (modified) llvm/test/Verifier/atomics.ll (+8-7)
``````````diff
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 79bdd25c18f1fd..32ba5ebdec6d37 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -10956,8 +10956,8 @@ If the ``load`` is marked as ``atomic``, it takes an extra :ref:`ordering
<ordering>` and optional ``syncscope("<target-scope>")`` argument. The
``release`` and ``acq_rel`` orderings are not valid on ``load`` instructions.
Atomic loads produce :ref:`defined <memmodel>` results when they may see
-multiple atomic stores. The type of the pointee must be an integer, pointer, or
-floating-point type whose bit width is a power of two greater than or equal to
+multiple atomic stores. The type of the pointee must be an integer, pointer,
+floating-point, or vector type whose bit width is a power of two greater than or equal to
eight and less than or equal to a target-specific size limit. ``align`` must be
explicitly specified on atomic loads. Note: if the alignment is not greater or
equal to the size of the `<value>` type, the atomic operation is likely to
@@ -11097,8 +11097,8 @@ If the ``store`` is marked as ``atomic``, it takes an extra :ref:`ordering
<ordering>` and optional ``syncscope("<target-scope>")`` argument. The
``acquire`` and ``acq_rel`` orderings aren't valid on ``store`` instructions.
Atomic loads produce :ref:`defined <memmodel>` results when they may see
-multiple atomic stores. The type of the pointee must be an integer, pointer, or
-floating-point type whose bit width is a power of two greater than or equal to
+multiple atomic stores. The type of the pointee must be an integer, pointer,
+floating-point, or vector type whose bit width is a power of two greater than or equal to
eight and less than or equal to a target-specific size limit. ``align`` must be
explicitly specified on atomic stores. Note: if the alignment is not greater or
equal to the size of the `<value>` type, the atomic operation is likely to
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 1703149aca7463..0086405825cd5c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -860,6 +860,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue ScalarizeVecRes_ExpOp(SDNode *N);
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+ SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N);
SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_VSELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 465128099f4447..bdd71b251f3941 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -60,6 +60,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
case ISD::FPOWI: R = ScalarizeVecRes_ExpOp(N); break;
case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::ATOMIC_LOAD:
+ R = ScalarizeVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
+ break;
case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
@@ -451,6 +454,19 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
return Op;
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
+
+ SDValue Result = DAG.getAtomic(
+ ISD::ATOMIC_LOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(),
+ N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(),
+ N->getMemOperand());
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+ return Result;
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
assert(N->isUnindexed() && "Indexed vector load?");
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 55de486e90e190..6f847e3b3fc70c 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -4255,9 +4255,10 @@ void Verifier::visitLoadInst(LoadInst &LI) {
Check(LI.getOrdering() != AtomicOrdering::Release &&
LI.getOrdering() != AtomicOrdering::AcquireRelease,
"Load cannot have Release ordering", &LI);
- Check(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(),
- "atomic load operand must have integer, pointer, or floating point "
- "type!",
+ Check(ElTy->getScalarType()->isIntOrPtrTy() ||
+ ElTy->getScalarType()->isFloatingPointTy(),
+ "atomic load operand must have integer, pointer, floating point, "
+ "or vector type!",
ElTy, &LI);
checkAtomicMemAccessSize(ElTy, &LI);
} else {
@@ -4281,9 +4282,10 @@ void Verifier::visitStoreInst(StoreInst &SI) {
Check(SI.getOrdering() != AtomicOrdering::Acquire &&
SI.getOrdering() != AtomicOrdering::AcquireRelease,
"Store cannot have Acquire ordering", &SI);
- Check(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(),
- "atomic store operand must have integer, pointer, or floating point "
- "type!",
+ Check(ElTy->getScalarType()->isIntOrPtrTy() ||
+ ElTy->getScalarType()->isFloatingPointTy(),
+ "atomic store operand must have integer, pointer, floating point, "
+ "or vector type!",
ElTy, &SI);
checkAtomicMemAccessSize(ElTy, &SI);
} else {
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9048d1d83f1874..907831af06900a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2589,6 +2589,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(Op, MVT::f32, Promote);
}
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::f32, Promote);
+ AddPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32);
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::f16, Promote);
+ AddPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16);
+
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine({ISD::VECTOR_SHUFFLE,
ISD::SCALAR_TO_VECTOR,
diff --git a/llvm/test/Assembler/atomic.ll b/llvm/test/Assembler/atomic.ll
index a44dcccc16bef1..f1027d5d3fbde4 100644
--- a/llvm/test/Assembler/atomic.ll
+++ b/llvm/test/Assembler/atomic.ll
@@ -52,6 +52,15 @@ define void @f(ptr %x) {
; CHECK: atomicrmw volatile usub_sat ptr %x, i32 10 syncscope("agent") monotonic
atomicrmw volatile usub_sat ptr %x, i32 10 syncscope("agent") monotonic
+ ; CHECK : load atomic <1 x i32>, ptr %x unordered, align 4
+ load atomic <1 x i32>, ptr %x unordered, align 4
+ ; CHECK : store atomic <1 x i32> splat (i32 3), ptr %x release, align 4
+ store atomic <1 x i32> <i32 3>, ptr %x release, align 4
+ ; CHECK : load atomic <2 x i32>, ptr %x unordered, align 4
+ load atomic <2 x i32>, ptr %x unordered, align 4
+ ; CHECK : store atomic <2 x i32> <i32 3, i32 4>, ptr %x release, align 4
+ store atomic <2 x i32> <i32 3, i32 4>, ptr %x release, align 4
+
; CHECK: fence syncscope("singlethread") release
fence syncscope("singlethread") release
; CHECK: fence seq_cst
diff --git a/llvm/test/CodeGen/X86/atomic-scalarization.ll b/llvm/test/CodeGen/X86/atomic-scalarization.ll
new file mode 100644
index 00000000000000..7ca6b3729a4723
--- /dev/null
+++ b/llvm/test/CodeGen/X86/atomic-scalarization.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc %s --mtriple=x86_64 | FileCheck %s
+
+define <1 x i32> @atomic_scalar_i32(ptr %x) {
+; CHECK-LABEL: atomic_scalar_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl (%rdi), %eax
+; CHECK-NEXT: retq
+ %ret = load atomic <1 x i32>, ptr %x acquire, align 4
+ ret <1 x i32> %ret
+}
+
+define <1 x float> @atomic_scalar_float(ptr %x) {
+; CHECK-LABEL: atomic_scalar_float:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: retq
+ %ret = load atomic <1 x float>, ptr %x acquire, align 4
+ ret <1 x float> %ret
+}
+
+define <1 x half> @atomic_scalar_half(ptr %x) {
+; CHECK-LABEL: atomic_scalar_half:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movzwl (%rdi), %eax
+; CHECK-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-NEXT: retq
+ %ret = load atomic <1 x half>, ptr %x acquire, align 4
+ ret <1 x half> %ret
+}
+
+define <1 x bfloat> @atomic_scalar_bfloat(ptr %x) {
+; CHECK-LABEL: atomic_scalar_bfloat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movzwl (%rdi), %eax
+; CHECK-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-NEXT: retq
+ %ret = load atomic <1 x bfloat>, ptr %x acquire, align 4
+ ret <1 x bfloat> %ret
+}
diff --git a/llvm/test/Verifier/atomics.ll b/llvm/test/Verifier/atomics.ll
index f835b98b243456..17bf5a0528d738 100644
--- a/llvm/test/Verifier/atomics.ll
+++ b/llvm/test/Verifier/atomics.ll
@@ -1,14 +1,15 @@
; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s
+; CHECK: atomic store operand must have integer, pointer, floating point, or vector type!
+; CHECK: atomic load operand must have integer, pointer, floating point, or vector type!
-; CHECK: atomic store operand must have integer, pointer, or floating point type!
-; CHECK: atomic load operand must have integer, pointer, or floating point type!
+%ty = type { i32 };
-define void @foo(ptr %P, <1 x i64> %v) {
- store atomic <1 x i64> %v, ptr %P unordered, align 8
+define void @foo(ptr %P, %ty %v) {
+ store atomic %ty %v, ptr %P unordered, align 8
ret void
}
-define <1 x i64> @bar(ptr %P) {
- %v = load atomic <1 x i64>, ptr %P unordered, align 8
- ret <1 x i64> %v
+define %ty @bar(ptr %P) {
+ %v = load atomic %ty, ptr %P unordered, align 8
+ ret %ty %v
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/118793
More information about the llvm-commits
mailing list