[llvm] [AArch64] Spare N2I roundtrip when splatting float comparison (PR #141806)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 4 07:02:50 PDT 2025
================
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
+
+define <8 x half> @dup_v8i16(half %a, half %b) {
+ ; CHECK-LABEL: dup_v8i16:
+ ; CHECK: // %bb.0: // %entry
+ ; CHECK-NEXT: fcvt s1, h1
+ ; CHECK-NEXT: fcvt s0, h0
+ ; CHECK-NEXT: fcmeq s0, s0, s1
+ ; CHECK-NEXT: ret
+ entry:
+ %0 = fcmp oeq half %a, %b
+ %vcmpd.i = sext i1 %0 to i16
+ %vecinit.i = insertelement <8 x i16> poison, i16 %vcmpd.i, i64 0
+ %1 = bitcast <8 x i16> %vecinit.i to <8 x half>
+ ret <8 x half> %1
+}
+
+define <1 x float> @dup_v1i32(float %a, float %b) {
+; CHECK-LABEL: dup_v1i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcmeq s0, s0, s1
+; CHECK-NEXT: ret
+entry:
+ %0 = fcmp oeq float %a, %b
+ %vcmpd.i = sext i1 %0 to i32
+ %vecinit.i = insertelement <1 x i32> poison, i32 %vcmpd.i, i64 0
+ %1 = bitcast <1 x i32> %vecinit.i to <1 x float>
+ ret <1 x float> %1
+}
+
+define <4 x float> @dup_v4i32(float %a, float %b) {
+; CHECK-LABEL: dup_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcmge s0, s0, s1
+; CHECK-NEXT: dup v0.4s, v0.s[0]
+; CHECK-NEXT: ret
+entry:
+ %0 = fcmp oge float %a, %b
+ %vcmpd.i = sext i1 %0 to i32
+ %vecinit.i = insertelement <4 x i32> poison, i32 %vcmpd.i, i64 0
+ %1 = bitcast <4 x i32> %vecinit.i to <4 x float>
+ %2 = shufflevector <4 x float> %1, <4 x float> poison, <4 x i32> zeroinitializer
+ ret <4 x float> %2
+}
+
+define <4 x float> @dup_v4i32_reversed(float %a, float %b) {
+; CHECK-LABEL: dup_v4i32_reversed:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcmgt s0, s1, s0
+; CHECK-NEXT: dup v0.4s, v0.s[0]
+; CHECK-NEXT: ret
+entry:
+ %0 = fcmp ogt float %b, %a
+ %vcmpd.i = sext i1 %0 to i32
+ %vecinit.i = insertelement <4 x i32> poison, i32 %vcmpd.i, i64 0
+ %1 = bitcast <4 x i32> %vecinit.i to <4 x float>
+ %2 = shufflevector <4 x float> %1, <4 x float> poison, <4 x i32> zeroinitializer
+ ret <4 x float> %2
+}
+
+define <2 x double> @dup_v2i64(double %a, double %b) {
+; CHECK-LABEL: dup_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcmgt d0, d0, d1
+; CHECK-NEXT: dup v0.2d, v0.d[0]
+; CHECK-NEXT: ret
+entry:
+ %0 = fcmp ogt double %a, %b
----------------
davemgreen wrote:
It is probably worth making sure there are tests for all the fp ops with at least 1 type. I think that ignoring CC2 is making cases like this incorrect:
```
define <2 x double> @dup_v2i64_ord(double %a, double %b) {
; CHECK-LABEL: dup_v2i64_ord:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcmgt d0, d1, d0
; CHECK-NEXT: dup v0.2d, v0.d[0]
; CHECK-NEXT: ret
entry:
%0 = fcmp ord double %a, %b
%vcmpd.i = sext i1 %0 to i64
%vecinit.i = insertelement <2 x i64> poison, i64 %vcmpd.i, i64 0
%1 = bitcast <2 x i64> %vecinit.i to <2 x double>
%2 = shufflevector <2 x double> %1, <2 x double> poison, <2 x i32> zeroinitializer
ret <2 x double> %2
}
```
https://github.com/llvm/llvm-project/pull/141806
More information about the llvm-commits
mailing list