[llvm] [Hexagon] Add HVX patterns for vector arithmetic (PR #170704)
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Sat Dec 6 09:05:46 PST 2025
================
@@ -565,12 +565,142 @@ define <32 x i32> @test_2h(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
; CHECK-LABEL: test_2i:
; CHECK: q[[Q2I0:[0-3]]] ^= vcmp.gt(v0.uw,v1.uw)
; CHECK: v0 = vmux(q[[Q2I0]],v0,v1)
-define <32 x i32> @test_2i(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
+define <32 x i32> @test_2i(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) {
%q0 = icmp ugt <32 x i32> %v0, %v1
%q1 = trunc <32 x i32> %v2 to <32 x i1>
%q2 = xor <32 x i1> %q0, %q1
%t1 = select <32 x i1> %q2, <32 x i32> %v0, <32 x i32> %v1
ret <32 x i32> %t1
}
-attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" }
+; --- Float32
+
+; CHECK-LABEL: test_2j:
+; CHECK: q[[Q2J0:[0-3]]] = vcmp.eq(v0.w,v1.w)
+; CHECK: v0 = vmux(q[[Q2J0]],v0,v1)
+define <32 x float> @test_2j(<32 x float> %v0, <32 x float> %v1) {
+ %t0 = fcmp oeq <32 x float> %v0, %v1
+ %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
+ ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2k:
+; CHECK: q[[Q2K0:[0-3]]] = vcmp.eq(v0.w,v1.w)
+; CHECK: v0 = vmux(q[[Q2K0]],v1,v0)
+define <32 x float> @test_2k(<32 x float> %v0, <32 x float> %v1) {
+ %t0 = fcmp one <32 x float> %v0, %v1
+ %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
+ ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2l:
+; CHECK: v0.sf = vmin(v1.sf,v0.sf)
+define <32 x float> @test_2l(<32 x float> %v0, <32 x float> %v1) {
+ %t0 = fcmp olt <32 x float> %v0, %v1
+ %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
+ ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2m:
+; CHECK: q[[Q2M0:[0-3]]] = vcmp.gt(v0.sf,v1.sf)
+; CHECK: v0 = vmux(q[[Q2M0]],v1,v0)
+define <32 x float> @test_2m(<32 x float> %v0, <32 x float> %v1) {
+ %t0 = fcmp ole <32 x float> %v0, %v1
+ %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
+ ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2n:
+; CHECK: v0.sf = vmax(v0.sf,v1.sf)
+define <32 x float> @test_2n(<32 x float> %v0, <32 x float> %v1) {
+ %t0 = fcmp ogt <32 x float> %v0, %v1
+ %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
+ ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2o:
+; CHECK: q[[Q2O0:[0-3]]] = vcmp.gt(v1.sf,v0.sf)
+; CHECK: v0 = vmux(q[[Q2O0]],v1,v0)
+define <32 x float> @test_2o(<32 x float> %v0, <32 x float> %v1) {
+ %t0 = fcmp oge <32 x float> %v0, %v1
+ %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
+ ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2p:
+; CHECK: r[[R2P0:[0-9]*]] = ##16843009
----------------
kparzysz wrote:
If you want to do a truncation from v32i32 to v32i1, using 0x01010101 is still wrong.
A Q register always has 128 bits. A value of type v32i1 is represented as 32 groups of 4 bits each. Within each group all 4 bits have to be 0 or all have to be 1.
If v2 has 0x00000001 repeated 32 times, using vand with 0x01010101 will produce groups of bits in Q that are 0x0000 or 0x0001. This will cause the final vmux to pick only the lowest _byte_ from v0 instead the whole float. To get a proper truncation from v32i32 to v32i1, do vandv(v2, vsplatw(0x00000001)) first, then vand(result, -1).
https://github.com/llvm/llvm-project/pull/170704
More information about the llvm-commits
mailing list