[llvm] d7ac412 - [AArch64][SME] Fix definition of uclamp/sclamp instructions. (#77619)

Wed Jan 10 09:07:07 PST 2024

Author: Sander de Smalen
Date: 2024-01-10T17:07:03Z
New Revision: d7ac4123333a5bc042b2eb9e17df8f723f6b56d9

URL: https://github.com/llvm/llvm-project/commit/d7ac4123333a5bc042b2eb9e17df8f723f6b56d9
DIFF: https://github.com/llvm/llvm-project/commit/d7ac4123333a5bc042b2eb9e17df8f723f6b56d9.diff

LOG: [AArch64][SME] Fix definition of uclamp/sclamp instructions. (#77619)

For some reason the arguments were in the wrong order.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/SMEInstrFormats.td
    llvm/test/CodeGen/AArch64/sve2-min-max-clamp.ll
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 70f3c2c99f0f05..44d9a8ac7cb677 100644

--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -1268,7 +1268,7 @@ multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> {
 }
 
 class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty>
-    : I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd),
+    : I<(outs zpr_ty:$Zd), (ins zpr_ty:$_Zd, zpr_ty:$Zn, zpr_ty:$Zm),
         asm, "\t$Zd, $Zn, $Zm", "", []>,
       Sched<[]> {
   bits<5> Zm;

diff  --git a/llvm/test/CodeGen/AArch64/sve2-min-max-clamp.ll b/llvm/test/CodeGen/AArch64/sve2-min-max-clamp.ll
index 28ec430aff3d3d..365fd534548495 100644
--- a/llvm/test/CodeGen/AArch64/sve2-min-max-clamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-min-max-clamp.ll
@@ -3,7 +3,7 @@
 
 ; Replace pattern min(max(v1,v2),v3) by clamp
 
-define <vscale x 16 x i8> @uclampi8(<vscale x 16 x i8> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+define <vscale x 16 x i8> @uclampi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: uclampi8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uclamp z0.b, z1.b, z2.b
@@ -13,7 +13,7 @@ define <vscale x 16 x i8> @uclampi8(<vscale x 16 x i8> %c, <vscale x 16 x i8> %a
   ret <vscale x 16 x i8> %res
 }
 
-define <vscale x 8 x i16> @uclampi16(<vscale x 8 x i16> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+define <vscale x 8 x i16> @uclampi16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: uclampi16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uclamp z0.h, z1.h, z2.h
@@ -23,7 +23,7 @@ define <vscale x 8 x i16> @uclampi16(<vscale x 8 x i16> %c, <vscale x 8 x i16> %
   ret <vscale x 8 x i16> %res
 }
 
-define <vscale x 4 x i32> @uclampi32(<vscale x 4 x i32> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+define <vscale x 4 x i32> @uclampi32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: uclampi32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uclamp z0.s, z1.s, z2.s
@@ -33,7 +33,7 @@ define <vscale x 4 x i32> @uclampi32(<vscale x 4 x i32> %c, <vscale x 4 x i32> %
   ret <vscale x 4 x i32> %res
 }
 
-define <vscale x 2 x i64> @uclampi64(<vscale x 2 x i64> %c, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+define <vscale x 2 x i64> @uclampi64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: uclampi64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uclamp z0.d, z1.d, z2.d
@@ -43,7 +43,7 @@ define <vscale x 2 x i64> @uclampi64(<vscale x 2 x i64> %c, <vscale x 2 x i64> %
   ret <vscale x 2 x i64> %res
 }
 
-define <vscale x 16 x i8> @sclampi8(<vscale x 16 x i8> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+define <vscale x 16 x i8> @sclampi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: sclampi8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sclamp z0.b, z1.b, z2.b
@@ -53,7 +53,7 @@ define <vscale x 16 x i8> @sclampi8(<vscale x 16 x i8> %c, <vscale x 16 x i8> %a
   ret <vscale x 16 x i8> %res
 }
 
-define <vscale x 8 x i16> @sclampi16(<vscale x 8 x i16> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+define <vscale x 8 x i16> @sclampi16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: sclampi16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sclamp z0.h, z1.h, z2.h
@@ -63,7 +63,7 @@ define <vscale x 8 x i16> @sclampi16(<vscale x 8 x i16> %c, <vscale x 8 x i16> %
   ret <vscale x 8 x i16> %res
 }
 
-define <vscale x 4 x i32> @sclampi32(<vscale x 4 x i32> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+define <vscale x 4 x i32> @sclampi32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: sclampi32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sclamp z0.s, z1.s, z2.s
@@ -73,7 +73,7 @@ define <vscale x 4 x i32> @sclampi32(<vscale x 4 x i32> %c, <vscale x 4 x i32> %
   ret <vscale x 4 x i32> %res
 }
 
-define <vscale x 2 x i64> @sclampi64(<vscale x 2 x i64> %c, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+define <vscale x 2 x i64> @sclampi64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: sclampi64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sclamp z0.d, z1.d, z2.d

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
index cf59036d42dbf5..912d5d853aa8d5 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
@@ -6,8 +6,7 @@ target triple = "aarch64-linux-gnu"
 define <vscale x 16 x i8> @test_sclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
 ; CHECK-LABEL: test_sclamp_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sclamp z2.b, z0.b, z1.b
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    sclamp z0.b, z1.b, z2.b
 ; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.sclamp.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
   ret <vscale x 16 x i8> %res
@@ -16,8 +15,7 @@ define <vscale x 16 x i8> @test_sclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x
 define <vscale x 8 x i16> @test_sclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
 ; CHECK-LABEL: test_sclamp_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sclamp z2.h, z0.h, z1.h
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    sclamp z0.h, z1.h, z2.h
 ; CHECK-NEXT:    ret
   %res = call <vscale x  8 x i16> @llvm.aarch64.sve.sclamp.nxv8i16(<vscale x  8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
   ret <vscale x 8 x i16> %res
@@ -26,8 +24,7 @@ define <vscale x 8 x i16> @test_sclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x
 define <vscale x 4 x i32> @test_sclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
 ; CHECK-LABEL: test_sclamp_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sclamp z2.s, z0.s, z1.s
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    sclamp z0.s, z1.s, z2.s
 ; CHECK-NEXT:    ret
   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sclamp.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
   ret <vscale x 4 x i32> %res
@@ -36,8 +33,7 @@ define <vscale x 4 x i32> @test_sclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x
 define <vscale x 2 x i64> @test_sclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
 ; CHECK-LABEL: test_sclamp_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sclamp z2.d, z0.d, z1.d
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    sclamp z0.d, z1.d, z2.d
 ; CHECK-NEXT:    ret
   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sclamp.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
   ret <vscale x 2 x i64> %res

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
index 81a34e82d8450e..de1695162c98eb 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
@@ -6,8 +6,7 @@ target triple = "aarch64-linux-gnu"
 define <vscale x 16 x i8> @test_uclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
 ; CHECK-LABEL: test_uclamp_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uclamp z2.b, z0.b, z1.b
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    uclamp z0.b, z1.b, z2.b
 ; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.uclamp.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
   ret <vscale x 16 x i8> %res
@@ -16,8 +15,7 @@ define <vscale x 16 x i8> @test_uclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x
 define <vscale x 8 x i16> @test_uclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
 ; CHECK-LABEL: test_uclamp_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uclamp z2.h, z0.h, z1.h
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    uclamp z0.h, z1.h, z2.h
 ; CHECK-NEXT:    ret
   %res = call <vscale x  8 x i16> @llvm.aarch64.sve.uclamp.nxv8i16(<vscale x  8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
   ret <vscale x 8 x i16> %res
@@ -26,8 +24,7 @@ define <vscale x 8 x i16> @test_uclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x
 define <vscale x 4 x i32> @test_uclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
 ; CHECK-LABEL: test_uclamp_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uclamp z2.s, z0.s, z1.s
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    uclamp z0.s, z1.s, z2.s
 ; CHECK-NEXT:    ret
   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.uclamp.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
   ret <vscale x 4 x i32> %res
@@ -36,8 +33,7 @@ define <vscale x 4 x i32> @test_uclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x
 define <vscale x 2 x i64> @test_uclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
 ; CHECK-LABEL: test_uclamp_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uclamp z2.d, z0.d, z1.d
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    uclamp z0.d, z1.d, z2.d
 ; CHECK-NEXT:    ret
   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.uclamp.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
   ret <vscale x 2 x i64> %res