[clang] [llvm] [WIP][RISCV] Support for Zvabd fast-track proposal (PR #124239)

Pengcheng Wang via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 24 02:30:24 PST 2025


https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/124239

>From 2ce077b011a9dcac0f9649493a50819971695b73 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Tue, 16 Jul 2024 16:08:16 +0800
Subject: [PATCH 1/2] [RISCV][MC] Support Zvabd instructions

Support of these instructions are added:

- Vector Single-Width Signed/Unsigned Integer Absolute Difference
- Vector Widening Signed/Unsigned Integer Absolute Difference and
  Accumulate

Doc: https://bytedance.larkoffice.com/docx/DqaLdNqNao8WgZxgUJkcqIVPn7g
---
 .../Driver/print-supported-extensions-riscv.c |   1 +
 .../test/Preprocessor/riscv-target-features.c |   9 ++
 llvm/lib/Target/RISCV/RISCVFeatures.td        |   6 +
 llvm/lib/Target/RISCV/RISCVInstrInfo.td       |   1 +
 llvm/lib/Target/RISCV/RISCVInstrInfoV.td      |  11 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoZvabd.td  |  25 +++++
 llvm/test/CodeGen/RISCV/attributes.ll         |   4 +
 llvm/test/MC/RISCV/rvv/zvabd-invalid.s        |  18 +++
 llvm/test/MC/RISCV/rvv/zvabd.s                | 105 ++++++++++++++++++
 .../TargetParser/RISCVISAInfoTest.cpp         |   1 +
 10 files changed, 176 insertions(+), 5 deletions(-)
 create mode 100644 llvm/lib/Target/RISCV/RISCVInstrInfoZvabd.td
 create mode 100644 llvm/test/MC/RISCV/rvv/zvabd-invalid.s
 create mode 100644 llvm/test/MC/RISCV/rvv/zvabd.s

diff --git a/clang/test/Driver/print-supported-extensions-riscv.c b/clang/test/Driver/print-supported-extensions-riscv.c
index ae3a1c29df3976..6c9ee75390fa88 100644
--- a/clang/test/Driver/print-supported-extensions-riscv.c
+++ b/clang/test/Driver/print-supported-extensions-riscv.c
@@ -183,6 +183,7 @@
 // CHECK-NEXT:     zicfilp              1.0       'Zicfilp' (Landing pad)
 // CHECK-NEXT:     zicfiss              1.0       'Zicfiss' (Shadow stack)
 // CHECK-NEXT:     zalasr               0.1       'Zalasr' (Load-Acquire and Store-Release Instructions)
+// CHECK-NEXT:     zvabd                0.2       'Zvabd' (Vector Absolute Difference)
 // CHECK-NEXT:     zvbc32e              0.7       'Zvbc32e' (Vector Carryless Multiplication with 32-bits elements)
 // CHECK-NEXT:     zvkgs                0.7       'Zvkgs' (Vector-Scalar GCM instructions for Cryptography)
 // CHECK-NEXT:     sdext                1.0       'Sdext' (External debugger)
diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c
index c2197711352757..2725c283f107d1 100644
--- a/clang/test/Preprocessor/riscv-target-features.c
+++ b/clang/test/Preprocessor/riscv-target-features.c
@@ -145,6 +145,7 @@
 // CHECK-NOT: __riscv_zksh {{.*$}}
 // CHECK-NOT: __riscv_zkt {{.*$}}
 // CHECK-NOT: __riscv_zmmul {{.*$}}
+// CHECK-NOT: __riscv_zvabd {{.*$}}
 // CHECK-NOT: __riscv_zvbb {{.*$}}
 // CHECK-NOT: __riscv_zvbc {{.*$}}
 // CHECK-NOT: __riscv_zve32f {{.*$}}
@@ -1504,6 +1505,14 @@
 // RUN:   -o - | FileCheck --check-prefix=CHECK-ZFA-EXT %s
 // CHECK-ZFA-EXT: __riscv_zfa 1000000{{$}}
 
+// RUN: %clang --target=riscv32 -menable-experimental-extensions \
+// RUN:   -march=rv32i_zve64x_zvabd0p2 -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-ZVABD-EXT %s
+// RUN: %clang --target=riscv64 -menable-experimental-extensions \
+// RUN:   -march=rv64i_zve64x_zvabd0p2 -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-ZVABD-EXT %s
+// CHECK-ZVABD-EXT: __riscv_zvabd  2000{{$}}
+
 // RUN: %clang --target=riscv32 \
 // RUN:   -march=rv32i_zve64x_zvbb1p0 -E -dM %s \
 // RUN:   -o - | FileCheck --check-prefix=CHECK-ZVBB-EXT %s
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 4119dd77804f1a..0937f378ca3d14 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -646,6 +646,12 @@ def FeatureStdExtV
                      [FeatureStdExtZvl128b, FeatureStdExtZve64d]>,
       RISCVExtensionBitmask<0, 21>;
 
+def FeatureStdExtZvabd
+    : RISCVExperimentalExtension<0, 2, "Vector Absolute Difference">;
+def HasStdExtZvabd : Predicate<"Subtarget->hasStdExtZvabd()">,
+                     AssemblerPredicate<(all_of FeatureStdExtZvabd),
+                                        "'Zvabd' (Vector Absolute Difference)">;
+
 def FeatureStdExtZvfbfmin
     : RISCVExtension<1, 0, "Vector BF16 Converts", [FeatureStdExtZve32f]>;
 def HasStdExtZvfbfmin : Predicate<"Subtarget->hasStdExtZvfbfmin()">,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index bb5bb6352c32a5..0e0a05e8fc03b1 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -2110,6 +2110,7 @@ include "RISCVInstrInfoZk.td"
 // Vector
 include "RISCVInstrInfoV.td"
 include "RISCVInstrInfoZvk.td"
+include "RISCVInstrInfoZvabd.td"
 
 // Compressed
 include "RISCVInstrInfoC.td"
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index 24a881dc6810f8..0d06efe6a488af 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -575,15 +575,16 @@ multiclass VALU_IV_X<string opcodestr, bits<6> funct6> {
            SchedBinaryMC<"WriteVIALUX", "ReadVIALUV", "ReadVIALUX">;
 }
 
-multiclass VALU_IV_I<string opcodestr, bits<6> funct6> {
-  def I  : VALUVI<funct6, opcodestr # ".vi">,
-           SchedUnaryMC<"WriteVIALUI", "ReadVIALUV">;
+multiclass VALU_IV_I<string opcodestr, bits<6> funct6, Operand optype = simm5> {
+  def I : VALUVI<funct6, opcodestr#".vi", optype>,
+          SchedUnaryMC<"WriteVIALUI", "ReadVIALUV">;
 }
 
-multiclass VALU_IV_V_X_I<string opcodestr, bits<6> funct6>
+multiclass VALU_IV_V_X_I<string opcodestr, bits<6> funct6,
+                         Operand optype = simm5>
     : VALU_IV_V<opcodestr, funct6>,
       VALU_IV_X<opcodestr, funct6>,
-      VALU_IV_I<opcodestr, funct6>;
+      VALU_IV_I<opcodestr, funct6, optype>;
 
 multiclass VALU_IV_V_X<string opcodestr, bits<6> funct6>
     : VALU_IV_V<opcodestr, funct6>,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvabd.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvabd.td
new file mode 100644
index 00000000000000..7a8f79ccfd465b
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvabd.td
@@ -0,0 +1,25 @@
+//===-- RISCVInstrInfoZvabd.td - 'Zvabd' instructions ------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file describes the RISC-V instructions for 'Zvabd' (Vector Absolute
+/// Difference).
+///
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions
+//===----------------------------------------------------------------------===//
+let Predicates = [HasStdExtZvabd] in {
+  defm VABD_V : VAALU_MV_V_X<"vabd", 0b010001>;
+  defm VABDU_V : VAALU_MV_V_X<"vabdu", 0b010011>;
+
+  let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
+    defm VWABDACC_V : VALU_MV_V_X<"vwabdacc", 0b010101, "v">;
+    defm VWABDACCU_V : VALU_MV_V_X<"vwabdaccu", 0b010110, "v">;
+  } // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
+} // Predicates = [HasStdExtZvabd]
diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll
index caed0bdfb04984..a36d8c16a318e0 100644
--- a/llvm/test/CodeGen/RISCV/attributes.ll
+++ b/llvm/test/CodeGen/RISCV/attributes.ll
@@ -121,6 +121,7 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+zve32x -mattr=+zvksh %s -o - | FileCheck --check-prefix=RV32ZVKSH %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zve32x -mattr=+zvkt %s -o - | FileCheck --check-prefix=RV32ZVKT %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zvfh %s -o - | FileCheck --check-prefix=RV32ZVFH %s
+; RUN: llc -mtriple=riscv32 -mattr=+zve32x -mattr=+experimental-zvabd %s -o - | FileCheck --check-prefix=RV32ZVABD %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zicond %s -o - | FileCheck --check-prefix=RV32ZICOND %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zimop %s -o - | FileCheck --check-prefix=RV32ZIMOP %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zcmop %s -o - | FileCheck --check-prefix=RV32ZCMOP %s
@@ -270,6 +271,7 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+zve32x -mattr=+zvksh %s -o - | FileCheck --check-prefix=RV64ZVKSH %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zve32x -mattr=+zvkt %s -o - | FileCheck --check-prefix=RV64ZVKT %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zvfh %s -o - | FileCheck --check-prefix=RV64ZVFH %s
+; RUN: llc -mtriple=riscv64 -mattr=+zve32x -mattr=+experimental-zvabd %s -o - | FileCheck --check-prefix=RV64ZVABD %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zicond %s -o - | FileCheck --check-prefix=RV64ZICOND %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zimop %s -o - | FileCheck --check-prefix=RV64ZIMOP %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zcmop %s -o - | FileCheck --check-prefix=RV64ZCMOP %s
@@ -437,6 +439,7 @@
 ; RV32ZVKSH: .attribute 5, "rv32i2p1_zicsr2p0_zve32x1p0_zvksh1p0_zvl32b1p0"
 ; RV32ZVKT: .attribute 5, "rv32i2p1_zicsr2p0_zve32x1p0_zvkt1p0_zvl32b1p0"
 ; RV32ZVFH: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zfhmin1p0_zve32f1p0_zve32x1p0_zvfh1p0_zvfhmin1p0_zvl32b1p0"
+; RV32ZVABD: .attribute 5, "rv32i2p1_zicsr2p0_zvabd0p2_zve32x1p0_zvl32b1p0"
 ; RV32ZICOND: .attribute 5, "rv32i2p1_zicond1p0"
 ; RV32ZIMOP: .attribute 5, "rv32i2p1_zimop1p0"
 ; RV32ZCMOP: .attribute 5, "rv32i2p1_zca1p0_zcmop1p0"
@@ -584,6 +587,7 @@
 ; RV64ZVKSH: .attribute 5, "rv64i2p1_zicsr2p0_zve32x1p0_zvksh1p0_zvl32b1p0"
 ; RV64ZVKT: .attribute 5, "rv64i2p1_zicsr2p0_zve32x1p0_zvkt1p0_zvl32b1p0"
 ; RV64ZVFH: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zfhmin1p0_zve32f1p0_zve32x1p0_zvfh1p0_zvfhmin1p0_zvl32b1p0"
+; RV64ZVABD: .attribute 5, "rv64i2p1_zicsr2p0_zvabd0p2_zve32x1p0_zvl32b1p0"
 ; RV64ZICOND: .attribute 5, "rv64i2p1_zicond1p0"
 ; RV64ZIMOP: .attribute 5, "rv64i2p1_zimop1p0"
 ; RV64ZCMOP: .attribute 5, "rv64i2p1_zca1p0_zcmop1p0"
diff --git a/llvm/test/MC/RISCV/rvv/zvabd-invalid.s b/llvm/test/MC/RISCV/rvv/zvabd-invalid.s
new file mode 100644
index 00000000000000..da9184364020ab
--- /dev/null
+++ b/llvm/test/MC/RISCV/rvv/zvabd-invalid.s
@@ -0,0 +1,18 @@
+# RUN: not llvm-mc -triple=riscv64 --mattr=+zve64x --mattr=+experimental-zvabd %s 2>&1 \
+# RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+
+vwabdacc.vv v9, v9, v8
+# CHECK-ERROR: [[@LINE-1]]:13: error: the destination vector register group cannot overlap the source vector register group
+# CHECK-ERROR-LABEL: vwabdacc.vv v9, v9, v8
+
+vwabdacc.vx v9, v9, a0
+# CHECK-ERROR: [[@LINE-1]]:13: error: the destination vector register group cannot overlap the source vector register group
+# CHECK-ERROR-LABEL: vwabdacc.vx v9, v9, a0
+
+vwabdaccu.vv v9, v9, v8
+# CHECK-ERROR: [[@LINE-1]]:14: error: the destination vector register group cannot overlap the source vector register group
+# CHECK-ERROR-LABEL: vwabdaccu.vv v9, v9, v8
+
+vwabdaccu.vx v9, v9, a0
+# CHECK-ERROR: [[@LINE-1]]:14: error: the destination vector register group cannot overlap the source vector register group
+# CHECK-ERROR-LABEL: vwabdaccu.vx v9, v9, a0
diff --git a/llvm/test/MC/RISCV/rvv/zvabd.s b/llvm/test/MC/RISCV/rvv/zvabd.s
new file mode 100644
index 00000000000000..d765e01c52081b
--- /dev/null
+++ b/llvm/test/MC/RISCV/rvv/zvabd.s
@@ -0,0 +1,105 @@
+# RUN: llvm-mc -triple=riscv32 -show-encoding --mattr=+v --mattr=+experimental-zvabd %s \
+# RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+# RUN: not llvm-mc -triple=riscv32 -show-encoding %s 2>&1 \
+# RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+# RUN: llvm-mc -triple=riscv32 -filetype=obj --mattr=+v --mattr=+experimental-zvabd %s \
+# RUN:        | llvm-objdump -d --mattr=+v --mattr=+experimental-zvabd --no-print-imm-hex  - \
+# RUN:        | FileCheck %s --check-prefix=CHECK-INST
+# RUN: llvm-mc -triple=riscv32 -filetype=obj --mattr=+v --mattr=+experimental-zvabd %s \
+# RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+vabd.vv v10, v9, v8
+# CHECK-INST: vabd.vv v10, v9, v8
+# CHECK-ENCODING: [0x57,0x25,0x94,0x46]
+# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}}
+# CHECK-UNKNOWN: 46942557 <unknown>
+
+vabd.vv v10, v9, v8, v0.t
+# CHECK-INST: vabd.vv v10, v9, v8, v0.t
+# CHECK-ENCODING: [0x57,0x25,0x94,0x44]
+# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}}
+# CHECK-UNKNOWN: 44942557 <unknown>
+
+vabd.vx v10, v9, a0
+# CHECK-INST: vabd.vx v10, v9, a0
+# CHECK-ENCODING: [0x57,0x65,0x95,0x46]
+# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}}
+# CHECK-UNKNOWN: 46956557 <unknown>
+
+vabd.vx v10, v9, a0, v0.t
+# CHECK-INST: vabd.vx v10, v9, a0, v0.t
+# CHECK-ENCODING: [0x57,0x65,0x95,0x44]
+# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}}
+# CHECK-UNKNOWN: 44956557 <unknown>
+
+vabdu.vv v10, v9, v8
+# CHECK-INST: vabdu.vv v10, v9, v8
+# CHECK-ENCODING: [0x57,0x25,0x94,0x4e]
+# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}}
+# CHECK-UNKNOWN: 4e942557 <unknown>
+
+vabdu.vv v10, v9, v8, v0.t
+# CHECK-INST: vabdu.vv v10, v9, v8, v0.t
+# CHECK-ENCODING: [0x57,0x25,0x94,0x4c]
+# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}}
+# CHECK-UNKNOWN: 4c942557 <unknown>
+
+vabdu.vx v10, v9, a0
+# CHECK-INST: vabdu.vx v10, v9, a0
+# CHECK-ENCODING: [0x57,0x65,0x95,0x4e]
+# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}}
+# CHECK-UNKNOWN: 4e956557 <unknown>
+
+vabdu.vx v10, v9, a0, v0.t
+# CHECK-INST: vabdu.vx v10, v9, a0, v0.t
+# CHECK-ENCODING: [0x57,0x65,0x95,0x4c]
+# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}}
+# CHECK-UNKNOWN: 4c956557 <unknown>
+
+vwabdacc.vv v10, v9, v8
+# CHECK-INST: vwabdacc.vv v10, v9, v8
+# CHECK-ENCODING: [0x57,0x25,0x94,0x56]
+# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}}
+# CHECK-UNKNOWN: 56942557 <unknown>
+
+vwabdacc.vv v10, v9, v8, v0.t
+# CHECK-INST: vwabdacc.vv v10, v9, v8, v0.t
+# CHECK-ENCODING: [0x57,0x25,0x94,0x54]
+# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}}
+# CHECK-UNKNOWN: 54942557 <unknown>
+
+vwabdacc.vx v10, v9, a0
+# CHECK-INST: vwabdacc.vx v10, v9, a0
+# CHECK-ENCODING: [0x57,0x65,0x95,0x56]
+# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}}
+# CHECK-UNKNOWN: 56956557 <unknown>
+
+vwabdacc.vx v10, v9, a0, v0.t
+# CHECK-INST: vwabdacc.vx v10, v9, a0, v0.t
+# CHECK-ENCODING: [0x57,0x65,0x95,0x54]
+# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}}
+# CHECK-UNKNOWN: 54956557 <unknown>
+
+vwabdaccu.vv v10, v9, v8
+# CHECK-INST: vwabdaccu.vv v10, v9, v8
+# CHECK-ENCODING: [0x57,0x25,0x94,0x5a]
+# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}}
+# CHECK-UNKNOWN: 5a942557 <unknown>
+
+vwabdaccu.vv v10, v9, v8, v0.t
+# CHECK-INST: vwabdaccu.vv v10, v9, v8, v0.t
+# CHECK-ENCODING: [0x57,0x25,0x94,0x58]
+# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}}
+# CHECK-UNKNOWN: 58942557 <unknown>
+
+vwabdaccu.vx v10, v9, a0
+# CHECK-INST: vwabdaccu.vx v10, v9, a0
+# CHECK-ENCODING: [0x57,0x65,0x95,0x5a]
+# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}}
+# CHECK-UNKNOWN: 5a956557 <unknown>
+
+vwabdaccu.vx v10, v9, a0, v0.t
+# CHECK-INST: vwabdaccu.vx v10, v9, a0, v0.t
+# CHECK-ENCODING: [0x57,0x65,0x95,0x58]
+# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}}
+# CHECK-UNKNOWN: 58956557 <unknown>
diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp
index 14a60c1857f24f..c279790a80d594 100644
--- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp
+++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp
@@ -1109,6 +1109,7 @@ Experimental extensions
     zicfilp              1.0       This is a long dummy description
     zicfiss              1.0
     zalasr               0.1
+    zvabd                0.2
     zvbc32e              0.7
     zvkgs                0.7
     sdext                1.0

>From b2cbf11fec3dd9b5d37b4c49eb159afd14f29fcf Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Fri, 24 Jan 2025 15:55:53 +0800
Subject: [PATCH 2/2] [RISCV][CodeGen] Lowering abds/abdu to Zvabd instructions

We directly lower `ISD::ABDS`/`ISD::ABDU` to Zvabd instructions.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  17 +-
 llvm/lib/Target/RISCV/RISCVISelLowering.h     |   4 +
 .../Target/RISCV/RISCVInstrInfoVPseudos.td    |  10 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoZvabd.td  |  22 ++
 llvm/test/CodeGen/RISCV/rvv/abd.ll            | 132 ++++++++
 .../CodeGen/RISCV/rvv/fixed-vectors-abd.ll    | 284 ++++++++++++++++++
 .../CodeGen/RISCV/rvv/fixed-vectors-sad.ll    |  83 +++++
 7 files changed, 544 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 618fb28d3e9f9a..0bfcf21351b465 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -29,6 +29,7 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/DiagnosticInfo.h"
@@ -830,7 +831,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,
                          Legal);
 
-      setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom);
+      if (Subtarget.hasStdExtZvabd())
+        setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Legal);
+      else
+        setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom);
 
       // Custom-lower extensions and truncations from/to mask types.
       setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},
@@ -6400,6 +6404,8 @@ static unsigned getRISCVVLOp(SDValue Op) {
   OP_CASE(SMAX)
   OP_CASE(UMIN)
   OP_CASE(UMAX)
+  OP_CASE(ABDS)
+  OP_CASE(ABDU)
   OP_CASE(STRICT_FADD)
   OP_CASE(STRICT_FSUB)
   OP_CASE(STRICT_FMUL)
@@ -6502,7 +6508,7 @@ static bool hasPassthruOp(unsigned Opcode) {
          Opcode <= RISCVISD::LAST_STRICTFP_OPCODE &&
          "not a RISC-V target specific op");
   static_assert(
-      RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 127 &&
+      RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 129 &&
       RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 &&
       "adding target specific op should update this function");
   if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
@@ -6526,7 +6532,7 @@ static bool hasMaskOp(unsigned Opcode) {
          Opcode <= RISCVISD::LAST_STRICTFP_OPCODE &&
          "not a RISC-V target specific op");
   static_assert(
-      RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 127 &&
+      RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 129 &&
       RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 &&
       "adding target specific op should update this function");
   if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
@@ -7530,6 +7536,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     return lowerToScalableOp(Op, DAG);
   case ISD::ABDS:
   case ISD::ABDU: {
+    if (Subtarget.hasStdExtZvabd())
+      return lowerToScalableOp(Op, DAG);
+
     SDLoc dl(Op);
     EVT VT = Op->getValueType(0);
     SDValue LHS = DAG.getFreeze(Op->getOperand(0));
@@ -21020,6 +21029,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(VZEXT_VL)
   NODE_NAME_CASE(VCPOP_VL)
   NODE_NAME_CASE(VFIRST_VL)
+  NODE_NAME_CASE(ABDS_VL)
+  NODE_NAME_CASE(ABDU_VL)
   NODE_NAME_CASE(READ_CSR)
   NODE_NAME_CASE(WRITE_CSR)
   NODE_NAME_CASE(SWAP_CSR)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 77605a3076a80a..ed2244be25eb74 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -282,6 +282,10 @@ enum NodeType : unsigned {
   UMIN_VL,
   UMAX_VL,
 
+  // Vector Absolute Difference.
+  ABDS_VL,
+  ABDU_VL,
+
   BITREVERSE_VL,
   BSWAP_VL,
   CTLZ_VL,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 268bfe70673a2a..4bf5ba1edea801 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -2850,11 +2850,11 @@ multiclass VPseudoVFRDIV_VF_RM {
   }
 }
 
-multiclass VPseudoVALU_VV_VX {
- foreach m = MxList in {
-    defm "" : VPseudoBinaryV_VV<m>,
-            SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", m.MX,
-                        forcePassthruRead=true>;
+multiclass VPseudoVALU_VV_VX<bit Commutable = 0> {
+  foreach m = MxList in {
+    defm "" : VPseudoBinaryV_VV<m, Commutable = Commutable>,
+              SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", m.MX,
+                          forcePassthruRead = true>;
     defm "" : VPseudoBinaryV_VX<m>,
             SchedBinary<"WriteVIALUX", "ReadVIALUV", "ReadVIALUX", m.MX,
                         forcePassthruRead=true>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvabd.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvabd.td
index 7a8f79ccfd465b..6adc28f89b456a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvabd.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvabd.td
@@ -23,3 +23,25 @@ let Predicates = [HasStdExtZvabd] in {
     defm VWABDACCU_V : VALU_MV_V_X<"vwabdaccu", 0b010110, "v">;
   } // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
 } // Predicates = [HasStdExtZvabd]
+
+//===----------------------------------------------------------------------===//
+// Pseudos
+//===----------------------------------------------------------------------===//
+let Predicates = [HasStdExtZvabd] in {
+  defm PseudoVABD : VPseudoVALU_VV_VX<Commutable = 1>;
+  defm PseudoVABDU : VPseudoVALU_VV_VX<Commutable = 1>;
+} // Predicates = [HasStdExtZvabd]
+
+//===----------------------------------------------------------------------===//
+// CodeGen Patterns
+//===----------------------------------------------------------------------===//
+def riscv_abds_vl
+    : SDNode<"RISCVISD::ABDS_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
+def riscv_abdu_vl
+    : SDNode<"RISCVISD::ABDU_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
+
+defm : VPatBinarySDNode_VV_VX<abds, "PseudoVABD">;
+defm : VPatBinarySDNode_VV_VX<abdu, "PseudoVABDU">;
+
+defm : VPatBinaryVL_VV_VX<riscv_abds_vl, "PseudoVABD">;
+defm : VPatBinaryVL_VV_VX<riscv_abdu_vl, "PseudoVABDU">;
diff --git a/llvm/test/CodeGen/RISCV/rvv/abd.ll b/llvm/test/CodeGen/RISCV/rvv/abd.ll
index 5e610c453e1bac..249a405c3470c6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/abd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/abd.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+experimental-zvabd -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVABD,ZVABD-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+experimental-zvabd -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVABD,ZVABD-RV64
 
 ;
 ; SABD
@@ -14,6 +16,12 @@ define <vscale x 16 x i8> @sabd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 ; CHECK-NEXT:    vmax.vv v8, v8, v10
 ; CHECK-NEXT:    vsub.vv v8, v8, v12
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_b:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
+; ZVABD-NEXT:    vabd.vv v8, v8, v10
+; ZVABD-NEXT:    ret
   %a.sext = sext <vscale x 16 x i8> %a to <vscale x 16 x i16>
   %b.sext = sext <vscale x 16 x i8> %b to <vscale x 16 x i16>
   %sub = sub <vscale x 16 x i16> %a.sext, %b.sext
@@ -30,6 +38,14 @@ define <vscale x 16 x i8> @sabd_b_promoted_ops(<vscale x 16 x i1> %a, <vscale x
 ; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_b_promoted_ops:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
+; ZVABD-NEXT:    vmxor.mm v0, v0, v8
+; ZVABD-NEXT:    vmv.v.i v8, 0
+; ZVABD-NEXT:    vmerge.vim v8, v8, 1, v0
+; ZVABD-NEXT:    ret
   %a.sext = sext <vscale x 16 x i1> %a to <vscale x 16 x i8>
   %b.sext = sext <vscale x 16 x i1> %b to <vscale x 16 x i8>
   %sub = sub <vscale x 16 x i8> %a.sext, %b.sext
@@ -45,6 +61,12 @@ define <vscale x 8 x i16> @sabd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
 ; CHECK-NEXT:    vmax.vv v8, v8, v10
 ; CHECK-NEXT:    vsub.vv v8, v8, v12
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_h:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVABD-NEXT:    vabd.vv v8, v8, v10
+; ZVABD-NEXT:    ret
   %a.sext = sext <vscale x 8 x i16> %a to <vscale x 8 x i32>
   %b.sext = sext <vscale x 8 x i16> %b to <vscale x 8 x i32>
   %sub = sub <vscale x 8 x i32> %a.sext, %b.sext
@@ -63,6 +85,14 @@ define <vscale x 8 x i16> @sabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_h_promoted_ops:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
+; ZVABD-NEXT:    vabd.vv v10, v8, v9
+; ZVABD-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVABD-NEXT:    vzext.vf2 v8, v10
+; ZVABD-NEXT:    ret
   %a.sext = sext <vscale x 8 x i8> %a to <vscale x 8 x i16>
   %b.sext = sext <vscale x 8 x i8> %b to <vscale x 8 x i16>
   %sub = sub <vscale x 8 x i16> %a.sext, %b.sext
@@ -78,6 +108,12 @@ define <vscale x 4 x i32> @sabd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
 ; CHECK-NEXT:    vmax.vv v8, v8, v10
 ; CHECK-NEXT:    vsub.vv v8, v8, v12
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_s:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; ZVABD-NEXT:    vabd.vv v8, v8, v10
+; ZVABD-NEXT:    ret
   %a.sext = sext <vscale x 4 x i32> %a to <vscale x 4 x i64>
   %b.sext = sext <vscale x 4 x i32> %b to <vscale x 4 x i64>
   %sub = sub <vscale x 4 x i64> %a.sext, %b.sext
@@ -96,6 +132,14 @@ define <vscale x 4 x i32> @sabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_s_promoted_ops:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVABD-NEXT:    vabd.vv v10, v8, v9
+; ZVABD-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVABD-NEXT:    vzext.vf2 v8, v10
+; ZVABD-NEXT:    ret
   %a.sext = sext <vscale x 4 x i16> %a to <vscale x 4 x i32>
   %b.sext = sext <vscale x 4 x i16> %b to <vscale x 4 x i32>
   %sub = sub <vscale x 4 x i32> %a.sext, %b.sext
@@ -123,6 +167,14 @@ define <vscale x 2 x i64> @sabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_d_promoted_ops:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; ZVABD-NEXT:    vabd.vv v10, v8, v9
+; ZVABD-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; ZVABD-NEXT:    vzext.vf2 v8, v10
+; ZVABD-NEXT:    ret
   %a.sext = sext <vscale x 2 x i32> %a to <vscale x 2 x i64>
   %b.sext = sext <vscale x 2 x i32> %b to <vscale x 2 x i64>
   %sub = sub <vscale x 2 x i64> %a.sext, %b.sext
@@ -142,6 +194,12 @@ define <vscale x 16 x i8> @uabd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v10
 ; CHECK-NEXT:    vsub.vv v8, v8, v12
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_b:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
+; ZVABD-NEXT:    vabdu.vv v8, v8, v10
+; ZVABD-NEXT:    ret
   %a.zext = zext <vscale x 16 x i8> %a to <vscale x 16 x i16>
   %b.zext = zext <vscale x 16 x i8> %b to <vscale x 16 x i16>
   %sub = sub <vscale x 16 x i16> %a.zext, %b.zext
@@ -158,6 +216,14 @@ define <vscale x 16 x i8> @uabd_b_promoted_ops(<vscale x 16 x i1> %a, <vscale x
 ; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_b_promoted_ops:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
+; ZVABD-NEXT:    vmxor.mm v0, v0, v8
+; ZVABD-NEXT:    vmv.v.i v8, 0
+; ZVABD-NEXT:    vmerge.vim v8, v8, 1, v0
+; ZVABD-NEXT:    ret
   %a.zext = zext <vscale x 16 x i1> %a to <vscale x 16 x i8>
   %b.zext = zext <vscale x 16 x i1> %b to <vscale x 16 x i8>
   %sub = sub <vscale x 16 x i8> %a.zext, %b.zext
@@ -173,6 +239,12 @@ define <vscale x 8 x i16> @uabd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v10
 ; CHECK-NEXT:    vsub.vv v8, v8, v12
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_h:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVABD-NEXT:    vabdu.vv v8, v8, v10
+; ZVABD-NEXT:    ret
   %a.zext = zext <vscale x 8 x i16> %a to <vscale x 8 x i32>
   %b.zext = zext <vscale x 8 x i16> %b to <vscale x 8 x i32>
   %sub = sub <vscale x 8 x i32> %a.zext, %b.zext
@@ -191,6 +263,14 @@ define <vscale x 8 x i16> @uabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_h_promoted_ops:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
+; ZVABD-NEXT:    vabdu.vv v10, v8, v9
+; ZVABD-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVABD-NEXT:    vzext.vf2 v8, v10
+; ZVABD-NEXT:    ret
   %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i16>
   %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i16>
   %sub = sub <vscale x 8 x i16> %a.zext, %b.zext
@@ -206,6 +286,12 @@ define <vscale x 4 x i32> @uabd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v10
 ; CHECK-NEXT:    vsub.vv v8, v8, v12
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_s:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; ZVABD-NEXT:    vabdu.vv v8, v8, v10
+; ZVABD-NEXT:    ret
   %a.zext = zext <vscale x 4 x i32> %a to <vscale x 4 x i64>
   %b.zext = zext <vscale x 4 x i32> %b to <vscale x 4 x i64>
   %sub = sub <vscale x 4 x i64> %a.zext, %b.zext
@@ -224,6 +310,14 @@ define <vscale x 4 x i32> @uabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_s_promoted_ops:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVABD-NEXT:    vabdu.vv v10, v8, v9
+; ZVABD-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVABD-NEXT:    vzext.vf2 v8, v10
+; ZVABD-NEXT:    ret
   %a.zext = zext <vscale x 4 x i16> %a to <vscale x 4 x i32>
   %b.zext = zext <vscale x 4 x i16> %b to <vscale x 4 x i32>
   %sub = sub <vscale x 4 x i32> %a.zext, %b.zext
@@ -251,6 +345,14 @@ define <vscale x 2 x i64> @uabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_d_promoted_ops:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; ZVABD-NEXT:    vabdu.vv v10, v8, v9
+; ZVABD-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; ZVABD-NEXT:    vzext.vf2 v8, v10
+; ZVABD-NEXT:    ret
   %a.zext = zext <vscale x 2 x i32> %a to <vscale x 2 x i64>
   %b.zext = zext <vscale x 2 x i32> %b to <vscale x 2 x i64>
   %sub = sub <vscale x 2 x i64> %a.zext, %b.zext
@@ -269,6 +371,13 @@ define <vscale x 4 x i32> @uabd_non_matching_extension(<vscale x 4 x i32> %a, <v
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v12
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_non_matching_extension:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; ZVABD-NEXT:    vzext.vf4 v12, v10
+; ZVABD-NEXT:    vabdu.vv v8, v8, v12
+; ZVABD-NEXT:    ret
   %a.zext = zext <vscale x 4 x i32> %a to <vscale x 4 x i64>
   %b.zext = zext <vscale x 4 x i8> %b to <vscale x 4 x i64>
   %sub = sub <vscale x 4 x i64> %a.zext, %b.zext
@@ -290,6 +399,15 @@ define <vscale x 4 x i32> @uabd_non_matching_promoted_ops(<vscale x 4 x i8> %a,
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_non_matching_promoted_ops:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVABD-NEXT:    vzext.vf2 v10, v8
+; ZVABD-NEXT:    vabdu.vv v10, v10, v9
+; ZVABD-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVABD-NEXT:    vzext.vf2 v8, v10
+; ZVABD-NEXT:    ret
   %a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32>
   %b.zext = zext <vscale x 4 x i16> %b to <vscale x 4 x i32>
   %sub = sub <vscale x 4 x i32> %a.zext, %b.zext
@@ -311,6 +429,18 @@ define <vscale x 4 x i32> @uabd_non_matching_promotion(<vscale x 4 x i8> %a, <vs
 ; CHECK-NEXT:    vrsub.vi v8, v10, 0
 ; CHECK-NEXT:    vmax.vv v8, v10, v8
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_non_matching_promotion:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; ZVABD-NEXT:    vzext.vf4 v10, v8
+; ZVABD-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVABD-NEXT:    vsext.vf2 v8, v9
+; ZVABD-NEXT:    vwsub.wv v10, v10, v8
+; ZVABD-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVABD-NEXT:    vrsub.vi v8, v10, 0
+; ZVABD-NEXT:    vmax.vv v8, v10, v8
+; ZVABD-NEXT:    ret
   %a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32>
   %b.zext = sext <vscale x 4 x i8> %b to <vscale x 4 x i32>
   %sub = sub <vscale x 4 x i32> %a.zext, %b.zext
@@ -333,3 +463,5 @@ declare <vscale x 2 x i128> @llvm.abs.nxv2i128(<vscale x 2 x i128>, i1)
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; RV32: {{.*}}
 ; RV64: {{.*}}
+; ZVABD-RV32: {{.*}}
+; ZVABD-RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abd.ll
index bd1209a17b5345..07bdb805ba46be 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abd.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+experimental-zvabd -verify-machineinstrs | FileCheck %s --check-prefixes=ZVABD,ZVABD-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+experimental-zvabd -verify-machineinstrs | FileCheck %s --check-prefixes=ZVABD,ZVABD-RV64
 ;
 ; SABD
 ;
@@ -14,6 +16,12 @@ define <8 x i8> @sabd_8b_as_16b(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_8b_as_16b:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; ZVABD-NEXT:    vabd.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a.sext = sext <8 x i8> %a to <8 x i16>
   %b.sext = sext <8 x i8> %b to <8 x i16>
   %sub = sub <8 x i16> %a.sext, %b.sext
@@ -31,6 +39,12 @@ define <8 x i8> @sabd_8b_as_32b(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_8b_as_32b:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; ZVABD-NEXT:    vabd.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a.sext = sext <8 x i8> %a to <8 x i32>
   %b.sext = sext <8 x i8> %b to <8 x i32>
   %sub = sub <8 x i32> %a.sext, %b.sext
@@ -48,6 +62,12 @@ define <16 x i8> @sabd_16b(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_16b:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; ZVABD-NEXT:    vabd.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a.sext = sext <16 x i8> %a to <16 x i16>
   %b.sext = sext <16 x i8> %b to <16 x i16>
   %sub = sub <16 x i16> %a.sext, %b.sext
@@ -65,6 +85,12 @@ define <4 x i16> @sabd_4h(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_4h:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVABD-NEXT:    vabd.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a.sext = sext <4 x i16> %a to <4 x i32>
   %b.sext = sext <4 x i16> %b to <4 x i32>
   %sub = sub <4 x i32> %a.sext, %b.sext
@@ -84,6 +110,14 @@ define <4 x i16> @sabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) {
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_4h_promoted_ops:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; ZVABD-NEXT:    vabd.vv v9, v8, v9
+; ZVABD-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVABD-NEXT:    vzext.vf2 v8, v9
+; ZVABD-NEXT:    ret
   %a.sext = sext <4 x i8> %a to <4 x i16>
   %b.sext = sext <4 x i8> %b to <4 x i16>
   %sub = sub <4 x i16> %a.sext, %b.sext
@@ -100,6 +134,12 @@ define <8 x i16> @sabd_8h(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_8h:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVABD-NEXT:    vabd.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a.sext = sext <8 x i16> %a to <8 x i32>
   %b.sext = sext <8 x i16> %b to <8 x i32>
   %sub = sub <8 x i32> %a.sext, %b.sext
@@ -119,6 +159,14 @@ define <8 x i16> @sabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_8h_promoted_ops:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; ZVABD-NEXT:    vabd.vv v9, v8, v9
+; ZVABD-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVABD-NEXT:    vzext.vf2 v8, v9
+; ZVABD-NEXT:    ret
   %a.sext = sext <8 x i8> %a to <8 x i16>
   %b.sext = sext <8 x i8> %b to <8 x i16>
   %sub = sub <8 x i16> %a.sext, %b.sext
@@ -135,6 +183,12 @@ define <2 x i32> @sabd_2s(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_2s:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; ZVABD-NEXT:    vabd.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a.sext = sext <2 x i32> %a to <2 x i64>
   %b.sext = sext <2 x i32> %b to <2 x i64>
   %sub = sub <2 x i64> %a.sext, %b.sext
@@ -154,6 +208,14 @@ define <2 x i32> @sabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_2s_promoted_ops:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVABD-NEXT:    vabd.vv v9, v8, v9
+; ZVABD-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVABD-NEXT:    vzext.vf2 v8, v9
+; ZVABD-NEXT:    ret
   %a.sext = sext <2 x i16> %a to <2 x i32>
   %b.sext = sext <2 x i16> %b to <2 x i32>
   %sub = sub <2 x i32> %a.sext, %b.sext
@@ -170,6 +232,12 @@ define <4 x i32> @sabd_4s(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_4s:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; ZVABD-NEXT:    vabd.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a.sext = sext <4 x i32> %a to <4 x i64>
   %b.sext = sext <4 x i32> %b to <4 x i64>
   %sub = sub <4 x i64> %a.sext, %b.sext
@@ -189,6 +257,14 @@ define <4 x i32> @sabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_4s_promoted_ops:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVABD-NEXT:    vabd.vv v9, v8, v9
+; ZVABD-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVABD-NEXT:    vzext.vf2 v8, v9
+; ZVABD-NEXT:    ret
   %a.sext = sext <4 x i16> %a to <4 x i32>
   %b.sext = sext <4 x i16> %b to <4 x i32>
   %sub = sub <4 x i32> %a.sext, %b.sext
@@ -204,6 +280,12 @@ define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_2d:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVABD-NEXT:    vabd.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a.sext = sext <2 x i64> %a to <2 x i128>
   %b.sext = sext <2 x i64> %b to <2 x i128>
   %sub = sub <2 x i128> %a.sext, %b.sext
@@ -223,6 +305,14 @@ define <2 x i64> @sabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_2d_promoted_ops:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; ZVABD-NEXT:    vabd.vv v9, v8, v9
+; ZVABD-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; ZVABD-NEXT:    vzext.vf2 v8, v9
+; ZVABD-NEXT:    ret
   %a.sext = sext <2 x i32> %a to <2 x i64>
   %b.sext = sext <2 x i32> %b to <2 x i64>
   %sub = sub <2 x i64> %a.sext, %b.sext
@@ -243,6 +333,12 @@ define <8 x i8> @uabd_8b(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_8b:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; ZVABD-NEXT:    vabdu.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a.zext = zext <8 x i8> %a to <8 x i16>
   %b.zext = zext <8 x i8> %b to <8 x i16>
   %sub = sub <8 x i16> %a.zext, %b.zext
@@ -260,6 +356,12 @@ define <16 x i8> @uabd_16b(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_16b:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; ZVABD-NEXT:    vabdu.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a.zext = zext <16 x i8> %a to <16 x i16>
   %b.zext = zext <16 x i8> %b to <16 x i16>
   %sub = sub <16 x i16> %a.zext, %b.zext
@@ -277,6 +379,12 @@ define <4 x i16> @uabd_4h(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_4h:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVABD-NEXT:    vabdu.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a.zext = zext <4 x i16> %a to <4 x i32>
   %b.zext = zext <4 x i16> %b to <4 x i32>
   %sub = sub <4 x i32> %a.zext, %b.zext
@@ -296,6 +404,14 @@ define <4 x i16> @uabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) {
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_4h_promoted_ops:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; ZVABD-NEXT:    vabdu.vv v9, v8, v9
+; ZVABD-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVABD-NEXT:    vzext.vf2 v8, v9
+; ZVABD-NEXT:    ret
   %a.zext = zext <4 x i8> %a to <4 x i16>
   %b.zext = zext <4 x i8> %b to <4 x i16>
   %sub = sub <4 x i16> %a.zext, %b.zext
@@ -312,6 +428,12 @@ define <8 x i16> @uabd_8h(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_8h:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVABD-NEXT:    vabdu.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a.zext = zext <8 x i16> %a to <8 x i32>
   %b.zext = zext <8 x i16> %b to <8 x i32>
   %sub = sub <8 x i32> %a.zext, %b.zext
@@ -331,6 +453,14 @@ define <8 x i16> @uabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_8h_promoted_ops:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; ZVABD-NEXT:    vabdu.vv v9, v8, v9
+; ZVABD-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVABD-NEXT:    vzext.vf2 v8, v9
+; ZVABD-NEXT:    ret
   %a.zext = zext <8 x i8> %a to <8 x i16>
   %b.zext = zext <8 x i8> %b to <8 x i16>
   %sub = sub <8 x i16> %a.zext, %b.zext
@@ -347,6 +477,12 @@ define <2 x i32> @uabd_2s(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_2s:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; ZVABD-NEXT:    vabdu.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a.zext = zext <2 x i32> %a to <2 x i64>
   %b.zext = zext <2 x i32> %b to <2 x i64>
   %sub = sub <2 x i64> %a.zext, %b.zext
@@ -366,6 +502,14 @@ define <2 x i32> @uabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_2s_promoted_ops:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVABD-NEXT:    vabdu.vv v9, v8, v9
+; ZVABD-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVABD-NEXT:    vzext.vf2 v8, v9
+; ZVABD-NEXT:    ret
   %a.zext = zext <2 x i16> %a to <2 x i32>
   %b.zext = zext <2 x i16> %b to <2 x i32>
   %sub = sub <2 x i32> %a.zext, %b.zext
@@ -382,6 +526,12 @@ define <4 x i32> @uabd_4s(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_4s:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; ZVABD-NEXT:    vabdu.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a.zext = zext <4 x i32> %a to <4 x i64>
   %b.zext = zext <4 x i32> %b to <4 x i64>
   %sub = sub <4 x i64> %a.zext, %b.zext
@@ -401,6 +551,14 @@ define <4 x i32> @uabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_4s_promoted_ops:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVABD-NEXT:    vabdu.vv v9, v8, v9
+; ZVABD-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVABD-NEXT:    vzext.vf2 v8, v9
+; ZVABD-NEXT:    ret
   %a.zext = zext <4 x i16> %a to <4 x i32>
   %b.zext = zext <4 x i16> %b to <4 x i32>
   %sub = sub <4 x i32> %a.zext, %b.zext
@@ -416,6 +574,12 @@ define <2 x i64> @uabd_2d(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_2d:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVABD-NEXT:    vabdu.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a.zext = zext <2 x i64> %a to <2 x i128>
   %b.zext = zext <2 x i64> %b to <2 x i128>
   %sub = sub <2 x i128> %a.zext, %b.zext
@@ -435,6 +599,14 @@ define <2 x i64> @uabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v8, v9
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_2d_promoted_ops:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; ZVABD-NEXT:    vabdu.vv v9, v8, v9
+; ZVABD-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; ZVABD-NEXT:    vzext.vf2 v8, v9
+; ZVABD-NEXT:    ret
   %a.zext = zext <2 x i32> %a to <2 x i64>
   %b.zext = zext <2 x i32> %b to <2 x i64>
   %sub = sub <2 x i64> %a.zext, %b.zext
@@ -451,6 +623,14 @@ define <16 x i8> @uabd_v16i8_nuw(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-NEXT:    vrsub.vi v9, v8, 0
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_v16i8_nuw:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; ZVABD-NEXT:    vsub.vv v8, v8, v9
+; ZVABD-NEXT:    vrsub.vi v9, v8, 0
+; ZVABD-NEXT:    vmax.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %sub = sub nuw <16 x i8> %a, %b
   %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true)
   ret <16 x i8> %abs
@@ -465,6 +645,14 @@ define <8 x i16> @uabd_v8i16_nuw(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-NEXT:    vrsub.vi v9, v8, 0
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_v8i16_nuw:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVABD-NEXT:    vsub.vv v8, v8, v9
+; ZVABD-NEXT:    vrsub.vi v9, v8, 0
+; ZVABD-NEXT:    vmax.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %sub = sub nuw <8 x i16> %a, %b
   %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
   ret <8 x i16> %abs
@@ -479,6 +667,14 @@ define <4 x i32> @uabd_v4i32_nuw(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-NEXT:    vrsub.vi v9, v8, 0
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_v4i32_nuw:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; ZVABD-NEXT:    vsub.vv v8, v8, v9
+; ZVABD-NEXT:    vrsub.vi v9, v8, 0
+; ZVABD-NEXT:    vmax.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %sub = sub nuw <4 x i32> %a, %b
   %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
   ret <4 x i32> %abs
@@ -493,6 +689,14 @@ define <2 x i64> @uabd_v2i64_nuw(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-NEXT:    vrsub.vi v9, v8, 0
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: uabd_v2i64_nuw:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVABD-NEXT:    vsub.vv v8, v8, v9
+; ZVABD-NEXT:    vrsub.vi v9, v8, 0
+; ZVABD-NEXT:    vmax.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %sub = sub nuw <2 x i64> %a, %b
   %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
   ret <2 x i64> %abs
@@ -507,6 +711,12 @@ define <16 x i8> @sabd_v16i8_nsw(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_v16i8_nsw:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; ZVABD-NEXT:    vabd.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %sub = sub nsw <16 x i8> %a, %b
   %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true)
   ret <16 x i8> %abs
@@ -521,6 +731,12 @@ define <8 x i16> @sabd_v8i16_nsw(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_v8i16_nsw:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVABD-NEXT:    vabd.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %sub = sub nsw <8 x i16> %a, %b
   %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
   ret <8 x i16> %abs
@@ -535,6 +751,12 @@ define <4 x i32> @sabd_v4i32_nsw(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_v4i32_nsw:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; ZVABD-NEXT:    vabd.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %sub = sub nsw <4 x i32> %a, %b
   %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
   ret <4 x i32> %abs
@@ -549,6 +771,12 @@ define <2 x i64> @sabd_v2i64_nsw(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sabd_v2i64_nsw:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVABD-NEXT:    vabd.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %sub = sub nsw <2 x i64> %a, %b
   %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
   ret <2 x i64> %abs
@@ -563,6 +791,12 @@ define <16 x i8> @smaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) {
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: smaxmin_v16i8:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; ZVABD-NEXT:    vabd.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> %0, <16 x i8> %1)
   %b = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> %0, <16 x i8> %1)
   %sub = sub <16 x i8> %a, %b
@@ -578,6 +812,12 @@ define <8 x i16> @smaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) {
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: smaxmin_v8i16:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVABD-NEXT:    vabd.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a = tail call <8 x i16> @llvm.smax.v8i16(<8 x i16> %0, <8 x i16> %1)
   %b = tail call <8 x i16> @llvm.smin.v8i16(<8 x i16> %0, <8 x i16> %1)
   %sub = sub <8 x i16> %a, %b
@@ -593,6 +833,12 @@ define <4 x i32> @smaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: smaxmin_v4i32:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; ZVABD-NEXT:    vabd.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %0, <4 x i32> %1)
   %b = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %0, <4 x i32> %1)
   %sub = sub <4 x i32> %a, %b
@@ -608,6 +854,12 @@ define <2 x i64> @smaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
 ; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: smaxmin_v2i64:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVABD-NEXT:    vabd.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a = tail call <2 x i64> @llvm.smax.v2i64(<2 x i64> %0, <2 x i64> %1)
   %b = tail call <2 x i64> @llvm.smin.v2i64(<2 x i64> %0, <2 x i64> %1)
   %sub = sub <2 x i64> %a, %b
@@ -623,6 +875,12 @@ define <16 x i8> @umaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) {
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: umaxmin_v16i8:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; ZVABD-NEXT:    vabdu.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %0, <16 x i8> %1)
   %b = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %0, <16 x i8> %1)
   %sub = sub <16 x i8> %a, %b
@@ -638,6 +896,12 @@ define <8 x i16> @umaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) {
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: umaxmin_v8i16:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVABD-NEXT:    vabdu.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a = tail call <8 x i16> @llvm.umax.v8i16(<8 x i16> %0, <8 x i16> %1)
   %b = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> %0, <8 x i16> %1)
   %sub = sub <8 x i16> %a, %b
@@ -653,6 +917,12 @@ define <4 x i32> @umaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: umaxmin_v4i32:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; ZVABD-NEXT:    vabdu.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %0, <4 x i32> %1)
   %b = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %0, <4 x i32> %1)
   %sub = sub <4 x i32> %a, %b
@@ -668,6 +938,12 @@ define <2 x i64> @umaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: umaxmin_v2i64:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVABD-NEXT:    vabdu.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a = tail call <2 x i64> @llvm.umax.v2i64(<2 x i64> %0, <2 x i64> %1)
   %b = tail call <2 x i64> @llvm.umin.v2i64(<2 x i64> %0, <2 x i64> %1)
   %sub = sub <2 x i64> %a, %b
@@ -683,6 +959,12 @@ define <16 x i8> @umaxmin_v16i8_com1(<16 x i8> %0, <16 x i8> %1) {
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v9
 ; CHECK-NEXT:    vsub.vv v8, v8, v10
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: umaxmin_v16i8_com1:
+; ZVABD:       # %bb.0:
+; ZVABD-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; ZVABD-NEXT:    vabdu.vv v8, v8, v9
+; ZVABD-NEXT:    ret
   %a = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %0, <16 x i8> %1)
   %b = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %1, <16 x i8> %0)
   %sub = sub <16 x i8> %a, %b
@@ -725,3 +1007,5 @@ declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>)
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; RV32: {{.*}}
 ; RV64: {{.*}}
+; ZVABD-RV32: {{.*}}
+; ZVABD-RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll
index 8da605d35270de..62ec0543949a0d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc < %s -mtriple=riscv32 -mattr=+v | FileCheck %s
 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+experimental-zvabd | FileCheck %s --check-prefix=ZVABD
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+experimental-zvabd | FileCheck %s --check-prefix=ZVABD
 
 define signext i16 @sad_4x8_as_i16(<4 x i8> %a, <4 x i8> %b) {
 ; CHECK-LABEL: sad_4x8_as_i16:
@@ -16,6 +18,18 @@ define signext i16 @sad_4x8_as_i16(<4 x i8> %a, <4 x i8> %b) {
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sad_4x8_as_i16:
+; ZVABD:       # %bb.0: # %entry
+; ZVABD-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; ZVABD-NEXT:    vabdu.vv v8, v8, v9
+; ZVABD-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVABD-NEXT:    vmv.s.x v9, zero
+; ZVABD-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
+; ZVABD-NEXT:    vwredsumu.vs v8, v8, v9
+; ZVABD-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVABD-NEXT:    vmv.x.s a0, v8
+; ZVABD-NEXT:    ret
 entry:
   %1 = zext <4 x i8> %a to <4 x i16>
   %3 = zext <4 x i8> %b to <4 x i16>
@@ -38,6 +52,17 @@ define signext i32 @sad_4x8_as_i32(<4 x i8> %a, <4 x i8> %b) {
 ; CHECK-NEXT:    vredsum.vs v8, v9, v8
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sad_4x8_as_i32:
+; ZVABD:       # %bb.0: # %entry
+; ZVABD-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; ZVABD-NEXT:    vabdu.vv v8, v8, v9
+; ZVABD-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVABD-NEXT:    vzext.vf4 v9, v8
+; ZVABD-NEXT:    vmv.s.x v8, zero
+; ZVABD-NEXT:    vredsum.vs v8, v9, v8
+; ZVABD-NEXT:    vmv.x.s a0, v8
+; ZVABD-NEXT:    ret
 entry:
   %1 = zext <4 x i8> %a to <4 x i32>
   %3 = zext <4 x i8> %b to <4 x i32>
@@ -61,6 +86,18 @@ define signext i16 @sad_16x8_as_i16(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sad_16x8_as_i16:
+; ZVABD:       # %bb.0: # %entry
+; ZVABD-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; ZVABD-NEXT:    vabdu.vv v8, v8, v9
+; ZVABD-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVABD-NEXT:    vmv.s.x v9, zero
+; ZVABD-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
+; ZVABD-NEXT:    vwredsumu.vs v8, v8, v9
+; ZVABD-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVABD-NEXT:    vmv.x.s a0, v8
+; ZVABD-NEXT:    ret
 entry:
   %1 = zext <16 x i8> %a to <16 x i16>
   %3 = zext <16 x i8> %b to <16 x i16>
@@ -83,6 +120,17 @@ define signext i32 @sad_16x8_as_i32(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-NEXT:    vredsum.vs v8, v12, v8
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sad_16x8_as_i32:
+; ZVABD:       # %bb.0: # %entry
+; ZVABD-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; ZVABD-NEXT:    vabdu.vv v8, v8, v9
+; ZVABD-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVABD-NEXT:    vzext.vf4 v12, v8
+; ZVABD-NEXT:    vmv.s.x v8, zero
+; ZVABD-NEXT:    vredsum.vs v8, v12, v8
+; ZVABD-NEXT:    vmv.x.s a0, v8
+; ZVABD-NEXT:    ret
 entry:
   %1 = zext <16 x i8> %a to <16 x i32>
   %3 = zext <16 x i8> %b to <16 x i32>
@@ -135,6 +183,41 @@ define signext i32 @sad_2block_16xi8_as_i32(ptr %a, ptr %b, i32 signext %stridea
 ; CHECK-NEXT:    vredsum.vs v8, v20, v8
 ; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
+;
+; ZVABD-LABEL: sad_2block_16xi8_as_i32:
+; ZVABD:       # %bb.0: # %entry
+; ZVABD-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; ZVABD-NEXT:    vle8.v v8, (a0)
+; ZVABD-NEXT:    vle8.v v9, (a1)
+; ZVABD-NEXT:    add a0, a0, a2
+; ZVABD-NEXT:    add a1, a1, a3
+; ZVABD-NEXT:    vle8.v v10, (a0)
+; ZVABD-NEXT:    vle8.v v11, (a1)
+; ZVABD-NEXT:    add a0, a0, a2
+; ZVABD-NEXT:    add a1, a1, a3
+; ZVABD-NEXT:    vle8.v v12, (a0)
+; ZVABD-NEXT:    vle8.v v13, (a1)
+; ZVABD-NEXT:    add a0, a0, a2
+; ZVABD-NEXT:    add a1, a1, a3
+; ZVABD-NEXT:    vabdu.vv v8, v8, v9
+; ZVABD-NEXT:    vle8.v v9, (a0)
+; ZVABD-NEXT:    vabdu.vv v10, v10, v11
+; ZVABD-NEXT:    vle8.v v11, (a1)
+; ZVABD-NEXT:    vwaddu.vv v14, v10, v8
+; ZVABD-NEXT:    vabdu.vv v8, v12, v13
+; ZVABD-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVABD-NEXT:    vzext.vf2 v12, v8
+; ZVABD-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
+; ZVABD-NEXT:    vabdu.vv v8, v9, v11
+; ZVABD-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVABD-NEXT:    vwaddu.vv v16, v12, v14
+; ZVABD-NEXT:    vzext.vf2 v10, v8
+; ZVABD-NEXT:    vwaddu.wv v16, v16, v10
+; ZVABD-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVABD-NEXT:    vmv.s.x v8, zero
+; ZVABD-NEXT:    vredsum.vs v8, v16, v8
+; ZVABD-NEXT:    vmv.x.s a0, v8
+; ZVABD-NEXT:    ret
 entry:
   %idx.ext8 = sext i32 %strideb to i64
   %idx.ext = sext i32 %stridea to i64



More information about the llvm-commits mailing list