[clang] [llvm] [HLSL][DirectX] Add `transpose` HLSL intrinsic and DXIL lowering of `llvm.matrix.transpose` (PR #186263)

Deric C. via cfe-commits cfe-commits at lists.llvm.org
Fri Mar 13 10:33:25 PDT 2026


================
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -dxil-intrinsic-expansion < %s | FileCheck %s
+
+; Verify that llvm.matrix.transpose is expanded to shufflevector for DXIL.
+
+declare <6 x float> @llvm.matrix.transpose.v6f32(<6 x float>, i32, i32)
+declare <12 x i32> @llvm.matrix.transpose.v12i32(<12 x i32>, i32, i32)
+declare <16 x float> @llvm.matrix.transpose.v16f32(<16 x float>, i32, i32)
+declare <4 x float> @llvm.matrix.transpose.v4f32(<4 x float>, i32, i32)
+declare <4 x half> @llvm.matrix.transpose.v4f16(<4 x half>, i32, i32)
+
+; 2x3 float -> 3x2 float
+define <6 x float> @test_transpose_float2x3(<6 x float> %m) {
+; CHECK-LABEL: define <6 x float> @test_transpose_float2x3(
+; CHECK-SAME: <6 x float> [[M:%.*]]) {
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <6 x float> [[M]], <6 x float> poison, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
+; CHECK-NEXT:    ret <6 x float> [[TMP12]]
+;
+  %r = call <6 x float> @llvm.matrix.transpose.v6f32(<6 x float> %m, i32 2, i32 3)
+  ret <6 x float> %r
+}
+
+; 4x3 int -> 3x4 int
+define <12 x i32> @test_transpose_int4x3(<12 x i32> %m) {
+; CHECK-LABEL: define <12 x i32> @test_transpose_int4x3(
+; CHECK-SAME: <12 x i32> [[M:%.*]]) {
+; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <12 x i32> [[M]], <12 x i32> poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+; CHECK-NEXT:    ret <12 x i32> [[TMP24]]
+;
+  %r = call <12 x i32> @llvm.matrix.transpose.v12i32(<12 x i32> %m, i32 4, i32 3)
+  ret <12 x i32> %r
+}
+
+; 4x4 float -> 4x4 float
+define <16 x float> @test_transpose_float4x4(<16 x float> %m) {
+; CHECK-LABEL: define <16 x float> @test_transpose_float4x4(
+; CHECK-SAME: <16 x float> [[M:%.*]]) {
+; CHECK-NEXT:    [[TMP32:%.*]] = shufflevector <16 x float> [[M]], <16 x float> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
----------------
Icohedron wrote:

The shufflevector is a supported instruction in DXIL. So it's up to the driver compiler to lower the shufflevector to its IR and ISA.

https://github.com/llvm/llvm-project/pull/186263


More information about the cfe-commits mailing list