[llvm] [MIPS][MSA] Invert operand order of `ILVOD` when lowering `VECTOR_SHUFFLE` (PR #123555)

Sun Jan 19 23:38:44 PST 2025

https://github.com/Cyanoxygen created https://github.com/llvm/llvm-project/pull/123555

This PR fixes operand order of `ILVOD.df` when lowering `VECTOR_SHUFFLE`, the result was `<y[1], x[1]>` while it should be `<x[1], y[1]>`.

* This PR is split from #123040.

Background
------------

Our Rust toolchain has a long-standing bug that stood unresolved: `sha2::compress256` produces a wrong result if MSA is enabled.

Recently @liushuyu presented me with a minimal IR that can reproduce the problem, and here's the fix.

Testing
-------

I have tested the patch with:

- Bootstrapping a Rust toolchain with the patch applied, and
- Build and run the minimal PoC that uses `sha2::compress256`
- Compile and run simple program that utilizes vectors (addition and shuffle)

These tests looks good to me.

| ![telegram-cloud-photo-size-1-4918091511221038641-y](https://github.com/user-attachments/assets/60f22d9a-ea1b-4a93-9e1c-1489566d599f) |
| :----: |
| The wrong result produced by `compress256`, compiled by the toolchain without the fix |
| ![telegram-cloud-photo-size-1-4918091511221038640-y](https://github.com/user-attachments/assets/6cf952b4-4bad-4a2e-a145-f355271979d5) |
| The test program no longer fails with the fix applied |


>From f09a8c39623c49161832034a4d5ebc8a25473f89 Mon Sep 17 00:00:00 2001
From: Xinhui Yang <cyan at cyano.uk>
Date: Wed, 15 Jan 2025 10:57:33 +0800
Subject: [PATCH] [MIPS][MSA] Invert operand order of ILVOD when lowering
 VECTOR_SHUFFLE

The result was <y[1], x[1]> while it should be <x[1], y[1]>.

This affects the result of vector shuffles:

    %result = shufflevector <4 x i32> %x, <4 x i32> %y,
              <4 x 132> <i32 1, i32 5, i32 3, i32 7>

The mask above matches the pattern wanted by the ILVOD lowering.
The result is <%y[1], %x[1], %y[3], %x[3]>, while it should be
<%x[1], %y[1], %x[3], %y[3]>.

* tests/CodeGen/Mips: Update MSA shuffle tests according to the change.
---
 llvm/lib/Target/Mips/MipsSEISelLowering.cpp |  2 +-
 llvm/test/CodeGen/Mips/msa/shuffle.ll       | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index 818b1683bb867e..1d1b0f9c6ae2a9 100644
--- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -2736,7 +2736,7 @@ static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy,
   else
     return SDValue();
 
-  return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Wt, Ws);
+  return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Ws, Wt);
 }
 
 // Lower VECTOR_SHUFFLE into ILVR (if possible).
diff --git a/llvm/test/CodeGen/Mips/msa/shuffle.ll b/llvm/test/CodeGen/Mips/msa/shuffle.ll
index e93bb7cdd11283..37eefd695c2eda 100644
--- a/llvm/test/CodeGen/Mips/msa/shuffle.ll
+++ b/llvm/test/CodeGen/Mips/msa/shuffle.ll
@@ -533,7 +533,7 @@ define void @ilvod_v16i8_0(ptr %c, ptr %a, ptr %b) nounwind {
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
                      <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
-  ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R2]], [[R1]]
   store <16 x i8> %3, ptr %c
   ; CHECK-DAG: st.b [[R3]], 0($4)
 
@@ -548,7 +548,7 @@ define void @ilvod_v8i16_0(ptr %c, ptr %a, ptr %b) nounwind {
   %2 = load <8 x i16>, ptr %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-  ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R2]], [[R1]]
   store <8 x i16> %3, ptr %c
   ; CHECK-DAG: st.h [[R3]], 0($4)
 
@@ -563,7 +563,7 @@ define void @ilvod_v4i32_0(ptr %c, ptr %a, ptr %b) nounwind {
   %2 = load <4 x i32>, ptr %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-  ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
   store <4 x i32> %3, ptr %c
   ; CHECK-DAG: st.w [[R3]], 0($4)
 
@@ -578,7 +578,7 @@ define void @ilvod_v2i64_0(ptr %c, ptr %a, ptr %b) nounwind {
   %2 = load <2 x i64>, ptr %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
-  ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
   store <2 x i64> %3, ptr %c
   ; CHECK-DAG: st.d [[R3]], 0($4)
 
@@ -934,7 +934,7 @@ define void @ilvl_v2i64_0(ptr %c, ptr %a, ptr %b) nounwind {
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
   ; ilvl.d and ilvod.d are equivalent for v2i64
-  ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
   store <2 x i64> %3, ptr %c
   ; CHECK-DAG: st.d [[R3]], 0($4)
 
@@ -1290,7 +1290,7 @@ define void @pckod_v2i64_0(ptr %c, ptr %a, ptr %b) nounwind {
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
   ; pckod.d and ilvod.d are equivalent for v2i64
-  ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
   store <2 x i64> %3, ptr %c
   ; CHECK-DAG: st.d [[R3]], 0($4)