[llvm] [SelectionDAG] Fix a false assumption that there will always be a valid integer type corresponding to a vector type (PR #96658)

Tue Jun 25 11:08:15 PDT 2024

https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/96658

>From e1d2c1c48e92f13cd5e3ce20322fe5b953de5370 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Tue, 25 Jun 2024 14:08:02 -0400
Subject: [PATCH] [SelectionDAG] Fix a false assumption that there will always
 be a valid integer type corresponding to a vector type

`SelectionDAG::getBitcastedAnyExtOrTrunc` assumes that there is always a valid
integer type corresponding to another type, which is not always true when it
comes to vector type. For example, `<3 x i8>` doesn't have a corresponding
integer type.

Fix SWDEV-464698.
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 19 ++++++++++-
 .../AMDGPU/no-corresponding-integer-type.ll   | 32 +++++++++++++++++++
 2 files changed, 50 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/no-corresponding-integer-type.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 8463e94d7f933..757e403050162 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1468,13 +1468,30 @@ SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
 }
 
 SDValue SelectionDAG::getBitcastedAnyExtOrTrunc(SDValue Op, const SDLoc &DL,
-                                                 EVT VT) {
+                                                EVT VT) {
   assert(!VT.isVector());
   auto Type = Op.getValueType();
   SDValue DestOp;
   if (Type == VT)
     return Op;
   auto Size = Op.getValueSizeInBits();
+  auto IntTy = MVT::getIntegerVT(Size);
+
+  if (!IntTy.isValid()) {
+    // We assume integers of "weird" size have already been legalized here.
+    assert(Type.isVector());
+    unsigned NumElements = Type.getVectorNumElements();
+    unsigned ExtSize = VT.getScalarSizeInBits();
+    EVT ElementType = Type.getVectorElementType();
+    unsigned ExtNumElements = ExtSize / ElementType.getScalarSizeInBits();
+    assert(NumElements < ExtNumElements);
+    MVT ExtType = MVT::getVectorVT(ElementType.getSimpleVT(), ExtNumElements);
+    SDValue ExtVec = getUNDEF(ExtType);
+    DestOp = getNode(ISD::INSERT_SUBVECTOR, DL, ExtType, ExtVec, Op,
+                     getVectorIdxConstant(0, DL));
+    return getBitcast(VT, DestOp);
+  }
+
   DestOp = getBitcast(MVT::getIntegerVT(Size), Op);
   if (DestOp.getValueType() == VT)
     return DestOp;
diff --git a/llvm/test/CodeGen/AMDGPU/no-corresponding-integer-type.ll b/llvm/test/CodeGen/AMDGPU/no-corresponding-integer-type.ll
new file mode 100644
index 0000000000000..5201f188afd5f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/no-corresponding-integer-type.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 %s -o - | FileCheck %s
+
+define void @no_corresponding_integer_type(i8 %arg, ptr addrspace(1) %ptr) {
+; CHECK-LABEL: no_corresponding_integer_type:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_mov_b32_e32 v3, v2
+; CHECK-NEXT:    v_mov_b32_e32 v2, v1
+; CHECK-NEXT:    global_load_ushort v1, v[2:3], off
+; CHECK-NEXT:    global_load_ubyte v4, v[2:3], off offset:2
+; CHECK-NEXT:    s_mov_b32 s0, 0xc0c0400
+; CHECK-NEXT:    s_mov_b32 s1, 0xc0c0000
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    v_lshl_or_b32 v1, v4, 16, v1
+; CHECK-NEXT:    v_perm_b32 v1, v0, v1, s0
+; CHECK-NEXT:    v_perm_b32 v0, v0, v0, s1
+; CHECK-NEXT:    v_dot4_u32_u8 v0, v0, v1, 1
+; CHECK-NEXT:    s_nop 2
+; CHECK-NEXT:    global_store_byte v[2:3], v0, off
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %load = load <3 x i8>, ptr addrspace(1) %ptr, align 1
+  %elt0 = extractelement <3 x i8> %load, i64 0
+  %mul0 = mul i8 %elt0, %arg
+  %or = or i8 %mul0, 1
+  %mul1 = mul i8 %arg, %arg
+  %add = add i8 %mul1, %or
+  store i8 %add, ptr addrspace(1) %ptr, align 1
+  ret void
+}