[llvm] 4aafebc - SelectionDAG: allow FP extensions when folding extract/insert.

Tim Northover via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 28 04:08:42 PDT 2022


Author: Tim Northover
Date: 2022-06-28T12:08:35+01:00
New Revision: 4aafebce520c706ad0e7cf7a0b05bbe1f1b434ef

URL: https://github.com/llvm/llvm-project/commit/4aafebce520c706ad0e7cf7a0b05bbe1f1b434ef
DIFF: https://github.com/llvm/llvm-project/commit/4aafebce520c706ad0e7cf7a0b05bbe1f1b434ef.diff

LOG: SelectionDAG: allow FP extensions when folding extract/insert.

Before, we were trying to sign extend half -> float, and asserted in getNode.

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/test/CodeGen/ARM/arm-half-promote.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 213874fb49337..bc1011b69c9df 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6149,6 +6149,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
         if (N1Op2C->getZExtValue() == N2C->getZExtValue()) {
           if (VT == N1.getOperand(1).getValueType())
             return N1.getOperand(1);
+          if (VT.isFloatingPoint()) {
+            assert(VT.getSizeInBits() > N1.getOperand(1).getValueType().getSizeInBits());
+            return getFPExtendOrRound(N1.getOperand(1), DL, VT);
+          }
           return getSExtOrTrunc(N1.getOperand(1), DL, VT);
         }
         return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);

diff  --git a/llvm/test/CodeGen/ARM/arm-half-promote.ll b/llvm/test/CodeGen/ARM/arm-half-promote.ll
index f3c9a9e081ba8..d6a8a9b9538f1 100644
--- a/llvm/test/CodeGen/ARM/arm-half-promote.ll
+++ b/llvm/test/CodeGen/ARM/arm-half-promote.ll
@@ -79,3 +79,16 @@ define fastcc { <8 x half>, <8 x half> } @f3() {
   ret { <8 x half>, <8 x half> } zeroinitializer
 }
 
+define void @extract_insert(ptr %dst) optnone noinline {
+; CHECK-LABEL: extract_insert:
+; CHECK: vmov.i32 d0, #0x0
+; CHECK: vcvtb.f16.f32 s0, s0
+; CHECK: vmov r1, s0
+; CHECK: strh r1, [r0]
+  %splat.splatinsert = insertelement <1 x half> zeroinitializer, half 0xH0000, i32 0
+  br label %next
+
+next:
+  store <1 x half> %splat.splatinsert, ptr %dst
+  ret void
+}


        


More information about the llvm-commits mailing list