[llvm] r360043 - [PowerPC] Fix erroneous condition for converting uint-to-fp vector conversion

Nemanja Ivanovic via llvm-commits llvm-commits at lists.llvm.org
Mon May 6 06:35:50 PDT 2019


Author: nemanjai
Date: Mon May  6 06:35:49 2019
New Revision: 360043

URL: http://llvm.org/viewvc/llvm-project?rev=360043&view=rev
Log:
[PowerPC] Fix erroneous condition for converting uint-to-fp vector conversion

A condition for exiting the legalization of v4i32 conversion to v2f64 through
extract/convert/build erroneously checks for the extract having type i32.
This is not adequate as smaller extracts are actually legalized to i32 as well.
Furthermore, an early exit is missing which means that we only check that
both extracts are from the same vector if that check fails.
As a result, both cases in the included test case fail - the first gets a
select error and the second generates incorrect code.

The culprit commit is r274535.

Added:
    llvm/trunk/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
Modified:
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=360043&r1=360042&r2=360043&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Mon May  6 06:35:49 2019
@@ -12476,9 +12476,8 @@ SDValue PPCTargetLowering::DAGCombineBui
   ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
   if (!Ext1Op || !Ext2Op)
     return SDValue();
-  if (Ext1.getValueType() != MVT::i32 ||
-      Ext2.getValueType() != MVT::i32)
-  if (Ext1.getOperand(0) != Ext2.getOperand(0))
+  if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
+      Ext1.getOperand(0) != Ext2.getOperand(0))
     return SDValue();
 
   int FirstElem = Ext1Op->getZExtValue();

Added: llvm/trunk/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll?rev=360043&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll Mon May  6 06:35:49 2019
@@ -0,0 +1,134 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:   -mtriple=powerpc64-unknown-unknown < %s | FileCheck -allow-deprecated-dag-overlap %s \
+; RUN:   -check-prefix=P9BE
+; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck -allow-deprecated-dag-overlap %s \
+; RUN:   -check-prefix=P9LE
+; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:   -mtriple=powerpc64-unknown-unknown < %s | FileCheck -allow-deprecated-dag-overlap %s \
+; RUN:   -check-prefix=P8BE
+; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck -allow-deprecated-dag-overlap %s \
+; RUN:   -check-prefix=P8LE
+define dso_local <2 x double> @test1(<8 x i16> %a) {
+; P9BE-LABEL: test1:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    li r3, 0
+; P9BE-NEXT:    vextuhlx r3, r3, v2
+; P9BE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; P9BE-NEXT:    mtvsrwz f0, r3
+; P9BE-NEXT:    li r3, 2
+; P9BE-NEXT:    vextuhlx r3, r3, v2
+; P9BE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; P9BE-NEXT:    mtvsrwz f1, r3
+; P9BE-NEXT:    xscvuxddp f0, f0
+; P9BE-NEXT:    xscvuxddp f1, f1
+; P9BE-NEXT:    xxmrghd v2, vs0, vs1
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: test1:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    li r3, 0
+; P9LE-NEXT:    vextuhrx r3, r3, v2
+; P9LE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; P9LE-NEXT:    mtvsrwz f0, r3
+; P9LE-NEXT:    li r3, 2
+; P9LE-NEXT:    vextuhrx r3, r3, v2
+; P9LE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; P9LE-NEXT:    mtvsrwz f1, r3
+; P9LE-NEXT:    xscvuxddp f0, f0
+; P9LE-NEXT:    xscvuxddp f1, f1
+; P9LE-NEXT:    xxmrghd v2, vs1, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: test1:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    mfvsrd r3, v2
+; P8BE-NEXT:    rldicl r4, r3, 16, 48
+; P8BE-NEXT:    rldicl r3, r3, 32, 48
+; P8BE-NEXT:    rlwinm r4, r4, 0, 16, 31
+; P8BE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; P8BE-NEXT:    mtvsrwz f0, r4
+; P8BE-NEXT:    mtvsrwz f1, r3
+; P8BE-NEXT:    xscvuxddp f0, f0
+; P8BE-NEXT:    xscvuxddp f1, f1
+; P8BE-NEXT:    xxmrghd v2, vs0, vs1
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: test1:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    xxswapd vs0, v2
+; P8LE-NEXT:    mfvsrd r3, f0
+; P8LE-NEXT:    clrldi r4, r3, 48
+; P8LE-NEXT:    rldicl r3, r3, 48, 48
+; P8LE-NEXT:    rlwinm r4, r4, 0, 16, 31
+; P8LE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; P8LE-NEXT:    mtvsrwz f0, r4
+; P8LE-NEXT:    mtvsrwz f1, r3
+; P8LE-NEXT:    xscvuxddp f0, f0
+; P8LE-NEXT:    xscvuxddp f1, f1
+; P8LE-NEXT:    xxmrghd v2, vs1, vs0
+; P8LE-NEXT:    blr
+entry:
+  %vecext = extractelement <8 x i16> %a, i32 0
+  %conv = uitofp i16 %vecext to double
+  %vecinit = insertelement <2 x double> undef, double %conv, i32 0
+  %vecext1 = extractelement <8 x i16> %a, i32 1
+  %conv2 = uitofp i16 %vecext1 to double
+  %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
+  ret <2 x double> %vecinit3
+}
+
+define dso_local <2 x double> @test2(<4 x i32> %a, <4 x i32> %b) {
+; P9BE-LABEL: test2:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    xxextractuw f0, v2, 0
+; P9BE-NEXT:    xxextractuw f1, v3, 4
+; P9BE-NEXT:    xscvuxddp f0, f0
+; P9BE-NEXT:    xscvuxddp f1, f1
+; P9BE-NEXT:    xxmrghd v2, vs0, vs1
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: test2:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    xxextractuw f0, v2, 12
+; P9LE-NEXT:    xxextractuw f1, v3, 8
+; P9LE-NEXT:    xscvuxddp f0, f0
+; P9LE-NEXT:    xscvuxddp f1, f1
+; P9LE-NEXT:    xxmrghd v2, vs1, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: test2:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    xxsldwi vs0, v2, v2, 3
+; P8BE-NEXT:    mfvsrwz r4, v3
+; P8BE-NEXT:    mtvsrwz f1, r4
+; P8BE-NEXT:    mfvsrwz r3, f0
+; P8BE-NEXT:    xscvuxddp f1, f1
+; P8BE-NEXT:    mtvsrwz f0, r3
+; P8BE-NEXT:    xscvuxddp f0, f0
+; P8BE-NEXT:    xxmrghd v2, vs0, vs1
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: test2:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    xxswapd vs0, v2
+; P8LE-NEXT:    xxsldwi vs1, v3, v3, 1
+; P8LE-NEXT:    mfvsrwz r3, f0
+; P8LE-NEXT:    mfvsrwz r4, f1
+; P8LE-NEXT:    mtvsrwz f0, r3
+; P8LE-NEXT:    mtvsrwz f1, r4
+; P8LE-NEXT:    xscvuxddp f0, f0
+; P8LE-NEXT:    xscvuxddp f1, f1
+; P8LE-NEXT:    xxmrghd v2, vs1, vs0
+; P8LE-NEXT:    blr
+entry:
+  %vecext = extractelement <4 x i32> %a, i32 0
+  %conv = uitofp i32 %vecext to double
+  %vecinit = insertelement <2 x double> undef, double %conv, i32 0
+  %vecext1 = extractelement <4 x i32> %b, i32 1
+  %conv2 = uitofp i32 %vecext1 to double
+  %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
+  ret <2 x double> %vecinit3
+}




More information about the llvm-commits mailing list