[llvm] [SPARC] Use op-then-neg instructions when we have VIS3 (PR #138603)

Mon May 5 15:32:51 PDT 2025

https://github.com/koachan created https://github.com/llvm/llvm-project/pull/138603

This is for manual re-merging of PR #135717 since that PR was merged to the wrong base branch.

>From af1ab680ab0b30cbdeed06f76c9879ecdb33e94b Mon Sep 17 00:00:00 2001
From: Koakuma <koachan at protonmail.com>
Date: Tue, 15 Apr 2025 07:46:10 +0700
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20change?=
 =?UTF-8?q?s=20to=20main=20this=20commit=20is=20based=20on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.5

[skip ci]
---
 llvm/lib/Target/Sparc/SparcISelLowering.cpp   |  34 +++-
 llvm/lib/Target/Sparc/SparcISelLowering.h     |   3 +
 llvm/lib/Target/Sparc/SparcInstr64Bit.td      |   2 +
 llvm/lib/Target/Sparc/SparcInstrVIS.td        |  56 +++++-
 llvm/test/CodeGen/SPARC/2011-01-11-CC.ll      | 118 ++++++++++++
 llvm/test/CodeGen/SPARC/bitcast.ll            | 139 ++++++++++++++
 llvm/test/CodeGen/SPARC/ctlz.ll               | 171 ++++++++++++++++++
 llvm/test/CodeGen/SPARC/float-constants.ll    | 115 ++++++++++++
 llvm/test/CodeGen/SPARC/multiply-extension.ll |  59 ++++++
 .../SPARC/smulo-128-legalisation-lowering.ll  |  44 +++++
 .../SPARC/umulo-128-legalisation-lowering.ll  |  33 ++++
 11 files changed, 758 insertions(+), 16 deletions(-)
 create mode 100644 llvm/test/CodeGen/SPARC/bitcast.ll
 create mode 100644 llvm/test/CodeGen/SPARC/ctlz.ll
 create mode 100644 llvm/test/CodeGen/SPARC/multiply-extension.ll

diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 85b8750d40f46..98fcaba86fee0 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1704,8 +1704,10 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand);
   setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand);
 
-  setOperationAction(ISD::BITCAST, MVT::f32, Expand);
-  setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+  setOperationAction(ISD::BITCAST, MVT::f32,
+                     Subtarget->isVIS3() ? Legal : Expand);
+  setOperationAction(ISD::BITCAST, MVT::i32,
+                     Subtarget->isVIS3() ? Legal : Expand);
 
   // Sparc has no select or setcc: expand to SELECT_CC.
   setOperationAction(ISD::SELECT, MVT::i32, Expand);
@@ -1737,9 +1739,16 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::SUBC, MVT::i32, Legal);
   setOperationAction(ISD::SUBE, MVT::i32, Legal);
 
+  if (Subtarget->isVIS3()) {
+    setOperationAction(ISD::ADDC, MVT::i64, Legal);
+    setOperationAction(ISD::ADDE, MVT::i64, Legal);
+  }
+
   if (Subtarget->is64Bit()) {
-    setOperationAction(ISD::BITCAST, MVT::f64, Expand);
-    setOperationAction(ISD::BITCAST, MVT::i64, Expand);
+    setOperationAction(ISD::BITCAST, MVT::f64,
+                       Subtarget->isVIS3() ? Legal : Expand);
+    setOperationAction(ISD::BITCAST, MVT::i64,
+                       Subtarget->isVIS3() ? Legal : Expand);
     setOperationAction(ISD::SELECT, MVT::i64, Expand);
     setOperationAction(ISD::SETCC, MVT::i64, Expand);
     setOperationAction(ISD::BR_CC, MVT::i64, Custom);
@@ -1748,7 +1757,8 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::CTPOP, MVT::i64,
                        Subtarget->usePopc() ? Legal : Expand);
     setOperationAction(ISD::CTTZ , MVT::i64, Expand);
-    setOperationAction(ISD::CTLZ , MVT::i64, Expand);
+    setOperationAction(ISD::CTLZ, MVT::i64,
+                       Subtarget->isVIS3() ? Legal : Expand);
     setOperationAction(ISD::BSWAP, MVT::i64, Expand);
     setOperationAction(ISD::ROTL , MVT::i64, Expand);
     setOperationAction(ISD::ROTR , MVT::i64, Expand);
@@ -1810,7 +1820,7 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FREM , MVT::f32, Expand);
   setOperationAction(ISD::FMA  , MVT::f32, Expand);
   setOperationAction(ISD::CTTZ , MVT::i32, Expand);
-  setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+  setOperationAction(ISD::CTLZ, MVT::i32, Subtarget->isVIS3() ? Legal : Expand);
   setOperationAction(ISD::ROTL , MVT::i32, Expand);
   setOperationAction(ISD::ROTR , MVT::i32, Expand);
   setOperationAction(ISD::BSWAP, MVT::i32, Expand);
@@ -1849,8 +1859,10 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
   if (Subtarget->is64Bit()) {
     setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
     setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
-    setOperationAction(ISD::MULHU,     MVT::i64, Expand);
-    setOperationAction(ISD::MULHS,     MVT::i64, Expand);
+    setOperationAction(ISD::MULHU, MVT::i64,
+                       Subtarget->isVIS3() ? Legal : Expand);
+    setOperationAction(ISD::MULHS, MVT::i64,
+                       Subtarget->isVIS3() ? Legal : Expand);
 
     setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
     setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
@@ -3560,6 +3572,12 @@ bool SparcTargetLowering::useLoadStackGuardNode(const Module &M) const {
   return true;
 }
 
+bool SparcTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
+                                       bool ForCodeSize) const {
+  return Subtarget->isVIS() && (VT == MVT::f32 || VT == MVT::f64) &&
+         Imm.isZero();
+}
+
 // Override to disable global variable loading on Linux.
 void SparcTargetLowering::insertSSPDeclarations(Module &M) const {
   if (!Subtarget->isTargetLinux())
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.h b/llvm/lib/Target/Sparc/SparcISelLowering.h
index 1bee5f4cfe84d..c09e465f5d05e 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -207,6 +207,9 @@ namespace llvm {
       return VT != MVT::f128;
     }
 
+    bool isFPImmLegal(const APFloat &Imm, EVT VT,
+                      bool ForCodeSize) const override;
+
     bool shouldInsertFencesForAtomic(const Instruction *I) const override {
       // FIXME: We insert fences for each atomics and generate
       // sub-optimal code for PSO/TSO. (Approximately nobody uses any
diff --git a/llvm/lib/Target/Sparc/SparcInstr64Bit.td b/llvm/lib/Target/Sparc/SparcInstr64Bit.td
index 56fab2f26a19e..000612534e89d 100644
--- a/llvm/lib/Target/Sparc/SparcInstr64Bit.td
+++ b/llvm/lib/Target/Sparc/SparcInstr64Bit.td
@@ -157,9 +157,11 @@ def : Pat<(and i64:$lhs, (not i64:$rhs)), (ANDNrr $lhs, $rhs)>;
 def : Pat<(or  i64:$lhs, (not i64:$rhs)), (ORNrr  $lhs, $rhs)>;
 def : Pat<(not (xor i64:$lhs, i64:$rhs)), (XNORrr $lhs, $rhs)>;
 
+def : Pat<(addc i64:$lhs, i64:$rhs), (ADDCCrr $lhs, $rhs)>, Requires<[HasVIS3]>;
 def : Pat<(add i64:$lhs, i64:$rhs), (ADDrr $lhs, $rhs)>;
 def : Pat<(sub i64:$lhs, i64:$rhs), (SUBrr $lhs, $rhs)>;
 
+def : Pat<(addc i64:$lhs, (i64 simm13:$rhs)), (ADDCCri $lhs, imm:$rhs)>, Requires<[HasVIS3]>;
 def : Pat<(add i64:$lhs, (i64 simm13:$rhs)), (ADDri $lhs, imm:$rhs)>;
 def : Pat<(sub i64:$lhs, (i64 simm13:$rhs)), (SUBri $lhs, imm:$rhs)>;
 
diff --git a/llvm/lib/Target/Sparc/SparcInstrVIS.td b/llvm/lib/Target/Sparc/SparcInstrVIS.td
index 8ce8f37f34040..b806f0c413899 100644
--- a/llvm/lib/Target/Sparc/SparcInstrVIS.td
+++ b/llvm/lib/Target/Sparc/SparcInstrVIS.td
@@ -45,10 +45,10 @@ class VISInst2<bits<9> opfval, string OpcStr, RegisterClass RC = DFPRegs>
         !strconcat(OpcStr, " $rs2, $rd")>;
 
 // For VIS Instructions with only rd operand.
-let Constraints = "$rd = $f", rs1 = 0, rs2 = 0 in
+let rs1 = 0, rs2 = 0 in
 class VISInstD<bits<9> opfval, string OpcStr, RegisterClass RC = DFPRegs>
        : VISInstFormat<opfval,
-        (outs RC:$rd), (ins RC:$f),
+        (outs RC:$rd), (ins),
         !strconcat(OpcStr, " $rd")>;
 
 // VIS 1 Instructions
@@ -259,14 +259,14 @@ def LZCNT     : VISInstFormat<0b000010111, (outs I64Regs:$rd),
                    (ins I64Regs:$rs2), "lzcnt $rs2, $rd">;
 
 let rs1 = 0 in {
-def MOVSTOSW : VISInstFormat<0b100010011, (outs I64Regs:$rd),
-                   (ins DFPRegs:$rs2), "movstosw $rs2, $rd">;
-def MOVSTOUW : VISInstFormat<0b100010001, (outs I64Regs:$rd),
-                   (ins DFPRegs:$rs2), "movstouw $rs2, $rd">;
+def MOVSTOSW : VISInstFormat<0b100010011, (outs IntRegs:$rd),
+                   (ins FPRegs:$rs2), "movstosw $rs2, $rd">;
+def MOVSTOUW : VISInstFormat<0b100010001, (outs IntRegs:$rd),
+                   (ins FPRegs:$rs2), "movstouw $rs2, $rd">;
 def MOVDTOX  : VISInstFormat<0b100010000, (outs I64Regs:$rd),
                    (ins DFPRegs:$rs2), "movdtox $rs2, $rd">;
-def MOVWTOS  :  VISInstFormat<0b100011001, (outs DFPRegs:$rd),
-                   (ins I64Regs:$rs2), "movwtos $rs2, $rd">;
+def MOVWTOS  :  VISInstFormat<0b100011001, (outs FPRegs:$rd),
+                   (ins IntRegs:$rs2), "movwtos $rs2, $rd">;
 def MOVXTOD  :  VISInstFormat<0b100011000, (outs DFPRegs:$rd),
                    (ins I64Regs:$rs2), "movxtod $rs2, $rd">;
 }
@@ -277,3 +277,43 @@ def UMULXHI  : VISInst<0b000010110, "umulxhi", I64Regs>;
 def XMULX    : VISInst<0b100010101, "xmulx",   I64Regs>;
 def XMULXHI  : VISInst<0b100010110, "xmulxhi", I64Regs>;
 } // Predicates = [IsVIS3]
+
+// FP immediate patterns.
+def fpimm0 : PatLeaf<(fpimm), [{return N->isExactlyValue(+0.0);}]>;
+def fpnegimm0 : PatLeaf<(fpimm), [{return N->isExactlyValue(-0.0);}]>;
+
+// VIS instruction patterns.
+let Predicates = [HasVIS] in {
+// Zero immediate.
+def : Pat<(f64 fpimm0), (FZERO)>;
+def : Pat<(f32 fpimm0), (FZEROS)>;
+def : Pat<(f64 fpnegimm0), (FNEGD (FZERO))>;
+def : Pat<(f32 fpnegimm0), (FNEGS (FZEROS))>;
+} // Predicates = [HasVIS]
+
+// VIS3 instruction patterns.
+let Predicates = [HasVIS3] in {
+def : Pat<(i64 (adde i64:$lhs, i64:$rhs)), (ADDXCCC $lhs, $rhs)>;
+
+def : Pat<(i64 (mulhu i64:$lhs, i64:$rhs)), (UMULXHI $lhs, $rhs)>;
+// Signed "MULXHI".
+// Based on the formula presented in OSA2011 §7.140, but with bitops to select
+// the values to be added.
+def : Pat<(i64 (mulhs i64:$lhs, i64:$rhs)),
+      (SUBrr (UMULXHI $lhs, $rhs),
+             (ADDrr (ANDrr (SRAXri $lhs, 63), $rhs),
+                    (ANDrr (SRAXri $rhs, 63), $lhs)))>;
+
+def : Pat<(i64 (ctlz i64:$src)), (LZCNT $src)>;
+// 32-bit LZCNT.
+// The zero extension will leave us with 32 extra leading zeros,
+// so we need to compensate for it.
+def : Pat<(i32 (ctlz i32:$src)), (ADDri (LZCNT (SRLri $src, 0)), (i32 -32))>;
+
+def : Pat<(i32 (bitconvert f32:$src)), (MOVSTOUW $src)>;
+def : Pat<(i64 (zext (i32 (bitconvert f32:$src)))), (MOVSTOUW $src)>;
+def : Pat<(i64 (sext (i32 (bitconvert f32:$src)))), (MOVSTOSW $src)>;
+def : Pat<(f32 (bitconvert i32:$src)), (MOVWTOS $src)>;
+def : Pat<(i64 (bitconvert f64:$src)), (MOVDTOX $src)>;
+def : Pat<(f64 (bitconvert i64:$src)), (MOVXTOD $src)>;
+} // Predicates = [HasVIS3]
diff --git a/llvm/test/CodeGen/SPARC/2011-01-11-CC.ll b/llvm/test/CodeGen/SPARC/2011-01-11-CC.ll
index 1560bc687b7dd..e05c47bfee766 100644
--- a/llvm/test/CodeGen/SPARC/2011-01-11-CC.ll
+++ b/llvm/test/CodeGen/SPARC/2011-01-11-CC.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=sparc %s -o - | FileCheck %s -check-prefix=V8
 ; RUN: llc -mtriple=sparc -mattr=v9 %s -o - | FileCheck %s -check-prefix=V9
 ; RUN: llc -mtriple=sparc64-unknown-linux %s -o - | FileCheck %s -check-prefix=SPARC64
+; RUN: llc -mtriple=sparc64-unknown-linux -mattr=vis3 %s -o - | FileCheck %s -check-prefix=SPARC64-VIS3
 
 define i32 @test_addx(i64 %a, i64 %b, i64 %c) nounwind {
 ; V8-LABEL: test_addx:
@@ -60,6 +61,15 @@ define i32 @test_addx(i64 %a, i64 %b, i64 %c) nounwind {
 ; SPARC64-NEXT:    movgu %xcc, 1, %o3
 ; SPARC64-NEXT:    retl
 ; SPARC64-NEXT:    srl %o3, 0, %o0
+;
+; SPARC64-VIS3-LABEL: test_addx:
+; SPARC64-VIS3:       ! %bb.0: ! %entry
+; SPARC64-VIS3-NEXT:    mov %g0, %o3
+; SPARC64-VIS3-NEXT:    add %o0, %o1, %o0
+; SPARC64-VIS3-NEXT:    cmp %o0, %o2
+; SPARC64-VIS3-NEXT:    movgu %xcc, 1, %o3
+; SPARC64-VIS3-NEXT:    retl
+; SPARC64-VIS3-NEXT:    srl %o3, 0, %o0
 entry:
   %0 = add i64 %a, %b
   %1 = icmp ugt i64 %0, %c
@@ -92,6 +102,13 @@ define i32 @test_select_int_icc(i32 %a, i32 %b, i32 %c) nounwind {
 ; SPARC64-NEXT:    move %icc, %o1, %o2
 ; SPARC64-NEXT:    retl
 ; SPARC64-NEXT:    mov %o2, %o0
+;
+; SPARC64-VIS3-LABEL: test_select_int_icc:
+; SPARC64-VIS3:       ! %bb.0: ! %entry
+; SPARC64-VIS3-NEXT:    cmp %o0, 0
+; SPARC64-VIS3-NEXT:    move %icc, %o1, %o2
+; SPARC64-VIS3-NEXT:    retl
+; SPARC64-VIS3-NEXT:    mov %o2, %o0
 entry:
   %0 = icmp eq i32 %a, 0
   %1 = select i1 %0, i32 %b, i32 %c
@@ -133,6 +150,13 @@ define float @test_select_fp_icc(i32 %a, float %f1, float %f2) nounwind {
 ; SPARC64-NEXT:    cmp %o0, 0
 ; SPARC64-NEXT:    retl
 ; SPARC64-NEXT:    fmovse %icc, %f3, %f0
+;
+; SPARC64-VIS3-LABEL: test_select_fp_icc:
+; SPARC64-VIS3:       ! %bb.0: ! %entry
+; SPARC64-VIS3-NEXT:    fmovs %f5, %f0
+; SPARC64-VIS3-NEXT:    cmp %o0, 0
+; SPARC64-VIS3-NEXT:    retl
+; SPARC64-VIS3-NEXT:    fmovse %icc, %f3, %f0
 entry:
   %0 = icmp eq i32 %a, 0
   %1 = select i1 %0, float %f1, float %f2
@@ -182,6 +206,13 @@ define double @test_select_dfp_icc(i32 %a, double %f1, double %f2) nounwind {
 ; SPARC64-NEXT:    cmp %o0, 0
 ; SPARC64-NEXT:    retl
 ; SPARC64-NEXT:    fmovde %icc, %f2, %f0
+;
+; SPARC64-VIS3-LABEL: test_select_dfp_icc:
+; SPARC64-VIS3:       ! %bb.0: ! %entry
+; SPARC64-VIS3-NEXT:    fmovd %f4, %f0
+; SPARC64-VIS3-NEXT:    cmp %o0, 0
+; SPARC64-VIS3-NEXT:    retl
+; SPARC64-VIS3-NEXT:    fmovde %icc, %f2, %f0
 entry:
   %0 = icmp eq i32 %a, 0
   %1 = select i1 %0, double %f1, double %f2
@@ -229,6 +260,17 @@ define i32 @test_select_int_fcc(float %f, i32 %a, i32 %b) nounwind {
 ; SPARC64-NEXT:    fcmps %fcc0, %f1, %f0
 ; SPARC64-NEXT:    retl
 ; SPARC64-NEXT:    movne %fcc0, %o1, %o0
+;
+; SPARC64-VIS3-LABEL: test_select_int_fcc:
+; SPARC64-VIS3:       ! %bb.0: ! %entry
+; SPARC64-VIS3-NEXT:    sethi %h44(.LCPI4_0), %o0
+; SPARC64-VIS3-NEXT:    add %o0, %m44(.LCPI4_0), %o0
+; SPARC64-VIS3-NEXT:    sllx %o0, 12, %o0
+; SPARC64-VIS3-NEXT:    ld [%o0+%l44(.LCPI4_0)], %f0
+; SPARC64-VIS3-NEXT:    mov %o2, %o0
+; SPARC64-VIS3-NEXT:    fcmps %fcc0, %f1, %f0
+; SPARC64-VIS3-NEXT:    retl
+; SPARC64-VIS3-NEXT:    movne %fcc0, %o1, %o0
 entry:
   %0 = fcmp une float %f, 0.000000e+00
   %a.b = select i1 %0, i32 %a, i32 %b
@@ -284,6 +326,17 @@ define float @test_select_fp_fcc(float %f, float %f1, float %f2) nounwind {
 ; SPARC64-NEXT:    fcmps %fcc0, %f1, %f2
 ; SPARC64-NEXT:    retl
 ; SPARC64-NEXT:    fmovsne %fcc0, %f3, %f0
+;
+; SPARC64-VIS3-LABEL: test_select_fp_fcc:
+; SPARC64-VIS3:       ! %bb.0: ! %entry
+; SPARC64-VIS3-NEXT:    sethi %h44(.LCPI5_0), %o0
+; SPARC64-VIS3-NEXT:    add %o0, %m44(.LCPI5_0), %o0
+; SPARC64-VIS3-NEXT:    sllx %o0, 12, %o0
+; SPARC64-VIS3-NEXT:    ld [%o0+%l44(.LCPI5_0)], %f2
+; SPARC64-VIS3-NEXT:    fmovs %f5, %f0
+; SPARC64-VIS3-NEXT:    fcmps %fcc0, %f1, %f2
+; SPARC64-VIS3-NEXT:    retl
+; SPARC64-VIS3-NEXT:    fmovsne %fcc0, %f3, %f0
 entry:
   %0 = fcmp une float %f, 0.000000e+00
   %1 = select i1 %0, float %f1, float %f2
@@ -352,6 +405,18 @@ define double @test_select_dfp_fcc(double %f, double %f1, double %f2) nounwind {
 ; SPARC64-NEXT:    fmovd %f4, %f0
 ; SPARC64-NEXT:    retl
 ; SPARC64-NEXT:    nop
+;
+; SPARC64-VIS3-LABEL: test_select_dfp_fcc:
+; SPARC64-VIS3:       ! %bb.0: ! %entry
+; SPARC64-VIS3-NEXT:    sethi %h44(.LCPI6_0), %o0
+; SPARC64-VIS3-NEXT:    add %o0, %m44(.LCPI6_0), %o0
+; SPARC64-VIS3-NEXT:    sllx %o0, 12, %o0
+; SPARC64-VIS3-NEXT:    ldd [%o0+%l44(.LCPI6_0)], %f6
+; SPARC64-VIS3-NEXT:    fcmpd %fcc0, %f0, %f6
+; SPARC64-VIS3-NEXT:    fmovdne %fcc0, %f2, %f4
+; SPARC64-VIS3-NEXT:    fmovd %f4, %f0
+; SPARC64-VIS3-NEXT:    retl
+; SPARC64-VIS3-NEXT:    nop
 entry:
   %0 = fcmp une double %f, 0.000000e+00
   %1 = select i1 %0, double %f1, double %f2
@@ -453,6 +518,31 @@ define i32 @test_float_cc(double %a, double %b, i32 %c, i32 %d) nounwind {
 ; SPARC64-NEXT:  ! %bb.4: ! %exit.0
 ; SPARC64-NEXT:    retl
 ; SPARC64-NEXT:    mov %g0, %o0
+;
+; SPARC64-VIS3-LABEL: test_float_cc:
+; SPARC64-VIS3:       ! %bb.0: ! %entry
+; SPARC64-VIS3-NEXT:    sethi %h44(.LCPI7_0), %o0
+; SPARC64-VIS3-NEXT:    add %o0, %m44(.LCPI7_0), %o0
+; SPARC64-VIS3-NEXT:    sllx %o0, 12, %o0
+; SPARC64-VIS3-NEXT:    ldd [%o0+%l44(.LCPI7_0)], %f4
+; SPARC64-VIS3-NEXT:    fcmpd %fcc0, %f0, %f4
+; SPARC64-VIS3-NEXT:    fbuge %fcc0, .LBB7_3
+; SPARC64-VIS3-NEXT:    nop
+; SPARC64-VIS3-NEXT:  ! %bb.1: ! %loop.2
+; SPARC64-VIS3-NEXT:    fcmpd %fcc0, %f2, %f4
+; SPARC64-VIS3-NEXT:    fbule %fcc0, .LBB7_3
+; SPARC64-VIS3-NEXT:    nop
+; SPARC64-VIS3-NEXT:  ! %bb.2: ! %exit.1
+; SPARC64-VIS3-NEXT:    retl
+; SPARC64-VIS3-NEXT:    mov 1, %o0
+; SPARC64-VIS3-NEXT:  .LBB7_3: ! %loop
+; SPARC64-VIS3-NEXT:    ! =>This Inner Loop Header: Depth=1
+; SPARC64-VIS3-NEXT:    cmp %o2, 10
+; SPARC64-VIS3-NEXT:    be %icc, .LBB7_3
+; SPARC64-VIS3-NEXT:    nop
+; SPARC64-VIS3-NEXT:  ! %bb.4: ! %exit.0
+; SPARC64-VIS3-NEXT:    retl
+; SPARC64-VIS3-NEXT:    mov %g0, %o0
 entry:
    %0 = fcmp uge double %a, 0.000000e+00
    br i1 %0, label %loop, label %loop.2
@@ -558,6 +648,34 @@ define void @test_adde_sube(ptr %a, ptr %b, ptr %sum, ptr %diff) nounwind {
 ; SPARC64-NEXT:    stx %i0, [%i3]
 ; SPARC64-NEXT:    ret
 ; SPARC64-NEXT:    restore
+;
+; SPARC64-VIS3-LABEL: test_adde_sube:
+; SPARC64-VIS3:         .register %g2, #scratch
+; SPARC64-VIS3-NEXT:  ! %bb.0: ! %entry
+; SPARC64-VIS3-NEXT:    save %sp, -128, %sp
+; SPARC64-VIS3-NEXT:    ldx [%i0+8], %i4
+; SPARC64-VIS3-NEXT:    ldx [%i0], %i5
+; SPARC64-VIS3-NEXT:    ldx [%i1+8], %g2
+; SPARC64-VIS3-NEXT:    ldx [%i1], %i1
+; SPARC64-VIS3-NEXT:    addcc %i4, %g2, %g2
+; SPARC64-VIS3-NEXT:    addxccc %i5, %i1, %i1
+; SPARC64-VIS3-NEXT:    stx %i1, [%i2]
+; SPARC64-VIS3-NEXT:    stx %g2, [%i2+8]
+; SPARC64-VIS3-NEXT:    !APP
+; SPARC64-VIS3-NEXT:    !NO_APP
+; SPARC64-VIS3-NEXT:    ldx [%i0+8], %i1
+; SPARC64-VIS3-NEXT:    mov %g0, %i2
+; SPARC64-VIS3-NEXT:    ldx [%i0], %i0
+; SPARC64-VIS3-NEXT:    cmp %i4, %i1
+; SPARC64-VIS3-NEXT:    movcs %xcc, 1, %i2
+; SPARC64-VIS3-NEXT:    srl %i2, 0, %i2
+; SPARC64-VIS3-NEXT:    sub %i5, %i0, %i0
+; SPARC64-VIS3-NEXT:    sub %i0, %i2, %i0
+; SPARC64-VIS3-NEXT:    sub %i4, %i1, %i1
+; SPARC64-VIS3-NEXT:    stx %i1, [%i3+8]
+; SPARC64-VIS3-NEXT:    stx %i0, [%i3]
+; SPARC64-VIS3-NEXT:    ret
+; SPARC64-VIS3-NEXT:    restore
 entry:
    %0 = bitcast ptr %a to ptr
    %1 = bitcast ptr %b to ptr
diff --git a/llvm/test/CodeGen/SPARC/bitcast.ll b/llvm/test/CodeGen/SPARC/bitcast.ll
new file mode 100644
index 0000000000000..d5fb994a0f0fe
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/bitcast.ll
@@ -0,0 +1,139 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=sparcv9 | FileCheck %s -check-prefix=V9
+; RUN: llc < %s -mtriple=sparcv9 -mattr=vis3 | FileCheck %s -check-prefix=VIS3
+
+define i32 @stow(float %0) nounwind {
+; V9-LABEL: stow:
+; V9:       ! %bb.0:
+; V9-NEXT:    add %sp, -144, %sp
+; V9-NEXT:    st %f1, [%sp+2187]
+; V9-NEXT:    ld [%sp+2187], %o0
+; V9-NEXT:    retl
+; V9-NEXT:    add %sp, 144, %sp
+;
+; VIS3-LABEL: stow:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    movstouw %f1, %o0
+  %2 = bitcast float %0 to i32
+  ret i32 %2
+}
+
+define zeroext i32 @stouw(float %0) nounwind {
+; V9-LABEL: stouw:
+; V9:       ! %bb.0:
+; V9-NEXT:    add %sp, -144, %sp
+; V9-NEXT:    st %f1, [%sp+2187]
+; V9-NEXT:    ld [%sp+2187], %o0
+; V9-NEXT:    retl
+; V9-NEXT:    add %sp, 144, %sp
+;
+; VIS3-LABEL: stouw:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    movstouw %f1, %o0
+  %2 = bitcast float %0 to i32
+  ret i32 %2
+}
+
+define signext i32 @stosw(float %0) nounwind {
+; V9-LABEL: stosw:
+; V9:       ! %bb.0:
+; V9-NEXT:    add %sp, -144, %sp
+; V9-NEXT:    st %f1, [%sp+2187]
+; V9-NEXT:    ldsw [%sp+2187], %o0
+; V9-NEXT:    retl
+; V9-NEXT:    add %sp, 144, %sp
+;
+; VIS3-LABEL: stosw:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    movstosw %f1, %o0
+  %2 = bitcast float %0 to i32
+  ret i32 %2
+}
+
+define float @wtos(i32 %0) nounwind {
+; V9-LABEL: wtos:
+; V9:       ! %bb.0:
+; V9-NEXT:    add %sp, -144, %sp
+; V9-NEXT:    st %o0, [%sp+2187]
+; V9-NEXT:    ld [%sp+2187], %f0
+; V9-NEXT:    retl
+; V9-NEXT:    add %sp, 144, %sp
+;
+; VIS3-LABEL: wtos:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    movwtos %o0, %f0
+  %2 = bitcast i32 %0 to float
+  ret float %2
+}
+
+define float @uwtos(i32 zeroext %0) nounwind {
+; V9-LABEL: uwtos:
+; V9:       ! %bb.0:
+; V9-NEXT:    add %sp, -144, %sp
+; V9-NEXT:    st %o0, [%sp+2187]
+; V9-NEXT:    ld [%sp+2187], %f0
+; V9-NEXT:    retl
+; V9-NEXT:    add %sp, 144, %sp
+;
+; VIS3-LABEL: uwtos:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    movwtos %o0, %f0
+  %2 = bitcast i32 %0 to float
+  ret float %2
+}
+
+define float @swtos(i32 signext %0) nounwind {
+; V9-LABEL: swtos:
+; V9:       ! %bb.0:
+; V9-NEXT:    add %sp, -144, %sp
+; V9-NEXT:    st %o0, [%sp+2187]
+; V9-NEXT:    ld [%sp+2187], %f0
+; V9-NEXT:    retl
+; V9-NEXT:    add %sp, 144, %sp
+;
+; VIS3-LABEL: swtos:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    movwtos %o0, %f0
+  %2 = bitcast i32 %0 to float
+  ret float %2
+}
+
+define i64 @dtox(double %0) nounwind {
+; V9-LABEL: dtox:
+; V9:       ! %bb.0:
+; V9-NEXT:    add %sp, -144, %sp
+; V9-NEXT:    std %f0, [%sp+2183]
+; V9-NEXT:    ldx [%sp+2183], %o0
+; V9-NEXT:    retl
+; V9-NEXT:    add %sp, 144, %sp
+;
+; VIS3-LABEL: dtox:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    movdtox %f0, %o0
+  %2 = bitcast double %0 to i64
+  ret i64 %2
+}
+
+define double @xtod(i64 %0) nounwind {
+; V9-LABEL: xtod:
+; V9:       ! %bb.0:
+; V9-NEXT:    add %sp, -144, %sp
+; V9-NEXT:    stx %o0, [%sp+2183]
+; V9-NEXT:    ldd [%sp+2183], %f0
+; V9-NEXT:    retl
+; V9-NEXT:    add %sp, 144, %sp
+;
+; VIS3-LABEL: xtod:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    movxtod %o0, %f0
+  %2 = bitcast i64 %0 to double
+  ret double %2
+}
diff --git a/llvm/test/CodeGen/SPARC/ctlz.ll b/llvm/test/CodeGen/SPARC/ctlz.ll
new file mode 100644
index 0000000000000..3b2fc0dbfd4a3
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/ctlz.ll
@@ -0,0 +1,171 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=sparcv9 | FileCheck %s -check-prefix=V9
+; RUN: llc < %s -mtriple=sparcv9 -mattr=popc | FileCheck %s -check-prefix=POPC
+; RUN: llc < %s -mtriple=sparcv9 -mattr=vis3 | FileCheck %s -check-prefix=VIS3
+
+define i32 @f(i32 %x) nounwind {
+; V9-LABEL: f:
+; V9:       ! %bb.0: ! %entry
+; V9-NEXT:    srl %o0, 1, %o1
+; V9-NEXT:    or %o0, %o1, %o1
+; V9-NEXT:    srl %o1, 2, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    srl %o1, 4, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    srl %o1, 8, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    srl %o1, 16, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    xor %o1, -1, %o1
+; V9-NEXT:    srl %o1, 1, %o2
+; V9-NEXT:    sethi 1398101, %o3
+; V9-NEXT:    or %o3, 341, %o3
+; V9-NEXT:    and %o2, %o3, %o2
+; V9-NEXT:    sub %o1, %o2, %o1
+; V9-NEXT:    sethi 838860, %o2
+; V9-NEXT:    or %o2, 819, %o2
+; V9-NEXT:    and %o1, %o2, %o3
+; V9-NEXT:    srl %o1, 2, %o1
+; V9-NEXT:    and %o1, %o2, %o1
+; V9-NEXT:    add %o3, %o1, %o1
+; V9-NEXT:    srl %o1, 4, %o2
+; V9-NEXT:    add %o1, %o2, %o1
+; V9-NEXT:    sethi 246723, %o2
+; V9-NEXT:    or %o2, 783, %o2
+; V9-NEXT:    and %o1, %o2, %o1
+; V9-NEXT:    sll %o1, 8, %o2
+; V9-NEXT:    add %o1, %o2, %o1
+; V9-NEXT:    sll %o1, 16, %o2
+; V9-NEXT:    add %o1, %o2, %o1
+; V9-NEXT:    srl %o1, 24, %o1
+; V9-NEXT:    cmp %o0, 0
+; V9-NEXT:    move %icc, 0, %o1
+; V9-NEXT:    retl
+; V9-NEXT:    mov %o1, %o0
+;
+; POPC-LABEL: f:
+; POPC:       ! %bb.0: ! %entry
+; POPC-NEXT:    srl %o0, 1, %o1
+; POPC-NEXT:    or %o0, %o1, %o1
+; POPC-NEXT:    srl %o1, 2, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    srl %o1, 4, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    srl %o1, 8, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    srl %o1, 16, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    xor %o1, -1, %o1
+; POPC-NEXT:    srl %o1, 0, %o1
+; POPC-NEXT:    popc %o1, %o1
+; POPC-NEXT:    cmp %o0, 0
+; POPC-NEXT:    move %icc, 0, %o1
+; POPC-NEXT:    retl
+; POPC-NEXT:    mov %o1, %o0
+;
+; VIS3-LABEL: f:
+; VIS3:       ! %bb.0: ! %entry
+; VIS3-NEXT:    srl %o0, 0, %o1
+; VIS3-NEXT:    lzcnt %o1, %o1
+; VIS3-NEXT:    add %o1, -32, %o1
+; VIS3-NEXT:    cmp %o0, 0
+; VIS3-NEXT:    move %icc, 0, %o1
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    mov %o1, %o0
+entry:
+  %0 = call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %1 = icmp eq i32 %x, 0
+  %2 = select i1 %1, i32 0, i32 %0
+  %3 = trunc i32 %2 to i8
+  %conv = zext i8 %3 to i32
+  ret i32 %conv
+}
+
+define i64 @g(i64 %x) nounwind {
+; V9-LABEL: g:
+; V9:       ! %bb.0: ! %entry
+; V9-NEXT:    srlx %o0, 1, %o1
+; V9-NEXT:    or %o0, %o1, %o1
+; V9-NEXT:    srlx %o1, 2, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    srlx %o1, 4, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    srlx %o1, 8, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    srlx %o1, 16, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    srlx %o1, 32, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    xor %o1, -1, %o1
+; V9-NEXT:    srlx %o1, 1, %o2
+; V9-NEXT:    sethi 1398101, %o3
+; V9-NEXT:    or %o3, 341, %o3
+; V9-NEXT:    sllx %o3, 32, %o4
+; V9-NEXT:    or %o4, %o3, %o3
+; V9-NEXT:    and %o2, %o3, %o2
+; V9-NEXT:    sub %o1, %o2, %o1
+; V9-NEXT:    sethi 838860, %o2
+; V9-NEXT:    or %o2, 819, %o2
+; V9-NEXT:    sllx %o2, 32, %o3
+; V9-NEXT:    or %o3, %o2, %o2
+; V9-NEXT:    and %o1, %o2, %o3
+; V9-NEXT:    srlx %o1, 2, %o1
+; V9-NEXT:    and %o1, %o2, %o1
+; V9-NEXT:    add %o3, %o1, %o1
+; V9-NEXT:    srlx %o1, 4, %o2
+; V9-NEXT:    add %o1, %o2, %o1
+; V9-NEXT:    sethi 246723, %o2
+; V9-NEXT:    or %o2, 783, %o2
+; V9-NEXT:    sllx %o2, 32, %o3
+; V9-NEXT:    or %o3, %o2, %o2
+; V9-NEXT:    and %o1, %o2, %o1
+; V9-NEXT:    sethi 16448, %o2
+; V9-NEXT:    or %o2, 257, %o2
+; V9-NEXT:    sllx %o2, 32, %o3
+; V9-NEXT:    or %o3, %o2, %o2
+; V9-NEXT:    mulx %o1, %o2, %o1
+; V9-NEXT:    srlx %o1, 56, %o1
+; V9-NEXT:    movrz %o0, 0, %o1
+; V9-NEXT:    retl
+; V9-NEXT:    mov %o1, %o0
+;
+; POPC-LABEL: g:
+; POPC:       ! %bb.0: ! %entry
+; POPC-NEXT:    srlx %o0, 1, %o1
+; POPC-NEXT:    or %o0, %o1, %o1
+; POPC-NEXT:    srlx %o1, 2, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    srlx %o1, 4, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    srlx %o1, 8, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    srlx %o1, 16, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    srlx %o1, 32, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    xor %o1, -1, %o1
+; POPC-NEXT:    popc %o1, %o1
+; POPC-NEXT:    movrz %o0, 0, %o1
+; POPC-NEXT:    retl
+; POPC-NEXT:    mov %o1, %o0
+;
+; VIS3-LABEL: g:
+; VIS3:       ! %bb.0: ! %entry
+; VIS3-NEXT:    lzcnt %o0, %o1
+; VIS3-NEXT:    movrz %o0, 0, %o1
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    mov %o1, %o0
+entry:
+  %0 = call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+  %1 = icmp eq i64 %x, 0
+  %2 = select i1 %1, i64 0, i64 %0
+  %3 = trunc i64 %2 to i32
+  %conv = zext i32 %3 to i64
+  ret i64 %conv
+}
+
+; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
+declare i32 @llvm.ctlz.i32(i32, i1 immarg) #0
+declare i64 @llvm.ctlz.i64(i64, i1 immarg) #0
+
+attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/SPARC/float-constants.ll b/llvm/test/CodeGen/SPARC/float-constants.ll
index b04ec68ed3d7e..440c75bfca9f9 100644
--- a/llvm/test/CodeGen/SPARC/float-constants.ll
+++ b/llvm/test/CodeGen/SPARC/float-constants.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc < %s -mtriple=sparc | FileCheck %s
 ; RUN: llc < %s -mtriple=sparcel | FileCheck %s --check-prefix=CHECK-LE
+; RUN: llc < %s -mtriple=sparcv9 -mattr=+vis | FileCheck %s --check-prefix=CHECK-VIS
 
 ;; Bitcast should not do a runtime conversion, but rather emit a
 ;; constant into integer registers directly.
@@ -17,6 +18,12 @@ define <2 x i32> @bitcast() nounwind {
 ; CHECK-LE-NEXT:    sethi 1049856, %o1
 ; CHECK-LE-NEXT:    retl
 ; CHECK-LE-NEXT:    mov %g0, %o0
+;
+; CHECK-VIS-LABEL: bitcast:
+; CHECK-VIS:       ! %bb.0:
+; CHECK-VIS-NEXT:    sethi 1049856, %o0
+; CHECK-VIS-NEXT:    retl
+; CHECK-VIS-NEXT:    mov %g0, %o1
   %1 = bitcast double 5.0 to <2 x i32>
   ret <2 x i32> %1
 }
@@ -43,6 +50,17 @@ define void @test_call() nounwind {
 ; CHECK-LE-NEXT:    mov %g0, %o0
 ; CHECK-LE-NEXT:    ret
 ; CHECK-LE-NEXT:    restore
+;
+; CHECK-VIS-LABEL: test_call:
+; CHECK-VIS:       ! %bb.0:
+; CHECK-VIS-NEXT:    save %sp, -176, %sp
+; CHECK-VIS-NEXT:    sethi %h44(.LCPI1_0), %i0
+; CHECK-VIS-NEXT:    add %i0, %m44(.LCPI1_0), %i0
+; CHECK-VIS-NEXT:    sllx %i0, 12, %i0
+; CHECK-VIS-NEXT:    call a
+; CHECK-VIS-NEXT:    ldd [%i0+%l44(.LCPI1_0)], %f0
+; CHECK-VIS-NEXT:    ret
+; CHECK-VIS-NEXT:    restore
   call void @a(double 5.0)
   ret void
 }
@@ -75,6 +93,103 @@ define double @test_intrins_call() nounwind {
 ; CHECK-LE-NEXT:    mov %o1, %o3
 ; CHECK-LE-NEXT:    ret
 ; CHECK-LE-NEXT:    restore
+;
+; CHECK-VIS-LABEL: test_intrins_call:
+; CHECK-VIS:       ! %bb.0:
+; CHECK-VIS-NEXT:    save %sp, -176, %sp
+; CHECK-VIS-NEXT:    sethi %h44(.LCPI2_0), %i0
+; CHECK-VIS-NEXT:    add %i0, %m44(.LCPI2_0), %i0
+; CHECK-VIS-NEXT:    sllx %i0, 12, %i0
+; CHECK-VIS-NEXT:    ldd [%i0+%l44(.LCPI2_0)], %f0
+; CHECK-VIS-NEXT:    fmovd %f0, %f2
+; CHECK-VIS-NEXT:    call pow
+; CHECK-VIS-NEXT:    nop
+; CHECK-VIS-NEXT:    ret
+; CHECK-VIS-NEXT:    restore
   %1 = call double @llvm.pow.f64(double 2.0, double 2.0)
   ret double %1
 }
+
+;; When we have VIS, f32/f64 zero constant should be materialized from fzero/fzeros.
+
+define double @pos_zero_double() nounwind {
+; CHECK-LABEL: pos_zero_double:
+; CHECK:       ! %bb.0:
+; CHECK-NEXT:    sethi %hi(.LCPI3_0), %o0
+; CHECK-NEXT:    retl
+; CHECK-NEXT:    ldd [%o0+%lo(.LCPI3_0)], %f0
+;
+; CHECK-LE-LABEL: pos_zero_double:
+; CHECK-LE:       ! %bb.0:
+; CHECK-LE-NEXT:    sethi %hi(.LCPI3_0), %o0
+; CHECK-LE-NEXT:    retl
+; CHECK-LE-NEXT:    ldd [%o0+%lo(.LCPI3_0)], %f0
+;
+; CHECK-VIS-LABEL: pos_zero_double:
+; CHECK-VIS:       ! %bb.0:
+; CHECK-VIS-NEXT:    retl
+; CHECK-VIS-NEXT:    fzero %f0
+  ret double +0.0
+}
+
+define double @neg_zero_double() nounwind {
+; CHECK-LABEL: neg_zero_double:
+; CHECK:       ! %bb.0:
+; CHECK-NEXT:    sethi %hi(.LCPI4_0), %o0
+; CHECK-NEXT:    retl
+; CHECK-NEXT:    ldd [%o0+%lo(.LCPI4_0)], %f0
+;
+; CHECK-LE-LABEL: neg_zero_double:
+; CHECK-LE:       ! %bb.0:
+; CHECK-LE-NEXT:    sethi %hi(.LCPI4_0), %o0
+; CHECK-LE-NEXT:    retl
+; CHECK-LE-NEXT:    ldd [%o0+%lo(.LCPI4_0)], %f0
+;
+; CHECK-VIS-LABEL: neg_zero_double:
+; CHECK-VIS:       ! %bb.0:
+; CHECK-VIS-NEXT:    fzero %f0
+; CHECK-VIS-NEXT:    retl
+; CHECK-VIS-NEXT:    fnegd %f0, %f0
+  ret double -0.0
+}
+
+define float @pos_zero_float() nounwind {
+; CHECK-LABEL: pos_zero_float:
+; CHECK:       ! %bb.0:
+; CHECK-NEXT:    sethi %hi(.LCPI5_0), %o0
+; CHECK-NEXT:    retl
+; CHECK-NEXT:    ld [%o0+%lo(.LCPI5_0)], %f0
+;
+; CHECK-LE-LABEL: pos_zero_float:
+; CHECK-LE:       ! %bb.0:
+; CHECK-LE-NEXT:    sethi %hi(.LCPI5_0), %o0
+; CHECK-LE-NEXT:    retl
+; CHECK-LE-NEXT:    ld [%o0+%lo(.LCPI5_0)], %f0
+;
+; CHECK-VIS-LABEL: pos_zero_float:
+; CHECK-VIS:       ! %bb.0:
+; CHECK-VIS-NEXT:    retl
+; CHECK-VIS-NEXT:    fzeros %f0
+  ret float +0.0
+}
+
+define float @neg_zero_float() nounwind {
+; CHECK-LABEL: neg_zero_float:
+; CHECK:       ! %bb.0:
+; CHECK-NEXT:    sethi %hi(.LCPI6_0), %o0
+; CHECK-NEXT:    retl
+; CHECK-NEXT:    ld [%o0+%lo(.LCPI6_0)], %f0
+;
+; CHECK-LE-LABEL: neg_zero_float:
+; CHECK-LE:       ! %bb.0:
+; CHECK-LE-NEXT:    sethi %hi(.LCPI6_0), %o0
+; CHECK-LE-NEXT:    retl
+; CHECK-LE-NEXT:    ld [%o0+%lo(.LCPI6_0)], %f0
+;
+; CHECK-VIS-LABEL: neg_zero_float:
+; CHECK-VIS:       ! %bb.0:
+; CHECK-VIS-NEXT:    fzeros %f0
+; CHECK-VIS-NEXT:    retl
+; CHECK-VIS-NEXT:    fnegs %f0, %f0
+  ret float -0.0
+}
diff --git a/llvm/test/CodeGen/SPARC/multiply-extension.ll b/llvm/test/CodeGen/SPARC/multiply-extension.ll
new file mode 100644
index 0000000000000..4d752ff101ca2
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/multiply-extension.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=sparcv9 | FileCheck %s -check-prefix=V9
+; RUN: llc < %s -mtriple=sparcv9 -mattr=+vis3 | FileCheck %s -check-prefix=VIS3
+
+define i128 @signed_multiply_extend(i64 %0, i64 %1) nounwind {
+; V9-LABEL: signed_multiply_extend:
+; V9:       ! %bb.0:
+; V9-NEXT:    save %sp, -176, %sp
+; V9-NEXT:    srax %i0, 63, %o2
+; V9-NEXT:    srax %i1, 63, %o0
+; V9-NEXT:    mov %i1, %o1
+; V9-NEXT:    call __multi3
+; V9-NEXT:    mov %i0, %o3
+; V9-NEXT:    mov %o0, %i0
+; V9-NEXT:    ret
+; V9-NEXT:    restore %g0, %o1, %o1
+;
+; VIS3-LABEL: signed_multiply_extend:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    srax %o0, 63, %o2
+; VIS3-NEXT:    and %o2, %o1, %o2
+; VIS3-NEXT:    srax %o1, 63, %o3
+; VIS3-NEXT:    and %o3, %o0, %o3
+; VIS3-NEXT:    add %o3, %o2, %o2
+; VIS3-NEXT:    umulxhi %o1, %o0, %o3
+; VIS3-NEXT:    sub %o3, %o2, %o2
+; VIS3-NEXT:    mulx %o1, %o0, %o1
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    mov %o2, %o0
+  %3 = sext i64 %0 to i128
+  %4 = sext i64 %1 to i128
+  %5 = mul nsw i128 %4, %3
+  ret i128 %5
+}
+
+define i128 @unsigned_multiply_extend(i64 %0, i64 %1) nounwind {
+; V9-LABEL: unsigned_multiply_extend:
+; V9:       ! %bb.0:
+; V9-NEXT:    save %sp, -176, %sp
+; V9-NEXT:    mov %g0, %o0
+; V9-NEXT:    mov %i1, %o1
+; V9-NEXT:    mov %g0, %o2
+; V9-NEXT:    call __multi3
+; V9-NEXT:    mov %i0, %o3
+; V9-NEXT:    mov %o0, %i0
+; V9-NEXT:    ret
+; V9-NEXT:    restore %g0, %o1, %o1
+;
+; VIS3-LABEL: unsigned_multiply_extend:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    umulxhi %o1, %o0, %o2
+; VIS3-NEXT:    mulx %o1, %o0, %o1
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    mov %o2, %o0
+  %3 = zext i64 %0 to i128
+  %4 = zext i64 %1 to i128
+  %5 = mul nuw i128 %4, %3
+  ret i128 %5
+}
diff --git a/llvm/test/CodeGen/SPARC/smulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/SPARC/smulo-128-legalisation-lowering.ll
index 07e4c408a3ff0..1e5ab7922de08 100644
--- a/llvm/test/CodeGen/SPARC/smulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/SPARC/smulo-128-legalisation-lowering.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=sparc-unknown-linux-gnu | FileCheck %s --check-prefixes=SPARC
 ; RUN: llc < %s -mtriple=sparc64-unknown-linux-gnu | FileCheck %s --check-prefixes=SPARC64
+; RUN: llc < %s -mtriple=sparc64-unknown-linux-gnu -mattr=vis3 | FileCheck %s --check-prefixes=SPARC64-VIS3
 
 define { i128, i8 } @muloti_test(i128 %l, i128 %r) nounwind {
 ; SPARC-LABEL: muloti_test:
@@ -213,6 +214,49 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) nounwind {
 ; SPARC64-NEXT:    srl %i3, 0, %i2
 ; SPARC64-NEXT:    ret
 ; SPARC64-NEXT:    restore
+;
+; SPARC64-VIS3-LABEL: muloti_test:
+; SPARC64-VIS3:         .register %g2, #scratch
+; SPARC64-VIS3-NEXT:    .register %g3, #scratch
+; SPARC64-VIS3-NEXT:  ! %bb.0: ! %start
+; SPARC64-VIS3-NEXT:    save %sp, -128, %sp
+; SPARC64-VIS3-NEXT:    mov %g0, %i5
+; SPARC64-VIS3-NEXT:    umulxhi %i0, %i3, %i4
+; SPARC64-VIS3-NEXT:    srax %i0, 63, %g2
+; SPARC64-VIS3-NEXT:    mulx %g2, %i3, %g3
+; SPARC64-VIS3-NEXT:    add %i4, %g3, %i4
+; SPARC64-VIS3-NEXT:    umulxhi %i1, %i3, %g3
+; SPARC64-VIS3-NEXT:    mulx %i0, %i3, %g4
+; SPARC64-VIS3-NEXT:    addcc %g4, %g3, %g3
+; SPARC64-VIS3-NEXT:    addxccc %i4, %g0, %g4
+; SPARC64-VIS3-NEXT:    umulxhi %i1, %i2, %i4
+; SPARC64-VIS3-NEXT:    srax %i2, 63, %g5
+; SPARC64-VIS3-NEXT:    mulx %i1, %g5, %l0
+; SPARC64-VIS3-NEXT:    add %i4, %l0, %l0
+; SPARC64-VIS3-NEXT:    mulx %i1, %i2, %i4
+; SPARC64-VIS3-NEXT:    addcc %i4, %g3, %i4
+; SPARC64-VIS3-NEXT:    addxccc %l0, %g0, %g3
+; SPARC64-VIS3-NEXT:    srax %g3, 63, %l0
+; SPARC64-VIS3-NEXT:    addcc %g4, %g3, %g3
+; SPARC64-VIS3-NEXT:    srax %g4, 63, %g4
+; SPARC64-VIS3-NEXT:    addxccc %g4, %l0, %g4
+; SPARC64-VIS3-NEXT:    and %g5, %i0, %g5
+; SPARC64-VIS3-NEXT:    and %g2, %i2, %g2
+; SPARC64-VIS3-NEXT:    add %g2, %g5, %g2
+; SPARC64-VIS3-NEXT:    umulxhi %i0, %i2, %g5
+; SPARC64-VIS3-NEXT:    sub %g5, %g2, %g2
+; SPARC64-VIS3-NEXT:    mulx %i0, %i2, %i0
+; SPARC64-VIS3-NEXT:    addcc %i0, %g3, %i0
+; SPARC64-VIS3-NEXT:    addxccc %g2, %g4, %i2
+; SPARC64-VIS3-NEXT:    srax %i4, 63, %g2
+; SPARC64-VIS3-NEXT:    xor %i2, %g2, %i2
+; SPARC64-VIS3-NEXT:    xor %i0, %g2, %i0
+; SPARC64-VIS3-NEXT:    or %i0, %i2, %i0
+; SPARC64-VIS3-NEXT:    movrnz %i0, 1, %i5
+; SPARC64-VIS3-NEXT:    mulx %i1, %i3, %i1
+; SPARC64-VIS3-NEXT:    srl %i5, 0, %i2
+; SPARC64-VIS3-NEXT:    ret
+; SPARC64-VIS3-NEXT:    restore %g0, %i4, %o0
 start:
   %0 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %l, i128 %r)
   %1 = extractvalue { i128, i1 } %0, 0
diff --git a/llvm/test/CodeGen/SPARC/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/SPARC/umulo-128-legalisation-lowering.ll
index 01383a00c2619..3bdb45ae51a93 100644
--- a/llvm/test/CodeGen/SPARC/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/SPARC/umulo-128-legalisation-lowering.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=sparc-unknown-linux-gnu | FileCheck %s --check-prefixes=SPARC
 ; RUN: llc < %s -mtriple=sparc64-unknown-linux-gnu | FileCheck %s --check-prefixes=SPARC64
+; RUN: llc < %s -mtriple=sparc64-unknown-linux-gnu -mattr=vis3 | FileCheck %s --check-prefixes=SPARC64-VIS3
 
 define { i128, i8 } @muloti_test(i128 %l, i128 %r) nounwind {
 ; SPARC-LABEL: muloti_test:
@@ -199,6 +200,38 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) nounwind {
 ; SPARC64-NEXT:    mov %i1, %i0
 ; SPARC64-NEXT:    ret
 ; SPARC64-NEXT:    restore %g0, %o1, %o1
+;
+; SPARC64-VIS3-LABEL: muloti_test:
+; SPARC64-VIS3:         .register %g2, #scratch
+; SPARC64-VIS3-NEXT:    .register %g3, #scratch
+; SPARC64-VIS3-NEXT:  ! %bb.0: ! %start
+; SPARC64-VIS3-NEXT:    save %sp, -128, %sp
+; SPARC64-VIS3-NEXT:    mov %g0, %i5
+; SPARC64-VIS3-NEXT:    mov %g0, %g2
+; SPARC64-VIS3-NEXT:    mov %g0, %g3
+; SPARC64-VIS3-NEXT:    mov %g0, %g4
+; SPARC64-VIS3-NEXT:    mov %g0, %g5
+; SPARC64-VIS3-NEXT:    mulx %i2, %i1, %i4
+; SPARC64-VIS3-NEXT:    mulx %i0, %i3, %l0
+; SPARC64-VIS3-NEXT:    add %l0, %i4, %i4
+; SPARC64-VIS3-NEXT:    umulxhi %i1, %i3, %l0
+; SPARC64-VIS3-NEXT:    add %l0, %i4, %i4
+; SPARC64-VIS3-NEXT:    cmp %i4, %l0
+; SPARC64-VIS3-NEXT:    movrnz %i2, 1, %g2
+; SPARC64-VIS3-NEXT:    movrnz %i0, 1, %g3
+; SPARC64-VIS3-NEXT:    and %g3, %g2, %g2
+; SPARC64-VIS3-NEXT:    umulxhi %i0, %i3, %i0
+; SPARC64-VIS3-NEXT:    movrnz %i0, 1, %g4
+; SPARC64-VIS3-NEXT:    movcs %xcc, 1, %i5
+; SPARC64-VIS3-NEXT:    or %g2, %g4, %i0
+; SPARC64-VIS3-NEXT:    umulxhi %i2, %i1, %i2
+; SPARC64-VIS3-NEXT:    movrnz %i2, 1, %g5
+; SPARC64-VIS3-NEXT:    or %i0, %g5, %i0
+; SPARC64-VIS3-NEXT:    or %i0, %i5, %i0
+; SPARC64-VIS3-NEXT:    mulx %i1, %i3, %i1
+; SPARC64-VIS3-NEXT:    srl %i0, 0, %i2
+; SPARC64-VIS3-NEXT:    ret
+; SPARC64-VIS3-NEXT:    restore %g0, %i4, %o0
 start:
   %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r)
   %1 = extractvalue { i128, i1 } %0, 0