[llvm] r261559 - [AMDGPU][llvm-mc] Support for 32-bit inline literals

Mon Feb 22 11:17:57 PST 2016

Author: tstellar
Date: Mon Feb 22 13:17:56 2016
New Revision: 261559

URL: http://llvm.org/viewvc/llvm-project?rev=261559&view=rev
Log:
[AMDGPU][llvm-mc] Support for 32-bit inline literals

Patch by: Artem Tamazov

Summary:
Note: Support for 64-bit inline literals TBD
Added: Support of abs/neg modifiers for literals (incomplete; parsing TBD).
Added: Some TODO comments.
Reworked/clarity: rename isInlineImm() to isInlinableImm()
Reworked/robustness: disallow BitsToFloat() with undefined value in isInlinableImm()
Reworked/reuse: isSSrc32/64(), isVSrc32/64()
Tests added.

Reviewers: tstellarAMD, arsenm

Subscribers: vpykhtin, nhaustov, SamWot, arsenm

Projects: #llvm-amdgpu-spb

Differential Revision: http://reviews.llvm.org/D17204

Modified:
    llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
    llvm/trunk/test/MC/AMDGPU/vop3-errs.s
    llvm/trunk/test/MC/AMDGPU/vop3.s

Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=261559&r1=261558&r2=261559&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Mon Feb 22 13:17:56 2016
@@ -79,6 +79,7 @@ public:
     bool IsFPImm;
     ImmTy Type;
     int64_t Val;
+    int Modifiers;
   };
 
   struct RegOp {
@@ -109,15 +110,20 @@ public:
   }
 
   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
-    if (isReg())
+    if (isRegKind())
       addRegOperands(Inst, N);
     else
       addImmOperands(Inst, N);
   }
 
-  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
-    Inst.addOperand(MCOperand::createImm(Reg.Modifiers));
-    addRegOperands(Inst, N);
+  void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
+    if (isRegKind()) {
+      Inst.addOperand(MCOperand::createImm(Reg.Modifiers));
+      addRegOperands(Inst, N);
+    } else {
+      Inst.addOperand(MCOperand::createImm(Imm.Modifiers));
+      addImmOperands(Inst, N);
+    }
   }
 
   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
@@ -143,12 +149,18 @@ public:
     return Kind == Immediate;
   }
 
-  bool isInlineImm() const {
-    float F = BitsToFloat(Imm.Val);
-    // TODO: Add 0.5pi for VI
-    return isImm() && ((Imm.Val <= 64 && Imm.Val >= -16) ||
+  bool isInlinableImm() const {
+    if (!isImm() || Imm.Type != AMDGPUOperand::ImmTyNone /* Only plain
+      immediates are inlinable (e.g. "clamp" attribute is not) */ )
+      return false;
+    // TODO: We should avoid using host float here. It would be better to
+    // check the float bit values which is what a few other places do. 
+    // We've had bot failures before due to weird NaN support on mips hosts.
+    const float F = BitsToFloat(Imm.Val);
+    // TODO: Add 1/(2*pi) for VI
+    return (Imm.Val <= 64 && Imm.Val >= -16) ||
            (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 ||
-           F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0));
+           F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0);
   }
 
   bool isDSOffset0() const {
@@ -178,8 +190,8 @@ public:
     return Kind == Register && Reg.Modifiers == 0;
   }
 
-  bool isRegWithInputMods() const {
-    return Kind == Register;
+  bool isRegOrImmWithInputMods() const {
+    return Kind == Register || isInlinableImm();
   }
 
   bool isClamp() const {
@@ -195,13 +207,16 @@ public:
   }
 
   void setModifiers(unsigned Mods) {
-    assert(isReg());
-    Reg.Modifiers = Mods;
+    assert(isReg() || (isImm() && Imm.Modifiers == 0));
+    if (isReg())
+      Reg.Modifiers = Mods;
+    else
+      Imm.Modifiers = Mods;
   }
 
   bool hasModifiers() const {
-    assert(isRegKind());
-    return Reg.Modifiers != 0;
+    assert(isRegKind() || isImm());
+    return isRegKind() ? Reg.Modifiers != 0 : Imm.Modifiers != 0;
   }
 
   unsigned getReg() const override {
@@ -217,36 +232,42 @@ public:
   }
 
   bool isSCSrc32() const {
-    return isInlineImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
+    return isInlinableImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
   }
 
-  bool isSSrc32() const {
-    return isImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
+  bool isSCSrc64() const {
+    return isInlinableImm() || (isReg() && isRegClass(AMDGPU::SReg_64RegClassID));
   }
 
-  bool isSSrc64() const {
-    return isImm() || isInlineImm() ||
-           (isReg() && isRegClass(AMDGPU::SReg_64RegClassID));
+  bool isSSrc32() const {
+    return isImm() || isSCSrc32();
   }
 
-  bool isSCSrc64() const {
-    return (isReg() && isRegClass(AMDGPU::SReg_64RegClassID)) || isInlineImm();
+  bool isSSrc64() const {
+    // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
+    // See isVSrc64().
+    return isImm() || isSCSrc64();
   }
 
   bool isVCSrc32() const {
-    return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
+    return isInlinableImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
   }
 
   bool isVCSrc64() const {
-    return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
+    return isInlinableImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
   }
 
   bool isVSrc32() const {
-    return isImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
+    return isImm() || isVCSrc32();
   }
 
   bool isVSrc64() const {
-    return isImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
+    // TODO: Check if the 64-bit value (coming from assembly source) can be 
+    // narrowed to 32 bits (in the instruction stream). That require knowledge
+    // of instruction type (unsigned/signed, floating or "untyped"/B64),
+    // see [AMD GCN3 ISA 6.3.1].
+    // TODO: How 64-bit values are formed from 32-bit literals in _B64 insns?
+    return isImm() || isVCSrc64();
   }
 
   bool isMem() const override {
@@ -275,7 +296,10 @@ public:
       OS << "<register " << getReg() << " mods: " << Reg.Modifiers << '>';
       break;
     case Immediate:
-      OS << getImm();
+      if (Imm.Type != AMDGPUOperand::ImmTyNone)
+        OS << getImm();
+      else 
+        OS << '<' << getImm() << " mods: " << Imm.Modifiers << '>';
       break;
     case Token:
       OS << '\'' << getToken() << '\'';
@@ -293,6 +317,7 @@ public:
     Op->Imm.Val = Val;
     Op->Imm.IsFPImm = IsFPImm;
     Op->Imm.Type = Type;
+    Op->Imm.Modifiers = 0;
     Op->StartLoc = Loc;
     Op->EndLoc = Loc;
     return Op;
@@ -1099,6 +1124,8 @@ static bool operandsHaveModifiers(const
     const AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
     if (Op.isRegKind() && Op.hasModifiers())
       return true;
+    if (Op.isImm() && Op.hasModifiers())
+      return true;
     if (Op.isImm() && (Op.getImmTy() == AMDGPUOperand::ImmTyOMod ||
                        Op.getImmTy() == AMDGPUOperand::ImmTyClamp))
       return true;
@@ -1843,7 +1870,7 @@ AMDGPUAsmParser::parseVOP3OptionalOps(Op
       // previous register operands have modifiers
       for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
         AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
-        if (Op.isReg())
+        if ((Op.isReg() || Op.isImm()) && !Op.hasModifiers())
           Op.setModifiers(0);
       }
     }
@@ -1892,14 +1919,12 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &In
   unsigned ClampIdx = 0, OModIdx = 0;
   for (unsigned E = Operands.size(); I != E; ++I) {
     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
-    if (Op.isRegWithInputMods()) {
-      Op.addRegWithInputModsOperands(Inst, 2);
+    if (Op.isRegOrImmWithInputMods()) {
+      Op.addRegOrImmWithInputModsOperands(Inst, 2);
     } else if (Op.isClamp()) {
       ClampIdx = I;
     } else if (Op.isOMod()) {
       OModIdx = I;
-    } else if (Op.isImm()) {
-      Op.addImmOperands(Inst, 1);
     } else {
       assert(false);
     }

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=261559&r1=261558&r2=261559&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Mon Feb 22 13:17:56 2016
@@ -1073,7 +1073,7 @@ def InputMods : OperandWithDefaultOps <i
 }
 
 def InputModsMatchClass : AsmOperandClass {
-  let Name = "RegWithInputMods";
+  let Name = "RegOrImmWithInputMods";
 }
 
 def InputModsNoDefault : Operand <i32> {

Modified: llvm/trunk/test/MC/AMDGPU/vop3-errs.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/vop3-errs.s?rev=261559&r1=261558&r2=261559&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/vop3-errs.s (original)
+++ llvm/trunk/test/MC/AMDGPU/vop3-errs.s Mon Feb 22 13:17:56 2016
@@ -3,3 +3,6 @@
 
 v_add_f32_e64 v0, v1
 // CHECK: error: too few operands for instruction
+
+v_div_scale_f32  v24, vcc, v22, 1.1, v22
+// CHECK: error: invalid operand for instruction

Modified: llvm/trunk/test/MC/AMDGPU/vop3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/vop3.s?rev=261559&r1=261558&r2=261559&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/vop3.s (original)
+++ llvm/trunk/test/MC/AMDGPU/vop3.s Mon Feb 22 13:17:56 2016
@@ -289,3 +289,19 @@ v_div_scale_f32  v24, vcc, s[10:11], v22
 v_div_scale_f32  v24, s[10:11], v22, v22, v20
 // SICI: v_div_scale_f32 v24, s[10:11], v22, v22, v20 ; encoding: [0x18,0x0a,0xda,0xd2,0x16,0x2d,0x52,0x04]
 // VI:   v_div_scale_f32 v24, s[10:11], v22, v22, v20 ; encoding: [0x18,0x0a,0xe0,0xd1,0x16,0x2d,0x52,0x04]
+
+v_div_scale_f32  v24, vcc, v22, 1.0, v22
+// SICI: v_div_scale_f32 v24, vcc, v22, 1.0, v22 ; encoding: [0x18,0x6a,0xda,0xd2,0x16,0xe5,0x59,0x04]
+// VI:   v_div_scale_f32 v24, vcc, v22, 1.0, v22 ; encoding: [0x18,0x6a,0xe0,0xd1,0x16,0xe5,0x59,0x04]
+
+v_div_scale_f32  v24, vcc, v22, v22, -2.0
+// SICI: v_div_scale_f32 v24, vcc, v22, v22, -2.0 ; encoding: [0x18,0x6a,0xda,0xd2,0x16,0x2d,0xd6,0x03]
+// VI:   v_div_scale_f32 v24, vcc, v22, v22, -2.0 ; encoding: [0x18,0x6a,0xe0,0xd1,0x16,0x2d,0xd6,0x03]
+
+v_div_scale_f32 v24, vcc, v22, v22, 0xc0000000
+// SICI: v_div_scale_f32 v24, vcc, v22, v22, -2.0 ; encoding: [0x18,0x6a,0xda,0xd2,0x16,0x2d,0xd6,0x03]
+// VI:   v_div_scale_f32 v24, vcc, v22, v22, -2.0 ; encoding: [0x18,0x6a,0xe0,0xd1,0x16,0x2d,0xd6,0x03]
+
+v_mad_f32 v9, 0.5, v5, -v8
+// SICI: v_mad_f32 v9, 0.5, v5, -v8      ; encoding: [0x09,0x00,0x82,0xd2,0xf0,0x0a,0x22,0x84]
+// VI:   v_mad_f32 v9, 0.5, v5, -v8      ; encoding: [0x09,0x00,0xc1,0xd1,0xf0,0x0a,0x22,0x84]