[vmkit-commits] [vmkit] r83412 - in /vmkit/trunk/lib/N3/VMCore: UTF8.cpp UTF8.h

Gael Thomas gael.thomas at lip6.fr
Tue Oct 6 13:37:43 PDT 2009


Author: gthomas
Date: Tue Oct  6 15:37:42 2009
New Revision: 83412

URL: http://llvm.org/viewvc/llvm-project?rev=83412&view=rev
Log:
Add UTF8.cpp/UTF8.h.


Added:
    vmkit/trunk/lib/N3/VMCore/UTF8.cpp
    vmkit/trunk/lib/N3/VMCore/UTF8.h

Added: vmkit/trunk/lib/N3/VMCore/UTF8.cpp
URL: http://llvm.org/viewvc/llvm-project/vmkit/trunk/lib/N3/VMCore/UTF8.cpp?rev=83412&view=auto

==============================================================================
--- vmkit/trunk/lib/N3/VMCore/UTF8.cpp (added)
+++ vmkit/trunk/lib/N3/VMCore/UTF8.cpp Tue Oct  6 15:37:42 2009
@@ -0,0 +1,175 @@
+#include "UTF8.h"
+#include "VMThread.h"
+#include "VMClass.h"
+#include "VMArray.h"
+#include "N3.h"
+#include "MSCorlib.h"
+
+using namespace n3;
+
+#define AT(name, elmt)                                                      \
+  elmt name::at(sint32 offset) const {                                      \
+    if (offset >= size)                                                     \
+      VMThread::get()->vm->indexOutOfBounds(this, offset);                  \
+    return elements[offset];                                                \
+  }                                                                         \
+  void name::setAt(sint32 offset, elmt value) {                             \
+    if (offset >= size)                                                     \
+      VMThread::get()->vm->indexOutOfBounds(this, offset);                  \
+    elements[offset] = value;                                               \
+  }
+
+#define INITIALISE(name)                                                    \
+  void name::initialise(VMCommonClass* atype, sint32 n) {                   \
+    VMObject::initialise(atype);                                            \
+    this->size = n;                                                         \
+    for (int i = 0; i < n; i++)                                             \
+      elements[i] = 0;                                                      \
+  }                                                                         \
+
+AT(UTF8, uint16)
+INITIALISE(UTF8)
+
+#undef AT
+#undef INITIALISE
+
+
+UTF8* UTF8::acons(sint32 n, VMClassArray* atype) {
+  if (n < 0)
+    VMThread::get()->vm->negativeArraySizeException(n);
+  else if (n > VMArray::MaxArraySize)
+    VMThread::get()->vm->outOfMemoryError(n);
+  uint32 size = sizeof(VMObject) + sizeof(sint32) + n * sizeof(uint16);
+  UTF8* res = (UTF8*)gc::operator new(size, UTF8::VT);
+  res->initialise(atype, n);
+  return res;
+}
+
+void UTF8::print(mvm::PrintBuffer* buf) const {
+  for (int i = 0; i < size; i++)
+    buf->writeChar((char)elements[i]);
+}
+
+const UTF8* UTF8::extract(N3 *vm, uint32 start, uint32 end) const {
+  uint32 len = end - start;
+  uint16* buf = (uint16*)alloca(sizeof(uint16) * len);
+
+  for (uint32 i = 0; i < len; i++) {
+    buf[i] = at(i + start);
+  }
+
+  return readerConstruct(vm, buf, len);
+}
+
+const UTF8* UTF8::asciizConstruct(N3* vm, const char* asciiz) {
+  return vm->asciizToUTF8(asciiz);
+}
+
+const UTF8* UTF8::readerConstruct(N3* vm, uint16* buf, uint32 n) {
+  return vm->bufToUTF8(buf, n);
+}
+
+char* UTF8::UTF8ToAsciiz() const {
+  mvm::NativeString* buf = mvm::NativeString::alloc(size + 1);
+  for (sint32 i = 0; i < size; ++i) {
+    buf->setAt(i, elements[i]);
+  }
+  buf->setAt(size, 0);
+  return buf->cString();
+}
+
+
+
+static uint32 asciizHasher(const char* asciiz, sint32 size) {
+  uint32 r0 = 0, r1 = 0;
+  for (sint32 i = 0; i < size; i++) {
+    char c = asciiz[i];
+    r0 += c;
+    r1 ^= c;
+  }
+  return (r1 & 255) + ((r0 & 255) << 8);
+}
+
+static uint32 readerHasher(const uint16* buf, sint32 size) {
+  uint32 r0 = 0, r1 = 0;
+  for (sint32 i = 0; i < size; i++) {
+    uint16 c = buf[i];
+    r0 += c;
+    r1 ^= c;
+  }
+  return (r1 & 255) + ((r0 & 255) << 8);
+}
+
+static bool asciizEqual(const UTF8* val, const char* asciiz, sint32 size) {
+  sint32 len = val->size;
+  if (len != size) return false;
+  else {
+    for (sint32 i = 0; i < len; i++) {
+      if (asciiz[i] != val->at(i)) return false;
+    }
+    return true;
+  }
+}
+
+static bool readerEqual(const UTF8* val, const uint16* buf, sint32 size) {
+  sint32 len = val->size;
+  if (len != size) return false;
+  else return !(memcmp(val->elements, buf, len * sizeof(uint16)));
+}
+
+
+const UTF8* UTF8Map::lookupOrCreateAsciiz(const char* asciiz) {
+  sint32 size = strlen(asciiz);
+  uint32 key = asciizHasher(asciiz, size);
+  const UTF8* res = 0;
+  lock->lock();
+  
+  std::pair<UTF8Map::iterator, UTF8Map::iterator> p = map.equal_range(key);
+  
+  for (UTF8Map::iterator i = p.first; i != p.second; i++) {
+    if (asciizEqual(i->second, asciiz, size)) {
+      res = i->second;
+      break;
+    }
+  }
+
+  if (res == 0) {
+    UTF8* tmp = (UTF8 *)UTF8::acons(size, MSCorlib::arrayChar);
+    for (sint32 i = 0; i < size; i++) {
+      tmp->setAt(i, asciiz[i]);
+    }
+    res = (const UTF8*)tmp;
+    map.insert(std::make_pair(key, res));
+  }
+  
+  lock->unlock();
+  return res;
+}
+
+const UTF8* UTF8Map::lookupOrCreateReader(const uint16* buf, uint32 len) {
+  sint32 size = (sint32)len;
+  uint32 key = readerHasher(buf, size);
+  const UTF8* res = 0;
+  lock->lock();
+  
+  std::pair<UTF8Map::iterator, UTF8Map::iterator> p = map.equal_range(key);
+
+  for (UTF8Map::iterator i = p.first; i != p.second; i++) {
+    if (readerEqual(i->second, buf, size)) {
+      res = i->second;
+      break;
+    }
+  }
+
+  if (res == 0) {
+    UTF8* tmp = UTF8::acons(size, MSCorlib::arrayChar);
+    memcpy(tmp->elements, buf, len * sizeof(uint16));
+    res = (const UTF8*)tmp;
+    map.insert(std::make_pair(key, res));
+  }
+  
+  lock->unlock();
+  return res;
+}
+
+

Added: vmkit/trunk/lib/N3/VMCore/UTF8.h
URL: http://llvm.org/viewvc/llvm-project/vmkit/trunk/lib/N3/VMCore/UTF8.h?rev=83412&view=auto

==============================================================================
--- vmkit/trunk/lib/N3/VMCore/UTF8.h (added)
+++ vmkit/trunk/lib/N3/VMCore/UTF8.h Tue Oct  6 15:37:42 2009
@@ -0,0 +1,109 @@
+#ifndef _N3_UTF8_
+#define _N3_UTF8_
+
+#include "VMObject.h"
+#include "mvm/PrintBuffer.h"
+
+namespace mvm {
+	class VirtualTable;
+}
+
+namespace n3 {
+	class VMClassArray;
+	class N3;
+
+class UTF8 : public VMObject {
+public:
+  static VirtualTable* VT;
+  sint32 size;
+  uint16 elements[1];
+
+  static const llvm::Type* llvmType;
+  static UTF8* acons(sint32 n, VMClassArray* cl);
+  void initialise(VMCommonClass* atype, sint32 n);
+  
+  unsigned short int at(sint32) const;
+  void setAt(sint32, uint16);
+  
+  virtual void print(mvm::PrintBuffer* buf) const;
+
+  char* UTF8ToAsciiz() const;
+  static const UTF8* asciizConstruct(N3 *vm, const char* asciiz);
+  static const UTF8* readerConstruct(N3 *vm, uint16* buf, uint32 n);
+
+  const UTF8* extract(N3 *vm, uint32 start, uint32 len) const;
+};
+
+class UTF8Map : public mvm::PermanentObject {
+public:
+  typedef std::multimap<const uint32, const UTF8*>::iterator iterator;
+  
+  mvm::Lock* lock;
+  std::multimap<uint32, const UTF8*, std::less<uint32>,
+                gc_allocator<std::pair<const uint32, const UTF8*> > > map;
+
+  const UTF8* lookupOrCreateAsciiz(const char* asciiz); 
+  const UTF8* lookupOrCreateReader(const uint16* buf, uint32 size);
+  
+  UTF8Map() {
+    lock = new mvm::LockNormal();
+  }
+  
+  virtual void TRACER {
+    //lock->MARK_AND_TRACE;
+    for (iterator i = map.begin(), e = map.end(); i!= e; ++i) {
+      i->second->MARK_AND_TRACE;
+    }
+  }
+
+  virtual void print(mvm::PrintBuffer* buf) const {
+    buf->write("UTF8 Hashtable<>");
+  }
+};
+
+
+class UTF8Builder {
+	uint16 *buf;
+	uint32  cur;
+	uint32  size;
+
+public:
+	UTF8Builder(size_t size) {
+		size = (size < 4) ? 4 : size;
+		this->buf = new uint16[size];
+		this->size = size;
+	}
+
+	UTF8Builder *append(const UTF8 *utf8, uint32 start=0, uint32 length=0xffffffff) {
+		length = length == 0xffffffff ? utf8->size : length;
+		uint32 req = cur + length;
+
+		if(req > size) {
+			uint32 newSize = size<<1;
+			while(req < newSize)
+				newSize <<= 1;
+			uint16 *newBuf = new uint16[newSize];
+			memcpy(newBuf, buf, cur<<1);
+			delete []buf;
+			buf = newBuf;
+			size = newSize;
+		}
+
+		memcpy(buf + cur, &utf8->elements + start, length<<1);
+		cur = req;
+
+		return this;
+	}
+
+	const UTF8 *toUTF8(UTF8Map *map) {
+		return map->lookupOrCreateReader(buf, size);
+	}
+
+	~UTF8Builder() {
+		delete [] buf;
+	}
+};
+
+}
+
+#endif





More information about the vmkit-commits mailing list