[vmkit-commits] [vmkit] r83412 - in /vmkit/trunk/lib/N3/VMCore: UTF8.cpp UTF8.h
Gael Thomas
gael.thomas at lip6.fr
Tue Oct 6 13:37:43 PDT 2009
Author: gthomas
Date: Tue Oct 6 15:37:42 2009
New Revision: 83412
URL: http://llvm.org/viewvc/llvm-project?rev=83412&view=rev
Log:
Add UTF8.cpp/UTF8.h.
Added:
vmkit/trunk/lib/N3/VMCore/UTF8.cpp
vmkit/trunk/lib/N3/VMCore/UTF8.h
Added: vmkit/trunk/lib/N3/VMCore/UTF8.cpp
URL: http://llvm.org/viewvc/llvm-project/vmkit/trunk/lib/N3/VMCore/UTF8.cpp?rev=83412&view=auto
==============================================================================
--- vmkit/trunk/lib/N3/VMCore/UTF8.cpp (added)
+++ vmkit/trunk/lib/N3/VMCore/UTF8.cpp Tue Oct 6 15:37:42 2009
@@ -0,0 +1,175 @@
+#include "UTF8.h"
+#include "VMThread.h"
+#include "VMClass.h"
+#include "VMArray.h"
+#include "N3.h"
+#include "MSCorlib.h"
+
+using namespace n3;
+
+#define AT(name, elmt) \
+ elmt name::at(sint32 offset) const { \
+ if (offset >= size) \
+ VMThread::get()->vm->indexOutOfBounds(this, offset); \
+ return elements[offset]; \
+ } \
+ void name::setAt(sint32 offset, elmt value) { \
+ if (offset >= size) \
+ VMThread::get()->vm->indexOutOfBounds(this, offset); \
+ elements[offset] = value; \
+ }
+
+#define INITIALISE(name) \
+ void name::initialise(VMCommonClass* atype, sint32 n) { \
+ VMObject::initialise(atype); \
+ this->size = n; \
+ for (int i = 0; i < n; i++) \
+ elements[i] = 0; \
+ } \
+
+AT(UTF8, uint16)
+INITIALISE(UTF8)
+
+#undef AT
+#undef INITIALISE
+
+
+UTF8* UTF8::acons(sint32 n, VMClassArray* atype) {
+ if (n < 0)
+ VMThread::get()->vm->negativeArraySizeException(n);
+ else if (n > VMArray::MaxArraySize)
+ VMThread::get()->vm->outOfMemoryError(n);
+ uint32 size = sizeof(VMObject) + sizeof(sint32) + n * sizeof(uint16);
+ UTF8* res = (UTF8*)gc::operator new(size, UTF8::VT);
+ res->initialise(atype, n);
+ return res;
+}
+
+void UTF8::print(mvm::PrintBuffer* buf) const {
+ for (int i = 0; i < size; i++)
+ buf->writeChar((char)elements[i]);
+}
+
+const UTF8* UTF8::extract(N3 *vm, uint32 start, uint32 end) const {
+ uint32 len = end - start;
+ uint16* buf = (uint16*)alloca(sizeof(uint16) * len);
+
+ for (uint32 i = 0; i < len; i++) {
+ buf[i] = at(i + start);
+ }
+
+ return readerConstruct(vm, buf, len);
+}
+
+const UTF8* UTF8::asciizConstruct(N3* vm, const char* asciiz) {
+ return vm->asciizToUTF8(asciiz);
+}
+
+const UTF8* UTF8::readerConstruct(N3* vm, uint16* buf, uint32 n) {
+ return vm->bufToUTF8(buf, n);
+}
+
+char* UTF8::UTF8ToAsciiz() const {
+ mvm::NativeString* buf = mvm::NativeString::alloc(size + 1);
+ for (sint32 i = 0; i < size; ++i) {
+ buf->setAt(i, elements[i]);
+ }
+ buf->setAt(size, 0);
+ return buf->cString();
+}
+
+
+
+static uint32 asciizHasher(const char* asciiz, sint32 size) {
+ uint32 r0 = 0, r1 = 0;
+ for (sint32 i = 0; i < size; i++) {
+ char c = asciiz[i];
+ r0 += c;
+ r1 ^= c;
+ }
+ return (r1 & 255) + ((r0 & 255) << 8);
+}
+
+static uint32 readerHasher(const uint16* buf, sint32 size) {
+ uint32 r0 = 0, r1 = 0;
+ for (sint32 i = 0; i < size; i++) {
+ uint16 c = buf[i];
+ r0 += c;
+ r1 ^= c;
+ }
+ return (r1 & 255) + ((r0 & 255) << 8);
+}
+
+static bool asciizEqual(const UTF8* val, const char* asciiz, sint32 size) {
+ sint32 len = val->size;
+ if (len != size) return false;
+ else {
+ for (sint32 i = 0; i < len; i++) {
+ if (asciiz[i] != val->at(i)) return false;
+ }
+ return true;
+ }
+}
+
+static bool readerEqual(const UTF8* val, const uint16* buf, sint32 size) {
+ sint32 len = val->size;
+ if (len != size) return false;
+ else return !(memcmp(val->elements, buf, len * sizeof(uint16)));
+}
+
+
+const UTF8* UTF8Map::lookupOrCreateAsciiz(const char* asciiz) {
+ sint32 size = strlen(asciiz);
+ uint32 key = asciizHasher(asciiz, size);
+ const UTF8* res = 0;
+ lock->lock();
+
+ std::pair<UTF8Map::iterator, UTF8Map::iterator> p = map.equal_range(key);
+
+ for (UTF8Map::iterator i = p.first; i != p.second; i++) {
+ if (asciizEqual(i->second, asciiz, size)) {
+ res = i->second;
+ break;
+ }
+ }
+
+ if (res == 0) {
+ UTF8* tmp = (UTF8 *)UTF8::acons(size, MSCorlib::arrayChar);
+ for (sint32 i = 0; i < size; i++) {
+ tmp->setAt(i, asciiz[i]);
+ }
+ res = (const UTF8*)tmp;
+ map.insert(std::make_pair(key, res));
+ }
+
+ lock->unlock();
+ return res;
+}
+
+const UTF8* UTF8Map::lookupOrCreateReader(const uint16* buf, uint32 len) {
+ sint32 size = (sint32)len;
+ uint32 key = readerHasher(buf, size);
+ const UTF8* res = 0;
+ lock->lock();
+
+ std::pair<UTF8Map::iterator, UTF8Map::iterator> p = map.equal_range(key);
+
+ for (UTF8Map::iterator i = p.first; i != p.second; i++) {
+ if (readerEqual(i->second, buf, size)) {
+ res = i->second;
+ break;
+ }
+ }
+
+ if (res == 0) {
+ UTF8* tmp = UTF8::acons(size, MSCorlib::arrayChar);
+ memcpy(tmp->elements, buf, len * sizeof(uint16));
+ res = (const UTF8*)tmp;
+ map.insert(std::make_pair(key, res));
+ }
+
+ lock->unlock();
+ return res;
+}
+
+
Added: vmkit/trunk/lib/N3/VMCore/UTF8.h
URL: http://llvm.org/viewvc/llvm-project/vmkit/trunk/lib/N3/VMCore/UTF8.h?rev=83412&view=auto
==============================================================================
--- vmkit/trunk/lib/N3/VMCore/UTF8.h (added)
+++ vmkit/trunk/lib/N3/VMCore/UTF8.h Tue Oct 6 15:37:42 2009
@@ -0,0 +1,109 @@
+#ifndef _N3_UTF8_
+#define _N3_UTF8_
+
+#include "VMObject.h"
+#include "mvm/PrintBuffer.h"
+
+namespace mvm {
+ class VirtualTable;
+}
+
+namespace n3 {
+ class VMClassArray;
+ class N3;
+
+class UTF8 : public VMObject {
+public:
+ static VirtualTable* VT;
+ sint32 size;
+ uint16 elements[1];
+
+ static const llvm::Type* llvmType;
+ static UTF8* acons(sint32 n, VMClassArray* cl);
+ void initialise(VMCommonClass* atype, sint32 n);
+
+ unsigned short int at(sint32) const;
+ void setAt(sint32, uint16);
+
+ virtual void print(mvm::PrintBuffer* buf) const;
+
+ char* UTF8ToAsciiz() const;
+ static const UTF8* asciizConstruct(N3 *vm, const char* asciiz);
+ static const UTF8* readerConstruct(N3 *vm, uint16* buf, uint32 n);
+
+ const UTF8* extract(N3 *vm, uint32 start, uint32 len) const;
+};
+
+class UTF8Map : public mvm::PermanentObject {
+public:
+ typedef std::multimap<const uint32, const UTF8*>::iterator iterator;
+
+ mvm::Lock* lock;
+ std::multimap<uint32, const UTF8*, std::less<uint32>,
+ gc_allocator<std::pair<const uint32, const UTF8*> > > map;
+
+ const UTF8* lookupOrCreateAsciiz(const char* asciiz);
+ const UTF8* lookupOrCreateReader(const uint16* buf, uint32 size);
+
+ UTF8Map() {
+ lock = new mvm::LockNormal();
+ }
+
+ virtual void TRACER {
+ //lock->MARK_AND_TRACE;
+ for (iterator i = map.begin(), e = map.end(); i!= e; ++i) {
+ i->second->MARK_AND_TRACE;
+ }
+ }
+
+ virtual void print(mvm::PrintBuffer* buf) const {
+ buf->write("UTF8 Hashtable<>");
+ }
+};
+
+
+class UTF8Builder {
+ uint16 *buf;
+ uint32 cur;
+ uint32 size;
+
+public:
+ UTF8Builder(size_t size) {
+ size = (size < 4) ? 4 : size;
+ this->buf = new uint16[size];
+ this->size = size;
+ }
+
+ UTF8Builder *append(const UTF8 *utf8, uint32 start=0, uint32 length=0xffffffff) {
+ length = length == 0xffffffff ? utf8->size : length;
+ uint32 req = cur + length;
+
+ if(req > size) {
+ uint32 newSize = size<<1;
+ while(req < newSize)
+ newSize <<= 1;
+ uint16 *newBuf = new uint16[newSize];
+ memcpy(newBuf, buf, cur<<1);
+ delete []buf;
+ buf = newBuf;
+ size = newSize;
+ }
+
+ memcpy(buf + cur, &utf8->elements + start, length<<1);
+ cur = req;
+
+ return this;
+ }
+
+ const UTF8 *toUTF8(UTF8Map *map) {
+ return map->lookupOrCreateReader(buf, size);
+ }
+
+ ~UTF8Builder() {
+ delete [] buf;
+ }
+};
+
+}
+
+#endif
More information about the vmkit-commits
mailing list