[llvm-commits] [test-suite] r46573 [2/3] - in /test-suite/trunk/MultiSource/Applications: ./ ClamAV/ ClamAV/dbdir/ ClamAV/inputs/ ClamAV/inputs/rtf-test/ ClamAV/scripts/

Evan Cheng evan.cheng at apple.com
Wed Jan 30 13:17:12 PST 2008


Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_binhex.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_binhex.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_binhex.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_binhex.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,219 @@
+/*
+ *  Copyright (C) 2004 Nigel Horne <njh at bandsman.co.uk>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ *
+ * Change History:
+ * $Log: binhex.c,v $
+ * Revision 1.23  2007/02/12 20:46:08  njh
+ * Various tidy
+ *
+ * Revision 1.22  2006/07/31 09:19:52  njh
+ * Use MAP_PRIVATE
+ *
+ * Revision 1.21  2006/07/01 16:17:35  njh
+ * Added destroy flag
+ *
+ * Revision 1.20  2006/07/01 03:47:50  njh
+ * Don't loop if binhex runs out of memory
+ *
+ * Revision 1.19  2006/05/19 11:02:12  njh
+ * Just include mbox.h
+ *
+ * Revision 1.18  2006/04/09 19:59:27  kojm
+ * update GPL headers with new address for FSF
+ *
+ * Revision 1.17  2005/11/06 14:03:26  nigelhorne
+ * Ensure NAME_MAX isn't redefined on BeOS
+ *
+ * Revision 1.16  2005/05/14 16:13:25  nigelhorne
+ * Ensure munmap is the right size
+ *
+ * Revision 1.15  2005/05/13 19:30:34  nigelhorne
+ * Clean cli_realloc call
+ *
+ * Revision 1.14  2005/03/10 08:51:30  nigelhorne
+ * Tidy
+ *
+ * Revision 1.13  2005/01/19 05:29:41  nigelhorne
+ * tidy
+ *
+ * Revision 1.12  2004/12/27 14:17:14  nigelhorne
+ * Fix segfault if write to temporary file fails
+ *
+ * Revision 1.11  2004/12/17 12:03:38  nigelhorne
+ * Tidy up for machines without MMAP
+ *
+ * Revision 1.10  2004/12/16 15:29:51  nigelhorne
+ * Tidy
+ *
+ * Revision 1.9  2004/11/28 22:06:39  nigelhorne
+ * Tidy space only headers code
+ *
+ * Revision 1.8  2004/11/28 21:05:50  nigelhorne
+ * Handle headers with only spaces
+ *
+ * Revision 1.7  2004/11/23 09:05:26  nigelhorne
+ * Fix crash in base64 encoded binhex files
+ *
+ * Revision 1.6  2004/11/22 15:16:53  nigelhorne
+ * Use cli_realloc instead of many cli_mallocs
+ *
+ * Revision 1.5  2004/11/18 20:11:34  nigelhorne
+ * Fix segfault
+ *
+ * Revision 1.4  2004/11/18 19:30:29  kojm
+ * add support for Mac's HQX file format
+ *
+ * Revision 1.3  2004/11/18 18:24:45  nigelhorne
+ * Added binhex.h
+ *
+ * Revision 1.2  2004/11/18 18:09:06  nigelhorne
+ * First draft of binhex.c
+ *
+ */
+static	char	const	rcsid[] = "$Id: binhex.c,v 1.23 2007/02/12 20:46:08 njh Exp $";
+
+#include "clamav.h"
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#ifndef	CL_DEBUG
+#define	NDEBUG	/* map CLAMAV debug onto standard */
+#endif
+
+#ifdef CL_THREAD_SAFE
+#ifndef	_REENTRANT
+#define	_REENTRANT	/* for Solaris 2.8 */
+#endif
+#endif
+
+#ifdef	HAVE_MMAP
+#if HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#else /* HAVE_SYS_MMAN_H */
+#undef HAVE_MMAP
+#endif
+#endif
+
+#include <stdio.h>
+#include <memory.h>
+#include <sys/stat.h>
+#include "others.h"
+
+#include "mbox.h"
+#include "binhex.h"
+
+int
+cli_binhex(const char *dir, int desc)
+{
+#ifndef HAVE_MMAP
+	cli_warnmsg("File not decoded - binhex decoding needs mmap() (for now)\n");
+	return CL_CLEAN;
+#else
+	struct stat statb;
+	char *buf, *start, *line;
+	size_t size;
+	long bytesleft;
+	message *m;
+	fileblob *fb;
+
+	if(fstat(desc, &statb) < 0)
+		return CL_EOPEN;
+
+	size = (size_t)statb.st_size;
+
+	if(size == 0)
+		return CL_CLEAN;
+
+	m = messageCreate();
+	if(m == NULL)
+		return CL_EMEM;
+
+	start = buf = mmap(NULL, size, PROT_READ, MAP_PRIVATE, desc, 0);
+	if(buf == MAP_FAILED) {
+		messageDestroy(m);
+		return CL_EMEM;
+	}
+
+	cli_dbgmsg("mmap'ed binhex file\n");
+
+	bytesleft = (long)size;
+	line = NULL;
+
+	while(bytesleft > 0) {
+		int length = 0;
+		char *ptr, *newline;
+
+		/*printf("%d: ", bytesleft);*/
+
+		for(ptr = buf; bytesleft && (*ptr != '\n') && (*ptr != '\r'); ptr++) {
+			length++;
+			--bytesleft;
+		}
+
+		/*printf("%d: ", length);*/
+
+		newline = cli_realloc(line, (size_t)(length + 1));
+		if(newline == NULL)
+			break;
+
+		line = newline;
+
+		memcpy(line, buf, length);
+		line[length] = '\0';
+
+		/*puts(line);*/
+
+		if(messageAddStr(m, line) < 0)
+			break;
+
+		if((bytesleft > 0) && (*ptr == '\r')) {
+			ptr++;
+			bytesleft--;
+		}
+		buf = ++ptr;
+		bytesleft--;
+	}
+	munmap(start, size);
+
+	if(line)
+		free(line);
+
+	if(binhexBegin(m) == NULL) {
+		messageDestroy(m);
+		cli_errmsg("No binhex line found\n");
+		return CL_EFORMAT;
+	}
+
+	/* similar to binhexMessage */
+	messageSetEncoding(m, "x-binhex");
+
+	fb = messageToFileblob(m, dir, 1);
+	if(fb) {
+		cli_dbgmsg("Binhex file decoded to %s\n", fileblobGetFilename(fb));
+		fileblobDestroy(fb);
+	} else
+		cli_errmsg("Couldn't decode binhex file to %s\n", dir);
+	messageDestroy(m);
+
+	if(fb)
+		return CL_CLEAN;	/* a lie - but it gets things going */
+	return CL_EIO;	/* probably CL_EMEM, but we can't tell at this layer */
+#endif
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_blob.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_blob.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_blob.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_blob.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,728 @@
+/*
+ *  Copyright (C) 2002 Nigel Horne <njh at bandsman.co.uk>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+static	char	const	rcsid[] = "$Id: blob.c,v 1.64 2007/02/12 22:25:14 njh Exp $";
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#ifdef	C_WINDOWS
+#include "stdafx.h"
+#include <io.h>
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#ifdef	HAVE_SYS_PARAM_H
+#include <sys/param.h>	/* for NAME_MAX */
+#endif
+
+#ifdef	C_DARWIN
+#include <sys/types.h>
+#endif
+
+#ifdef	HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "others.h"
+#include "mbox.h"
+#include "matcher.h"
+#include "scanners.h"
+
+#ifndef	CL_DEBUG
+#define	NDEBUG	/* map CLAMAV debug onto standard */
+#endif
+
+#ifndef	O_BINARY
+#define	O_BINARY	0
+#endif
+
+#include <assert.h>
+
+#if	defined(C_MINGW) || defined(C_WINDOWS)
+#include <windows.h>
+#endif
+
+#define	MAX_SCAN_SIZE	20*1024	/*
+				 * The performance benefit of scanning
+				 * early disappears on medium and
+				 * large sized files
+				 */
+
+static	const	char	*blobGetFilename(const blob *b);
+
+blob *
+blobCreate(void)
+{
+#ifdef	CL_DEBUG
+	blob *b = (blob *)cli_calloc(1, sizeof(blob));
+	if(b)
+		b->magic = BLOBCLASS;
+	cli_dbgmsg("blobCreate\n");
+	return b;
+#else
+	return (blob *)cli_calloc(1, sizeof(blob));
+#endif
+}
+
+void
+blobDestroy(blob *b)
+{
+#ifdef	CL_DEBUG
+	cli_dbgmsg("blobDestroy %d\n", b->magic);
+#else
+	cli_dbgmsg("blobDestroy\n");
+#endif
+
+	assert(b != NULL);
+	assert(b->magic == BLOBCLASS);
+
+	if(b->name)
+		free(b->name);
+	if(b->data)
+		free(b->data);
+#ifdef	CL_DEBUG
+	b->magic = INVALIDCLASS;
+#endif
+	free(b);
+}
+
+void
+blobArrayDestroy(blob *blobList[], int n)
+{
+	assert(blobList != NULL);
+
+	while(--n >= 0) {
+		cli_dbgmsg("blobArrayDestroy: %d\n", n);
+		if(blobList[n]) {
+			blobDestroy(blobList[n]);
+			blobList[n] = NULL;
+		}
+	}
+}
+
+/*ARGSUSED*/
+void
+blobSetFilename(blob *b, const char *dir, const char *filename)
+{
+	assert(b != NULL);
+	assert(b->magic == BLOBCLASS);
+	assert(filename != NULL);
+
+	cli_dbgmsg("blobSetFilename: %s\n", filename);
+
+	if(b->name)
+		free(b->name);
+
+	b->name = cli_strdup(filename);
+
+	if(b->name)
+		sanitiseName(b->name);
+}
+
+static const char *
+blobGetFilename(const blob *b)
+{
+	assert(b != NULL);
+	assert(b->magic == BLOBCLASS);
+
+	return b->name;
+}
+
+/*
+ * Returns <0 for failure
+ */
+int
+blobAddData(blob *b, const unsigned char *data, size_t len)
+{
+#ifdef	HAVE_GETPAGESIZE
+	static int pagesize;
+	int growth;
+#endif
+
+	assert(b != NULL);
+	assert(b->magic == BLOBCLASS);
+	assert(data != NULL);
+
+	if(len == 0)
+		return 0;
+
+	if(b->isClosed) {
+		/*
+		 * Should be cli_dbgmsg, but I want to see them for now,
+		 * and cli_dbgmsg doesn't support debug levels
+		 */
+		cli_warnmsg("Reopening closed blob\n");
+		b->isClosed = 0;
+	}
+	/*
+	 * The payoff here is between reducing the number of calls to
+	 * malloc/realloc and not overallocating memory. A lot of machines
+	 * are more tight with memory than one may imagine which is why
+	 * we don't just allocate a *huge* amount and be done with it. Closing
+	 * the blob helps because that reclaims memory. If you know the maximum
+	 * size of a blob before you start adding data, use blobGrow() that's
+	 * the most optimum
+	 */
+#ifdef	HAVE_GETPAGESIZE
+	if(pagesize == 0) {
+		pagesize = getpagesize();
+		if(pagesize == 0)
+			pagesize = 4096;
+	}
+	growth = pagesize;
+	if(len >= (size_t)pagesize)
+		growth = ((len / pagesize) + 1) * pagesize;
+
+	/*cli_dbgmsg("blobGrow: b->size %lu, b->len %lu, len %lu, growth = %u\n",
+		b->size, b->len, len, growth);*/
+
+	if(b->data == NULL) {
+		assert(b->len == 0);
+		assert(b->size == 0);
+
+		b->size = growth;
+		b->data = cli_malloc(growth);
+	} else if(b->size < b->len + (off_t)len) {
+		unsigned char *p = cli_realloc(b->data, b->size + growth);
+
+		if(p == NULL)
+			return -1;
+
+		b->size += growth;
+		b->data = p;
+	}
+#else
+	if(b->data == NULL) {
+		assert(b->len == 0);
+		assert(b->size == 0);
+
+		b->size = (off_t)len * 4;
+		b->data = cli_malloc(b->size);
+	} else if(b->size < b->len + (off_t)len) {
+		unsigned char *p = cli_realloc(b->data, b->size + (len * 4));
+
+		if(p == NULL)
+			return -1;
+
+		b->size += (off_t)len * 4;
+		b->data = p;
+	}
+#endif
+
+	if(b->data) {
+		memcpy(&b->data[b->len], data, len);
+		b->len += (off_t)len;
+	}
+	return 0;
+}
+
+unsigned char *
+blobGetData(const blob *b)
+{
+	assert(b != NULL);
+	assert(b->magic == BLOBCLASS);
+
+	if(b->len == 0)
+		return NULL;
+	return b->data;
+}
+
+size_t
+blobGetDataSize(const blob *b)
+{
+	assert(b != NULL);
+	assert(b->magic == BLOBCLASS);
+
+	return b->len;
+}
+
+void
+blobClose(blob *b)
+{
+	assert(b != NULL);
+	assert(b->magic == BLOBCLASS);
+
+	if(b->isClosed) {
+		cli_warnmsg("Attempt to close a previously closed blob\n");
+		return;
+	}
+
+	/*
+	 * Nothing more is going to be added to this blob. If it'll save more
+	 * than a trivial amount (say 64 bytes) of memory, shrink the allocation
+	 */
+	if((b->size - b->len) >= 64) {
+		if(b->len == 0) {	/* Not likely */
+			free(b->data);
+			b->data = NULL;
+			cli_dbgmsg("blobClose: recovered all %lu bytes\n",
+				(unsigned long)b->size);
+			b->size = 0;
+		} else {
+			unsigned char *ptr = cli_realloc(b->data, b->len);
+
+			if(ptr == NULL)
+				return;
+
+			cli_dbgmsg("blobClose: recovered %lu bytes from %lu\n",
+				(unsigned long)(b->size - b->len),
+				(unsigned long)b->size);
+			b->size = b->len;
+			b->data = ptr;
+		}
+	}
+	b->isClosed = 1;
+}
+
+/*
+ * Returns 0 if the blobs are the same
+ */
+int
+blobcmp(const blob *b1, const blob *b2)
+{
+	size_t s1, s2;
+
+	assert(b1 != NULL);
+	assert(b2 != NULL);
+
+	if(b1 == b2)
+		return 0;
+
+	s1 = blobGetDataSize(b1);
+	s2 = blobGetDataSize(b2);
+
+	if(s1 != s2)
+		return 1;
+
+	if((s1 == 0) && (s2 == 0))
+		return 0;
+
+	return memcmp(blobGetData(b1), blobGetData(b2), s1);
+}
+
+/*
+ * Return clamav return code
+ */
+int
+blobGrow(blob *b, size_t len)
+{
+	assert(b != NULL);
+	assert(b->magic == BLOBCLASS);
+
+	if(len == 0)
+		return CL_SUCCESS;
+
+	if(b->isClosed) {
+		/*
+		 * Should be cli_dbgmsg, but I want to see them for now,
+		 * and cli_dbgmsg doesn't support debug levels
+		 */
+		cli_warnmsg("Growing closed blob\n");
+		b->isClosed = 0;
+	}
+	if(b->data == NULL) {
+		assert(b->len == 0);
+		assert(b->size == 0);
+
+		b->data = cli_malloc(len);
+		if(b->data)
+			b->size = (off_t)len;
+	} else {
+		unsigned char *ptr = cli_realloc(b->data, b->size + len);
+
+		if(ptr) {
+			b->size += (off_t)len;
+			b->data = ptr;
+		}
+	}
+
+	return (b->data) ? CL_SUCCESS : CL_EMEM;
+}
+
+fileblob *
+fileblobCreate(void)
+{
+#ifdef	CL_DEBUG
+	fileblob *fb = (fileblob *)cli_calloc(1, sizeof(fileblob));
+	if(fb)
+		fb->b.magic = BLOBCLASS;
+	cli_dbgmsg("blobCreate\n");
+	return fb;
+#else
+	return (fileblob *)cli_calloc(1, sizeof(fileblob));
+#endif
+}
+
+/*
+ * Returns CL_CLEAN or CL_VIRUS. Destroys the fileblob and removes the file
+ * if possible
+ */
+int
+fileblobScanAndDestroy(fileblob *fb)
+{
+	if(cli_leavetemps_flag) {
+		/* Can't remove the file, the caller must scan */
+		fileblobDestroy(fb);
+		return CL_CLEAN;
+	}
+		
+	switch(fileblobScan(fb)) {
+		case CL_VIRUS:
+			fileblobDestructiveDestroy(fb);
+			return CL_VIRUS;
+		case CL_BREAK:
+			fileblobDestructiveDestroy(fb);
+			return CL_CLEAN;
+		default:
+			fileblobDestroy(fb);
+			return CL_CLEAN;
+	}
+}
+
+/*
+ * Destroy the fileblob, and remove the file associated with it
+ */
+void
+fileblobDestructiveDestroy(fileblob *fb)
+{
+	if(fb->fp && fb->fullname) {
+		fclose(fb->fp);
+		cli_dbgmsg("fileblobDestructiveDestroy: %s\n", fb->fullname);
+		if(unlink(fb->fullname) < 0)
+			cli_warnmsg("fileblobDestructiveDestroy: Can't delete file %s\n", fb->fullname);
+		free(fb->fullname);
+		fb->fp = NULL;
+		fb->fullname = NULL;
+	}
+	if(fb->b.name) {
+		free(fb->b.name);
+		fb->b.name = NULL;
+	}
+	fileblobDestroy(fb);
+}
+
+/*
+ * Destroy the fileblob, and remove the file associated with it if that file is
+ * empty
+ */
+void
+fileblobDestroy(fileblob *fb)
+{
+	assert(fb != NULL);
+	assert(fb->b.magic == BLOBCLASS);
+
+	if(fb->b.name && fb->fp) {
+		fclose(fb->fp);
+		if(fb->fullname) {
+			cli_dbgmsg("fileblobDestroy: %s\n", fb->fullname);
+			if(!fb->isNotEmpty) {
+				cli_dbgmsg("fileblobDestroy: not saving empty file\n");
+				if(unlink(fb->fullname) < 0)
+					cli_warnmsg("fileblobDestroy: Can't delete empty file %s\n", fb->fullname);
+			}
+		}
+		free(fb->b.name);
+
+		assert(fb->b.data == NULL);
+	} else if(fb->b.data) {
+		free(fb->b.data);
+		if(fb->b.name) {
+			cli_errmsg("fileblobDestroy: %s not saved: report to http://bugs.clamav.net\n",
+				(fb->fullname) ? fb->fullname : fb->b.name);
+			free(fb->b.name);
+		} else
+			cli_errmsg("fileblobDestroy: file not saved (%lu bytes): report to http://bugs.clamav.net\n",
+				(unsigned long)fb->b.len);
+	}
+	if(fb->fullname)
+		free(fb->fullname);
+#ifdef	CL_DEBUG
+	fb->b.magic = INVALIDCLASS;
+#endif
+	free(fb);
+}
+
+void
+fileblobSetFilename(fileblob *fb, const char *dir, const char *filename)
+{
+	int fd;
+	char fullname[NAME_MAX + 1];
+
+	if(fb->b.name)
+		return;
+
+	assert(filename != NULL);
+	assert(dir != NULL);
+
+	blobSetFilename(&fb->b, dir, filename);
+
+	/*
+	 * Reload the filename, it may be different from the one we've
+	 * asked for, e.g. '/'s taken out
+	 */
+	filename = blobGetFilename(&fb->b);
+
+	assert(filename != NULL);
+
+#ifdef	C_QNX6
+	/*
+	 * QNX6 support from mikep at kaluga.org to fix bug where mkstemp
+	 * can return ETOOLONG even when the file name isn't too long
+	 */
+	snprintf(fullname, sizeof(fullname), "%s/clamavtmpXXXXXXXXXXXXX", dir);
+#elif	defined(C_WINDOWS)
+	sprintf_s(fullname, sizeof(fullname) - 1, "%s\\%.*sXXXXXX", dir,
+		(int)(sizeof(fullname) - 9 - strlen(dir)), filename);
+#else
+	sprintf(fullname, "%s/%.*sXXXXXX", dir,
+		(int)(sizeof(fullname) - 9 - strlen(dir)), filename);
+#endif
+
+#if	defined(C_LINUX) || defined(C_BSD) || defined(HAVE_MKSTEMP) || defined(C_SOLARIS) || defined(C_CYGWIN) || defined(C_QNX6)
+	cli_dbgmsg("fileblobSetFilename: mkstemp(%s)\n", fullname);
+	fd = mkstemp(fullname);
+	if((fd < 0) && (errno == EINVAL)) {
+		/*
+		 * This happens with some Linux flavours when (mis)handling
+		 * filenames with foreign characters
+		 */
+		snprintf(fullname, sizeof(fullname), "%s/clamavtmpXXXXXXXXXXXXX", dir);
+		cli_dbgmsg("fileblobSetFilename: retry as mkstemp(%s)\n", fullname);
+		fd = mkstemp(fullname);
+	}
+#elif	defined(C_WINDOWS)
+	cli_dbgmsg("fileblobSetFilename: _mktemp_s(%s)\n", fullname);
+	if(_mktemp_s(fullname, strlen(fullname) + 1) != 0) {
+		char *name;
+
+		/* _mktemp_s only allows 26 files */
+		cli_dbgmsg("fileblobSetFilename: _mktemp_s(%s) failed: %s\n", fullname, strerror(errno));
+		name = cli_gentemp(dir);
+		if(name == NULL)
+			return;
+		fd = open(name, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
+		if(fd >= 0)
+			strncpy(fullname, name, sizeof(fullname) - 1);
+		free(name);
+	} else
+		fd = open(fullname, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
+#else
+	cli_dbgmsg("fileblobSetFilename: mktemp(%s)\n", fullname);
+	(void)mktemp(fullname);
+	fd = open(fullname, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
+#endif
+
+	if(fd < 0) {
+		cli_errmsg("Can't create temporary file %s: %s\n", fullname, strerror(errno));
+		cli_dbgmsg("%lu %lu\n", (unsigned long)sizeof(fullname),
+			(unsigned long)strlen(fullname));
+		return;
+	}
+
+	cli_dbgmsg("Creating %s\n", fullname);
+
+	fb->fp = fdopen(fd, "wb");
+
+	if(fb->fp == NULL) {
+		cli_errmsg("Can't create file %s: %s\n", fullname, strerror(errno));
+		cli_dbgmsg("%lu %lu\n", (unsigned long)sizeof(fullname),
+			(unsigned long)strlen(fullname));
+		close(fd);
+
+		return;
+	}
+	if(fb->b.data)
+		if(fileblobAddData(fb, fb->b.data, fb->b.len) == 0) {
+			free(fb->b.data);
+			fb->b.data = NULL;
+			fb->b.len = fb->b.size = 0;
+			fb->isNotEmpty = 1;
+		}
+
+	/*
+	 * If this strdup fails, then if the file is empty it won't be removed
+	 * until later. Since this is only a trivial issue, there is no need
+	 * to error if it fails to allocate
+	 */
+	fb->fullname = cli_strdup(fullname);
+}
+
+int
+fileblobAddData(fileblob *fb, const unsigned char *data, size_t len)
+{
+	if(len == 0)
+		return 0;
+
+	assert(data != NULL);
+
+	if(fb->fp) {
+#if	defined(MAX_SCAN_SIZE) && (MAX_SCAN_SIZE > 0)
+		const cli_ctx *ctx = fb->ctx;
+
+		if(fb->isInfected)	/* pretend all was written */
+			return 0;
+		if(ctx) {
+			int do_scan = 1;
+
+			if(ctx->limits)
+				if(fb->bytes_scanned >= ctx->limits->maxfilesize)
+					do_scan = 0;
+
+			if(fb->bytes_scanned > MAX_SCAN_SIZE)
+				do_scan = 0;
+			if(do_scan) {
+				if(ctx->scanned)
+					*ctx->scanned += (unsigned long)len / CL_COUNT_PRECISION;
+				fb->bytes_scanned += (unsigned long)len;
+
+				if((len > 5) && (cli_scanbuff(data, (unsigned int)len, ctx->virname, ctx->engine, CL_TYPE_UNKNOWN_DATA) == CL_VIRUS)) {
+					cli_dbgmsg("fileblobAddData: found %s\n", *ctx->virname);
+					fb->isInfected = 1;
+				}
+			}
+		}
+#endif
+
+		if(fwrite(data, len, 1, fb->fp) != 1) {
+			cli_errmsg("fileblobAddData: Can't write %lu bytes to temporary file %s: %s\n",
+				(unsigned long)len, fb->b.name, strerror(errno));
+			return -1;
+		}
+		fb->isNotEmpty = 1;
+		return 0;
+	}
+	return blobAddData(&(fb->b), data, len);
+}
+
+const char *
+fileblobGetFilename(const fileblob *fb)
+{
+	return blobGetFilename(&(fb->b));
+}
+
+void
+fileblobSetCTX(fileblob *fb, cli_ctx *ctx)
+{
+	fb->ctx = ctx;
+}
+
+/*
+ * Performs a full scan on the fileblob, returning ClamAV status:
+ *	CL_BREAK means clean
+ *	CL_CLEAN means unknown
+ *	CL_VIRUS means infected
+ */
+int
+fileblobScan(const fileblob *fb)
+{
+#ifndef	C_WINDOWS
+	int rc, fd;
+#endif
+
+	if(fb->isInfected)
+		return CL_VIRUS;
+	if(fb->fullname == NULL) {
+		/* shouldn't happen, scan called before fileblobSetFilename */
+		cli_warnmsg("fileblobScan, fullname == NULL\n");
+		return CL_ENULLARG;	/* there is no CL_UNKNOWN */
+	}
+	if(fb->ctx == NULL) {
+		/* fileblobSetCTX hasn't been called */
+		cli_dbgmsg("fileblobScan, ctx == NULL\n");
+		return CL_CLEAN;	/* there is no CL_UNKNOWN */
+	}
+#ifndef	C_WINDOWS
+	/*
+	 * FIXME: On Windows, cli_readn gives "bad file descriptor" when called
+	 * by cli_check_mydoom_log from the call to cli_magic_scandesc here
+	 * which implies that the file descriptor is getting closed somewhere,
+	 * but I can't see where.
+	 * One possible fix would be to duplicate cli_scanfile here.
+	 */
+	fflush(fb->fp);
+	fd = dup(fileno(fb->fp));
+	if(fd == -1) {
+		cli_warnmsg("%s: dup failed\n", fb->fullname);
+		return CL_CLEAN;
+	}
+	/* cli_scanfile is static :-( */
+	/*if(cli_scanfile(fb->fullname, fb->ctx) == CL_VIRUS) {
+		cli_dbgmsg("%s is infected\n", fb->fullname);
+		return CL_VIRUS;
+	}*/
+
+	rc = cli_magic_scandesc(fd, fb->ctx);
+	close(fd);
+
+	if(rc == CL_VIRUS) {
+		cli_dbgmsg("%s is infected\n", fb->fullname);
+		return CL_VIRUS;
+	}
+	cli_dbgmsg("%s is clean\n", fb->fullname);
+	return CL_BREAK;
+#else	/*C_WINDOWS*/
+	/* Ensure that the file is saved and scanned */
+	return CL_CLEAN;	/* there is no CL_UNKNOWN :-( */
+#endif	/*C_WINDOWS*/
+}
+
+/*
+ * Doesn't perform a full scan just lets the caller know if something suspicious has
+ * been seen yet
+ */
+int
+fileblobInfected(const fileblob *fb)
+{
+	return fb->isInfected;
+}
+
+/*
+ * Different operating systems allow different characters in their filenames
+ * FIXME: What does QNX want? There is no #ifdef C_QNX, but if there were
+ * it may be best to treat it like MSDOS
+ */
+void
+sanitiseName(char *name)
+{
+	while(*name) {
+#ifdef	C_DARWIN
+		*name &= '\177';
+#endif
+		/* Also check for tab - "Heinz Martin" <Martin at hemag.ch> */
+#if	defined(MSDOS) || defined(C_OS2)
+		/*
+		 * Don't take it from this that ClamAV supports DOS, it doesn't
+		 * I don't know if spaces are legal in OS/2.
+		 */
+		if(strchr("%/*?<>|\\\"+=,;:\t ~", *name))
+#elif	defined(C_CYGWIN) || defined(C_WINDOWS)
+		if(strchr("%/*?<>|\\\"+=,;:\t~", *name))
+#else
+		if(*name == '/')
+#endif
+			*name = '_';
+		name++;
+	}
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_cab.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_cab.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_cab.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_cab.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,711 @@
+/*
+ *  Copyright (C) 2006 Tomasz Kojm <tkojm at clamav.net>
+ *
+ *  This code is based on the work of Stuart Caie and the official
+ *  specification.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <fcntl.h>
+
+#include "cltypes.h"
+#include "others.h"
+#include "mspack.h"
+#include "cab.h"
+
+#define EC32(x) le32_to_host(x) /* Convert little endian to host */
+#define EC16(x) le16_to_host(x)
+
+#ifndef O_BINARY
+#define O_BINARY        0
+#endif
+
+/* hard limits */
+#define CAB_FOLDER_LIMIT    5000
+#define CAB_FILE_LIMIT	    5000
+
+/* Cabinet format data structures */
+
+struct cab_hdr {
+    uint32_t	signature;	/* file signature */
+    uint32_t	res1;		/* reserved */
+    uint32_t	cbCabinet;	/* size of cabinet file */
+    uint32_t	res2;		/* reserved */
+    uint32_t	coffFiles;	/* offset of the first file entry */
+    uint32_t	res3;		/* reserved */
+    uint8_t	versionMinor;   /* file format version, minor */
+    uint8_t	versionMajor;	/* file format version, major */
+    uint16_t	cFolders;	/* number of folder entries */
+    uint16_t	cFiles;		/* number of file entries */
+    uint16_t	flags;		/* option flags */
+    uint16_t	setID;		/* multiple cabs related */
+    uint16_t	iCabinet;	/* multiple cabs related */
+};
+
+struct cab_hdr_opt {
+    uint16_t	cbCFHeader;	/* size of reserved header area */
+    uint8_t	cbCFFolder;	/* size of reserved folder area */
+    uint8_t	cbCFData;	/* size of reserved block area */
+};
+
+struct cab_folder_hdr
+{
+    uint32_t	coffCabStart;	/* offset of the first data block */
+    uint16_t	cCFData;	/* number of data blocks */
+    uint16_t	typeCompress;	/* compression type */
+};
+
+struct cab_file_hdr
+{
+    uint32_t	cbFile;		    /* uncompressed size */
+    uint32_t	uoffFolderStart;    /* uncompressed offset of file in folder */
+    uint16_t	iFolder;	    /* folder index */
+    uint16_t	date;		    /* date stamp */
+    uint16_t	time;		    /* time stamp */
+    uint16_t	attribs;	    /* attribute flags */
+};
+
+struct cab_block_hdr
+{
+    uint32_t	csum;	    /* data block checksum */
+    uint16_t	cbData;	    /* number of compressed bytes */
+    uint16_t	cbUncomp;   /* number of uncompressed bytes */
+};
+
+static char *cab_readstr(int fd, int *ret)
+{
+	int i, bread, found = 0;
+	char buff[256], *str;
+	off_t pos;
+
+
+    if((pos = lseek(fd, 0, SEEK_CUR)) == -1) {
+	*ret = CL_EIO;
+	return NULL;
+    }
+
+    bread = read(fd, buff, sizeof(buff));
+    for(i = 0; i < bread; i++) {
+	if(!buff[i]) {
+	    found = 1;
+	    break;
+	}
+    }
+
+    if(!found) {
+	*ret = CL_EFORMAT;
+	return NULL;
+    }
+
+    if(lseek(fd, (off_t) (pos + i + 1), SEEK_SET) == -1) {
+	*ret = CL_EIO;
+	return NULL;
+    }
+
+    if(!(str = cli_strdup(buff))) {
+	*ret = CL_EMEM;
+	return NULL;
+    }
+
+    *ret = CL_SUCCESS;
+    return str;
+}
+
+static int cab_chkname(const char *name)
+{
+	size_t i, len = strlen(name);
+
+
+    for(i = 0; i < len; i++) {
+	if(strchr("%/*?|\\\"+=<>;:\t ", name[i]) || !isascii(name[i])) {
+	    cli_dbgmsg("cab_chkname: File name contains disallowed characters\n");
+	    return 1;
+	}
+    }
+
+    return 0;
+}
+
+void cab_free(struct cab_archive *cab)
+{
+	struct cab_folder *folder;
+	struct cab_file *file;
+
+
+    while(cab->folders) {
+	folder = cab->folders;
+	cab->folders = cab->folders->next;
+	free(folder);
+    }
+
+    while(cab->files) {
+	file = cab->files;
+	cab->files = cab->files->next;
+	free(file->name);
+	free(file);
+    }
+}
+
+int cab_open(int fd, off_t offset, struct cab_archive *cab)
+{
+	unsigned int i, bscore = 0, badname = 0;
+	struct cab_file *file, *lfile = NULL;
+	struct cab_folder *folder, *lfolder = NULL;
+	struct cab_hdr hdr;
+	struct cab_hdr_opt hdr_opt;
+	struct cab_folder_hdr folder_hdr;
+	struct cab_file_hdr file_hdr;
+	struct stat sb;
+	uint16_t fidx;
+	char *pt;
+	int ret;
+	off_t resfold = 0, rsize;
+
+
+    if(lseek(fd, offset, SEEK_SET) == -1) {
+	cli_errmsg("cab_open: Can't lseek to %u (offset)\n", (unsigned int) offset);
+	return CL_EIO;
+    }
+
+    if(cli_readn(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) {
+	cli_dbgmsg("cab_open: Can't read cabinet header\n");
+	return CL_EIO;
+    }
+
+    if(EC32(hdr.signature) != 0x4643534d) {
+	cli_dbgmsg("cab_open: Incorrect CAB signature\n");
+	return CL_EFORMAT;
+    } else {
+	cli_dbgmsg("CAB: -------------- Cabinet file ----------------\n");
+    }
+
+    if(fstat(fd, &sb) == -1) {
+	cli_errmsg("cab_open: Can't fstat descriptor %d\n", fd);
+	return CL_EIO;
+    }
+    rsize = sb.st_size;
+
+    memset(cab, 0, sizeof(struct cab_archive));
+
+    cab->length = EC32(hdr.cbCabinet);
+    cli_dbgmsg("CAB: Cabinet length: %u\n", cab->length);
+    if((off_t) cab->length > rsize)
+	bscore++;
+
+    cab->nfolders = EC16(hdr.cFolders);
+    if(!cab->nfolders) {
+	cli_dbgmsg("cab_open: No folders in cabinet (fake cab?)\n");
+	return CL_EFORMAT;
+    } else {
+	cli_dbgmsg("CAB: Folders: %u\n", cab->nfolders);
+	if(cab->nfolders > CAB_FOLDER_LIMIT) {
+	    cab->nfolders = CAB_FOLDER_LIMIT;
+	    cli_dbgmsg("CAB: *** Number of folders limited to %u ***\n", cab->nfolders);
+	    bscore++;
+	}
+    }
+
+    cab->nfiles = EC16(hdr.cFiles);
+    if(!cab->nfiles) {
+	cli_dbgmsg("cab_open: No files in cabinet (fake cab?)\n");
+	return CL_EFORMAT;
+    } else {
+	cli_dbgmsg("CAB: Files: %u\n", cab->nfiles);
+	if(cab->nfiles > CAB_FILE_LIMIT) {
+	    cab->nfiles = CAB_FILE_LIMIT;
+	    cli_dbgmsg("CAB: *** Number of files limited to %u ***\n", cab->nfiles);
+	    bscore++;
+	}
+    }
+
+    cli_dbgmsg("CAB: File format version: %u.%u\n", hdr.versionMajor, hdr.versionMinor);
+    if(hdr.versionMajor != 1 || hdr.versionMinor != 3)
+	bscore++;
+
+    cab->flags = EC16(hdr.flags);
+    if(cab->flags & 0x0004) {
+	if(cli_readn(fd, &hdr_opt, sizeof(hdr_opt)) != sizeof(hdr_opt)) {
+	    cli_dbgmsg("cab_open: Can't read file header (fake cab?)\n");
+	    return CL_EIO;
+	}
+
+	cab->reshdr = EC16(hdr_opt.cbCFHeader);
+	resfold = hdr_opt.cbCFFolder;
+	cab->resdata = hdr_opt.cbCFData;
+
+	if(cab->reshdr) {
+	    if(lseek(fd, cab->reshdr, SEEK_CUR) == -1) {
+		cli_dbgmsg("cab_open: Can't lseek to %u (fake cab?)\n", cab->reshdr);
+		return CL_EIO;
+	    }
+	}
+    }
+
+    if(cab->flags & 0x0001) { /* preceeding cabinet */
+	/* name */
+	pt = cab_readstr(fd, &ret);
+	if(ret)
+	    return ret;
+	if(cab_chkname(pt))
+	    badname = 1;
+	else
+	    cli_dbgmsg("CAB: Preceeding cabinet name: %s\n", pt);
+	free(pt);
+	/* info */
+	pt = cab_readstr(fd, &ret);
+	if(ret)
+	    return ret;
+	if(cab_chkname(pt))
+	    badname = 1;
+	else
+	    cli_dbgmsg("CAB: Preceeding cabinet info: %s\n", pt);
+	free(pt);
+    }
+
+    if(cab->flags & 0x0002) { /* next cabinet */
+	/* name */
+	pt = cab_readstr(fd, &ret);
+	if(ret)
+	    return ret;
+	if(cab_chkname(pt))
+	    badname = 1;
+	else
+	    cli_dbgmsg("CAB: Next cabinet name: %s\n", pt);
+	free(pt);
+	/* info */
+	pt = cab_readstr(fd, &ret);
+	if(ret)
+	    return ret;
+	if(cab_chkname(pt))
+	    badname = 1;
+	else
+	    cli_dbgmsg("CAB: Next cabinet info: %s\n", pt);
+	free(pt);
+    }
+    bscore += badname;
+
+    if(bscore >= 4) {
+	cli_dbgmsg("CAB: bscore == %u, most likely a fake cabinet\n", bscore);
+	return CL_EFORMAT;
+    }
+
+    /* folders */
+    for(i = 0; i < cab->nfolders; i++) {
+	if(cli_readn(fd, &folder_hdr, sizeof(folder_hdr)) != sizeof(folder_hdr)) {
+	    cli_errmsg("cab_open: Can't read header for folder %u\n", i);
+	    cab_free(cab);
+	    return CL_EIO;
+	}
+
+	if(resfold) {
+	    if(lseek(fd, resfold, SEEK_CUR) == -1) {
+		cli_errmsg("cab_open: Can't lseek to %u (resfold)\n", (unsigned int) resfold);
+		cab_free(cab);
+		return CL_EIO;
+	    }
+	}
+
+	folder = (struct cab_folder *) cli_calloc(1, sizeof(struct cab_folder));
+	if(!folder) {
+	    cli_errmsg("cab_open: Can't allocate memory for folder\n");
+	    cab_free(cab);
+	    return CL_EMEM;
+	}
+
+	folder->cab = (struct cab_archive *) cab;
+	folder->offset = (off_t) EC32(folder_hdr.coffCabStart) + offset;
+	if(folder->offset > rsize)
+	    bscore++;
+	folder->nblocks = EC16(folder_hdr.cCFData);
+	folder->cmethod = EC16(folder_hdr.typeCompress);
+
+	cli_dbgmsg("CAB: Folder record %u\n", i);
+	cli_dbgmsg("CAB: Folder offset: %u\n", (unsigned int) folder->offset);
+	cli_dbgmsg("CAB: Folder compression method: %d\n", folder->cmethod);
+	if((folder->cmethod & 0x000f) > 3)
+	    bscore++;
+
+	if(!lfolder)
+	    cab->folders = folder;
+	else
+	    lfolder->next = folder;
+
+	lfolder = folder;
+
+	if(bscore > 10) {
+	    cab_free(cab);
+	    cli_dbgmsg("CAB: bscore == %u, most likely a fake cabinet\n", bscore);
+	    return CL_EFORMAT;
+	}
+    }
+
+    /* files */
+    for(i = 0; i < cab->nfiles; i++) {
+	if(bscore > 10) {
+	    cab_free(cab);
+	    cli_dbgmsg("CAB: bscore == %u, most likely a fake cabinet\n", bscore);
+	    return CL_EFORMAT;
+	}
+
+	if(cli_readn(fd, &file_hdr, sizeof(file_hdr)) != sizeof(file_hdr)) {
+	    cli_errmsg("cab_open: Can't read file %u header\n", i);
+	    cab_free(cab);
+	    return CL_EIO;
+	}
+
+	file = (struct cab_file *) cli_calloc(1, sizeof(struct cab_file));
+	if(!file) {
+	    cli_errmsg("cab_open: Can't allocate memory for file\n");
+	    cab_free(cab);
+	    return CL_EMEM;
+	}
+
+	file->cab = cab;
+	file->fd = fd;
+	file->length = EC32(file_hdr.cbFile);
+	file->offset = EC32(file_hdr.uoffFolderStart);
+	file->attribs = EC32(file_hdr.attribs);
+	fidx = EC32(file_hdr.iFolder);
+
+	file->name = cab_readstr(fd, &ret);
+	if(ret) {
+	    free(file);
+	    cab_free(cab);
+	    return ret;
+	}
+
+	cli_dbgmsg("CAB: File record %u\n", i);
+	cli_dbgmsg("CAB: File name: %s\n", file->name);
+	cli_dbgmsg("CAB: File offset: %u\n", (unsigned int) file->offset);
+	cli_dbgmsg("CAB: File folder index: %u\n", fidx);
+	cli_dbgmsg("CAB: File attribs: 0x%x\n", file->attribs);
+	if(file->attribs & 0x01)
+	    cli_dbgmsg("CAB:   * file is read-only\n");
+	if(file->attribs & 0x02)
+	    cli_dbgmsg("CAB:   * file is hidden\n");
+	if(file->attribs & 0x04)
+	    cli_dbgmsg("CAB:   * file is a system file\n");
+	if(file->attribs & 0x20)
+	    cli_dbgmsg("CAB:   * file modified since last backup\n");
+	if(file->attribs & 0x40)
+	    cli_dbgmsg("CAB:   * file to be run after extraction\n");
+	if(file->attribs & 0x80)
+	    cli_dbgmsg("CAB:   * file name contains UTF\n");
+
+	/* folder index */
+	if(fidx < 0xfffd) {
+	    if(fidx > cab->nfolders) {
+		if(bscore < 3)
+		    cli_dbgmsg("cab_open: File %s is not associated with any folder\n", file->name);
+		bscore++;
+		free(file->name);
+		free(file);
+		continue;
+	    }
+
+	    file->folder = cab->folders;
+	    while(file->folder && fidx--)
+		file->folder = file->folder->next;
+
+	    if(!file->folder) {
+		cli_errmsg("cab_open: Folder not found for file %s\n", file->name);
+		free(file->name);
+		free(file);
+		cab_free(cab);
+		return CL_EFORMAT;
+	    }
+
+	} else {
+	    cli_dbgmsg("CAB: File is split *skipping*\n");
+	    free(file->name);
+	    free(file);
+	    continue;
+	}
+
+	if(!lfile)
+	    cab->files = file;
+	else
+	    lfile->next = file;
+
+	lfile = file;
+
+    }
+
+    return CL_SUCCESS;
+}
+
+static int cab_read_block(int fd, struct cab_state *state, uint16_t resdata)
+{
+	struct cab_block_hdr block_hdr;
+
+
+    if(cli_readn(fd, &block_hdr, sizeof(block_hdr)) != sizeof(block_hdr)) {
+	cli_dbgmsg("cab_read_block: Can't read block header\n");
+	return CL_EIO;
+    }
+
+    if(resdata && lseek(fd, (off_t) resdata, SEEK_CUR) == -1) {
+	cli_dbgmsg("cab_read_block: lseek failed\n");
+	return CL_EIO;
+    }
+
+    state->blklen = EC16(block_hdr.cbData);
+    if(state->blklen > CAB_INPUTMAX) {
+	cli_dbgmsg("cab_read_block: block size > CAB_INPUTMAX\n");
+	return CL_EFORMAT;
+    }
+
+    state->outlen = EC16(block_hdr.cbUncomp);
+
+    if(state->outlen > CAB_BLOCKMAX) {
+	cli_dbgmsg("cab_read_block: output size > CAB_BLOCKMAX\n");
+	return CL_EFORMAT;
+    }
+
+    if(cli_readn(fd, state->block, state->blklen) != state->blklen) {
+	cli_dbgmsg("cab_read_block: Can't read block data\n");
+	return CL_EIO;
+    }
+
+    state->pt = state->end = state->block;
+    state->end += state->blklen;
+
+    return CL_SUCCESS;
+}
+
+static int cab_read(struct cab_file *file, unsigned char *buffer, int bytes)
+{
+	uint16_t todo, left;
+
+
+    todo = bytes;
+    while(todo > 0) {
+	left = file->state->end - file->state->pt;
+
+	if(left) {
+	    if(left > todo)
+		left = todo;
+
+	    memcpy(buffer, file->state->pt, left);
+	    file->state->pt += left;
+	    buffer += left;
+	    todo -= left;
+
+	} else {
+	    if(file->state->blknum++ >= file->folder->nblocks) {
+		file->error = CL_EFORMAT;
+		break;
+	    }
+
+	    file->error = cab_read_block(file->fd, file->state, file->cab->resdata);
+	    if(file->error)
+		return -1;
+
+	    if((file->folder->cmethod & 0x000f) == 0x0002) /* Quantum hack */
+		*file->state->end++ = 0xff;
+
+	    if(file->state->blknum >= file->folder->nblocks) {
+		if((file->folder->cmethod & 0x000f) == 0x0003) { /* LZX hack */
+		    lzx_set_output_length(file->state->stream, (off_t) ((file->state->blknum - 1) * CAB_BLOCKMAX + file->state->outlen));
+		}
+	    } else {
+		if(file->state->outlen != CAB_BLOCKMAX) {
+		    cli_dbgmsg("cab_read: WARNING: partial data block\n");
+		}
+	    }
+	}
+    }
+
+    return bytes - todo;
+}
+
+static int cab_unstore(struct cab_file *file, int bytes, uint8_t wflag)
+{
+	int todo;
+	unsigned char buff[4096];
+
+
+    if(bytes < 0) {
+	cli_warnmsg("cab_unstore: bytes < 0\n");
+	return CL_EFORMAT;
+    }
+
+    todo = bytes;
+
+    while(1) {
+
+	if((unsigned int) todo <= sizeof(buff)) {
+	    if(cab_read(file, buff, todo) == -1) {
+		cli_dbgmsg("cab_unstore: cab_read failed for descriptor %d\n", file->fd);
+		return CL_EIO;
+	    } else if(wflag && cli_writen(file->ofd, buff, todo) == -1) {
+		cli_dbgmsg("cab_unstore: Can't write to descriptor %d\n", file->ofd);
+		return CL_EIO;
+	    }
+	    break;
+
+	} else {
+	    if(cab_read(file, buff, sizeof(buff)) == -1) {
+		cli_dbgmsg("cab_unstore: cab_read failed for descriptor %d\n", file->fd);
+		return CL_EIO;
+	    } else if(wflag && cli_writen(file->ofd, buff, sizeof(buff)) == -1) {
+		cli_dbgmsg("cab_unstore: Can't write to descriptor %d\n", file->ofd);
+		return CL_EIO;
+	    }
+	    todo -= sizeof(buff);
+	}
+    }
+
+    return CL_SUCCESS;
+}
+
+int cab_extract(struct cab_file *file, const char *name)
+{
+	struct cab_folder *folder;
+	int ret;
+
+
+    if(!file || !name) {
+	cli_errmsg("cab_extract: !file || !name\n");
+	return CL_ENULLARG;
+    }
+
+    if(!(folder = file->folder)) {
+	cli_errmsg("cab_extract: file->folder == NULL\n");
+	return CL_ENULLARG;
+    }
+
+    if(lseek(file->fd, file->folder->offset, SEEK_SET) == -1) {
+	cli_errmsg("cab_extract: Can't lseek to %u\n", (unsigned int) file->folder->offset);
+	return CL_EIO;
+    }
+
+    file->state = (struct cab_state *) cli_calloc(1, sizeof(struct cab_state));
+    if(!file->state) {
+	cli_errmsg("cab_extract: Can't allocate memory for internal state\n");
+	return CL_EIO;
+    }
+
+    file->ofd = open(name, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, S_IRWXU);
+    if(file->ofd == -1) {
+	cli_errmsg("cab_extract: Can't open file %s in write mode\n", name);
+	free(file->state);
+	return CL_EIO;
+    }
+
+    switch(file->folder->cmethod & 0x000f) {
+	case 0x0000: /* STORE */
+	    if(file->offset > 0)
+		cab_unstore(file, file->offset, 0);
+
+	    ret = cab_unstore(file, file->length, 1);
+	    break;
+
+	case 0x0001: /* MSZIP */
+	    cli_dbgmsg("CAB: Compression method: MSZIP\n");
+	    file->state->stream = (struct mszip_stream *) mszip_init(file->fd, file->ofd, 4096, 1, file, &cab_read);
+	    if(!file->state->stream) {
+		free(file->state);
+		close(file->ofd);
+		return CL_EMSCAB;
+	    }
+	    if(file->offset > 0) {
+		((struct mszip_stream *) file->state->stream)->wflag = 0;
+		ret = mszip_decompress(file->state->stream, file->offset);
+		((struct mszip_stream *) file->state->stream)->wflag = 1;
+		if(ret < 0) {
+		    mszip_free(file->state->stream);
+		    memset(file->state, 0, sizeof(struct cab_state));
+		    file->state->stream = (struct mszip_stream *) mszip_init(file->fd, file->ofd, 4096, 1, file, &cab_read);
+		    if(!file->state->stream) {
+			free(file->state);
+			close(file->ofd);
+			return CL_EMSCAB;
+		    }
+                    lseek(file->fd, file->folder->offset, SEEK_SET);
+		}
+	    }
+	    ret = mszip_decompress(file->state->stream, file->length);
+	    mszip_free(file->state->stream);
+	    break;
+
+	case 0x0002: /* QUANTUM */
+	    cli_dbgmsg("CAB: Compression method: QUANTUM\n");
+	    file->state->stream = (struct qtm_stream *) qtm_init(file->fd, file->ofd, (int) (file->folder->cmethod >> 8) & 0x1f, 4096, file, &cab_read);
+	    if(!file->state->stream) {
+		free(file->state);
+		close(file->ofd);
+		return CL_EMSCAB;
+	    }
+	    if(file->offset > 0) {
+		((struct qtm_stream *) file->state->stream)->wflag = 0;
+		qtm_decompress(file->state->stream, file->offset);
+		((struct qtm_stream *) file->state->stream)->wflag = 1;
+	    }
+	    ret = qtm_decompress(file->state->stream, file->length);
+	    qtm_free(file->state->stream);
+	    break;
+
+	case 0x0003: /* LZX */
+	    cli_dbgmsg("CAB: Compression method: LZX\n");
+	    file->state->stream = (struct lzx_stream *) lzx_init(file->fd, file->ofd, (int) (file->folder->cmethod >> 8) & 0x1f, 0, 4096, 0, file, &cab_read);
+	    if(!file->state->stream) {
+		free(file->state);
+		close(file->ofd);
+		return CL_EMSCAB;
+	    }
+	    if(file->offset > 0) {
+		((struct lzx_stream *) file->state->stream)->wflag = 0;
+		ret = lzx_decompress(file->state->stream, file->offset);
+		((struct lzx_stream *) file->state->stream)->wflag = 1;
+		if(ret < 0) {
+		    lzx_free(file->state->stream);
+		    memset(file->state, 0, sizeof(struct cab_state));
+		    file->state->stream = (struct lzx_stream *) lzx_init(file->fd, file->ofd, (int) (file->folder->cmethod >> 8) & 0x1f, 0, 4096, 0, file, &cab_read);
+		    if(!file->state->stream) {
+			free(file->state);
+			close(file->ofd);
+			return CL_EMSCAB;
+		    }
+                    lseek(file->fd, file->folder->offset, SEEK_SET);
+		}
+	    }
+	    ret = lzx_decompress(file->state->stream, file->length);
+	    lzx_free(file->state->stream);
+	    break;
+
+	default:
+	    cli_warnmsg("CAB: Not supported compression method: 0x%x\n", file->folder->cmethod & 0x000f);
+	    ret = CL_EFORMAT;
+    }
+
+    free(file->state);
+    close(file->ofd);
+
+    return ret;
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_chmunpack.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_chmunpack.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_chmunpack.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_chmunpack.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,1059 @@
+/*
+ *  Extract component parts of MS CHM files
+ *
+ *  Copyright (C) 2004-2005 trog at uncon.org
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+ 
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#ifdef	HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <string.h>
+
+#if defined(HAVE_ATTRIB_PACKED) || defined(HAVE_PRAGMA_PACK) || defined(HAVE_PRAGMA_PACK_HPPA)
+#if HAVE_MMAP
+#if HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#else /* HAVE_SYS_MMAN_H */
+#undef HAVE_MMAP
+#endif /* HAVE_SYS_MMAN_H */
+#endif /* HAVE_MMAP */
+#else/* PACKED */
+#undef HAVE_MMAP
+#endif
+
+#include "others.h"
+#include "mspack.h"
+#include "cltypes.h"
+#include "chmunpack.h"
+
+#ifndef HAVE_ATTRIB_PACKED
+#define __attribute__(x)
+#endif
+
+#ifdef HAVE_PRAGMA_PACK
+#pragma pack(1)
+#endif
+
+#ifdef HAVE_PRAGMA_PACK_HPPA
+#pragma pack 1
+#endif
+
+#ifndef	O_BINARY
+#define	O_BINARY	0
+#endif
+
+#define CHM_ITSF_MIN_LEN (0x60)
+typedef struct itsf_header_tag
+{
+	unsigned char signature[4] __attribute__ ((packed));
+	int32_t version __attribute__ ((packed));
+	int32_t header_len __attribute__ ((packed));
+	uint32_t unknown __attribute__ ((packed));
+	uint32_t last_modified __attribute__ ((packed));
+	uint32_t lang_id __attribute__ ((packed));
+	unsigned char dir_clsid[16] __attribute__ ((packed));
+	unsigned char stream_clsid[16] __attribute__ ((packed));
+	uint64_t sec0_offset __attribute__ ((packed));
+	uint64_t sec0_len __attribute__ ((packed));
+	uint64_t dir_offset __attribute__ ((packed));
+	uint64_t dir_len __attribute__ ((packed));
+	uint64_t data_offset __attribute__ ((packed));
+} itsf_header_t;
+
+#define CHM_ITSP_LEN (0x54)
+typedef struct itsp_header_tag
+{
+	unsigned char signature[4] __attribute__ ((packed));
+	int32_t version __attribute__ ((packed));
+	int32_t header_len __attribute__ ((packed));
+	int32_t unknown1 __attribute__ ((packed));
+	uint32_t block_len __attribute__ ((packed));
+	int32_t blockidx_intvl __attribute__ ((packed));
+	int32_t index_depth __attribute__ ((packed));
+	int32_t index_root __attribute__ ((packed));
+	int32_t index_head __attribute__ ((packed));
+	int32_t index_tail __attribute__ ((packed));
+	int32_t unknown2 __attribute__ ((packed));
+	uint32_t num_blocks __attribute__ ((packed));
+	uint32_t lang_id __attribute__ ((packed));
+	unsigned char system_clsid[16] __attribute__ ((packed));
+	unsigned char unknown4[16] __attribute__ ((packed));
+} itsp_header_t;
+
+#define CHM_CHUNK_HDR_LEN (0x14)
+typedef struct chunk_header_tag
+{
+	unsigned char signature[4] __attribute__ ((packed));
+	uint32_t free_space __attribute__ ((packed));
+	uint32_t unknown __attribute__ ((packed));
+	int32_t block_prev __attribute__ ((packed));
+	int32_t block_next __attribute__ ((packed));
+	unsigned char *chunk_data;
+	uint16_t num_entries;
+} chunk_header_t;
+
+typedef struct file_list_tag
+{
+	unsigned char *name;
+	uint64_t section;
+	uint64_t offset;
+	uint64_t length;
+	struct file_list_tag *next;
+} file_list_t;
+
+#define CHM_CONTROL_LEN (0x18)
+typedef struct lzx_control_tag {
+	uint32_t length __attribute__ ((packed));
+	unsigned char signature[4] __attribute__ ((packed));
+	uint32_t version __attribute__ ((packed));
+	uint32_t reset_interval __attribute__ ((packed));
+	uint32_t window_size __attribute__ ((packed));
+	uint32_t cache_size __attribute__ ((packed));
+} lzx_control_t;
+
+/* Don't need to include rt_offset in the strucuture len*/
+#define CHM_RESET_TABLE_LEN (0x24)
+typedef struct lzx_reset_table_tag {
+	uint32_t num_entries __attribute__ ((packed));
+	uint32_t entry_size __attribute__ ((packed));
+	uint32_t table_offset __attribute__ ((packed));
+	uint64_t uncom_len __attribute__ ((packed));
+	uint64_t com_len __attribute__ ((packed));
+	uint64_t frame_len __attribute__ ((packed));
+	off_t rt_offset __attribute__ ((packed));
+} lzx_reset_table_t;
+
+typedef struct lzx_content_tag {
+	uint64_t offset;
+	uint64_t length;
+} lzx_content_t;
+
+#ifdef HAVE_PRAGMA_PACK
+#pragma pack()
+#endif
+
+#ifdef HAVE_PRAGMA_PACK_HPPA
+#pragma pack
+#endif
+
+#define chm_endian_convert_16(x) le16_to_host(x) 
+#define chm_endian_convert_32(x) le32_to_host(x) 
+#define chm_endian_convert_64(x) le64_to_host(x)
+
+/* Read in a block of data from either the mmap area or the given fd */
+static int chm_read_data(int fd, unsigned char *dest, off_t offset, off_t len,
+			unsigned char *m_area, off_t m_length)
+{
+	if ((offset < 0) || (len < 0) || ((offset+len) < 0)) {
+		return FALSE;
+	}
+	if (m_area != NULL) {
+		if ((offset+len) > m_length) {
+			return FALSE;
+		}
+		memcpy(dest, m_area+offset, len);
+	} else {
+		if (lseek(fd, offset, SEEK_SET) != offset) {
+			return FALSE;
+		}
+		if (cli_readn(fd, dest, len) != len) {
+			return FALSE;
+		}
+	}
+	return TRUE;
+}
+
+static uint64_t chm_copy_file_data(int ifd, int ofd, uint64_t len)
+{
+	unsigned char data[8192];
+	uint64_t count, rem;
+	unsigned int todo;
+	
+	rem = len;
+
+	while (rem > 0) {
+		todo = MIN(8192, rem);
+		count = cli_readn(ifd, data, todo);
+		if (count != todo) {
+			return len-rem;
+		}
+		if (cli_writen(ofd, data, count) != (int64_t)count) {
+			return len-rem-count;
+		}
+		rem -= count;
+	}
+	return len;
+}
+
+static void free_file_list(file_list_t *file_l)
+{
+	file_list_t *next;
+	
+	while (file_l) {
+		next = file_l->next;
+		if (file_l->name) {
+			free(file_l->name);
+		}
+		free(file_l);
+		file_l = next;
+	}
+}
+
+static void itsf_print_header(itsf_header_t *itsf_hdr)
+{
+	if (!itsf_hdr) {
+		return;
+	}
+	
+	cli_dbgmsg("---- ITSF ----\n");
+	cli_dbgmsg("Signature:\t%c%c%c%c\n", itsf_hdr->signature[0],
+		itsf_hdr->signature[1],itsf_hdr->signature[2],itsf_hdr->signature[3]);
+	cli_dbgmsg("Version:\t%d\n", itsf_hdr->version);
+	cli_dbgmsg("Header len:\t%ld\n", itsf_hdr->header_len);
+	cli_dbgmsg("Lang ID:\t%d\n", itsf_hdr->lang_id);
+	cli_dbgmsg("Sec0 offset:\t%llu\n", itsf_hdr->sec0_offset);
+	cli_dbgmsg("Sec0 len:\t%llu\n", itsf_hdr->sec0_len);
+	cli_dbgmsg("Dir offset:\t%llu\n", itsf_hdr->dir_offset);
+	cli_dbgmsg("Dir len:\t%llu\n", itsf_hdr->dir_len);
+	if (itsf_hdr->version > 2) {
+		cli_dbgmsg("Data offset:\t%llu\n\n", itsf_hdr->data_offset);
+	}
+}
+
+static int itsf_read_header(int fd, itsf_header_t *itsf_hdr, unsigned char *m_area, off_t m_length)
+{
+#if defined(HAVE_ATTRIB_PACKED) || defined(HAVE_PRAGMA_PACK) || defined(HAVE_PRAGMA_PACK_HPPA)
+	if (!chm_read_data(fd, (unsigned char *) itsf_hdr, 0, CHM_ITSF_MIN_LEN,
+				m_area,	m_length)) {
+		return FALSE;
+	}
+#else
+	if (cli_readn(fd, &itsf_hdr->signature, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsf_hdr->version, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsf_hdr->header_len, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsf_hdr->unknown, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsf_hdr->last_modified, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsf_hdr->lang_id, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsf_hdr->dir_clsid, 16) != 16) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsf_hdr->stream_clsid, 16) != 16) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsf_hdr->sec0_offset, 8) != 8) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsf_hdr->sec0_len, 8) != 8) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsf_hdr->dir_offset, 8) != 8) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsf_hdr->dir_len, 8) != 8) {
+		return FALSE;
+	}
+	if (itsf_hdr->version > 2) {
+		if (cli_readn(fd, &itsf_hdr->data_offset, 8) != 8) {
+			return FALSE;
+		}
+	}
+#endif
+	if (memcmp(itsf_hdr->signature, "ITSF", 4) != 0) {
+		cli_dbgmsg("ITSF signature mismatch\n");
+		return FALSE;
+	}
+	itsf_hdr->version = chm_endian_convert_32(itsf_hdr->version);
+	itsf_hdr->header_len = chm_endian_convert_32(itsf_hdr->header_len);
+	itsf_hdr->last_modified = chm_endian_convert_32(itsf_hdr->last_modified);
+	itsf_hdr->lang_id = chm_endian_convert_32(itsf_hdr->lang_id);
+	itsf_hdr->sec0_offset = chm_endian_convert_64(itsf_hdr->sec0_offset);
+	itsf_hdr->sec0_len = chm_endian_convert_64(itsf_hdr->sec0_len);
+	itsf_hdr->dir_offset = chm_endian_convert_64(itsf_hdr->dir_offset);
+	itsf_hdr->dir_len = chm_endian_convert_64(itsf_hdr->dir_len);
+	if (itsf_hdr->version > 2) {
+		itsf_hdr->data_offset = chm_endian_convert_64(itsf_hdr->data_offset);
+	}
+	return TRUE;
+}
+
+static void itsp_print_header(itsp_header_t *itsp_hdr)
+{
+	if (!itsp_hdr) {
+		return;
+	}
+	
+	cli_dbgmsg("---- ITSP ----\n");
+	cli_dbgmsg("Signature:\t%c%c%c%c\n", itsp_hdr->signature[0],
+		itsp_hdr->signature[1],itsp_hdr->signature[2],itsp_hdr->signature[3]);
+	cli_dbgmsg("Version:\t%d\n", itsp_hdr->version);
+	cli_dbgmsg("Block len:\t%ld\n", itsp_hdr->block_len);
+	cli_dbgmsg("Block idx int:\t%d\n", itsp_hdr->blockidx_intvl);
+	cli_dbgmsg("Index depth:\t%d\n", itsp_hdr->index_depth);
+	cli_dbgmsg("Index root:\t%d\n", itsp_hdr->index_root);
+	cli_dbgmsg("Index head:\t%u\n", itsp_hdr->index_head);
+	cli_dbgmsg("Index tail:\t%u\n", itsp_hdr->index_tail);
+	cli_dbgmsg("Num Blocks:\t%u\n", itsp_hdr->num_blocks);
+	cli_dbgmsg("Lang ID:\t%lu\n\n", itsp_hdr->lang_id);
+}
+
+static int itsp_read_header(int fd, itsp_header_t *itsp_hdr, off_t offset,
+				unsigned char *m_area, off_t m_length)
+{
+#if defined(HAVE_ATTRIB_PACKED) || defined(HAVE_PRAGMA_PACK) || defined(HAVE_PRAGMA_PACK_HPPA)
+	if (!chm_read_data(fd, (unsigned char *) itsp_hdr, offset, CHM_ITSP_LEN,
+				m_area,	m_length)) {
+		return FALSE;
+	}
+#else
+	if (lseek(fd, offset, SEEK_SET) != offset) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsp_hdr->signature, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsp_hdr->version, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsp_hdr->header_len, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsp_hdr->unknown1, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsp_hdr->block_len, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsp_hdr->blockidx_intvl, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsp_hdr->index_depth, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsp_hdr->index_root, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsp_hdr->index_head, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsp_hdr->index_tail, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsp_hdr->unknown2, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsp_hdr->num_blocks, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsp_hdr->lang_id, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsp_hdr->system_clsid, 16) != 16) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &itsp_hdr->unknown4, 16) != 16) {
+		return FALSE;
+	}
+#endif
+	if (memcmp(itsp_hdr->signature, "ITSP", 4) != 0) {
+		cli_dbgmsg("ITSP signature mismatch\n");
+		return FALSE;
+	}
+	
+	itsp_hdr->version = chm_endian_convert_32(itsp_hdr->version);
+	itsp_hdr->header_len = chm_endian_convert_32(itsp_hdr->header_len);
+	itsp_hdr->block_len = chm_endian_convert_32(itsp_hdr->block_len);
+	itsp_hdr->blockidx_intvl = chm_endian_convert_32(itsp_hdr->blockidx_intvl);
+	itsp_hdr->index_depth = chm_endian_convert_32(itsp_hdr->index_depth);
+	itsp_hdr->index_root = chm_endian_convert_32(itsp_hdr->index_root);
+	itsp_hdr->index_head = chm_endian_convert_32(itsp_hdr->index_head);
+	itsp_hdr->index_tail = chm_endian_convert_32(itsp_hdr->index_tail);
+	itsp_hdr->num_blocks = chm_endian_convert_32(itsp_hdr->num_blocks);
+	itsp_hdr->lang_id = chm_endian_convert_32(itsp_hdr->lang_id);
+	
+	if ((itsp_hdr->version != 1) || (itsp_hdr->header_len != CHM_ITSP_LEN)) {
+		cli_dbgmsg("ITSP header mismatch\n");
+		return FALSE;
+	}
+	return TRUE;
+}
+
+static uint64_t read_enc_int(unsigned char **start, unsigned char *end)
+{
+	uint64_t retval=0;
+	unsigned char *current;
+	
+	current = *start;
+	
+	if (current > end) {
+		return 0;
+	}
+	
+	do {
+		if (current > end) {
+			return 0;
+		}
+		retval = (retval << 7) | (*current & 0x7f);
+	} while (*current++ & 0x80);
+	
+	*start = current;
+	return retval;
+}
+
+/* Read chunk entries */
+/* Note: the file lists end up in reverse order to the order in the chunk */
+static int read_chunk_entries(unsigned char *chunk, uint32_t chunk_len,
+					uint16_t num_entries,
+					file_list_t *file_l, file_list_t *sys_file_l)
+{
+	unsigned char *current, *end;
+	uint64_t name_len;
+	file_list_t *file_e;
+
+	end = chunk + chunk_len;
+	current = chunk + CHM_CHUNK_HDR_LEN;
+	
+	while (num_entries--) {
+		if (current > end) {
+			cli_dbgmsg("read chunk entries failed\n");
+			return FALSE;
+		}
+
+		file_e = (file_list_t *) cli_malloc(sizeof(file_list_t));
+		if (!file_e) {
+			return FALSE;
+		}
+		file_e->next = NULL;
+		
+		name_len = read_enc_int(&current, end);
+		if (((current + name_len) > end) || ((current + name_len) < chunk)) {
+			cli_dbgmsg("Bad CHM name_len detected\n");
+			free(file_e);
+			return FALSE;
+		}
+		if (name_len > 0xFFFFFF) {
+			cli_dbgmsg("CHM file name too long: %llu\n", name_len);
+			file_e->name = (unsigned char *) cli_strdup("truncated");
+	                if (!file_e->name) {
+        	                free(file_e);
+                	        return FALSE;
+                	}
+		} else {
+			file_e->name = (unsigned char *) cli_malloc(name_len+1);
+			if (!file_e->name) {
+				free(file_e);
+				return FALSE;
+			}
+			strncpy(file_e->name, current, name_len);
+			file_e->name[name_len] = '\0';
+		}
+		current += name_len;
+		file_e->section = read_enc_int(&current, end);
+		file_e->offset = read_enc_int(&current, end);
+		file_e->length = read_enc_int(&current, end);
+		if ((name_len >= 2) && (file_e->name[0] == ':') &&
+				(file_e->name[1] == ':')) {
+			file_e->next = sys_file_l->next;
+			sys_file_l->next = file_e;
+		} else {
+			file_e->next = file_l->next;
+			file_l->next = file_e;
+		}
+		cli_dbgmsg("Section: %llu Offset: %llu Length: %llu, Name: %s\n",
+					file_e->section, file_e->offset,
+					file_e->length, file_e->name);
+	}
+	return TRUE;
+}
+
+static void print_chunk(chunk_header_t *chunk)
+{
+
+	cli_dbgmsg("---- Chunk ----\n");
+	cli_dbgmsg("Signature:\t%c%c%c%c\n", chunk->signature[0],
+		chunk->signature[1],chunk->signature[2],chunk->signature[3]);
+	cli_dbgmsg("Free Space:\t%u\n", chunk->free_space);
+	if (memcmp(chunk->signature, "PMGL", 4) == 0) {
+		cli_dbgmsg("Prev Block:\t%d\n", chunk->block_prev);
+		cli_dbgmsg("Next Block:\t%d\n", chunk->block_next);
+		cli_dbgmsg("Num entries:\t%d\n\n", chunk->num_entries);
+	}
+	return;
+}
+
+static int read_chunk(int fd, off_t offset, uint32_t chunk_len,
+					unsigned char *m_area, off_t m_length,
+					file_list_t *file_l, file_list_t *sys_file_l)
+{
+	chunk_header_t *chunk_hdr;
+	int retval = FALSE;
+	
+	if (chunk_len < 8 || chunk_len > 33554432) {
+		return FALSE;
+	}
+	
+	chunk_hdr = (chunk_header_t *) cli_malloc(sizeof(chunk_header_t));
+	if (!chunk_hdr) {
+		return FALSE;
+	}
+	
+	chunk_hdr->chunk_data = (unsigned char *) cli_malloc(chunk_len);
+	if (!chunk_hdr->chunk_data) {
+		free(chunk_hdr);
+		return FALSE;
+	}
+	
+#if defined(HAVE_ATTRIB_PACKED) || defined(HAVE_PRAGMA_PACK) || defined(HAVE_PRAGMA_PACK_HPPA)
+	/* 8 bytes reads the signature and the free_space */
+	if (!chm_read_data(fd, chunk_hdr->signature, offset, 8,
+				m_area,	m_length)) {
+		goto abort;
+	}
+	if (!chm_read_data(fd, chunk_hdr->chunk_data, offset, chunk_len,
+				m_area,	m_length)) {
+		goto abort;
+	}
+#else	
+	if (lseek(fd, offset, SEEK_SET) != offset) {
+		goto abort;
+	}
+	if (cli_readn(fd, chunk_hdr->chunk_data, chunk_len) != chunk_len) {
+		goto abort;
+	}
+	if (lseek(fd, offset, SEEK_SET) != offset) {
+		goto abort;
+	}
+	if (cli_readn(fd, &chunk_hdr->signature, 4) != 4) {
+		goto abort;
+	}
+	if (cli_readn(fd, &chunk_hdr->free_space, 4) != 4) {
+		goto abort;
+	}
+#endif
+	chunk_hdr->free_space = chm_endian_convert_32(chunk_hdr->free_space);
+	
+	if (memcmp(chunk_hdr->signature, "PMGL", 4) == 0) {
+#if defined(HAVE_ATTRIB_PACKED) || defined(HAVE_PRAGMA_PACK) || defined(HAVE_PRAGMA_PACK_HPPA)
+		if (!chm_read_data(fd, (unsigned char *) &chunk_hdr->unknown, offset+8, 12,
+					m_area,	m_length)) {
+			goto abort;
+		}
+#else
+		if (cli_readn(fd, &chunk_hdr->unknown, 4) != 4) {
+			goto abort;
+		}
+		if (cli_readn(fd, &chunk_hdr->block_next, 4) != 4) {
+			goto abort;
+		}
+		if (cli_readn(fd, &chunk_hdr->block_prev, 4) != 4) {
+			goto abort;
+		}
+#endif
+		chunk_hdr->block_next = chm_endian_convert_32(chunk_hdr->block_next);
+		chunk_hdr->block_prev = chm_endian_convert_32(chunk_hdr->block_prev);
+		
+		chunk_hdr->num_entries = (uint16_t)((((uint8_t const *)(chunk_hdr->chunk_data))[chunk_len-2] << 0)
+					| (((uint8_t const *)(chunk_hdr->chunk_data))[chunk_len-1] << 8));
+		read_chunk_entries(chunk_hdr->chunk_data, chunk_len,
+                        chunk_hdr->num_entries, file_l, sys_file_l);
+	} else if (memcmp(chunk_hdr->signature, "PMGI", 4) != 0) {
+		goto abort;
+	}
+
+	print_chunk(chunk_hdr);
+	retval=TRUE;
+abort:
+	free(chunk_hdr->chunk_data);
+	free(chunk_hdr);
+	return retval;
+}
+
+static void print_sys_control(lzx_control_t *lzx_control)
+{
+	if (!lzx_control) {
+		return;
+	}
+
+	cli_dbgmsg("---- Control ----\n");	
+	cli_dbgmsg("Length:\t\t%lu\n", lzx_control->length);
+	cli_dbgmsg("Signature:\t%c%c%c%c\n", lzx_control->signature[0],
+		lzx_control->signature[1],lzx_control->signature[2],lzx_control->signature[3]);
+	cli_dbgmsg("Version:\t%d\n", lzx_control->version);
+	cli_dbgmsg("Reset Interval:\t%d\n", lzx_control->reset_interval);
+	cli_dbgmsg("Window Size:\t%d\n", lzx_control->window_size);
+	cli_dbgmsg("Cache Size:\t%d\n\n", lzx_control->cache_size);
+}
+
+static lzx_control_t *read_sys_control(int fd, itsf_header_t *itsf_hdr, file_list_t *file_e,
+					unsigned char *m_area, off_t m_length)
+{
+	off_t offset;
+	lzx_control_t *lzx_control;
+	
+	if (file_e->length != 28) {
+		return NULL;
+	}
+	offset = itsf_hdr->data_offset + file_e->offset;
+	if (offset < 0) {
+		return NULL;
+	}
+
+	lzx_control = (lzx_control_t *) cli_malloc(sizeof(lzx_control_t));
+	if (!lzx_control) {
+		return NULL;
+	}
+#if defined(HAVE_ATTRIB_PACKED) || defined(HAVE_PRAGMA_PACK) || defined(HAVE_PRAGMA_PACK_HPPA)
+	if (!chm_read_data(fd, (unsigned char *) lzx_control, offset, CHM_CONTROL_LEN,
+				m_area,	m_length)) {
+		goto abort;
+	}
+#else
+	if (lseek(fd, offset, SEEK_SET) != offset) {
+		goto abort;
+	}
+	if (cli_readn(fd, &lzx_control->length, 4) != 4) {
+		goto abort;
+	}
+	if (cli_readn(fd, &lzx_control->signature, 4) != 4) {
+		goto abort;
+	}
+	if (cli_readn(fd, &lzx_control->version, 4) != 4) {
+		goto abort;
+	}
+	if (cli_readn(fd, &lzx_control->reset_interval, 4) != 4) {
+		goto abort;
+	}
+	if (cli_readn(fd, &lzx_control->window_size, 4) != 4) {
+		goto abort;
+	}
+	if (cli_readn(fd, &lzx_control->cache_size, 4) != 4) {
+		goto abort;
+	}
+#endif
+	lzx_control->length = chm_endian_convert_32(lzx_control->length);
+	lzx_control->version = chm_endian_convert_32(lzx_control->version);
+	lzx_control->reset_interval = chm_endian_convert_32(lzx_control->reset_interval);
+	lzx_control->window_size = chm_endian_convert_32(lzx_control->window_size);
+	lzx_control->cache_size = chm_endian_convert_32(lzx_control->cache_size);
+	
+	if (strncmp("LZXC", lzx_control->signature, 4) != 0) {
+		cli_dbgmsg("bad sys_control signature");
+		goto abort;
+	}
+	switch(lzx_control->version) {
+		case 1:
+			break;
+		case 2:
+			lzx_control->reset_interval *= LZX_FRAME_SIZE;
+			lzx_control->window_size *= LZX_FRAME_SIZE;
+			break;
+		default:
+			cli_dbgmsg("Unknown sys_control version:%d\n", lzx_control->version);
+			goto abort;
+	}
+			
+	print_sys_control(lzx_control);
+	return lzx_control;
+abort:
+	free(lzx_control);
+	return NULL;
+}
+
+static void print_sys_content(lzx_content_t *lzx_content)
+{
+	if (!lzx_content) {
+		return;
+	}
+	
+	cli_dbgmsg("---- Content ----\n");
+	cli_dbgmsg("Offset:\t%llu\n", lzx_content->offset);
+	cli_dbgmsg("Length:\t%llu\n\n", lzx_content->length);
+}
+
+static lzx_content_t *read_sys_content(int fd, itsf_header_t *itsf_hdr, file_list_t *file_e)
+{
+	lzx_content_t *lzx_content;
+	
+	lzx_content = (lzx_content_t *) cli_malloc(sizeof(lzx_content_t));
+	if (!lzx_content) {
+		return NULL;
+	}
+	lzx_content->offset = itsf_hdr->data_offset + file_e->offset;
+	lzx_content->length = file_e->length;
+	
+	print_sys_content(lzx_content);
+	return lzx_content;
+}
+
+static void print_sys_reset_table(lzx_reset_table_t *lzx_reset_table)
+{
+	if (!lzx_reset_table) {
+		return;
+	}
+	
+	cli_dbgmsg("---- Reset Table ----\n");
+	cli_dbgmsg("Num Entries:\t%lu\n", lzx_reset_table->num_entries);
+	cli_dbgmsg("Entry Size:\t%lu\n", lzx_reset_table->entry_size);
+	cli_dbgmsg("Table Offset:\t%lu\n", lzx_reset_table->table_offset);
+	cli_dbgmsg("Uncom Len:\t%llu\n", lzx_reset_table->uncom_len);
+	cli_dbgmsg("Com Len:\t%llu\n", lzx_reset_table->com_len);
+	cli_dbgmsg("Frame Len:\t%llu\n\n", lzx_reset_table->frame_len);
+}
+
+static lzx_reset_table_t *read_sys_reset_table(int fd, itsf_header_t *itsf_hdr, file_list_t *file_e,
+						unsigned char *m_area, off_t m_length)
+{
+	off_t offset;
+	lzx_reset_table_t *lzx_reset_table;
+
+	if (file_e->length < 40) {
+		return NULL;
+	}
+	/* Skip past unknown entry in offset calc */
+	offset = itsf_hdr->data_offset + file_e->offset + 4;
+	
+	if (offset < 0) {
+		return NULL;
+	}
+
+	lzx_reset_table = (lzx_reset_table_t *) cli_malloc(sizeof(lzx_reset_table_t));
+	if (!lzx_reset_table) {
+		return NULL;
+	}
+	
+	/* Save the entry offset for later use */
+	lzx_reset_table->rt_offset = offset-4;
+
+#if defined(HAVE_ATTRIB_PACKED) || defined(HAVE_PRAGMA_PACK) || defined(HAVE_PRAGMA_PACK_HPPA)
+	if (!chm_read_data(fd, (unsigned char *) lzx_reset_table, offset, CHM_RESET_TABLE_LEN,
+				m_area,	m_length)) {
+		goto abort;
+	}
+#else	
+	if (lseek(fd, offset, SEEK_SET) != offset) {
+		goto abort;
+	}
+	if (cli_readn(fd, &lzx_reset_table->num_entries, 4) != 4) {
+		goto abort;
+	}
+	if (cli_readn(fd, &lzx_reset_table->entry_size, 4) != 4) {
+		goto abort;
+	}
+	if (cli_readn(fd, &lzx_reset_table->table_offset, 4) != 4) {
+		goto abort;
+	}
+	if (cli_readn(fd, &lzx_reset_table->uncom_len, 8) != 8) {
+		goto abort;
+	}
+	if (cli_readn(fd, &lzx_reset_table->com_len, 8) != 8) {
+		goto abort;
+	}
+	if (cli_readn(fd, &lzx_reset_table->frame_len, 8) != 8) {
+		goto abort;
+	}
+#endif
+	lzx_reset_table->num_entries = chm_endian_convert_32(lzx_reset_table->num_entries);
+	lzx_reset_table->entry_size = chm_endian_convert_32(lzx_reset_table->entry_size);
+	lzx_reset_table->table_offset = chm_endian_convert_32(lzx_reset_table->table_offset);
+	lzx_reset_table->uncom_len = chm_endian_convert_64(lzx_reset_table->uncom_len);
+	lzx_reset_table->com_len = chm_endian_convert_64(lzx_reset_table->com_len);
+	lzx_reset_table->frame_len = chm_endian_convert_64(lzx_reset_table->frame_len);
+
+	if (lzx_reset_table->frame_len != LZX_FRAME_SIZE) {
+		cli_dbgmsg("bad sys_reset_table frame_len: 0x%x\n",lzx_reset_table->frame_len);
+		goto abort;
+	}
+	if ((lzx_reset_table->entry_size != 4) && (lzx_reset_table->entry_size != 8)) {
+		cli_dbgmsg("bad sys_reset_table entry_size: 0x%x\n",lzx_reset_table->entry_size);
+		goto abort;
+	}
+	print_sys_reset_table(lzx_reset_table);
+	return lzx_reset_table;
+abort:
+	free(lzx_reset_table);
+	return NULL;
+}
+
+/* *****************************************************************/
+/* This section interfaces to the mspack files. As such, this is a */
+/* little bit dirty compared to my usual code */
+
+#define CHM_SYS_CONTROL_NAME "::DataSpace/Storage/MSCompressed/ControlData"
+#define CHM_SYS_CONTENT_NAME "::DataSpace/Storage/MSCompressed/Content"
+#define CHM_SYS_RESETTABLE_NAME "::DataSpace/Storage/MSCompressed/Transform/{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable"
+
+static int chm_decompress_stream(int fd, const char *dirname, itsf_header_t *itsf_hdr,
+				file_list_t *file_l, file_list_t *sys_file_l,
+				unsigned char *m_area, off_t m_length)
+{
+	file_list_t *entry;
+	lzx_content_t *lzx_content=NULL;
+	lzx_reset_table_t *lzx_reset_table=NULL;
+	lzx_control_t *lzx_control=NULL;
+	int window_bits, count, length, tmpfd, ofd, retval=FALSE;
+	uint64_t com_offset;
+	struct lzx_stream * stream;
+	unsigned char filename[1024];
+	
+	snprintf(filename, 1024, "%s/clamav-unchm.bin", dirname);
+	tmpfd = open(filename, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, S_IRWXU);
+	if (tmpfd<0) {
+		cli_dbgmsg("open failed for %s\n", filename);
+		return FALSE;
+	}
+
+	entry = sys_file_l->next;
+	while (entry) {
+		if (strcmp(entry->name, CHM_SYS_CONTROL_NAME) == 0) {
+			lzx_control = read_sys_control(fd, itsf_hdr, entry, m_area, m_length);
+		} else if (strcmp(entry->name, CHM_SYS_CONTENT_NAME) == 0) {
+			lzx_content = read_sys_content(fd, itsf_hdr, entry);
+		} else if (strcmp(entry->name, CHM_SYS_RESETTABLE_NAME) == 0) {
+			lzx_reset_table = read_sys_reset_table(fd, itsf_hdr, entry, m_area, m_length);
+		}
+		entry = entry->next;
+	}
+	
+	if (!lzx_content || !lzx_reset_table || !lzx_control) {
+		goto abort;
+	}
+	
+	switch (lzx_control->window_size) {
+		case 0x008000:
+			window_bits = 15;
+			break;
+		case 0x010000:
+			window_bits = 16;
+			break;
+		case 0x020000:
+			window_bits = 17;
+			break;
+		case 0x040000:
+			window_bits = 18;
+			break;
+		case 0x080000:
+			window_bits = 19;
+			break;
+		case 0x100000:
+			window_bits = 20;
+			break;
+		case 0x200000:
+			window_bits = 21;
+			break;
+		default:
+			cli_dbgmsg("bad control window size: 0x%x\n", lzx_control->window_size);
+			goto abort;
+	}
+	
+	if (lzx_control->reset_interval % LZX_FRAME_SIZE) {
+		cli_dbgmsg("bad reset_interval: 0x%x\n", lzx_control->window_size);
+		goto abort;
+	}
+	
+	length = lzx_reset_table->uncom_len;
+	length += lzx_control->reset_interval;
+	length &= -lzx_control->reset_interval;
+	
+	com_offset = lzx_content->offset;
+	cli_dbgmsg("Compressed offset: %llu\n", com_offset);
+	
+	stream = lzx_init(fd, tmpfd, window_bits,
+			lzx_control->reset_interval / LZX_FRAME_SIZE,
+			4096, length, NULL, NULL);
+	lseek(fd, com_offset, SEEK_SET);
+	if (!stream) {
+		cli_dbgmsg("lzx_init failed\n");
+		goto abort;
+	}
+	
+	lzx_decompress(stream, length);
+	lzx_free(stream);
+	
+	entry = file_l->next;
+	close(tmpfd);
+	
+	/* Reopen the file for reading */
+	tmpfd = open(filename, O_RDONLY|O_BINARY);
+	if (tmpfd < 0) {
+		cli_dbgmsg("re-open output failed\n");
+		goto abort;
+	}
+	
+	/* Delete the file */
+	unlink(filename);
+	
+	count=0;
+	while(entry) {
+		if (entry->section != 1) {
+			entry = entry->next;
+			continue;
+		}
+		if (lseek(tmpfd, entry->offset, SEEK_SET) != (off_t)entry->offset) {
+			cli_dbgmsg("seek in output failed\n");
+			entry = entry->next;
+			continue;
+		}
+		
+		snprintf(filename, 1024, "%s/%d-%llu.chm", dirname, count, entry->offset);
+		ofd = open(filename, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, S_IRWXU);
+		if (ofd < 0) {
+			entry = entry->next;
+			continue;
+		}
+		if (chm_copy_file_data(tmpfd, ofd, entry->length) != entry->length) {
+			cli_dbgmsg("failed to copy %lu bytes\n", entry->length);
+		}
+		
+		close(ofd);		
+		entry = entry->next;
+		count++;
+	}
+	close(tmpfd);
+	tmpfd=-1;
+	retval = TRUE;
+	
+abort:
+	if (tmpfd>=0) {
+		close(tmpfd);
+	}
+	if (lzx_content) {
+		free(lzx_content);
+	}
+	if (lzx_reset_table) {
+		free(lzx_reset_table);
+	}
+	if (lzx_control) {
+		free(lzx_control);
+	}
+	return retval;
+}
+
+/* ************ End dirty section ********************/
+
+int chm_unpack(int fd, const char *dirname)
+{
+	int retval=FALSE;
+	unsigned char *m_area=NULL;
+	off_t m_length=0, offset;
+	file_list_t *file_l, *sys_file_l;
+	struct stat statbuf;
+	itsf_header_t itsf_hdr;
+	itsp_header_t itsp_hdr;
+	uint32_t num_chunks;
+
+	/* These two lists contain the list of files and system files in
+	the archive. The first entry in the list is an empty entry */
+	
+        file_l = (file_list_t *) cli_malloc(sizeof(file_list_t));
+	if (!file_l) {
+		return FALSE;
+	}
+	file_l->next = NULL;
+	file_l->name = NULL;
+	sys_file_l = (file_list_t *) cli_malloc(sizeof(file_list_t));
+	if (!sys_file_l) {
+		free(file_l);
+		return FALSE;
+	}
+	sys_file_l->next = NULL;
+	sys_file_l->name = NULL;
+	
+#ifdef HAVE_MMAP
+	if (fstat(fd, &statbuf) == 0) {
+		if (statbuf.st_size < CHM_ITSF_MIN_LEN) {
+			goto abort;
+		}
+		m_length = statbuf.st_size;
+		m_area = (unsigned char *) mmap(NULL, m_length, PROT_READ, MAP_PRIVATE, fd, 0);
+		if (m_area == MAP_FAILED) {
+			m_area = NULL;
+		}
+	}
+#endif
+
+	if (!itsf_read_header(fd, &itsf_hdr, m_area, m_length)) {
+		goto abort;
+	}
+	itsf_print_header(&itsf_hdr);
+
+	if (!itsp_read_header(fd, &itsp_hdr, itsf_hdr.dir_offset, m_area, m_length)) {
+		goto abort;
+	}
+	itsp_print_header(&itsp_hdr);
+	
+	offset = itsf_hdr.dir_offset+CHM_ITSP_LEN;
+	
+	/* TODO: need to check this first calculation,
+		currently have no files of this type */
+	if (itsp_hdr.index_head > 0) {
+		offset += itsp_hdr.index_head * itsp_hdr.block_len;
+	}
+
+	num_chunks = itsp_hdr.index_tail - itsp_hdr.index_head + 1;
+	
+	/* Versions before 3 didn't have a data_offset */
+	/* TODO: need to check this calculation,
+		 currently have no files of this type */
+	if (itsf_hdr.version < 3) {
+		itsf_hdr.data_offset = itsf_hdr.dir_offset + CHM_ITSP_LEN + (itsp_hdr.block_len*itsp_hdr.num_blocks);
+	}
+
+	while (num_chunks) {
+		if (!read_chunk(fd, offset, itsp_hdr.block_len, m_area,
+					m_length, file_l, sys_file_l)) {
+			goto abort;
+		}
+
+		num_chunks--;
+		offset += itsp_hdr.block_len;
+	}
+
+	chm_decompress_stream(fd, dirname, &itsf_hdr, file_l, sys_file_l, m_area, m_length);
+
+	/* Signal success */
+	retval = TRUE;
+abort:
+	free_file_list(file_l);
+	free_file_list(sys_file_l);
+
+#ifdef HAVE_MMAP
+	if (m_area) {
+		munmap(m_area, m_length);
+	}
+#endif
+	return retval;
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_cvd.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_cvd.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_cvd.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_cvd.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,427 @@
+/*
+ *  Copyright (C) 2003 - 2006 Tomasz Kojm <tkojm at clamav.net>
+ *
+ *  untgz() is based on public domain minitar utility by Charles G. Waldman
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#ifdef	HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include "zlib.h"
+#include <time.h>
+#include <errno.h>
+
+#include "clamav.h"
+#include "others.h"
+#include "dsig.h"
+#include "str.h"
+#include "cvd.h"
+
+#define TAR_BLOCKSIZE 512
+
+int cli_untgz(int fd, const char *destdir)
+{
+	char *path, osize[13], name[101], type;
+	char block[TAR_BLOCKSIZE];
+	int nbytes, nread, nwritten, in_block = 0, fdd;
+	unsigned int size, pathlen = strlen(destdir) + 100 + 5;
+	FILE *outfile = NULL;
+	gzFile *infile;
+
+
+    cli_dbgmsg("in cli_untgz()\n");
+
+    if((fdd = dup(fd)) == -1) {
+	cli_errmsg("cli_untgz: Can't duplicate descriptor %d\n", fd);
+	return -1;
+    }
+
+    if((infile = gzdopen(fdd, "rb")) == NULL) {
+	cli_errmsg("cli_untgz: Can't gzdopen() descriptor %d, errno = %d\n", fdd, errno);
+	return -1;
+    }
+
+    path = (char *) cli_calloc(sizeof(char), pathlen);
+    if(!path) {
+	cli_errmsg("cli_untgz: Can't allocate memory for path\n");
+	return -1;
+    }
+
+    while(1) {
+
+	nread = gzread(infile, block, TAR_BLOCKSIZE);
+
+	if(!in_block && !nread)
+	    break;
+
+	if(nread != TAR_BLOCKSIZE) {
+	    cli_errmsg("cli_untgz: Incomplete block read\n");
+	    free(path);
+	    gzclose(infile);
+	    return -1;
+	}
+
+	if(!in_block) {
+	    if (block[0] == '\0')  /* We're done */
+		break;
+
+	    strncpy(name, block, 100);
+	    name[100] = '\0';
+
+	    if(strchr(name, '/')) {
+		cli_errmsg("cli_untgz: Slash separators are not allowed in CVD\n");
+		free(path);
+	        gzclose(infile);
+		return -1;
+	    }
+
+	    snprintf(path, pathlen, "%s/%s", destdir, name);
+	    cli_dbgmsg("cli_untgz: Unpacking %s\n", path);
+	    type = block[156];
+
+	    switch(type) {
+		case '0':
+		case '\0':
+		    break;
+		case '5':
+		    cli_errmsg("cli_untgz: Directories are not supported in CVD\n");
+		    free(path);
+	            gzclose(infile);
+		    return -1;
+		default:
+		    cli_errmsg("cli_untgz: Unknown type flag '%c'\n", type);
+		    free(path);
+	            gzclose(infile);
+		    return -1;
+	    }
+	    in_block = 1;
+
+	    if(outfile) {
+		if(fclose(outfile)) {
+		    cli_errmsg("cli_untgz: Cannot close file %s\n", path);
+		    free(path);
+	            gzclose(infile);
+		    return -1;
+		}
+		outfile = NULL;
+	    }
+
+	    if(!(outfile = fopen(path, "wb"))) {
+		cli_errmsg("cli_untgz: Cannot create file %s\n", path);
+		free(path);
+	        gzclose(infile);
+		return -1;
+	    }
+
+	    strncpy(osize, block + 124, 12);
+	    osize[12] = '\0';
+
+	    if((sscanf(osize, "%o", &size)) == 0) {
+		cli_errmsg("cli_untgz: Invalid size in header\n");
+		free(path);
+	        gzclose(infile);
+		fclose(outfile);
+		return -1;
+	    }
+
+	} else { /* write or continue writing file contents */
+	    nbytes = size > TAR_BLOCKSIZE ? TAR_BLOCKSIZE : size;
+	    nwritten = fwrite(block, 1, nbytes, outfile);
+
+	    if(nwritten != nbytes) {
+		cli_errmsg("cli_untgz: Wrote %d instead of %d (%s)\n", nwritten, nbytes, path);
+		free(path);
+	        gzclose(infile);
+		return -1;
+	    }
+
+	    size -= nbytes;
+	    if(size == 0)
+		in_block = 0;
+	}
+    }
+
+    if(outfile)
+	fclose(outfile);
+
+    gzclose(infile);
+    free(path);
+    return 0;
+}
+
+struct cl_cvd *cl_cvdparse(const char *head)
+{
+	struct cl_cvd *cvd;
+	char *pt;
+
+
+    if(strncmp(head, "ClamAV-VDB:", 11)) {
+	cli_errmsg("cli_cvdparse: Not a CVD file\n");
+	return NULL;
+    }
+
+    if(!(cvd = (struct cl_cvd *) cli_malloc(sizeof(struct cl_cvd)))) {
+	cli_errmsg("cl_cvdparse: Can't allocate memory for cvd\n");
+	return NULL;
+    }
+
+    if(!(cvd->time = cli_strtok(head, 1, ":"))) {
+	cli_errmsg("cli_cvdparse: Can't parse the creation time\n");
+	free(cvd);
+	return NULL;
+    }
+
+    if(!(pt = cli_strtok(head, 2, ":"))) {
+	cli_errmsg("cli_cvdparse: Can't parse the version number\n");
+	free(cvd->time);
+	free(cvd);
+	return NULL;
+    }
+    cvd->version = atoi(pt);
+    free(pt);
+
+    if(!(pt = cli_strtok(head, 3, ":"))) {
+	cli_errmsg("cli_cvdparse: Can't parse the number of signatures\n");
+	free(cvd->time);
+	free(cvd);
+	return NULL;
+    }
+    cvd->sigs = atoi(pt);
+    free(pt);
+
+    if(!(pt = cli_strtok(head, 4, ":"))) {
+	cli_errmsg("cli_cvdparse: Can't parse the functionality level\n");
+	free(cvd->time);
+	free(cvd);
+	return NULL;
+    }
+    cvd->fl = atoi(pt);
+    free(pt);
+
+    if(!(cvd->md5 = cli_strtok(head, 5, ":"))) {
+	cli_errmsg("cli_cvdparse: Can't parse the MD5 checksum\n");
+	free(cvd->time);
+	free(cvd);
+	return NULL;
+    }
+
+    if(!(cvd->dsig = cli_strtok(head, 6, ":"))) {
+	cli_errmsg("cli_cvdparse: Can't parse the digital signature\n");
+	free(cvd->time);
+	free(cvd->md5);
+	free(cvd);
+	return NULL;
+    }
+
+    if(!(cvd->builder = cli_strtok(head, 7, ":"))) {
+	cli_errmsg("cli_cvdparse: Can't parse the builder name\n");
+	free(cvd->time);
+	free(cvd->md5);
+	free(cvd->dsig);
+	free(cvd);
+	return NULL;
+    }
+
+    if((pt = cli_strtok(head, 8, ":"))) {
+	cvd->stime = atoi(pt);
+	free(pt);
+    } else {
+	cli_dbgmsg("cli_cvdparse: No creation time in seconds (old file format)\n");
+	cvd->stime = 0;
+    }
+
+    return cvd;
+}
+
+struct cl_cvd *cl_cvdhead(const char *file)
+{
+	FILE *fs;
+	char head[513], *pt;
+	int i;
+	unsigned int bread;
+
+
+    if((fs = fopen(file, "rb")) == NULL) {
+	cli_errmsg("cl_cvdhead: Can't open file %s\n", file);
+	return NULL;
+    }
+
+    if(!(bread = fread(head, 1, 512, fs))) {
+	cli_errmsg("cl_cvdhead: Can't read CVD header in %s\n", file);
+	fclose(fs);
+	return NULL;
+    }
+
+    fclose(fs);
+
+    head[bread] = 0;
+    if((pt = strpbrk(head, "\n\r")))
+	*pt = 0;
+    
+    for(i = bread - 1; i > 0 && (head[i] == ' ' || head[i] == '\n' || head[i] == '\r'); head[i] = 0, i--);
+
+    return cl_cvdparse(head);
+}
+
+void cl_cvdfree(struct cl_cvd *cvd)
+{
+    free(cvd->time);
+    free(cvd->md5);
+    free(cvd->dsig);
+    free(cvd->builder);
+    free(cvd);
+}
+
+static int cli_cvdverify(FILE *fs, struct cl_cvd *cvdpt)
+{
+	struct cl_cvd *cvd;
+	char *md5, head[513];
+	int i;
+
+
+    fseek(fs, 0, SEEK_SET);
+    if(fread(head, 1, 512, fs) != 512) {
+	cli_errmsg("cli_cvdverify: Can't read CVD header\n");
+	return CL_ECVD;
+    }
+
+    head[512] = 0;
+    for(i = 511; i > 0 && (head[i] == ' ' || head[i] == 10); head[i] = 0, i--);
+
+    if((cvd = cl_cvdparse(head)) == NULL)
+	return CL_ECVD;
+
+    if(cvdpt)
+	memcpy(cvdpt, cvd, sizeof(struct cl_cvd));
+
+    md5 = cli_md5stream(fs, NULL);
+    cli_dbgmsg("MD5(.tar.gz) = %s\n", md5);
+
+    if(strncmp(md5, cvd->md5, 32)) {
+	cli_dbgmsg("cli_cvdverify: MD5 verification error\n");
+	free(md5);
+	cl_cvdfree(cvd);
+	return CL_EMD5;
+    }
+
+#ifdef HAVE_GMP
+    if(cli_versig(md5, cvd->dsig)) {
+	cli_dbgmsg("cli_cvdverify: Digital signature verification error\n");
+	free(md5);
+	cl_cvdfree(cvd);
+	return CL_EDSIG;
+    }
+#endif
+
+    free(md5);
+    cl_cvdfree(cvd);
+    return 0;
+}
+
+int cl_cvdverify(const char *file)
+{
+	FILE *fs;
+	int ret;
+
+
+    if((fs = fopen(file, "rb")) == NULL) {
+	cli_errmsg("cl_cvdverify: Can't open file %s\n", file);
+	return CL_EOPEN;
+    }
+
+    ret = cli_cvdverify(fs, NULL);
+    fclose(fs);
+
+    return ret;
+}
+
+int cli_cvdload(FILE *fs, struct cl_engine **engine, unsigned int *signo, short warn, unsigned int options)
+{
+        char *dir;
+	struct cl_cvd cvd;
+	int ret;
+	time_t s_time;
+	int cfd;
+
+    cli_dbgmsg("in cli_cvdload()\n");
+
+    /* verify */
+
+    if((ret = cli_cvdverify(fs, &cvd)))
+	return ret;
+
+    if(cvd.stime && warn) {
+	time(&s_time);
+	if((int) s_time - cvd.stime > 604800) {
+	    cli_warnmsg("**************************************************\n");
+	    cli_warnmsg("***  The virus database is older than 7 days!  ***\n");
+	    cli_warnmsg("***   Please update it as soon as possible.    ***\n");
+	    cli_warnmsg("**************************************************\n");
+	}
+    }
+
+    if(cvd.fl > cl_retflevel()) {
+	cli_warnmsg("***********************************************************\n");
+	cli_warnmsg("***  This version of the ClamAV engine is outdated.     ***\n");
+	cli_warnmsg("*** DON'T PANIC! Read http://www.clamav.net/support/faq ***\n");
+	cli_warnmsg("***********************************************************\n");
+    }
+
+    dir = cli_gentemp(NULL);
+    if(mkdir(dir, 0700)) {
+	cli_errmsg("cli_cvdload(): Can't create temporary directory %s\n", dir);
+	free(dir);
+	return CL_ETMPDIR;
+    }
+
+    cfd = fileno(fs);
+
+    /* use only operations on file descriptors, and not on the FILE* from here on 
+     * if we seek the FILE*, the underlying descriptor may not seek as expected
+     * (for example on OpenBSD, cygwin, etc.).
+     * So seek the descriptor directly.
+     */ 
+
+    if(lseek(cfd, 512, SEEK_SET) == -1) {
+	cli_errmsg("cli_cvdload(): lseek(fs, 512, SEEK_SET) failed\n");
+	return CL_EIO;
+    }
+
+    if(cli_untgz(cfd, dir)) {
+	cli_errmsg("cli_cvdload(): Can't unpack CVD file.\n");
+	free(dir);
+	return CL_ECVDEXTR;
+    }
+
+    /* load extracted directory */
+    ret = cl_load(dir, engine, signo, options);
+
+    cli_rmdirs(dir);
+    free(dir);
+
+    return ret;
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_dconf.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_dconf.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_dconf.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_dconf.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,353 @@
+/*
+ *  Copyright (C) 2007 Tomasz Kojm <tkojm at clamav.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "clamav.h"
+#include "cltypes.h"
+#include "dconf.h"
+#include "readdb.h"
+#include "str.h"
+#include "others.h"
+
+struct dconf_module {
+    const char	*mname;	    /* module name */
+    const char	*sname;	    /* submodule name */
+    uint32_t	bflag;	    /* bit flag */
+    uint8_t	state;	    /* default state (on/off) */
+};
+
+#ifdef CL_EXPERIMENTAL
+#define DCONF_ENABLE_EXPERIMENTAL 1
+#else
+#define DCONF_ENABLE_EXPERIMENTAL 0
+#endif
+
+static struct dconf_module modules[] = {
+
+    { "PE",	    "PARITE",	    PE_CONF_PARITE,	    1 },
+    { "PE",	    "KRIZ",	    PE_CONF_KRIZ,	    1 },
+    { "PE",	    "MAGISTR",	    PE_CONF_MAGISTR,	    1 },
+    { "PE",	    "POLIPOS",	    PE_CONF_POLIPOS,	    1 },
+    { "PE",	    "MD5SECT",	    PE_CONF_MD5SECT,	    1 },
+    { "PE",	    "UPX",	    PE_CONF_UPX,	    1 },
+    { "PE",	    "FSG",	    PE_CONF_FSG,	    1 },
+
+    { "PE",	    "PETITE",	    PE_CONF_PETITE,	    1 },
+    { "PE",	    "PESPIN",	    PE_CONF_PESPIN,	    1 },
+    { "PE",	    "YC",	    PE_CONF_YC,		    1 },
+    { "PE",	    "WWPACK",	    PE_CONF_WWPACK,	    1 },
+
+    { "PE",	    "NSPACK",	    PE_CONF_NSPACK,	    1 },
+    { "PE",	    "MEW",	    PE_CONF_MEW,	    1 },
+    { "PE",	    "UPACK",	    PE_CONF_UPACK,	    1 },
+    { "PE",	    "ASPACK",	    PE_CONF_ASPACK,	    1 },
+
+    { "ELF",	    NULL,	    0x1,		    1 },
+
+    { "ARCHIVE",    "RAR",	    ARCH_CONF_RAR,	    1 },
+    { "ARCHIVE",    "ZIP",	    ARCH_CONF_ZIP,	    1 },
+    { "ARCHIVE",    "GZIP",	    ARCH_CONF_GZ,	    1 },
+    { "ARCHIVE",    "BZIP",	    ARCH_CONF_BZ,	    1 },
+    { "ARCHIVE",    "ARJ",          ARCH_CONF_ARJ,	    1 },
+    { "ARCHIVE",    "SZDD",	    ARCH_CONF_SZDD,	    1 },
+    { "ARCHIVE",    "CAB",	    ARCH_CONF_CAB,	    1 },
+    { "ARCHIVE",    "CHM",	    ARCH_CONF_CHM,	    1 },
+    { "ARCHIVE",    "OLE2",	    ARCH_CONF_OLE2,	    1 },
+    { "ARCHIVE",    "TAR",	    ARCH_CONF_TAR,	    1 },
+    { "ARCHIVE",    "BINHEX",	    ARCH_CONF_BINHEX,	    1 },
+    { "ARCHIVE",    "SIS",	    ARCH_CONF_SIS,	    1 },
+    { "ARCHIVE",    "NSIS",	    ARCH_CONF_NSIS,	    1 },
+    { "ARCHIVE",    "AUTOIT",	    ARCH_CONF_AUTOIT,	    1 },
+
+    { "DOCUMENT",   "HTML",	    DOC_CONF_HTML,	    1 },
+    { "DOCUMENT",   "RTF",	    DOC_CONF_RTF,	    1 },
+    { "DOCUMENT",   "PDF",	    DOC_CONF_PDF,	    1 },
+
+    { "MAIL",	    "MBOX",	    MAIL_CONF_MBOX,	    1 },
+    { "MAIL",	    "TNEF",	    MAIL_CONF_TNEF,	    1 },
+    { "MAIL",	    "PST",	    MAIL_CONF_PST,	    1 },
+
+    { "OTHER",	    "UUENCODED",    OTHER_CONF_UUENC,	    1 },
+    { "OTHER",	    "SCRENC",	    OTHER_CONF_SCRENC,	    1 },
+    { "OTHER",	    "RIFF",	    OTHER_CONF_RIFF,	    1 },
+    { "OTHER",	    "JPEG",	    OTHER_CONF_JPEG,	    1 },
+    { "OTHER",	    "CRYPTFF",	    OTHER_CONF_CRYPTFF,	    1 },
+
+    { "PHISHING",   "ENGINE",       PHISHING_CONF_ENGINE,   1 },
+    { "PHISHING",   "ENTCONV",      PHISHING_CONF_ENTCONV,  DCONF_ENABLE_EXPERIMENTAL }, /* exp */
+
+    { NULL,	    NULL,	    0,			    0 }
+};
+
+struct cli_dconf *cli_dconf_init(void)
+{
+	unsigned int i;
+	struct cli_dconf *dconf;
+
+
+    dconf = (struct cli_dconf *) cli_calloc(sizeof(struct cli_dconf), 1);
+    if(!dconf)
+	return NULL;
+
+    for(i = 0; modules[i].mname; i++) {
+	if(!strcmp(modules[i].mname, "PE")) {
+	    if(modules[i].state)
+		dconf->pe |= modules[i].bflag;
+
+	} else if(!strcmp(modules[i].mname, "ELF")) {
+	    if(modules[i].state)
+		dconf->elf |= modules[i].bflag;
+
+	} else if(!strcmp(modules[i].mname, "ARCHIVE")) {
+	    if(modules[i].state)
+		dconf->archive |= modules[i].bflag;
+
+	} else if(!strcmp(modules[i].mname, "DOCUMENT")) {
+	    if(modules[i].state)
+		dconf->doc |= modules[i].bflag;
+
+	} else if(!strcmp(modules[i].mname, "MAIL")) {
+	    if(modules[i].state)
+		dconf->mail |= modules[i].bflag;
+
+	} else if(!strcmp(modules[i].mname, "OTHER")) {
+	    if(modules[i].state)
+		dconf->other |= modules[i].bflag;
+	} else if(!strcmp(modules[i].mname, "PHISHING")) {
+	    if(modules[i].state)
+		dconf->phishing |= modules[i].bflag;
+	}
+    }
+
+    return dconf;
+}
+
+void cli_dconf_print(struct cli_dconf *dconf)
+{
+	uint8_t pe = 0, elf = 0, arch = 0, doc = 0, mail = 0, other = 0, phishing=0;
+	unsigned int i;
+
+
+    cli_dbgmsg("Dynamic engine configuration settings:\n");
+    cli_dbgmsg("--------------------------------------\n");
+
+    for(i = 0; modules[i].mname; i++) {
+	if(!strcmp(modules[i].mname, "PE")) {
+	    if(!pe) {
+		cli_dbgmsg("Module PE: %s\n", dconf->pe ? "On" : "Off");
+		pe = 1;
+	    }
+	    if(dconf->pe)
+		cli_dbgmsg("   * Submodule %10s:\t%s\n", modules[i].sname, (dconf->pe & modules[i].bflag) ? "On" : "** Off **");
+	    else
+		continue;
+
+	} else if(!strcmp(modules[i].mname, "ELF")) {
+	    if(!elf) {
+		cli_dbgmsg("Module ELF: %s\n", dconf->elf ? "On" : "Off");
+		elf = 1;
+	    }
+
+	} else if(!strcmp(modules[i].mname, "ARCHIVE")) {
+	    if(!arch) {
+		cli_dbgmsg("Module ARCHIVE: %s\n", dconf->archive ? "On" : "Off");
+		arch = 1;
+	    }
+	    if(dconf->archive)
+		cli_dbgmsg("   * Submodule %10s:\t%s\n", modules[i].sname, (dconf->archive & modules[i].bflag) ? "On" : "** Off **");
+	    else
+		continue;
+
+	} else if(!strcmp(modules[i].mname, "DOCUMENT")) {
+	    if(!doc) {
+		cli_dbgmsg("Module DOCUMENT: %s\n", dconf->doc ? "On" : "Off");
+		doc = 1;
+	    }
+	    if(dconf->doc)
+		cli_dbgmsg("   * Submodule %10s:\t%s\n", modules[i].sname, (dconf->doc & modules[i].bflag) ? "On" : "** Off **");
+	    else
+		continue;
+
+	} else if(!strcmp(modules[i].mname, "MAIL")) {
+	    if(!mail) {
+		cli_dbgmsg("Module MAIL: %s\n", dconf->mail ? "On" : "Off");
+		mail = 1;
+	    }
+	    if(dconf->mail)
+		cli_dbgmsg("   * Submodule %10s:\t%s\n", modules[i].sname, (dconf->mail & modules[i].bflag) ? "On" : "** Off **");
+	    else
+		continue;
+
+	} else if(!strcmp(modules[i].mname, "OTHER")) {
+	    if(!other) {
+		cli_dbgmsg("Module OTHER: %s\n", dconf->other ? "On" : "Off");
+		other = 1;
+	    }
+	    if(dconf->other)
+		cli_dbgmsg("   * Submodule %10s:\t%s\n", modules[i].sname, (dconf->other & modules[i].bflag) ? "On" : "** Off **");
+	    else
+		continue;
+	} else if(!strcmp(modules[i].mname, "PHISHING")) {
+	    if(!phishing) {
+		cli_dbgmsg("Module PHISHING %s\n", dconf->phishing ? "On" : "Off");
+		phishing = 1;
+	    }
+	    if(dconf->phishing)
+		cli_dbgmsg("   * Submodule %10s:\t%s\n", modules[i].sname, (dconf->phishing & modules[i].bflag) ? "On" : "** Off **");
+	    else
+		continue;
+	}
+    }
+}
+
+static int chkflevel(const char *entry, int field)
+{
+	char *pt;
+
+
+    if((pt = cli_strtok(entry, field, ":"))) { /* min version */
+	if(!isdigit(*pt)) {
+	    free(pt);
+	    return 0;
+	}
+
+	if((unsigned int) atoi(pt) > cl_retflevel()) {
+	    free(pt);
+	    return 0;
+	}
+
+	free(pt);
+
+	if((pt = cli_strtok(entry, field + 1, ":"))) { /* max version */
+	    if(!isdigit(*pt)) {
+		free(pt);
+		return 0;
+	    }
+
+	    if((unsigned int) atoi(pt) < cl_retflevel()) {
+		free(pt);
+		return 0;
+	    }
+
+	    free(pt);
+	}
+    }
+
+    return 1;
+}
+
+int cli_dconf_load(FILE *fd, struct cl_engine **engine, unsigned int options)
+{
+	char buffer[FILEBUFF];
+	unsigned int line = 0;
+	int ret = 0;
+	struct cli_dconf *dconf;
+	uint32_t val;
+
+
+    if((ret = cli_initengine(engine, options))) {
+	cl_free(*engine);
+	return ret;
+    }
+
+    dconf = (struct cli_dconf *) (*engine)->dconf;
+
+    while(fgets(buffer, FILEBUFF, fd)) {
+	line++;
+	cli_chomp(buffer);
+
+	if(!strncmp(buffer, "PE:", 3) && chkflevel(buffer, 2)) {
+	    if(sscanf(buffer + 3, "0x%x", &val) == 1) {
+		dconf->pe = val;
+	    } else {
+		ret = CL_EMALFDB;
+		break;
+	    }
+	}
+
+	if(!strncmp(buffer, "ELF:", 4) && chkflevel(buffer, 2)) {
+	    if(sscanf(buffer + 4, "0x%x", &val) == 1) {
+		dconf->elf = val;
+	    } else {
+		ret = CL_EMALFDB;
+		break;
+	    }
+	}
+
+	if(!strncmp(buffer, "ARCHIVE:", 8) && chkflevel(buffer, 2)) {
+	    if(sscanf(buffer + 8, "0x%x", &val) == 1) {
+		dconf->archive = val;
+	    } else {
+		ret = CL_EMALFDB;
+		break;
+	    }
+	}
+
+	if(!strncmp(buffer, "DOCUMENT:", 9) && chkflevel(buffer, 2)) {
+	    if(sscanf(buffer + 9, "0x%x", &val) == 1) {
+		dconf->doc = val;
+	    } else {
+		ret = CL_EMALFDB;
+		break;
+	    }
+	}
+
+	if(!strncmp(buffer, "MAIL:", 5) && chkflevel(buffer, 2)) {
+	    if(sscanf(buffer + 5, "0x%x", &val) == 1) {
+		dconf->mail = val;
+	    } else {
+		ret = CL_EMALFDB;
+		break;
+	    }
+	}
+
+	if(!strncmp(buffer, "OTHER:", 6) && chkflevel(buffer, 2)) {
+	    if(sscanf(buffer + 6, "0x%x", &val) == 1) {
+		dconf->other = val;
+	    } else {
+		ret = CL_EMALFDB;
+		break;
+	    }
+	}
+
+	if(!strncmp(buffer, "PHISHING:", 9) && chkflevel(buffer, 2)) {
+	    if(sscanf(buffer + 9, "0x%x", &val) == 1) {
+		dconf->phishing = val;
+	    } else {
+		ret = CL_EMALFDB;
+		break;
+	    }
+	}
+    }
+
+    if(ret) {
+	cli_errmsg("Problem parsing configuration file at line %u\n", line);
+	cl_free(*engine);
+	return ret;
+    }
+
+    return CL_SUCCESS;
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_dsig.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_dsig.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_dsig.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_dsig.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,151 @@
+/*
+ *  Copyright (C) 2003 - 2006 Tomasz Kojm <tkojm at clamav.net>
+ *
+ *  Number encoding rutines are based on yyyRSA by Erik Thiele
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#ifdef HAVE_GMP
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <gmp.h>
+
+#include "clamav.h"
+#include "others.h"
+#include "dsig.h"
+#include "str.h"
+
+#define CLI_NSTR "118640995551645342603070001658453189751527774412027743746599405743243142607464144767361060640655844749760788890022283424922762488917565551002467771109669598189410434699034532232228621591089508178591428456220796841621637175567590476666928698770143328137383952820383197532047771780196576957695822641224262693037"
+
+#define CLI_ESTR "100001027"
+
+static unsigned char cli_ndecode(unsigned char value)
+{
+	unsigned int i;
+	char ncodec[] = {
+	    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 
+	    'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 
+	    'y', 'z',
+	    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 
+	    'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 
+	    'Y', 'Z',
+	    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+	    '+', '/'
+	};
+
+
+    for(i = 0; i < 64; i++)
+	if(ncodec[i] == value)
+	    return i;
+
+    cli_errmsg("cli_ndecode: value out of range\n");
+    return -1;
+}
+
+unsigned char *cli_decodesig(const char *sig, unsigned int plen, mpz_t e, mpz_t n)
+{
+	int i, slen = strlen(sig), dec;
+	unsigned char *plain;
+	mpz_t r, p, c;
+
+
+    mpz_init(r);
+    mpz_init(c);
+
+    for(i = 0; i < slen; i++) {
+	if((dec = cli_ndecode(sig[i])) < 0) {
+	    mpz_clear(r);
+	    mpz_clear(c);
+	    return NULL;
+	}
+
+	mpz_set_ui(r, dec);
+	mpz_mul_2exp(r, r, 6 * i);
+	mpz_add(c, c, r);
+    }
+
+    plain = (unsigned char *) cli_calloc(plen + 1, sizeof(unsigned char));
+    if(!plain) {
+	cli_errmsg("cli_decodesig: Can't allocate memory for 'plain'\n");
+	mpz_clear(r);
+	mpz_clear(c);
+	return NULL;
+    }
+
+    mpz_init(p);
+    mpz_powm(p, c, e, n); /* plain = cipher^e mod n */
+    mpz_clear(c);
+
+    for(i = plen - 1; i >= 0; i--) { /* reverse */
+	mpz_tdiv_qr_ui(p, r, p, 256);
+	plain[i] = mpz_get_ui(r);
+    }
+
+    mpz_clear(p);
+    mpz_clear(r);
+
+    return plain;
+}
+
+int cli_versig(const char *md5, const char *dsig)
+{
+	mpz_t n, e;
+	char *pt, *pt2;
+
+
+    if(strlen(md5) != 32 || !isalnum(md5[0])) {
+	/* someone is trying to fool us with empty/malformed MD5 ? */
+	cli_errmsg("SECURITY WARNING: MD5 basic test failure.\n");
+	return CL_EMD5;
+    }
+
+    mpz_init_set_str(n, CLI_NSTR, 10);
+    mpz_init_set_str(e, CLI_ESTR, 10);
+
+    if(!(pt = (char *) cli_decodesig(dsig, 16, e, n))) {
+	mpz_clear(n);
+	mpz_clear(e);
+	return CL_EDSIG;
+    }
+
+    pt2 = cli_str2hex(pt, 16);
+    free(pt);
+
+    cli_dbgmsg("cli_versig: Decoded signature: %s\n", pt2);
+
+    if(strncmp(md5, pt2, 32)) {
+	cli_dbgmsg("cli_versig: Signature doesn't match.\n");
+	free(pt2);
+	mpz_clear(n);
+	mpz_clear(e);
+	return CL_EDSIG;
+    }
+
+    free(pt2);
+    mpz_clear(n);
+    mpz_clear(e);
+
+    cli_dbgmsg("cli_versig: Digital signature is correct.\n");
+    return CL_SUCCESS;
+}
+#endif

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_elf.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_elf.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_elf.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_elf.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,536 @@
+/*
+ *  Copyright (C) 2005 - 2006 Tomasz Kojm <tkojm at clamav.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#ifdef	HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <time.h>
+
+#include "cltypes.h"
+#include "elf.h"
+#include "clamav.h"
+#include "execs.h"
+
+static inline uint16_t EC16(uint16_t v, uint8_t c)
+{
+    if(!c)
+	return v;
+    else
+	return ((v >> 8) + (v << 8));
+}
+
+static inline uint32_t EC32(uint32_t v, uint8_t c)
+{
+    if(!c)
+	return v;
+    else
+	return ((v >> 24) | ((v & 0x00FF0000) >> 8) | ((v & 0x0000FF00) << 8) | (v << 24));
+}
+
+static uint32_t cli_rawaddr(uint32_t vaddr, struct elf_program_hdr32 *ph, uint16_t phnum, uint8_t conv, uint8_t *err)
+{
+	uint16_t i, found = 0;
+
+
+    for(i = 0; i < phnum; i++) {
+	if(EC32(ph[i].p_vaddr, conv) <= vaddr && EC32(ph[i].p_vaddr, conv) + EC32(ph[i].p_memsz, conv) > vaddr) {
+	    found = 1;
+	    break;
+	}
+    }
+
+    if(!found) {
+	*err = 1;
+	return 0;
+    }
+
+    *err = 0;
+    return vaddr - EC32(ph[i].p_vaddr, conv) + EC32(ph[i].p_offset, conv);
+}
+
+int cli_scanelf(int desc, cli_ctx *ctx)
+{
+	struct elf_file_hdr32 file_hdr;
+	struct elf_section_hdr32 *section_hdr;
+	struct elf_program_hdr32 *program_hdr;
+	uint16_t shnum, phnum, shentsize, phentsize;
+	uint32_t entry, fentry, shoff, phoff, i;
+	uint8_t conv = 0, err;
+
+
+    cli_dbgmsg("in cli_scanelf\n");
+
+    if(read(desc, &file_hdr, sizeof(file_hdr)) != sizeof(file_hdr)) {
+	/* Not an ELF file? */
+	cli_dbgmsg("ELF: Can't read file header\n");
+	return CL_CLEAN;
+    }
+
+    if(memcmp(file_hdr.e_ident, "\x7f\x45\x4c\x46", 4)) {
+	cli_dbgmsg("ELF: Not an ELF file\n");
+	return CL_CLEAN;
+    }
+
+    if(file_hdr.e_ident[4] != 1) {
+	cli_dbgmsg("ELF: 64-bit binaries are not supported (yet)\n");
+	return CL_CLEAN;
+    }
+
+    if(file_hdr.e_ident[5] == 1) {
+#if WORDS_BIGENDIAN == 0
+	cli_dbgmsg("ELF: File is little-endian - conversion not required\n");
+#else
+	cli_dbgmsg("ELF: File is little-endian - data conversion enabled\n");
+	conv = 1;
+#endif
+    } else {
+#if WORDS_BIGENDIAN == 0
+	cli_dbgmsg("ELF: File is big-endian - data conversion enabled\n");
+	conv = 1;
+#else
+	cli_dbgmsg("ELF: File is big-endian - conversion not required\n");
+#endif
+    }
+
+    switch(EC16(file_hdr.e_type, conv)) {
+	case 0x0: /* ET_NONE */
+	    cli_dbgmsg("ELF: File type: None\n");
+	    break;
+	case 0x1: /* ET_REL */
+	    cli_dbgmsg("ELF: File type: Relocatable\n");
+	    break;
+	case 0x2: /* ET_EXEC */
+	    cli_dbgmsg("ELF: File type: Executable\n");
+	    break;
+	case 0x3: /* ET_DYN */
+	    cli_dbgmsg("ELF: File type: Core\n");
+	    break;
+	case 0x4: /* ET_CORE */
+	    cli_dbgmsg("ELF: File type: Core\n");
+	    break;
+	default:
+	    cli_dbgmsg("ELF: File type: Unknown (%d)\n", EC16(file_hdr.e_type, conv));
+    }
+
+    switch(EC16(file_hdr.e_machine, conv)) {
+	/* Due to a huge list, we only include the most popular machines here */
+	case 0x0: /* EM_NONE */
+	    cli_dbgmsg("ELF: Machine type: None\n");
+	    break;
+	case 0x2: /* EM_SPARC */
+	    cli_dbgmsg("ELF: Machine type: SPARC\n");
+	    break;
+	case 0x3: /* EM_386 */
+	    cli_dbgmsg("ELF: Machine type: Intel 80386\n");
+	    break;
+	case 0x4: /* EM_68K */
+	    cli_dbgmsg("ELF: Machine type: Motorola 68000\n");
+	    break;
+	case 0x8: /* EM_MIPS */
+	    cli_dbgmsg("ELF: Machine type: MIPS RS3000\n");
+	    break;
+	case 0x15: /* EM_PARISC */
+	    cli_dbgmsg("ELF: Machine type: HPPA\n");
+	    break;
+	case 0x20: /* EM_PPC */
+	    cli_dbgmsg("ELF: Machine type: PowerPC\n");
+	    break;
+	case 0x21: /* EM_PPC64 */
+	    cli_dbgmsg("ELF: Machine type: PowerPC 64-bit\n");
+	    break;
+	case 0x22: /* EM_S390 */
+	    cli_dbgmsg("ELF: Machine type: IBM S390\n");
+	    break;
+	case 0x40: /* EM_ARM */
+	    cli_dbgmsg("ELF: Machine type: ARM\n");
+	    break;
+	case 0x41: /* EM_FAKE_ALPHA */
+	    cli_dbgmsg("ELF: Machine type: Digital Alpha\n");
+	    break;
+	case 0x43: /* EM_SPARCV9 */
+	    cli_dbgmsg("ELF: Machine type: SPARC v9 64-bit\n");
+	    break;
+	case 0x50: /* EM_IA_64 */
+	    cli_dbgmsg("ELF: Machine type: IA64\n");
+	    break;
+	default:
+	    cli_dbgmsg("ELF: Machine type: Unknown (%d)\n", EC16(file_hdr.e_machine, conv));
+    }
+
+    entry = EC32(file_hdr.e_entry, conv);
+
+    /* Program headers */
+
+    phnum = EC16(file_hdr.e_phnum, conv);
+    cli_dbgmsg("ELF: Number of program headers: %d\n", phnum);
+    if(phnum > 128) {
+	cli_dbgmsg("ELF: Suspicious number of program headers\n");
+        if(DETECT_BROKEN) {
+	    if(ctx->virname)
+		*ctx->virname = "Broken.Executable";
+            return CL_VIRUS;
+        }
+	return CL_EFORMAT;
+    }
+
+    if(phnum && entry) {
+
+	phentsize = EC16(file_hdr.e_phentsize, conv);
+	if(phentsize != sizeof(struct elf_program_hdr32)) {
+	    cli_dbgmsg("ELF: phentsize != sizeof(struct elf_program_hdr32)\n");
+	    if(DETECT_BROKEN) {
+		if(ctx->virname)
+		    *ctx->virname = "Broken.Executable";
+		return CL_VIRUS;
+	    }
+	    return CL_EFORMAT;
+	}
+
+	phoff = EC32(file_hdr.e_phoff, conv);
+	cli_dbgmsg("ELF: Program header table offset: %d\n", phoff);
+	if((uint32_t) lseek(desc, phoff, SEEK_SET) != phoff) {
+	    if(DETECT_BROKEN) {
+		if(ctx->virname)
+		    *ctx->virname = "Broken.Executable";
+		return CL_VIRUS;
+	    }
+	    return CL_CLEAN;
+	}
+
+	program_hdr = (struct elf_program_hdr32 *) cli_calloc(phnum, phentsize);
+	if(!program_hdr) {
+	    cli_errmsg("ELF: Can't allocate memory for program headers\n");
+	    return CL_EMEM;
+	}
+
+	cli_dbgmsg("------------------------------------\n");
+
+	for(i = 0; i < phnum; i++) {
+
+	    if(read(desc, &program_hdr[i], sizeof(struct elf_program_hdr32)) != sizeof(struct elf_program_hdr32)) {
+		cli_dbgmsg("ELF: Can't read segment #%d\n", i);
+		cli_dbgmsg("ELF: Possibly broken ELF file\n");
+		free(program_hdr);
+		if(DETECT_BROKEN) {
+		    if(ctx->virname)
+			*ctx->virname = "Broken.Executable";
+		    return CL_VIRUS;
+		}
+		return CL_CLEAN;
+	    }
+
+	    cli_dbgmsg("ELF: Segment #%d\n", i);
+	    cli_dbgmsg("ELF: Segment type: 0x%x\n", EC32(program_hdr[i].p_type, conv));
+	    cli_dbgmsg("ELF: Segment offset: 0x%x\n", EC32(program_hdr[i].p_offset, conv));
+	    cli_dbgmsg("ELF: Segment virtual address: 0x%x\n", EC32(program_hdr[i].p_vaddr, conv));
+	    cli_dbgmsg("ELF: Segment real size: 0x%x\n", EC32(program_hdr[i].p_filesz, conv));
+	    cli_dbgmsg("ELF: Segment virtual size: 0x%x\n", EC32(program_hdr[i].p_memsz, conv));
+	    cli_dbgmsg("------------------------------------\n");
+	}
+
+	fentry = cli_rawaddr(entry, program_hdr, phnum, conv, &err);
+	free(program_hdr);
+	if(err) {
+	    cli_dbgmsg("ELF: Can't calculate file offset of entry point\n");
+	    if(DETECT_BROKEN) {
+		if(ctx->virname)
+		    *ctx->virname = "Broken.Executable";
+		return CL_VIRUS;
+	    }
+	    return CL_EFORMAT;
+	}
+	cli_dbgmsg("ELF: Entry point address: 0x%.8x\n", entry);
+	cli_dbgmsg("ELF: Entry point offset: 0x%.8x (%d)\n", fentry, fentry);
+    }
+
+    /* Sections */
+
+    shnum = EC16(file_hdr.e_shnum, conv);
+    cli_dbgmsg("ELF: Number of sections: %d\n", shnum);
+    if(shnum > 256) {
+	cli_dbgmsg("ELF: Suspicious number of sections\n");
+        if(DETECT_BROKEN) {
+	    if(ctx->virname)
+		*ctx->virname = "Broken.Executable";
+            return CL_VIRUS;
+        }
+	return CL_EFORMAT;
+    }
+
+    shentsize = EC16(file_hdr.e_shentsize, conv);
+    if(shentsize != sizeof(struct elf_section_hdr32)) {
+	cli_dbgmsg("ELF: shentsize != sizeof(struct elf_section_hdr32)\n");
+        if(DETECT_BROKEN) {
+	    if(ctx->virname)
+		*ctx->virname = "Broken.Executable";
+            return CL_VIRUS;
+        }
+	return CL_EFORMAT;
+    }
+
+    shoff = EC32(file_hdr.e_shoff, conv);
+    cli_dbgmsg("ELF: Section header table offset: %d\n", shoff);
+    if((uint32_t) lseek(desc, shoff, SEEK_SET) != shoff) {
+	/* Possibly broken end of file */
+        if(DETECT_BROKEN) {
+	    if(ctx->virname)
+		*ctx->virname = "Broken.Executable";
+            return CL_VIRUS;
+        }
+	return CL_CLEAN;
+    }
+
+    section_hdr = (struct elf_section_hdr32 *) cli_calloc(shnum, shentsize);
+    if(!section_hdr) {
+	cli_errmsg("ELF: Can't allocate memory for section headers\n");
+	return CL_EMEM;
+    }
+
+    cli_dbgmsg("------------------------------------\n");
+
+    for(i = 0; i < shnum; i++) {
+
+	if(read(desc, &section_hdr[i], sizeof(struct elf_section_hdr32)) != sizeof(struct elf_section_hdr32)) {
+            cli_dbgmsg("ELF: Can't read section header\n");
+            cli_dbgmsg("ELF: Possibly broken ELF file\n");
+            free(section_hdr);
+            if(DETECT_BROKEN) {
+                if(ctx->virname)
+                    *ctx->virname = "Broken.Executable";
+                return CL_VIRUS;
+            }
+            return CL_CLEAN;
+        }
+
+	cli_dbgmsg("ELF: Section %d\n", i);
+	cli_dbgmsg("ELF: Section offset: %d\n", EC32(section_hdr[i].sh_offset, conv));
+	cli_dbgmsg("ELF: Section size: %d\n", EC32(section_hdr[i].sh_size, conv));
+
+	switch(EC32(section_hdr[i].sh_type, conv)) {
+	    case 0x6: /* SHT_DYNAMIC */
+		cli_dbgmsg("ELF: Section type: Dynamic linking information\n");
+		break;
+	    case 0xb: /* SHT_DYNSYM */
+		cli_dbgmsg("ELF: Section type: Symbols for dynamic linking\n");
+		break;
+	    case 0xf: /* SHT_FINI_ARRAY */
+		cli_dbgmsg("ELF: Section type: Array of pointers to termination functions\n");
+		break;
+	    case 0x5: /* SHT_HASH */
+		cli_dbgmsg("ELF: Section type: Symbol hash table\n");
+		break;
+	    case 0xe: /* SHT_INIT_ARRAY */
+		cli_dbgmsg("ELF: Section type: Array of pointers to initialization functions\n");
+		break;
+	    case 0x8: /* SHT_NOBITS */
+		cli_dbgmsg("ELF: Section type: Empty section (NOBITS)\n");
+		break;
+	    case 0x7: /* SHT_NOTE */
+		cli_dbgmsg("ELF: Section type: Note section\n");
+		break;
+	    case 0x0: /* SHT_NULL */
+		cli_dbgmsg("ELF: Section type: Null (no associated section)\n");
+		break;
+	    case 0x10: /* SHT_PREINIT_ARRAY */
+		cli_dbgmsg("ELF: Section type: Array of pointers to preinit functions\n");
+		break;
+	    case 0x1: /* SHT_PROGBITS */
+		cli_dbgmsg("ELF: Section type: Program information\n");
+		break;
+	    case 0x9: /* SHT_REL */
+		cli_dbgmsg("ELF: Section type: Relocation entries w/o explicit addends\n");
+		break;
+	    case 0x4: /* SHT_RELA */
+		cli_dbgmsg("ELF: Section type: Relocation entries with explicit addends\n");
+		break;
+	    case 0x3: /* SHT_STRTAB */
+		cli_dbgmsg("ELF: Section type: String table\n");
+		break;
+	    case 0x2: /* SHT_SYMTAB */
+		cli_dbgmsg("ELF: Section type: Symbol table\n");
+		break;
+	    case 0x6ffffffd: /* SHT_GNU_verdef */
+		cli_dbgmsg("ELF: Section type: Provided symbol versions\n");
+		break;
+	    case 0x6ffffffe: /* SHT_GNU_verneed */
+		cli_dbgmsg("ELF: Section type: Required symbol versions\n");
+		break;
+	    case 0x6fffffff: /* SHT_GNU_versym */
+		cli_dbgmsg("ELF: Section type: Symbol Version Table\n");
+		break;
+	    default :
+		cli_dbgmsg("ELF: Section type: Unknown\n");
+	}
+
+	if(EC32(section_hdr[i].sh_flags, conv) & 0x1) /* SHF_WRITE */
+	    cli_dbgmsg("ELF: Section contains writable data\n");
+
+	if(EC32(section_hdr[i].sh_flags, conv) & 0x2) /* SHF_ALLOC */
+	    cli_dbgmsg("ELF: Section occupies memory\n");
+
+	if(EC32(section_hdr[i].sh_flags, conv) & 0x4) /* SHF_EXECINSTR */
+	    cli_dbgmsg("ELF: Section contains executable code\n");
+
+	cli_dbgmsg("------------------------------------\n");
+    }
+
+    free(section_hdr);
+    return CL_CLEAN;
+}
+
+int cli_elfheader(int desc, struct cli_exe_info *elfinfo)
+{
+	struct elf_file_hdr32 file_hdr;
+	struct elf_section_hdr32 *section_hdr;
+	struct elf_program_hdr32 *program_hdr;
+	uint16_t shnum, phnum, shentsize, phentsize, i;
+	uint32_t entry, fentry = 0, shoff, phoff;
+	uint8_t conv = 0, err;
+
+
+    cli_dbgmsg("in cli_elfheader\n");
+
+    if(read(desc, &file_hdr, sizeof(file_hdr)) != sizeof(file_hdr)) {
+	/* Not an ELF file? */
+	cli_dbgmsg("ELF: Can't read file header\n");
+	return -1;
+    }
+
+    if(memcmp(file_hdr.e_ident, "\x7f\x45\x4c\x46", 4)) {
+	cli_dbgmsg("ELF: Not an ELF file\n");
+	return -1;
+    }
+
+    if(file_hdr.e_ident[4] != 1) {
+	cli_dbgmsg("ELF: 64-bit binaries are not supported (yet)\n");
+	return -1;
+    }
+
+    if(file_hdr.e_ident[5] == 1) {
+#if WORDS_BIGENDIAN == 1
+	conv = 1;
+#endif
+    } else {
+#if WORDS_BIGENDIAN == 0
+	conv = 1;
+#endif
+    }
+
+    phnum = EC16(file_hdr.e_phnum, conv);
+    if(phnum > 128) {
+	cli_dbgmsg("ELF: Suspicious number of program headers\n");
+	return -1;
+    }
+    entry = EC32(file_hdr.e_entry, conv);
+
+    if(phnum && entry) {
+	phentsize = EC16(file_hdr.e_phentsize, conv);
+	if(phentsize != sizeof(struct elf_program_hdr32)) {
+	    cli_dbgmsg("ELF: phentsize != sizeof(struct elf_program_hdr32)\n");
+	    return -1;
+	}
+
+	phoff = EC32(file_hdr.e_phoff, conv);
+	if((uint32_t) lseek(desc, phoff, SEEK_SET) != phoff) {
+	    return -1;
+	}
+
+	program_hdr = (struct elf_program_hdr32 *) cli_calloc(phnum, phentsize);
+	if(!program_hdr) {
+	    cli_errmsg("ELF: Can't allocate memory for program headers\n");
+	    return -1;
+	}
+
+	for(i = 0; i < phnum; i++) {
+	    if(read(desc, &program_hdr[i], sizeof(struct elf_program_hdr32)) != sizeof(struct elf_program_hdr32)) {
+		cli_dbgmsg("ELF: Can't read segment #%d\n", i);
+		free(program_hdr);
+		return -1;
+	    }
+	}
+
+	fentry = cli_rawaddr(entry, program_hdr, phnum, conv, &err);
+	free(program_hdr);
+	if(err) {
+	    cli_dbgmsg("ELF: Can't calculate file offset of entry point\n");
+	    return -1;
+	}
+    }
+
+    elfinfo->ep = fentry;
+
+    shnum = EC16(file_hdr.e_shnum, conv);
+    if(shnum > 256) {
+	cli_dbgmsg("ELF: Suspicious number of sections\n");
+	return -1;
+    }
+    elfinfo->nsections = shnum;
+
+    shentsize = EC16(file_hdr.e_shentsize, conv);
+    if(shentsize != sizeof(struct elf_section_hdr32)) {
+	cli_dbgmsg("ELF: shentsize != sizeof(struct elf_section_hdr32)\n");
+	return -1;
+    }
+
+    shoff = EC32(file_hdr.e_shoff, conv);
+    if((uint32_t) lseek(desc, shoff, SEEK_SET) != shoff) {
+	/* Possibly broken end of file */
+	return -1;
+    }
+
+    elfinfo->section = (struct cli_exe_section *) cli_calloc(elfinfo->nsections, sizeof(struct cli_exe_section));
+    if(!elfinfo->section) {
+	cli_dbgmsg("ELF: Can't allocate memory for section headers\n");
+	return -1;
+    }
+
+    section_hdr = (struct elf_section_hdr32 *) cli_calloc(shnum, shentsize);
+    if(!section_hdr) {
+	cli_errmsg("ELF: Can't allocate memory for section headers\n");
+	free(elfinfo->section);
+	elfinfo->section = NULL;
+	return -1;
+    }
+
+    for(i = 0; i < shnum; i++) {
+
+	if(read(desc, &section_hdr[i], sizeof(struct elf_section_hdr32)) != sizeof(struct elf_section_hdr32)) {
+            free(section_hdr);
+	    free(elfinfo->section);
+	    elfinfo->section = NULL;
+            return -1;
+        }
+
+	elfinfo->section[i].rva = EC32(section_hdr[i].sh_addr, conv);
+	elfinfo->section[i].raw = EC32(section_hdr[i].sh_offset, conv);
+	elfinfo->section[i].rsz = EC32(section_hdr[i].sh_size, conv);
+    }
+
+    free(section_hdr);
+    return 0;
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_entconv.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_entconv.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_entconv.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_entconv.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,981 @@
+/*
+ *  HTML Entity & Encoding normalization.
+ *
+ *  Copyright (C) 2006 Török Edvin <edwin at clamav.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as 
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ *
+ */
+#include "clamav-config.h"
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+
+
+#ifdef CL_THREAD_SAFE
+#include <pthread.h>
+#endif
+
+#include "clamav.h"
+#include "others.h"
+#include "htmlnorm.h"
+#include "hashtab.h"
+#include "entconv.h"
+#include "entitylist.h"
+#include "cltypes.h"
+
+#ifdef HAVE_ICONV_H
+#include <iconv.h>
+#endif
+#include "encoding_aliases.h"
+
+
+#define MAX_LINE 1024
+
+#ifndef EILSEQ
+#define EILSEQ 84
+#endif
+
+unsigned char* entity_norm(const struct entity_conv* conv,const unsigned char* entity)
+{
+	struct element* e = hashtab_find(conv->ht,entity,strlen((const char*)entity));
+	if(e && e->key) {
+		const int val = e->data;
+		if(val == '<')/* this was an escaped <, so output it escaped*/
+			return (unsigned char*)cli_strdup("<");
+		else if(val == '>')/* see above */
+			return (unsigned char*)cli_strdup(">");
+		else if(val<127) {
+			unsigned char *e_out = cli_malloc(2);
+
+			if(!e_out)
+			    return NULL;
+
+			e_out[0] = (unsigned char)val;
+			e_out[1] = '\0';
+			return e_out;
+		}
+		else if(val==160)
+			return (unsigned char*)cli_strdup(" ");
+		else {
+			unsigned char *ent_out = cli_malloc(10);
+
+			if(!ent_out)
+			    return NULL;
+
+			snprintf((char*)ent_out,9,"&#%d;",val);
+			ent_out[9] = '\0';
+			return ent_out;
+		}
+	}
+	else
+		return NULL;
+}
+
+/* sane default, must be larger, than the longest possible return string,
+ * which is
+ * &#xxx;*/
+#define MIN_BUFFER_SIZE 32
+
+int init_entity_converter(struct entity_conv* conv,const unsigned char* encoding,size_t buffer_size)
+{
+	if(buffer_size < MIN_BUFFER_SIZE) {
+		cli_warnmsg("Entity converter: Supplied buffer size:%lu, smaller than minimum required: %d\n",(unsigned long)buffer_size,MIN_BUFFER_SIZE);
+		return CL_ENULLARG;
+	}
+	if(conv) {
+		conv->encoding = (unsigned char*) cli_strdup("ISO-8859-1");
+		conv->autodetected = OTHER;
+		conv->bom_cnt = 0;
+		conv->buffer_cnt = 0;
+		conv->bytes_read = 0;
+		conv->partial = 0;
+		conv->entity_buffcnt = 0;
+		conv->buffer_size = buffer_size;
+		conv->priority = NOPRIO;
+
+		conv->tmp_area.offset = 0;
+		conv->tmp_area.length = 0;
+		conv->tmp_area.buffer  =  cli_malloc(buffer_size);
+		if(!conv->tmp_area.buffer) {
+			return CL_EMEM;
+		}
+
+		conv->out_area.offset = 0;
+		conv->out_area.length = 0;
+		conv->out_area.buffer = cli_malloc(buffer_size);
+		if(!conv->out_area.buffer) {
+			free(conv->tmp_area.buffer);
+			return CL_EMEM;
+		}
+
+		conv->norm_area.offset = 0;
+		conv->norm_area.length = 0;
+		conv->norm_area.buffer = cli_malloc(buffer_size);
+		if(!conv->norm_area.buffer) {
+			free(conv->tmp_area.buffer);
+			free(conv->out_area.buffer);
+			return CL_EMEM;
+		}
+
+		conv->ht = &entities_htable;
+		conv->msg_zero_shown = 0;
+
+		return 0;
+	}
+	else 
+		return CL_ENULLARG;
+}
+
+static size_t encoding_bytes(const unsigned char* fromcode, enum encodings* encoding)
+{
+	const unsigned char* from = (const unsigned char*) fromcode;
+	/* special case for these unusual byteorders */
+	*encoding=E_OTHER;
+	if(from == UCS4_2143)
+		*encoding = E_UCS4_2134;
+	else if (from == UCS4_3412)
+		*encoding = E_UCS4_3412;
+	else {
+		struct element * e = hashtab_find(&aliases_htable,from,strlen((const char*)fromcode));
+		if(e && e->key) {
+			*encoding = e->data;
+		}
+	}
+
+	switch(*encoding) {
+		case E_UCS4:
+		case E_UCS4_1234:
+		case E_UCS4_4321:
+		case E_UCS4_2134:
+		case E_UCS4_3412:
+			return 4;
+		case E_UTF16:
+		case E_UTF16_BE:
+		case E_UTF16_LE:
+			return 2;
+		case E_UTF8:
+		case E_UNKNOWN:
+		case E_OTHER:
+		default:
+			return 1;
+	}
+	}
+
+#ifndef HAVE_ICONV_H
+typedef struct {
+	enum encodings encoding;
+	size_t size;
+} * iconv_t;
+
+static iconv_t iconv_open(const char *tocode, const char* fromcode)
+{
+	iconv_t iconv = cli_malloc(sizeof(*iconv));
+	if(!iconv)
+		return NULL;
+	/* TODO: check that tocode is UTF16BE */
+	iconv->size = encoding_bytes(fromcode,&iconv->encoding);
+	return iconv;
+}
+
+static int iconv_close(iconv_t cd)
+{
+	if(cd)
+		free(cd);
+	return 0;
+}
+
+
+static int iconv(iconv_t iconv_struct,char **inbuf, size_t *inbytesleft,
+		char** outbuf, size_t *outbytesleft)
+{
+	const size_t maxcopy = (*inbytesleft > *outbytesleft ? *outbytesleft  : *inbytesleft) & ~(iconv_struct->size - 1);
+	const uint8_t* input = (const uint8_t*)*inbuf;
+	uint8_t* output = (uint8_t*)*outbuf;
+	size_t i;
+
+	/*,maxcopy is aligned to data size */
+	/* output is always utf16be !*/
+	switch(iconv_struct->encoding) {
+		case E_UCS4:
+		case E_UCS4_1234:			
+			{
+				for(i=0;i < maxcopy; i += 4) {
+					if(!input[i+2] && !input[i+3]) {
+						output[i/2] = input[i+1]; /* is compiler smart enough to replace /2, with >>1 ? */
+						output[i/2+1] = input[i];
+					}
+					else {
+						cli_dbgmsg("Warning: unicode character out of utf16 range!\n");
+						output[i/2] = 0xff;
+						output[i/2+1] = 0xff;
+					}
+				}
+				break;
+			}
+		case E_UCS4_4321:
+			{
+				const uint16_t *in = (const uint16_t*)input;/*UCS4_4321, and UTF16_BE have same endianness, no need for byteswap here*/
+				uint16_t *out = (uint16_t*)output;
+				for(i=0;i<maxcopy/2; i+=2) {
+					if(!in[i]) {
+						out[i/2] = in[i+1];
+					}
+					else {
+						out[i/2] = 0xffff;
+					}
+				}
+				break;
+			}
+		case E_UCS4_2134: 
+			{
+				const uint16_t *in = (const uint16_t*)input;
+				uint16_t* out = (uint16_t*)output;
+				for(i=0;i<maxcopy/2;i+=2) {
+					if(!in[i+1])
+						out[i/2] = in[i];
+					else
+						out[i/2] = 0xffff;
+				}
+				break;
+			}
+		case E_UCS4_3412:
+			{
+				for(i=0;i < maxcopy;i += 4) {
+					if(!input[i] && !input[i+1]) {
+						output[i/2] = input[i+3];
+						output[i/2+1] = input[i+2];
+					}
+					else {
+						output[i/2] = 0xff;
+						output[i/2+1] = 0xff;
+					}
+				}
+				break;
+			}
+		case E_UTF16:
+		case E_UTF16_LE:
+			{
+				for(i=0;i < maxcopy;i += 2) {
+					output[i] = input[i+1];
+					output[i+1] = input[i];
+				}
+				break;
+			}
+		case E_UTF16_BE:
+			memcpy(output,input,maxcopy);
+			break;
+		case E_UNKNOWN:
+		case E_OTHER:
+			{
+				const size_t max_copy = *inbytesleft > (*outbytesleft/2) ? (*outbytesleft/2) : *inbytesleft;
+				for(i=0;i<max_copy;i++) {
+					output[i*2]   = 0;
+					output[i*2+1] = input[i];
+				}
+				*outbytesleft -= max_copy*2;
+				*inbytesleft  -= max_copy;
+				*inbuf += max_copy;
+				*outbuf += max_copy*2;
+				if(*inbytesleft)
+					return E2BIG;
+				return 0;
+			}
+		case E_UTF8:
+			{
+				const size_t maxread  = *inbytesleft;
+				const size_t maxwrite = *outbytesleft;
+				size_t j;
+				for(i=0,j=0 ; i < maxread && j < maxwrite;) {
+					if(input[i] < 0x7F)  {
+						output[j++] = 0;
+						output[j++] = input[i++];
+							}
+					else if( (input[i]&0xE0) == 0xC0 ) {
+						if ((input[i+1]&0xC0) == 0x80) {
+							/* 2 bytes long 110yyyyy zzzzzzzz -> 00000yyy yyzzzzzz*/
+							output[j++] = ((input[i] & 0x1F) >> 2) & 0x07;
+							output[j++] = ((input[i] & 0x1F) << 6) | (input[i+1] & 0x3F);
+						}
+						else {
+							cli_dbgmsg("invalid UTF8 character encountered\n");
+							break;
+						}
+						i+=2;
+					}
+					else if( (input[i]&0xE0) == 0xE0) {
+						if( (input[i+1]&0xC0) == 0x80 && (input[i+2]&0xC0) == 0x80) {
+							/* 3 bytes long 1110xxxx 10yyyyyy 10zzzzzzzz -> xxxxyyyy yyzzzzzz*/
+							output[j++] = (input[i] << 4) | ((input[i+1] >> 2) & 0x0F);
+							output[j++] = (input[i+1] << 6) | (input[i+2] & 0x3F);
+						}
+						else {
+							cli_dbgmsg("invalid UTF8 character encountered\n");
+							break;
+						}
+						i+=3;
+					}
+					else if( (input[i]&0xF8) == 0xF0) {
+						if((input[i+1]&0xC0) == 0x80 && (input[i+2]&0xC0) == 0x80 && (input[i+3]&0xC0) == 0x80) {
+							/* 4 bytes long 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz*/
+							cli_dbgmsg("UTF8 character out of UTF16 range encountered");
+							output[j++] = 0xff;
+							output[j++] = 0xff;
+
+							/*out[j++] = ((input[i] & 0x07) << 2) | ((input[i+1] >> 4) & 0x3);
+							out[j++] = (input[i+1] << 4) | ((input[i+2] >> 2) & 0x0F);
+							out[j++] = (input[i+2] << 6) | (input[i+2] & 0x3F);*/
+						}
+						else {
+							cli_dbgmsg("invalid UTF8 character encountered\n");
+							break;
+						}
+						i+=4;
+					}
+					else {
+						cli_dbgmsg("invalid UTF8 character encountered\n");
+						break;
+					}							
+				}
+				*inbytesleft -= i;
+				*outbytesleft -= j;
+				*inbuf += i;
+				*outbuf += j;
+				if(*inbytesleft && *outbytesleft) {
+					errno = EILSEQ;/* we had an early exit */
+					return -1;
+				}
+				if(*inbytesleft) {
+					errno = E2BIG;
+					return -1;
+				}
+				return 0;
+			}
+	}
+	
+	*outbytesleft -= maxcopy;
+	*inbytesleft  -= maxcopy;
+	*inbuf += maxcopy;
+	*outbuf += maxcopy;
+	if(*inbytesleft) {
+		errno = E2BIG;
+		return -1;
+	}
+	return  0;
+}
+
+#else
+
+
+
+#endif
+
+/* new iconv() version */
+static inline void process_bom(struct entity_conv* conv)
+{
+	const unsigned char* bom = conv->bom;
+	const unsigned char* encoding = OTHER;
+	int has_bom = 0;
+	uint8_t enc_bytes = 4;/* default is UTF8, which has a maximum of 4 bytes*/
+
+	switch(bom[0]) {
+		case 0x00:
+			if(bom[1] == 0x00) {
+				if(bom[2] == 0xFE && bom[3] == 0xFF) {
+					encoding = UCS4_1234;/* UCS-4 big-endian*/
+					has_bom = 1;
+				}
+				else if(bom[2] == 0xFF && bom[3] == 0xFE) {
+					encoding = UCS4_2143;/* UCS-4 unusual order 2143 */
+					has_bom = 1;
+				}
+				else if(bom[2] == 0x00 && bom[3] == 0x3C) {
+					encoding = UNDECIDED_32_1234;
+				} 
+				else if(bom[2] == 0x3C && bom[3] == 0x00) {
+					encoding = UNDECIDED_32_2143;
+				}
+			}/* 0x00 0x00 */
+			else if(bom[1] == 0x3C) {
+				if(bom[2] == 0x00) {
+					if(bom[3] == 0x00) {
+						encoding = UNDECIDED_32_3412;
+					}
+					else if(bom[3] == 0x3F) {
+						encoding = UNDECIDED_16_BE;
+						enc_bytes = 2;
+					}
+				}/*0x00 0x3C 0x00*/
+			}/*0x00 0x3C*/
+			break;
+		case 0xFF:
+			if(bom[1] == 0xFE) {
+				if(bom[2] == 0x00 && bom[3] == 0x00) {
+					encoding = UCS4_4321;
+					has_bom = 1;
+				}
+				else {
+					encoding = UTF16_LE;
+					has_bom = 1;
+					enc_bytes = 2;
+				}
+			}/*0xFF 0xFE*/
+			break;
+		case 0xFE: 
+			if(bom[1] == 0xFF) {
+					if(bom[2] == 0x00 && bom[3] == 0x00) {
+						encoding = UCS4_3412;
+						has_bom = 1;
+					}
+					else {
+						encoding = UTF16_BE;
+						has_bom = 1;
+						enc_bytes = 2;
+					}					
+			}/*0xFE 0xFF*/
+			break;
+		case 0xEF: 
+			if(bom[1] == 0xBB && bom[2] == 0xBF)  {
+					encoding = UTF8;
+					has_bom = 1;
+					/*enc_bytes = 4;- default, maximum 4 bytes*/
+			}/*0xEF 0xBB 0xBF*/				
+			break;
+		case 0x3C: 
+				if(bom[1] == 0x00) {
+					if(bom[2] == 0x00 && bom[3] == 0x00) {
+						encoding = UNDECIDED_32_4321;
+					}
+					else if(bom[2] == 0x3F && bom[3] == 0x00) {
+						encoding = UNDECIDED_16_LE;
+						enc_bytes = 2;
+					}
+				}/*0x3C 0x00*/
+				else if(bom[1] == 0x3F && bom[2] == 0x78 && bom[3]==0x6D) {
+					encoding = UNDECIDED_8;
+					enc_bytes = 1;
+				}/*0x3C 3F 78 6D*/
+				break;
+		case 0x4C: 
+				if(bom[1] == 0x6F && bom[2] == 0xA7 && bom[3] == 0x94) {
+					encoding = EBCDIC;
+					enc_bytes = 1;
+				}/*4C 6F A7 94*/
+				break;
+	}/*switch*/
+	conv->autodetected = encoding;
+	conv->enc_bytes = enc_bytes;
+	conv->has_bom = has_bom;
+}
+
+static unsigned char* normalize_encoding(const unsigned char* enc)
+{
+	unsigned char* norm; 
+	size_t i;
+	const size_t len = strlen((const char*)enc);
+	norm = cli_malloc( len+1);
+	if(!norm)
+		return NULL;
+	if(enc == OTHER)
+		enc = (const unsigned char*)"ISO-8859-1";
+	for(i=0;i < strlen((const char*)enc); i++)
+		norm[i] = toupper(enc[i]);
+	norm[len]='\0';
+	return norm;
+}
+
+static const unsigned char* encoding_name(unsigned char* encoding)
+{
+	if(!encoding)
+		return (const unsigned char*)"ISO-8859-1";
+	else
+		return encoding;
+}
+
+void process_encoding_set(struct entity_conv* conv,const unsigned char* encoding,enum encoding_priority prio)
+{
+	unsigned char *tmp_encoding;
+	enum encodings tmp;
+	size_t new_size,old_size;
+
+	cli_dbgmsg("Setting encoding for %p  to %s, priority: %d\n",(void*)conv, encoding, prio);
+	if(encoding == OTHER)
+		return;
+	if(conv->priority == CONTENT_TYPE)
+		return;/* Content-type in header is highest priority, no overrides possible*/
+	if(conv->priority ==  BOM && prio == NOBOM_AUTODETECT)
+		return;
+
+	tmp_encoding = normalize_encoding(encoding);/* FIXME: better obey priorities*/
+	if(prio == META) {
+	old_size = encoding_bytes(conv->encoding,&tmp);
+	new_size = encoding_bytes(tmp_encoding,&tmp);
+	if(old_size != new_size)  {
+		/* on x86 gcc wants %u for size_t, on x86_64 it wants %lu for size_t. So just cast to unsigned long to make warnings go away. */
+		cli_dbgmsg("process_encoding_set: refusing to override encoding - new encoding size differs: %s(%lu) != %s(%lu)\n",conv->encoding,(unsigned long)old_size,tmp_encoding,(unsigned long)new_size);
+		free(tmp_encoding);
+		return;
+	}
+	}
+	free(conv->encoding);
+	conv->encoding = tmp_encoding;
+	cli_dbgmsg("New encoding for %p:%s\n",(void*)conv,conv->encoding);
+	/* reset stream */
+}
+
+static int encoding_norm_done(struct entity_conv* conv)
+{
+	if(conv->encoding) {
+		free(conv->encoding);
+		conv->encoding = NULL;
+	}
+	conv->buffer_size = 0;
+	if(conv->tmp_area.buffer) {
+		free(conv->tmp_area.buffer);
+		conv->tmp_area.buffer = NULL;
+	}
+	if(conv->out_area.buffer) {
+		free(conv->out_area.buffer);
+		conv->out_area.buffer = NULL;
+	}
+	if(conv->norm_area.buffer) {
+		free(conv->norm_area.buffer);
+		conv->norm_area.buffer = NULL;
+	}
+	return 0;
+}
+
+int entity_norm_done(struct entity_conv* conv)
+{
+	return encoding_norm_done(conv);
+}
+
+static size_t read_raw(FILE *stream, m_area_t *m_area, int max_len, unsigned char* outbuff)
+{
+
+	/* Try and use the memory buffer first */
+	if (m_area) {
+		size_t area_maxcopy;
+		const unsigned char* src;
+		size_t copied;
+		if(m_area->offset >= m_area->length)
+			return 0;
+		area_maxcopy = (m_area->length > m_area->offset + max_len) ? max_len : m_area->length - m_area->offset;
+		src = m_area->buffer + m_area->offset;
+		m_area->offset += area_maxcopy;
+		copied = area_maxcopy;
+		while(area_maxcopy && *src != '\n') {
+			*outbuff++ = *src++;
+			area_maxcopy--;
+		}
+		if(area_maxcopy > 3) {
+			/*copy 3 more bytes, just in case its ucs4 */
+			*outbuff++ = *src++;
+			*outbuff++ = *src++;
+			*outbuff++ = *src++;
+			area_maxcopy -= 3;
+		}
+		m_area->offset -= area_maxcopy;
+		copied -= area_maxcopy;
+		return copied;
+	} else {
+		if (!stream) {
+			cli_dbgmsg("No HTML stream\n");
+			return 0;
+		}
+		else {
+			const size_t iread = fread(outbuff, 1, max_len, stream);
+			size_t i;
+			if(ferror(stream)) {
+				cli_errmsg("Error while reading HTML stream\n");
+			}
+			for(i=0; i < iread; i++)
+				if(outbuff[i] == '\n') {
+					return i+3 > iread ?  iread : i+3;
+				}
+			return iread;
+		}
+	}
+}
+
+static void output_first(struct entity_conv* conv,unsigned char** out, unsigned char** in,size_t* inleft)
+{
+	if(conv->has_bom) {
+		switch(conv->enc_bytes) {
+			case 1:
+				if(conv->autodetected == UTF8) {
+					*in += 3;
+					*inleft -= 3;
+				}
+				break;
+			case 2:
+				*in += 2;
+				*inleft -= 2;
+				break;
+			case 4:
+				*in += 4;
+				*inleft -= 4;
+				break;
+		}
+	}
+}
+
+/* sarge leaks on iconv_open/iconv_close, so lets not open/close so many times,
+ * just keep on each thread its own pool of iconvs*/
+
+struct iconv_cache {
+	iconv_t* tab;
+	size_t     len;
+	size_t   last;
+	struct   hashtable hashtab;
+};
+
+static void iconv_cache_init(struct iconv_cache* cache)
+{
+/*	cache->tab = NULL;
+	cache->len = 0;
+	cache->used = 0; - already done by memset*/
+	cli_dbgmsg("Initializing iconv pool:%p\n",(void*)cache);
+	hashtab_init(&cache->hashtab, 32);
+}
+
+static void iconv_cache_destroy(struct iconv_cache* cache)
+{
+	size_t i;
+	cli_dbgmsg("Destroying iconv pool:%p\n",(void*)cache);
+	for(i=0;i < cache->last;i++) {
+		cli_dbgmsg("closing iconv:%p\n",cache->tab[i]);
+		iconv_close(cache->tab[i]);
+	}
+	hashtab_clear(&cache->hashtab);
+	free(cache->hashtab.htable);
+	free(cache->tab);
+	free(cache);
+}
+
+
+#ifdef CL_THREAD_SAFE
+static pthread_key_t iconv_pool_tls_key;
+static pthread_once_t iconv_pool_tls_key_once = PTHREAD_ONCE_INIT;
+
+/* destructor called for all threads that exit via pthread_exit, or cancellation. Unfortunately that doesn't include
+ * the main thread, so we have to call this manually for the main thread.*/
+
+static int cache_atexit_registered = 0;
+
+static void iconv_pool_tls_instance_destroy(void* ptr)
+{
+	if(ptr) {
+		iconv_cache_destroy(ptr);
+	}
+}
+
+static void iconv_cache_cleanup_main(void)
+{
+	struct iconv_cache* cache = pthread_getspecific(iconv_pool_tls_key);
+	if(cache) {
+		iconv_pool_tls_instance_destroy(cache);
+		pthread_setspecific(iconv_pool_tls_key,NULL);
+	}
+	pthread_key_delete(iconv_pool_tls_key);
+}
+
+static void iconv_pool_tls_key_alloc(void)
+{
+	pthread_key_create(&iconv_pool_tls_key, iconv_pool_tls_instance_destroy);
+	if(!cache_atexit_registered) {
+		cli_dbgmsg("iconv:registering atexit\n");
+		if(atexit(iconv_cache_cleanup_main)) {
+			cli_dbgmsg("failed to register atexit\n");
+		}
+		cache_atexit_registered = 1;
+	}
+}
+
+static void init_iconv_pool_ifneeded(void)
+{
+	pthread_once(&iconv_pool_tls_key_once, iconv_pool_tls_key_alloc);
+}
+
+static inline struct iconv_cache* cache_get_tls_instance(void)
+{
+	struct iconv_cache* cache = pthread_getspecific(iconv_pool_tls_key);
+	if(!cache) {
+		cache = cli_calloc(1,sizeof(*cache));
+		if(!cache) {
+			cli_dbgmsg("!Out of memory allocating TLS iconv instance\n");
+			return NULL;
+		}
+		iconv_cache_init(cache);
+		pthread_setspecific(iconv_pool_tls_key, cache);
+	}
+	return cache;
+}
+
+#else
+
+static struct iconv_cache* global_iconv_cache = NULL;
+static int    iconv_global_inited = 0;
+
+
+static void iconv_cache_cleanup_main(void)
+{
+	iconv_cache_destroy(global_iconv_cache);
+}
+
+static inline void init_iconv_pool_ifneeded() 
+{
+	if(!iconv_global_inited) {
+		global_iconv_cache = cli_calloc(1,sizeof(*global_iconv_cache));
+		if(global_iconv_cache) {
+			iconv_cache_init(global_iconv_cache);
+			atexit(iconv_cache_cleanup_main);
+			iconv_global_inited = 1;
+		}
+	}
+}
+
+
+static inline struct iconv_cache* cache_get_tls_instance(void)
+{
+	return global_iconv_cache;
+}
+
+#endif
+
+static iconv_t iconv_open_cached(const unsigned char* fromcode)
+{
+	struct iconv_cache * cache;
+	size_t idx;
+	const size_t fromcode_len = strlen((const char*)fromcode);
+	struct element * e;
+	iconv_t  iconv_struct;
+
+	init_iconv_pool_ifneeded();
+	cache = cache_get_tls_instance();/* gets TLS iconv pool */
+	if(!cache) {
+		cli_dbgmsg("!Unable to get TLS iconv cache!\n");
+		errno = EINVAL;
+		return (iconv_t)-1;
+	}
+
+	e = hashtab_find(&cache->hashtab, fromcode, fromcode_len);
+	if(e && (e->data < 0 || (size_t)e->data > cache->len)) {
+		e = NULL;
+	}
+	if(e) {
+		return cache->tab[e->data];
+	}
+	cli_dbgmsg("iconv not found in cache, for encoding:%s\n",fromcode);
+	iconv_struct = iconv_open("UTF-16BE",(const char*)fromcode);
+	if(iconv_struct != (iconv_t)-1) {
+	idx = cache->last++;
+	if(idx >= cache->len) {
+		cache->len += 16;
+		cache->tab = cli_realloc2(cache->tab, cache->len*sizeof(cache->tab[0]));
+		if(!cache->tab) {
+			cli_dbgmsg("!Out of mem in iconv-pool\n");
+			errno = ENOMEM;
+			return (iconv_t)-1;
+		}
+	}
+
+	hashtab_insert(&cache->hashtab, fromcode, fromcode_len, idx);
+		cache->tab[idx] = iconv_struct;
+	cli_dbgmsg("iconv_open(),for:%s -> %p\n",fromcode,(void*)cache->tab[idx]);
+	return cache->tab[idx];
+}
+	return (iconv_t)-1;
+}
+
+
+/* tmp_m_area and conv->out_area are of size maxlen */
+unsigned char* encoding_norm_readline(struct entity_conv* conv, FILE* stream_in, m_area_t* in_m_area, const size_t maxlen)
+{
+	if(!conv || !conv->out_area.buffer || !conv->tmp_area.buffer || maxlen<2 )
+		return NULL;
+	else {
+		/* stream_in|in_m_area ->(read_raw) conv->tmp_area -> (iconv) conv->out_area -> (normalize) conv->norm_area -> (cli_readline) return value*/
+		const size_t tmp_move = conv->tmp_area.length - conv->tmp_area.offset;
+		const size_t tmp_available = conv->buffer_size - tmp_move;
+		const size_t max_read = maxlen < tmp_available ? maxlen : tmp_available;
+		unsigned char* tmpbuff = &conv->tmp_area.buffer[tmp_move];
+	
+		const size_t out_move = conv->out_area.length < conv->out_area.offset ? 0 : conv->out_area.length - conv->out_area.offset;
+		size_t outleft = conv->buffer_size - out_move;
+		unsigned char* out = &conv->out_area.buffer[out_move];
+
+		const size_t norm_move = conv->norm_area.length - conv->norm_area.offset;
+
+		unsigned char* norm;
+		const unsigned char* norm_end;
+		iconv_t iconv_struct;
+
+		size_t rc, inleft;
+		ssize_t i;
+
+		signed char alignfix;
+
+		/* move whatever left in conv->tmp_area to beginning */
+		if(tmp_move)
+			memmove(conv->tmp_area.buffer, conv->tmp_area.buffer + conv->tmp_area.offset, tmp_move);
+		conv->tmp_area.offset = 0;
+
+		/* read raw data from stream, or in_m_area into conv->tmp_area*/
+		conv->tmp_area.length = tmp_move + read_raw(stream_in, in_m_area, max_read, tmpbuff);
+
+		/* move whatever left in conv->out_area to beginning */
+		if(out_move)
+			memmove(conv->out_area.buffer, conv->out_area.buffer + conv->out_area.offset, out_move);
+		conv->out_area.offset = 0;
+
+		tmpbuff = conv->tmp_area.buffer;
+		inleft = conv->tmp_area.length;
+		if(!conv->bom_cnt && conv->tmp_area.length >= 4) {/* detect Byte Order Mark */
+			memcpy( conv->bom, tmpbuff, 4);
+			process_bom(conv);
+			process_encoding_set(conv,conv->autodetected,conv->has_bom ? BOM : NOBOM_AUTODETECT);
+			output_first(conv,&out,&tmpbuff,&inleft);
+			conv->bom_cnt++;
+		}
+
+		/* convert encoding conv->tmp_area. conv->out_area */
+		alignfix = inleft%4;/* iconv gives an error if we give him 3 bytes to convert, 
+				       and we are using ucs4, ditto for utf16, and 1 byte*/
+		inleft -= alignfix;
+
+		if(!inleft && alignfix) {
+			size_t k;
+			for(k=0;k+alignfix < 4;k++)
+				tmpbuff[alignfix+k] = '\0';
+			inleft = 4;
+			alignfix = -inleft;
+		}
+
+		iconv_struct = iconv_open_cached(encoding_name(conv->encoding));
+
+		if(iconv_struct == (iconv_t)-1) {
+			cli_dbgmsg("Iconv init problem for encoding:%s, falling back to iso encoding!\n",encoding_name(conv->encoding));
+			/* message shown only once/file */
+			/* what can we do? just fall back for it being an ISO-8859-1 */
+		        free(conv->encoding);
+			conv->encoding = (unsigned char*) cli_strdup("ISO-8859-1");
+			iconv_struct = iconv_open_cached(conv->encoding);
+			if(iconv_struct == (iconv_t)-1) {
+				cli_dbgmsg("fallback failed... bail out\n");
+				return cli_readline(NULL,&conv->tmp_area,maxlen);
+			}
+		}
+
+		if(inleft && outleft > conv->buffer_size/2 ) /* iconv doesn't like inleft to be 0 */ {
+			rc = iconv(iconv_struct, (char**) &tmpbuff,  &inleft, (char**) &out, &outleft);	
+		}
+		else
+			rc = 0;
+
+#if 0
+		 iconv_close(iconv_struct);/* - don't close, we are using a cached instance */
+#endif
+
+		if(rc==(size_t)-1 && errno != E2BIG) {
+				cli_dbgmsg("iconv error:%s, silently resuming (%ld,%ld,%lu,%lu)\n",strerror(errno),(long)(out-conv->out_area.buffer),(long)(tmpbuff-conv->tmp_area.buffer),(unsigned long)inleft,(unsigned long)outleft);
+				/* output raw byte, and resume at next byte */
+				*out++ = 0;
+				*out++ = *tmpbuff++;
+				inleft--;
+/*				return cli_readline(NULL, &conv->norm_area, maxlen);*/
+		}
+
+		conv->tmp_area.length = inleft + (alignfix > 0 ? alignfix : 0);
+		conv->out_area.length = out - conv->out_area.buffer - out_move;
+
+		conv->tmp_area.offset = tmpbuff - conv->tmp_area.buffer;
+		conv->tmp_area.length += conv->tmp_area.offset;
+
+
+		/* move whatever left in conv->norm_area to beginning */
+		if(norm_move) {
+			if(norm_move < conv->buffer_size/2) {
+			memmove(conv->norm_area.buffer, conv->norm_area.buffer + conv->norm_area.offset, norm_move);
+		conv->norm_area.offset = 0;
+				norm = conv->norm_area.buffer + norm_move;
+			}
+			else {
+				/* don't modify offset here */
+				norm = conv->norm_area.buffer + conv->norm_area.length;
+			}
+		}
+		else {
+			conv->norm_area.offset = 0;
+			norm = conv->norm_area.buffer;	
+		}
+
+		/* now do the real normalization */
+		out = conv->out_area.buffer;/* skip over utf16 bom, FIXME: check if iconv really outputted a BOM */
+		norm_end = conv->norm_area.buffer + conv->buffer_size;
+		if(conv->out_area.length>0 && out[0] == 0xFF && out[1] == 0xFE)
+			i = 2;
+		else
+			i = 0;
+		for(; i < conv->out_area.length; i += 2) {
+			uint16_t u16 = ( ((uint16_t)out[i]) << 8 ) | out[i+1];
+			if(!u16) {
+				if(alignfix >= 0 && !conv->msg_zero_shown) /* if alignfix is negative, this 0 byte is on-purpose, its padding */ {
+					conv->msg_zero_shown = 1;
+					cli_dbgmsg("Skipping null character in html stream\n");
+			}
+			}
+			else if(u16 < 0x80) {
+				if(norm >= norm_end)
+					break;
+				if((unsigned char)u16 ==0)
+					cli_dbgmsg("Impossible\n");
+				*norm++ = (unsigned char)u16;
+			}
+			else if (u16 == 160)  {/*nbsp*/
+				if(norm >= norm_end)
+					break;
+				*norm++ = 0x20;
+			}
+			else {
+				char buff[10];
+				int len;
+
+				snprintf(buff,9,"&#%d;",u16);
+				buff[9] = '\0';
+				len = strlen(buff);
+				if((norm_end - norm) <= len)
+					/* prevent buffer overflow */
+					break;
+				memcpy((char*)norm, buff, len);
+				norm += len;
+			}	
+		}
+		conv->out_area.offset = i; /* so that we can resume next time from here */
+
+		conv->norm_area.length = norm - conv->norm_area.buffer;
+/*
+		conv->norm_area.buffer[conv->buffer_size-1]=0;DONT DO THIS
+		if( (o =strstr(conv->norm_area.buffer,"Content")) && strstr(conv->norm_area.buffer,"text/x-"))
+			printf("%s\n",o);*/
+		/* final cli_readline from conv->norm_area */
+		return cli_readline(NULL, &conv->norm_area, maxlen);
+	}
+}
+

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_filetypes.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_filetypes.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_filetypes.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_filetypes.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,405 @@
+/*
+ *  Copyright (C) 2002 - 2005 Tomasz Kojm <tkojm at clamav.net>
+ *  With enhancements from Thomas Lamy <Thomas.Lamy at in-online.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <sys/types.h>
+#ifdef	HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "clamav.h"
+#include "filetypes.h"
+#include "others.h"
+#include "readdb.h"
+#include "matcher-ac.h"
+#include "str.h"
+
+#include "htmlnorm.h"
+#include "entconv.h"
+
+struct cli_magic_s {
+    size_t offset;
+    const char *magic;
+    size_t length;
+    const char *descr;
+    cli_file_t type;
+};
+
+struct cli_smagic_s {
+    const char *sig;
+    const char *descr;
+    cli_file_t type;
+};
+
+static const struct cli_magic_s cli_magic[] = {
+
+    /* Executables */
+
+    {0,  "MZ",				2,  "DOS/W32 executable/library/driver", CL_TYPE_MSEXE},
+    {0,	 "\177ELF",			4,  "ELF",		CL_TYPE_ELF},
+
+    /* Archives */
+
+    {0,	    "Rar!",			4,  "RAR",		CL_TYPE_RAR},
+    {0,	    "PK\003\004",		4,  "ZIP",		CL_TYPE_ZIP},
+    {0,	    "PK00PK\003\004",		8,  "ZIP",		CL_TYPE_ZIP},
+    {0,	    "\037\213",			2,  "GZip",		CL_TYPE_GZ},
+    {0,	    "BZh",			3,  "BZip",		CL_TYPE_BZ},
+    {0,	    "\x60\xea",			2,  "ARJ",		CL_TYPE_ARJ},
+    {0,	    "SZDD",			4,  "compress.exe'd",	CL_TYPE_MSSZDD},
+    {0,	    "MSCF",			4,  "MS CAB",		CL_TYPE_MSCAB},
+    {0,	    "ITSF",			4,  "MS CHM",           CL_TYPE_MSCHM},
+    {8,	    "\x19\x04\x00\x10",		4,  "SIS",		CL_TYPE_SIS},
+    {0,     "#@~^",			4,  "SCRENC",		CL_TYPE_SCRENC},
+    {0,     "(This file must be converted with BinHex 4.0)",
+				       45, "BinHex",		CL_TYPE_BINHEX},
+
+    /* Mail */
+
+    {0,  "From ",			 5, "MBox",		  CL_TYPE_MAIL},
+    {0,  "Received: ",			10, "Raw mail",		  CL_TYPE_MAIL},
+    {0,  "Return-Path: ",		13, "Maildir",		  CL_TYPE_MAIL},
+    {0,  "Return-path: ",		13, "Maildir",		  CL_TYPE_MAIL},
+    {0,  "Delivered-To: ",		14, "Mail",		  CL_TYPE_MAIL},
+    {0,  "X-UIDL: ",			 8, "Mail",		  CL_TYPE_MAIL},
+    {0,  "X-Apparently-To: ",		17, "Mail",		  CL_TYPE_MAIL},
+    {0,  "X-Envelope-From: ",		17, "Mail",		  CL_TYPE_MAIL},
+    {0,  "X-Original-To: ",		15, "Mail",		  CL_TYPE_MAIL},
+    {0,  "X-Symantec-",			11, "Symantec",		  CL_TYPE_MAIL},
+    {0,  "X-EVS",			 5, "EVS mail",		  CL_TYPE_MAIL},
+    {0,  "X-Real-To: ",                 11, "Mail",               CL_TYPE_MAIL},
+    {0,  "X-Sieve: ",			 9, "Mail",		  CL_TYPE_MAIL},
+    {0,  ">From ",			 6, "Mail",		  CL_TYPE_MAIL},
+    {0,  "Date: ",			 6, "Mail",		  CL_TYPE_MAIL},
+    {0,  "Message-Id: ",		12, "Mail",		  CL_TYPE_MAIL},
+    {0,  "Message-ID: ",		12, "Mail",		  CL_TYPE_MAIL},
+    {0,  "Envelope-to: ",		13, "Mail",		  CL_TYPE_MAIL},
+    {0,  "Delivery-date: ",		15, "Mail",		  CL_TYPE_MAIL},
+    {0,  "To: ",			 4, "Mail",		  CL_TYPE_MAIL},
+    {0,  "Subject: ",			 9, "Mail",		  CL_TYPE_MAIL},
+    {0,  "For: ",			 5, "Eserv mail",	  CL_TYPE_MAIL},
+    {0,  "From: ",			 6, "Exim mail",	  CL_TYPE_MAIL},
+    {0,  "v:\015\012Received: ",	14, "VPOP3 Mail (DOS)",	  CL_TYPE_MAIL},
+    {0,  "v:\012Received: ",		13, "VPOP3 Mail (UNIX)",  CL_TYPE_MAIL},
+    {0,  "Hi. This is the qmail-send",  26, "Qmail bounce",	  CL_TYPE_MAIL},
+    {0,  "\170\237\076\042",		 4, "TNEF",               CL_TYPE_TNEF},
+
+    {0,  "begin ",			6,  "UUencoded",	  CL_TYPE_UUENCODED},
+    {0, "\041\102\104\116",		4, "PST",		  CL_TYPE_PST},
+
+    /* Graphics (may contain exploits against MS systems) */
+
+    {0,  "GIF",				 3, "GIF",	    CL_TYPE_GRAPHICS},
+    {0,  "BM",				 2, "BMP",          CL_TYPE_GRAPHICS},
+    {0,  "\377\330\377",		 3, "JPEG",         CL_TYPE_GRAPHICS},
+    {6,  "JFIF",			 4, "JPEG",         CL_TYPE_GRAPHICS},
+    {6,  "Exif",			 4, "JPEG",         CL_TYPE_GRAPHICS},
+    {0,  "\x89PNG",			 4, "PNG",          CL_TYPE_GRAPHICS},
+    {0,  "RIFF",                         4, "RIFF",         CL_TYPE_RIFF},
+    {0,  "RIFX",                         4, "RIFX",         CL_TYPE_RIFF},
+
+    /* Others */
+
+    {0,  "\320\317\021\340\241\261\032\341", 8, "OLE2 container", CL_TYPE_MSOLE2},
+    {0,  "%PDF-",			 5, "PDF document", CL_TYPE_PDF},
+    {0,  "\266\271\254\256\376\377\377\377", 8, "CryptFF", CL_TYPE_CRYPTFF},
+    {0,  "{\\rtf",                           5, "RTF", CL_TYPE_RTF}, 
+
+    /* Ignored types */
+
+    {0,  "\000\000\001\263",             4, "MPEG video stream",  CL_TYPE_DATA},
+    {0,  "\000\000\001\272",             4, "MPEG sys stream",    CL_TYPE_DATA},
+    {0,  "OggS",                         4, "Ogg Stream",         CL_TYPE_DATA},
+    {0,  "ID3",				 3, "MP3",		  CL_TYPE_DATA},
+    {0,  "\377\373\220",		 3, "MP3",		  CL_TYPE_DATA},
+    {0,  "%!PS-Adobe-",			11, "PostScript",	  CL_TYPE_DATA},
+    {0,  "\060\046\262\165\216\146\317", 7, "WMA/WMV/ASF",	  CL_TYPE_DATA},
+    {0,  ".RMF" ,			 4, "Real Media File",	  CL_TYPE_DATA},
+
+    {0, NULL,				 0, NULL,		  CL_TYPE_UNKNOWN_DATA}
+};
+
+static const struct cli_smagic_s cli_smagic[] = {
+
+    /* "\nFrom: " * "\nContent-Type: " */
+    {"0a46726f6d3a20{-2048}0a436f6e74656e742d547970653a20", "Mail file", CL_TYPE_MAIL},
+
+    /* "\nReceived: " * "\nContent-Type: " */
+    {"0a52656365697665643a20{-2048}0a436f6e74656e742d547970653a20", "Mail file", CL_TYPE_MAIL},
+
+    /* "\nReceived: " * "\nContent-type: " */
+    {"0a52656365697665643a20{-2048}0a436f6e74656e742d747970653a20", "Mail file", CL_TYPE_MAIL},
+
+    /* "MIME-Version: " * "\nContent-Type: " */
+    {"4d494d452d56657273696f6e3a20{-2048}0a436f6e74656e742d547970653a20", "Mail file", CL_TYPE_MAIL},
+
+    /* remember the matcher is case sensitive */
+    {"3c62723e",       "HTML data", CL_TYPE_HTML},	/* <br> */
+    {"3c42723e",       "HTML data", CL_TYPE_HTML},	/* <Br> */
+    {"3c42523e",       "HTML data", CL_TYPE_HTML},	/* <BR> */
+    {"3c703e",	       "HTML data", CL_TYPE_HTML},	/* <p> */
+    {"3c503e",	       "HTML data", CL_TYPE_HTML},	/* <P> */
+    {"68726566",       "HTML data", CL_TYPE_HTML},	/* href */
+    {"48726566",       "HTML data", CL_TYPE_HTML},	/* Href */
+    {"48524546",       "HTML data", CL_TYPE_HTML},	/* HREF */
+    {"3c68746d6c3e",   "HTML data", CL_TYPE_HTML},      /* <html> */
+    {"3c48544d4c3e",   "HTML data", CL_TYPE_HTML},      /* <HTML> */
+    {"3c48746d6c3e",   "HTML data", CL_TYPE_HTML},      /* <Html> */
+    {"3c686561643e",   "HTML data", CL_TYPE_HTML},      /* <head> */
+    {"3c484541443e",   "HTML data", CL_TYPE_HTML},      /* <HEAD> */
+    {"3c486561643e",   "HTML data", CL_TYPE_HTML},      /* <Head> */
+    {"3c666f6e74",     "HTML data", CL_TYPE_HTML},	/* <font */
+    {"3c466f6e74",     "HTML data", CL_TYPE_HTML},	/* <Font */
+    {"3c464f4e54",     "HTML data", CL_TYPE_HTML},	/* <FONT */
+    {"3c696d67",       "HTML data", CL_TYPE_HTML},      /* <img */
+    {"3c494d47",       "HTML data", CL_TYPE_HTML},      /* <IMG */
+    {"3c496d67",       "HTML data", CL_TYPE_HTML},      /* <Img */
+    {"3c736372697074", "HTML data", CL_TYPE_HTML},	/* <script */
+    {"3c536372697074", "HTML data", CL_TYPE_HTML},	/* <Script */
+    {"3c534352495054", "HTML data", CL_TYPE_HTML},	/* <SCRIPT */
+    {"3c6f626a656374", "HTML data", CL_TYPE_HTML},      /* <object */
+    {"3c4f626a656374", "HTML data", CL_TYPE_HTML},      /* <Object */
+    {"3c4f424a454354", "HTML data", CL_TYPE_HTML},      /* <OBJECT */
+    {"3c696672616d65", "HTML data", CL_TYPE_HTML},      /* <iframe */
+    {"3c494652414d45", "HTML data", CL_TYPE_HTML},      /* <IFRAME */
+    {"3c7461626c65",   "HTML data", CL_TYPE_HTML},	/* <table */
+    {"3c5441424c45",   "HTML data", CL_TYPE_HTML},	/* <TABLE */
+
+    {"526172211a0700", "RAR-SFX", CL_TYPE_RARSFX},
+    {"504b0304", "ZIP-SFX", CL_TYPE_ZIPSFX},
+    {"4d534346", "CAB-SFX", CL_TYPE_CABSFX},
+    {"60ea{7}0002", "ARJ-SFX", CL_TYPE_ARJSFX},
+    {"60ea{7}0102", "ARJ-SFX", CL_TYPE_ARJSFX},
+    {"60ea{7}0202", "ARJ-SFX", CL_TYPE_ARJSFX},
+    {"efbeadde4e756c6c736f6674496e7374", "NSIS", CL_TYPE_NULSFT},
+    {"a3484bbe986c4aa9994c530a86d6487d41553321454130(35|36)", "AUTOIT", CL_TYPE_AUTOIT},
+
+    {"4d5a{60-300}50450000", "PE", CL_TYPE_MSEXE},
+
+    {NULL,  NULL,   CL_TYPE_UNKNOWN_DATA}
+};
+
+static char internat[256] = {
+    /* TODO: Remember to buy a beer to Joerg Wunsch <joerg at FreeBSD.ORG> */
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0,  /* 0x0X */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,  /* 0x1X */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x2X */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x3X */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x4X */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x5X */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x6X */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,  /* 0x7X */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x8X */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x9X */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0xaX */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0xbX */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0xcX */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0xdX */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0xeX */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1   /* 0xfX */
+};
+
+cli_file_t cli_filetype(const unsigned char *buf, size_t buflen)
+{
+	int i, text = 1, len;
+
+
+    for(i = 0; cli_magic[i].magic; i++) {
+	if(buflen >= cli_magic[i].offset+cli_magic[i].length) {
+	    if(memcmp(buf+cli_magic[i].offset, cli_magic[i].magic, cli_magic[i].length) == 0) {
+		cli_dbgmsg("Recognized %s file\n", cli_magic[i].descr);
+		return cli_magic[i].type;
+	    }
+	}
+    }
+
+/* improve or drop this code
+ * https://wwws.clamav.net/bugzilla/show_bug.cgi?id=373
+ *
+    buflen < 25 ? (len = buflen) : (len = 25);
+    for(i = 0; i < len; i++)
+	if(!iscntrl(buf[i]) && !isprint(buf[i]) && !internat[buf[i] & 0xff]) {
+	    text = 0;
+	    break;
+	}
+*/
+    return text ? CL_TYPE_UNKNOWN_TEXT : CL_TYPE_UNKNOWN_DATA;
+}
+
+int is_tar(unsigned char *buf, unsigned int nbytes);
+
+cli_file_t cli_filetype2(int desc, const struct cl_engine *engine)
+{
+	unsigned char smallbuff[MAGIC_BUFFER_SIZE + 1], *decoded, *bigbuff;
+	int bread, sret;
+	cli_file_t ret = CL_TYPE_UNKNOWN_DATA;
+	struct cli_matcher *root;
+	struct cli_ac_data mdata;
+
+
+    memset(smallbuff, 0, sizeof(smallbuff));
+    if((bread = read(desc, smallbuff, MAGIC_BUFFER_SIZE)) > 0)
+	ret = cli_filetype(smallbuff, bread);
+
+    if(engine && ret == CL_TYPE_UNKNOWN_TEXT) {
+	root = engine->root[0];
+	if(!root)
+	    return ret;
+
+	if(cli_ac_initdata(&mdata, root->ac_partsigs, AC_DEFAULT_TRACKLEN))
+	    return ret;
+
+	sret = cli_ac_scanbuff(smallbuff, bread, NULL, engine->root[0], &mdata, 1, 0, 0, -1, NULL);
+
+	cli_ac_freedata(&mdata);
+
+	if(sret >= CL_TYPENO) {
+	    ret = sret;
+	} else {
+	    if(cli_ac_initdata(&mdata, root->ac_partsigs, AC_DEFAULT_TRACKLEN))
+		return ret;
+
+	    decoded = (unsigned char *) cli_utf16toascii((char *) smallbuff, bread);
+	    if(decoded) {
+		sret = cli_ac_scanbuff(decoded, strlen((char *) decoded), NULL, engine->root[0], &mdata, 1, 0, 0, -1, NULL);
+		free(decoded);
+		if(sret == CL_TYPE_HTML)
+		    ret = CL_TYPE_HTML_UTF16;
+	    }
+	    cli_ac_freedata(&mdata);
+
+	    if((((struct cli_dconf*) engine->dconf)->phishing & PHISHING_CONF_ENTCONV) && ret != CL_TYPE_HTML_UTF16) {
+		    struct entity_conv conv;
+		    const size_t conv_size = 2*bread < 256 ? 256 : 2*bread;
+
+		if(init_entity_converter(&conv,UNKNOWN,conv_size) == 0) {
+			int end = 0;
+			m_area_t area;
+			area.buffer = (unsigned char *) smallbuff;
+			area.length = bread;
+			area.offset = 0;
+
+		    while(!end) {
+			if(cli_ac_initdata(&mdata, root->ac_partsigs, AC_DEFAULT_TRACKLEN))
+			    return ret;
+
+			decoded =  encoding_norm_readline(&conv, NULL, &area, bread);
+
+			if(decoded) {
+			    sret = cli_ac_scanbuff(decoded, strlen((const char *) decoded), NULL, engine->root[0], &mdata, 1, 0, 0, -1, NULL);
+			    free(decoded);
+			    if(sret == CL_TYPE_HTML) {
+				ret = CL_TYPE_HTML;
+				end = 1;
+			    }
+			} else
+			    end = 1;
+
+			cli_ac_freedata(&mdata);
+		    }
+
+		    entity_norm_done(&conv);
+
+		} else {
+		    cli_warnmsg("cli_filetype2: Error initializing entity converter\n");
+		}
+	    }
+	}
+    }
+
+    if(ret == CL_TYPE_UNKNOWN_DATA || ret == CL_TYPE_UNKNOWN_TEXT) {
+
+	if(!(bigbuff = (unsigned char *) cli_calloc(37638 + 1, sizeof(unsigned char))))
+	    return ret;
+
+	lseek(desc, 0, SEEK_SET);
+	if((bread = read(desc, bigbuff, 37638)) > 0) {
+
+	    bigbuff[bread] = 0;
+
+	    switch(is_tar(bigbuff, bread)) {
+		case 1:
+		    ret = CL_TYPE_OLD_TAR;
+		    cli_dbgmsg("Recognized old fashioned tar file\n");
+		    break;
+		case 2:
+		    ret = CL_TYPE_POSIX_TAR;
+		    cli_dbgmsg("Recognized POSIX tar file\n");
+		    break;
+	    }
+	}
+
+	if(ret == CL_TYPE_UNKNOWN_DATA || ret == CL_TYPE_UNKNOWN_TEXT) {
+
+	    if(!memcmp(bigbuff + 32769, "CD001" , 5) || !memcmp(bigbuff + 37633, "CD001" , 5)) {
+		cli_dbgmsg("Recognized ISO 9660 CD-ROM data\n");
+		ret = CL_TYPE_DATA;
+	    } else if(!memcmp(bigbuff + 32776, "CDROM" , 5)) {
+		cli_dbgmsg("Recognized High Sierra CD-ROM data\n");
+		ret = CL_TYPE_DATA;
+	    }
+	}
+
+	free(bigbuff);
+    }
+
+    return ret;
+}
+
+int cli_addtypesigs(struct cl_engine *engine)
+{
+	int i, ret;
+	struct cli_matcher *root;
+
+
+    if(!engine->root[0]) {
+	cli_dbgmsg("cli_addtypesigs: Need to allocate AC trie in engine->root[0]\n");
+	root = engine->root[0] = (struct cli_matcher *) cli_calloc(1, sizeof(struct cli_matcher));
+	if(!root) {
+	    cli_errmsg("cli_addtypesigs: Can't initialise AC pattern matcher\n");
+	    return CL_EMEM;
+	}
+
+	if((ret = cli_ac_init(root, cli_ac_mindepth, cli_ac_maxdepth))) {
+	    /* No need to free previously allocated memory here - all engine
+	     * elements will be properly freed by cl_free()
+	     */
+	    cli_errmsg("cli_addtypesigs: Can't initialise AC pattern matcher\n");
+	    return ret;
+	}
+    } else {
+	root = engine->root[0];
+    }
+
+    for(i = 0; cli_smagic[i].sig; i++) {
+	if((ret = cli_parse_add(root, cli_smagic[i].descr, cli_smagic[i].sig, cli_smagic[i].type, NULL, 0))) {
+	    cli_errmsg("cli_addtypesigs: Problem adding signature for %s\n", cli_smagic[i].descr);
+	    return ret;
+	}
+    }
+
+    return 0;
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_fsg.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_fsg.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_fsg.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_fsg.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,120 @@
+/*
+ *  Copyright (C) 2004 aCaB <acab at clamav.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+
+/*
+** defsg.c
+** 
+** 02/08/2k4 - Dumped and reversed
+** 02/08/2k4 - Done coding
+** 03/08/2k4 - Cleaning and securing
+** 04/08/2k4 - Done porting
+** 07/08/2k4 - Started adding support for 1.33
+*/
+
+/*
+** Unpacks an FSG compressed section.
+**
+** Czesc bart, good asm, nice piece of code ;)
+*/
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdlib.h>
+
+#include "cltypes.h"
+#include "rebuildpe.h"
+#include "others.h"
+#include "packlibs.h"
+#include "fsg.h"
+
+int unfsg_200(char *source, char *dest, int ssize, int dsize, uint32_t rva, uint32_t base, uint32_t ep, int file) {
+  char *tsrc;
+  struct cli_exe_section section; /* Yup, just one ;) */
+  
+  if ( cli_unfsg(source, dest, ssize, dsize, NULL, NULL) ) return -1;
+  
+  section.raw=0;
+  section.rsz = dsize;
+  section.vsz = dsize;
+  section.rva = rva;
+
+  if (!cli_rebuildpe(dest, &section, 1, base, ep, 0, 0, file)) {
+    cli_dbgmsg("FSG: Rebuilding failed\n");
+    return 0;
+  }
+  return 1;
+}
+
+
+int unfsg_133(char *source, char *dest, int ssize, int dsize, struct cli_exe_section *sections, int sectcount, uint32_t base, uint32_t ep, int file) {
+  char *tsrc=source, *tdst=dest;
+  int i, upd=1, offs=0, lastsz=dsize;
+
+  for (i = 0 ; i <= sectcount ; i++) {
+    char *startd=tdst;
+    if ( cli_unfsg(tsrc, tdst, ssize - (tsrc - source), dsize - (tdst - dest), &tsrc, &tdst) == -1 )
+      return -1;
+
+    /* RVA has been filled already in pe.c */
+    sections[i].raw=offs;
+    sections[i].rsz=tdst-startd;
+    /*    cli_dbgmsg("Unpacked section %d @%x size %x Vsize =%x \n", i, offs, tdst-startd, dsize - (startd - dest)); */
+    offs+=tdst-startd;
+  }
+
+  /* Sort out the sections */
+  while ( upd ) {
+    upd = 0;
+    for (i = 0; i < sectcount  ; i++) {
+      uint32_t trva,trsz,traw;
+      
+      if ( sections[i].rva <= sections[i+1].rva )
+	continue;
+      trva = sections[i].rva;
+      traw = sections[i].raw;
+      trsz = sections[i].rsz;
+      sections[i].rva = sections[i+1].rva;
+      sections[i].rsz = sections[i+1].rsz;
+      sections[i].raw = sections[i+1].raw;
+      sections[i+1].rva = trva;
+      sections[i+1].raw = traw;
+      sections[i+1].rsz = trsz;
+      upd = 1;
+    }
+  }
+
+  /* Cure Vsizes and debugspam */
+  for (i = 0; i <= sectcount ; i++) {
+    if ( i != sectcount ) {
+      sections[i].vsz = sections[i+1].rva - sections[i].rva;
+      lastsz-= sections[i+1].rva - sections[i].rva;
+    }
+    else 
+      sections[i].vsz = lastsz;
+
+    cli_dbgmsg("FSG: .SECT%d RVA:%x VSize:%x ROffset: %x, RSize:%x\n", i, sections[i].rva, sections[i].vsz, sections[i].raw, sections[i].rsz);
+  }
+
+  if (!cli_rebuildpe(dest, sections, sectcount+1, base, ep, 0, 0, file)) {
+    cli_dbgmsg("FSG: Rebuilding failed\n");
+    return 0;
+  }
+  return 1;
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_hashtab.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_hashtab.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_hashtab.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_hashtab.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,413 @@
+/*
+ *  HTML Entity & Encoding normalization.
+ *
+ *  Copyright (C) 2006-2007 Török Edvin <edwin at clamav.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as 
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ *
+ */
+#include <clamav-config.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "cltypes.h"
+#include "clamav.h"
+#include "others.h"
+#include "hashtab.h"
+
+
+static const size_t prime_list[] =
+{
+     53ul,         97ul,         193ul,       389ul,       769ul,
+     1543ul,       3079ul,       6151ul,      12289ul,     24593ul,
+     49157ul,      98317ul,      196613ul,    393241ul,    786433ul,
+     1572869ul,    3145739ul,    6291469ul,   12582917ul,  25165843ul,
+     50331653ul,   100663319ul,  201326611ul, 402653189ul, 805306457ul,
+     1610612741ul, 3221225473ul
+};
+
+
+static const size_t prime_n = sizeof(prime_list)/sizeof(prime_list[0]);
+
+static unsigned char DELETED_KEY[] = "";
+
+static size_t get_nearest_capacity(const size_t capacity)
+{
+	size_t i;
+	for(i=0 ;i < prime_n; i++) {
+		if (prime_list[i] > capacity)
+			return prime_list[i];
+	}
+	cli_errmsg("Requested hashtable size is too big!");
+	return prime_list[prime_n-1];
+}
+
+#ifdef PROFILE_HASHTABLE
+/* I know, this is ugly, most of these functions get a const s, that gets its const-ness discarded,
+ * and then these functions modify something the compiler assumes is readonly.
+ * Please, never use PROFILE_HASHTABLE in production code, and in releases. Use it for development only!*/
+
+static inline void PROFILE_INIT(struct hashtable *s)
+{
+	memset(&s->PROFILE_STRUCT,0,sizeof(s->PROFILE_STRUCT));
+}
+
+static inline void PROFILE_CALC_HASH(struct hashtable *s)
+{
+	s->PROFILE_STRUCT.calc_hash++;
+}
+
+static inline void PROFILE_FIND_ELEMENT(struct hashtable *s)
+{
+	s->PROFILE_STRUCT.find_req++;
+}
+
+static inline void PROFILE_FIND_NOTFOUND(struct hashtable *s, size_t tries)
+{
+	s->PROFILE_STRUCT.not_found++;
+	s->PROFILE_STRUCT.not_found_tries += tries;
+}
+
+static inline void PROFILE_FIND_FOUND(struct hashtable *s, size_t tries)
+{
+	s->PROFILE_STRUCT.found++;
+	s->PROFILE_STRUCT.found_tries += tries;
+}
+
+static inline void PROFILE_HASH_EXHAUSTED(struct hashtable *s)
+{
+	s->PROFILE_STRUCT.hash_exhausted++;
+}
+
+static inline void PROFILE_GROW_START(struct hashtable *s)
+{
+	s->PROFILE_STRUCT.grow++;
+}
+
+static inline void PROFILE_GROW_FOUND(struct hashtable *s, size_t tries)
+{
+	s->PROFILE_STRUCT.grow_found++;
+	s->PROFILE_STRUCT.grow_found_tries += tries;
+}
+
+static inline void PROFILE_GROW_DONE(struct hashtable *s)
+{
+}
+
+static inline void PROFILE_DELETED_REUSE(struct hashtable *s, size_t tries)
+{
+	s->PROFILE_STRUCT.deleted_reuse++;
+	s->PROFILE_STRUCT.deleted_tries += tries;
+}
+
+static inline void PROFILE_INSERT(struct hashtable *s, size_t tries)
+{
+	s->PROFILE_STRUCT.inserts++;
+	s->PROFILE_STRUCT.insert_tries += tries;
+}
+
+static inline void PROFILE_DATA_UPDATE(struct hashtable *s, size_t tries)
+{
+	s->PROFILE_STRUCT.update++;
+	s->PROFILE_STRUCT.update_tries += tries;
+}
+
+static inline void PROFILE_HASH_DELETE(struct hashtable *s)
+{
+	s->PROFILE_STRUCT.deletes++;
+}
+
+static inline void PROFILE_HASH_CLEAR(struct hashtable *s)
+{
+	s->PROFILE_STRUCT.clear++;
+}
+
+static inline void PROFILE_REPORT(const struct hashtable *s)
+{
+	size_t lookups, queries, insert_tries, inserts;
+	cli_dbgmsg("--------Hashtable usage report for %p--------------\n",(const void*)s);
+	cli_dbgmsg("hash function calculations:%ld\n",s->PROFILE_STRUCT.calc_hash);
+	cli_dbgmsg("successfull finds/total searches: %ld/%ld; lookups: %ld\n", s->PROFILE_STRUCT.found, s->PROFILE_STRUCT.find_req, s->PROFILE_STRUCT.found_tries);
+	cli_dbgmsg("unsuccessfull finds/total searches: %ld/%ld; lookups: %ld\n", s->PROFILE_STRUCT.not_found, s->PROFILE_STRUCT.find_req , s->PROFILE_STRUCT.not_found_tries);
+	cli_dbgmsg("successfull finds during grow:%ld; lookups: %ld\n",s->PROFILE_STRUCT.grow_found, s->PROFILE_STRUCT.grow_found_tries);
+	lookups = s->PROFILE_STRUCT.found_tries + s->PROFILE_STRUCT.not_found_tries + s->PROFILE_STRUCT.grow_found_tries;
+	queries = s->PROFILE_STRUCT.find_req + s->PROFILE_STRUCT.grow_found;
+	cli_dbgmsg("Find Lookups/total queries: %ld/%ld = %3f\n", lookups, queries, lookups*1.0/queries);
+	insert_tries = s->PROFILE_STRUCT.insert_tries + s->PROFILE_STRUCT.update_tries + s->PROFILE_STRUCT.deleted_tries;
+
+	cli_dbgmsg("new item insert tries/new items: %ld/%ld\n", s->PROFILE_STRUCT.insert_tries, s->PROFILE_STRUCT.inserts);
+	cli_dbgmsg("update tries/updates: %ld/%ld\n", s->PROFILE_STRUCT.update_tries, s->PROFILE_STRUCT.update);
+	cli_dbgmsg("deleted item reuse tries/deleted&reused items: %ld/%ld\n", s->PROFILE_STRUCT.deleted_tries, s->PROFILE_STRUCT.deleted_reuse);
+	inserts = s->PROFILE_STRUCT.inserts + s->PROFILE_STRUCT.update + s->PROFILE_STRUCT.deleted_reuse;
+	cli_dbgmsg("Insert tries/total inserts: %ld/%ld = %3f\n", insert_tries, inserts, insert_tries*1.0/inserts);
+
+	cli_dbgmsg("Grows: %ld, Deletes : %ld, hashtable clears: %ld\n",s->PROFILE_STRUCT.grow,s->PROFILE_STRUCT.deletes, s->PROFILE_STRUCT.clear);
+        cli_dbgmsg("--------Report end-------------\n");	
+}
+
+#else
+#define PROFILE_INIT(s) 
+#define PROFILE_CALC_HASH(s) 
+#define PROFILE_FIND_ELEMENT(s) 
+#define PROFILE_FIND_NOTFOUND(s, tries) 
+#define PROFILE_FIND_FOUND(s, tries)
+#define PROFILE_HASH_EXHAUSTED(s)
+#define PROFILE_GROW_START(s)
+#define PROFILE_GROW_FOUND(s, tries)
+#define PROFILE_GROW_DONE(s)
+#define PROFILE_DELETED_REUSE(s, tries)
+#define PROFILE_INSERT(s, tries)
+#define PROFILE_DATA_UPDATE(s, tries)
+#define PROFILE_HASH_DELETE(s)
+#define PROFILE_HASH_CLEAR(s)
+#define PROFILE_REPORT(s)
+#endif
+
+int hashtab_init(struct hashtable *s,size_t capacity)
+{
+	if(!s)
+		return CL_ENULLARG;
+
+	PROFILE_INIT(s);
+
+	capacity = get_nearest_capacity(capacity);
+	s->htable = cli_calloc(capacity,sizeof(*s->htable));
+	if(!s->htable)
+		return CL_EMEM;
+	s->capacity = capacity;
+	s->used = 0;
+	s->maxfill = 8*capacity/10;
+	return 0;
+}
+
+static size_t hash(const unsigned char* k,const size_t len,const size_t SIZE)
+{
+	size_t Hash = 0;	
+	size_t i;
+	for(i=len;i>0;i--)
+		Hash = ((Hash << 8) + k[i-1]) % SIZE;
+	return Hash;
+}
+
+/* if returned element has key==NULL, then key was not found in table */
+struct element* hashtab_find(const struct hashtable *s,const unsigned char* key,const size_t len)
+{
+	struct element* element;
+	size_t tries = 1; 
+	size_t idx;
+
+	if(!s)
+		return NULL; 
+	PROFILE_CALC_HASH(s);
+	PROFILE_FIND_ELEMENT(s);
+	idx = hash(key, len, s->capacity); 
+	element = &s->htable[idx];
+	do {
+		if(!element->key) {
+			PROFILE_FIND_NOTFOUND(s, tries);
+			return NULL; /* element not found, place is empty*/
+		}
+		else if(element->key != DELETED_KEY && strncmp((const char*)key,(const char*)element->key,len)==0) {
+			PROFILE_FIND_FOUND(s, tries);
+			return element;/* found */
+		}
+		else {
+			idx = (idx + tries++) % s->capacity;
+			element = &s->htable[idx];
+		}
+	} while (tries <= s->capacity);
+	PROFILE_HASH_EXHAUSTED(s);
+	return NULL; /* not found */
+}
+
+static int hashtab_grow(struct hashtable *s)
+{
+	const size_t new_capacity = get_nearest_capacity(s->capacity);
+	struct element* htable = cli_calloc(new_capacity, sizeof(*s->htable));
+	size_t i,idx, used = 0;
+	if(new_capacity == s->capacity || !htable)
+		return CL_EMEM;
+
+	PROFILE_GROW_START(s);
+	cli_dbgmsg("hashtab.c: Warning: growing open-addressing hashtables is slow. Either allocate more storage when initializing, or use other hashtable types!\n");
+	for(i=0; i < s->capacity;i++) {
+		if(s->htable[i].key && s->htable[i].key != DELETED_KEY) {
+			struct element* element;
+			size_t tries = 1;				
+
+			PROFILE_CALC_HASH(s);
+			idx = hash(s->htable[i].key, strlen((const char*)s->htable[i].key), new_capacity);
+			element = &htable[idx];
+
+			while(element->key && tries <= new_capacity) {
+				idx = (idx + tries++) % new_capacity;
+				element = &htable[idx];
+			}
+			if(!element->key) {
+				/* copy element from old hashtable to new */
+				PROFILE_GROW_FOUND(s, tries);
+				*element = s->htable[i];
+				used++;
+			}
+			else {
+				cli_errmsg("hashtab.c: Impossible - unable to rehash table");
+				return CL_EMEM;/* this means we didn't find enough room for all elements in the new table, should never happen */ 
+			}
+		}
+	}
+	free(s->htable);
+	s->htable = htable;
+	s->used = used;
+	s->capacity = new_capacity;
+	s->maxfill = new_capacity*8/10;
+	cli_dbgmsg("Table %p size after grow:%ld\n",(void*)s,s->capacity);
+	PROFILE_GROW_DONE(s);
+	return CL_SUCCESS;
+}
+
+
+int hashtab_insert(struct hashtable *s,const unsigned char* key,const size_t len,const element_data data)
+{
+	struct element* element;
+	struct element* deleted_element = NULL;
+	size_t tries = 1; 
+	size_t idx;
+	if(!s)
+		return CL_ENULLARG; 
+	do {
+		PROFILE_CALC_HASH(s);
+		idx = hash(key, len, s->capacity); 
+		element = &s->htable[idx];
+
+		do {
+			if(!element->key) {
+				unsigned char* thekey;
+				/* element not found, place is empty, insert*/
+				if(deleted_element) {
+					/* reuse deleted elements*/
+					element = deleted_element;
+					PROFILE_DELETED_REUSE(s, tries);
+				}
+				else {
+					PROFILE_INSERT(s, tries);
+				}
+				thekey = cli_malloc(len+1);
+				if(!thekey)
+					return CL_EMEM;
+				strncpy((char*)thekey,(const char*)key,len+1);
+				element->key = thekey;
+				element->data = data;
+				s->used++;		
+				if(s->used > s->maxfill) {
+					cli_dbgmsg("hashtab.c:Growing hashtable %p, because it has exceeded maxfill, old size:%ld\n",(void*)s,s->capacity);
+					hashtab_grow(s);
+				}
+				return 0;
+			}
+			else if(element->key == DELETED_KEY) {
+				deleted_element = element;
+			}
+			else if(strncmp((const char*)key,(const char*)element->key,len)==0) {
+				PROFILE_DATA_UPDATE(s, tries);
+				element->data = data;/* key found, update */
+				return 0;		
+			}
+			else {
+				idx = (idx + tries++) % s->capacity;
+				element = &s->htable[idx];
+			}
+		} while (tries <= s->capacity);
+		/* no free place found*/
+		PROFILE_HASH_EXHAUSTED(s);
+		cli_dbgmsg("hashtab.c: Growing hashtable %p, because its full, old size:%ld.\n",(void*)s,s->capacity);
+	} while( hashtab_grow(s) >= 0 );
+	cli_warnmsg("hashtab.c: Unable to grow hashtable\n");
+	return CL_EMEM;
+}
+
+void hashtab_delete(struct hashtable *s,const unsigned char* key,const size_t len)
+{
+	struct element* e = hashtab_find(s,key,len);
+	if(e && e->key) {	
+		PROFILE_HASH_DELETE(s);
+		free(e->key);/*FIXME: any way to shut up warnings here? if I make key char*, I get tons of warnings in entitylist.h */
+		e->key = DELETED_KEY;
+		s->used--;
+	}
+}
+
+void hashtab_clear(struct hashtable *s)
+{
+	size_t i;
+	PROFILE_HASH_CLEAR(s);
+	for(i=0;i < s->capacity;i++) {
+		if(s->htable[i].key && s->htable[i].key != DELETED_KEY)
+			free(s->htable[i].key);/*FIXME: shut up warnings */
+	}
+	memset(s->htable, 0, s->capacity);
+	s->used = 0;
+}
+
+
+int hashtab_store(const struct hashtable *s,FILE* out)
+{
+	size_t i;
+	for(i=0; i < s->capacity; i++) {
+		const struct element* e = &s->htable[i];
+		if(e->key && e->key != DELETED_KEY) {
+			fprintf(out,"%ld %s\n",e->data,e->key);
+		}
+	}
+	return CL_SUCCESS;
+}
+
+int hashtab_generate_c(const struct hashtable *s,const char* name)
+{
+	size_t i;
+	printf("/* TODO: include GPL headers */\n");
+	printf("#include <hashtab.h>\n");
+	printf("static struct element %s_elements[] = {\n",name);
+	for(i=0; i < s->capacity; i++) {
+		const struct element* e = &s->htable[i];
+		if(!e->key)
+			printf("\t{NULL, 0},\n");
+		else if(e->key == DELETED_KEY)
+			printf("\t{DELETED_KEY,0},\n");
+		else
+			printf("\t{(const unsigned char*)\"%s\", %ld},\n", e->key, e->data);
+	}
+	printf("};\n");
+	printf("const struct hashtable %s = {\n",name);
+	printf("\t%s_elements, %ld, %ld, %ld", name, s->capacity, s->used, s->maxfill);
+	printf("\n};\n");
+
+	PROFILE_REPORT(s);
+	return 0;
+}
+
+int hashtab_load(FILE* in, struct hashtable *s)
+{
+	char line[1024];
+	while (fgets(line, sizeof(line), in)) {
+		unsigned char l[1024];
+		int val;
+		sscanf(line,"%d %1023s",&val,l);
+		hashtab_insert(s,l,strlen((const char*)l),val);
+	}
+	return CL_SUCCESS;
+}
+

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_htmlnorm.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_htmlnorm.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_htmlnorm.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_htmlnorm.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,1711 @@
+/*
+ *  Normalise HTML text.
+ *  Decode MS Script Encoder protection. 
+ *
+ *  Copyright (C) 2004 trog at uncon.org
+ *
+ *  The ScrEnc decoder was initially based upon an analysis by Andreas Marx.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#ifdef	HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include <ctype.h>
+
+#if HAVE_MMAP
+#if HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#else /* HAVE_SYS_MMAN_H */
+#undef HAVE_MMAP
+#endif
+#endif
+
+#include "others.h"
+#include "htmlnorm.h"
+
+typedef enum {
+        INVALIDCLASS, BLOBCLASS
+} object_type;
+#include "blob.h"
+
+#include "entconv.h"
+
+#define HTML_STR_LENGTH 1024
+#define MAX_TAG_CONTENTS_LENGTH HTML_STR_LENGTH
+
+typedef enum {
+    HTML_BAD_STATE,
+    HTML_NORM,
+    HTML_COMMENT,
+    HTML_CHAR_REF,
+    HTML_ENTITY_REF_DECODE,
+    HTML_SKIP_WS,
+    HTML_TRIM_WS,
+    HTML_TAG,
+    HTML_TAG_ARG,
+    HTML_TAG_ARG_VAL,
+    HTML_TAG_ARG_EQUAL,
+    HTML_PROCESS_TAG,
+    HTML_CHAR_REF_DECODE,
+    HTML_SKIP_LENGTH,
+    HTML_JSDECODE,
+    HTML_JSDECODE_LENGTH,
+    HTML_JSDECODE_DECRYPT,
+    HTML_SPECIAL_CHAR,
+    HTML_RFC2397_TYPE,
+    HTML_RFC2397_INIT,
+    HTML_RFC2397_DATA,
+    HTML_RFC2397_FINISH,
+    HTML_RFC2397_ESC,
+    HTML_ESCAPE_CHAR
+} html_state;
+
+typedef enum {
+    SINGLE_QUOTED,
+    DOUBLE_QUOTED,
+    NOT_QUOTED
+} quoted_state;
+
+
+#define HTML_FILE_BUFF_LEN 8192
+
+typedef struct file_buff_tag {
+	int fd;
+	unsigned char buffer[HTML_FILE_BUFF_LEN];
+	int length;
+} file_buff_t;
+
+static const int base64_chars[256] = {
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
+    52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
+    -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
+    15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
+    -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
+    41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1,
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+};
+
+int table_order[] = {
+       00, 02, 01, 00, 02, 01, 02, 01, 01, 02, 01, 02, 00, 01, 02, 01,
+       00, 01, 02, 01, 00, 00, 02, 01, 01, 02, 00, 01, 02, 01, 01, 02,
+       00, 00, 01, 02, 01, 02, 01, 00, 01, 00, 00, 02, 01, 00, 01, 02,
+       00, 01, 02, 01, 00, 00, 02, 01, 01, 00, 00, 02, 01, 00, 01, 02
+};
+
+int decrypt_tables[3][128] = {
+      {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x57, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+       0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+       0x2E, 0x47, 0x7A, 0x56, 0x42, 0x6A, 0x2F, 0x26, 0x49, 0x41, 0x34, 0x32, 0x5B, 0x76, 0x72, 0x43,
+       0x38, 0x39, 0x70, 0x45, 0x68, 0x71, 0x4F, 0x09, 0x62, 0x44, 0x23, 0x75, 0x3C, 0x7E, 0x3E, 0x5E,
+       0xFF, 0x77, 0x4A, 0x61, 0x5D, 0x22, 0x4B, 0x6F, 0x4E, 0x3B, 0x4C, 0x50, 0x67, 0x2A, 0x7D, 0x74,
+       0x54, 0x2B, 0x2D, 0x2C, 0x30, 0x6E, 0x6B, 0x66, 0x35, 0x25, 0x21, 0x64, 0x4D, 0x52, 0x63, 0x3F,
+       0x7B, 0x78, 0x29, 0x28, 0x73, 0x59, 0x33, 0x7F, 0x6D, 0x55, 0x53, 0x7C, 0x3A, 0x5F, 0x65, 0x46,
+       0x58, 0x31, 0x69, 0x6C, 0x5A, 0x48, 0x27, 0x5C, 0x3D, 0x24, 0x79, 0x37, 0x60, 0x51, 0x20, 0x36},
+
+      {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x7B, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+       0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+       0x32, 0x30, 0x21, 0x29, 0x5B, 0x38, 0x33, 0x3D, 0x58, 0x3A, 0x35, 0x65, 0x39, 0x5C, 0x56, 0x73,
+       0x66, 0x4E, 0x45, 0x6B, 0x62, 0x59, 0x78, 0x5E, 0x7D, 0x4A, 0x6D, 0x71, 0x3C, 0x60, 0x3E, 0x53,
+       0xFF, 0x42, 0x27, 0x48, 0x72, 0x75, 0x31, 0x37, 0x4D, 0x52, 0x22, 0x54, 0x6A, 0x47, 0x64, 0x2D,
+       0x20, 0x7F, 0x2E, 0x4C, 0x5D, 0x7E, 0x6C, 0x6F, 0x79, 0x74, 0x43, 0x26, 0x76, 0x25, 0x24, 0x2B,
+       0x28, 0x23, 0x41, 0x34, 0x09, 0x2A, 0x44, 0x3F, 0x77, 0x3B, 0x55, 0x69, 0x61, 0x63, 0x50, 0x67,
+       0x51, 0x49, 0x4F, 0x46, 0x68, 0x7C, 0x36, 0x70, 0x6E, 0x7A, 0x2F, 0x5F, 0x4B, 0x5A, 0x2C, 0x57},
+
+      {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x6E, 0x0A, 0x0B, 0x0C, 0x06, 0x0E, 0x0F,
+       0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+       0x2D, 0x75, 0x52, 0x60, 0x71, 0x5E, 0x49, 0x5C, 0x62, 0x7D, 0x29, 0x36, 0x20, 0x7C, 0x7A, 0x7F,
+       0x6B, 0x63, 0x33, 0x2B, 0x68, 0x51, 0x66, 0x76, 0x31, 0x64, 0x54, 0x43, 0x3C, 0x3A, 0x3E, 0x7E,
+       0xFF, 0x45, 0x2C, 0x2A, 0x74, 0x27, 0x37, 0x44, 0x79, 0x59, 0x2F, 0x6F, 0x26, 0x72, 0x6A, 0x39,
+       0x7B, 0x3F, 0x38, 0x77, 0x67, 0x53, 0x47, 0x34, 0x78, 0x5D, 0x30, 0x23, 0x5A, 0x5B, 0x6C, 0x48,
+       0x55, 0x70, 0x69, 0x2E, 0x4C, 0x21, 0x24, 0x4E, 0x50, 0x09, 0x56, 0x73, 0x35, 0x61, 0x4B, 0x58,
+       0x3B, 0x57, 0x22, 0x6D, 0x4D, 0x25, 0x28, 0x46, 0x4A, 0x32, 0x41, 0x3D, 0x5F, 0x4F, 0x42, 0x65}
+};
+
+unsigned char *cli_readline(FILE *stream, m_area_t *m_area, unsigned int max_len)
+{
+	unsigned char *line, *ptr, *start, *end;
+	unsigned int line_len, count;
+
+	line = (unsigned char *) cli_malloc(max_len);
+	if (!line) {
+		return NULL;
+	}
+
+	/* Try and use the memory buffer first */
+	if (m_area) {
+		start = ptr = m_area->buffer + m_area->offset;
+		end = m_area->buffer + m_area->length;
+		if (start >= end) {
+			free(line);
+			return NULL;
+		}
+		line_len = 1;
+		while ((ptr < end) && (*ptr != '\n') && (line_len < (max_len-1))) {
+			ptr++;
+			line_len++;
+		}
+		if (ptr == end) {
+			line_len--;
+			memcpy(line, start, line_len);
+			line[line_len] = '\0';
+		} else if (*ptr == '\n') {
+			memcpy(line, start, line_len);
+			line[line_len] = '\0';
+		} else {
+			/* Hit max_len */
+			/* Store the current line end and length*/
+			count = line_len;
+			while (!isspace(*ptr) && (line_len > 1)) {
+				ptr--;
+				line_len--;
+			}
+			if (line_len == 1) {
+				line_len=count;
+			}
+			memcpy(line, start, line_len);
+			line[line_len] = '\0';
+		}
+		m_area->offset += line_len;
+	} else {
+		if (!stream) {
+			cli_dbgmsg("No HTML stream\n");
+			free(line);
+			return NULL;
+		}
+		if (fgets(line, max_len, stream) == NULL) {
+			free(line);
+			return NULL;
+		}
+
+		line_len=strlen(line);
+		if (line_len == 0) {
+			free(line);
+			return NULL;
+		}
+		if (line_len == max_len-1) {
+			/* didn't find a whole line - rewind to a space*/
+			count = 0;
+			while (!isspace(line[--line_len])) {
+				count--;
+				if (line_len == 0) {
+					return line;
+				}
+			}
+			fseek(stream, count, SEEK_CUR);
+			line[line_len+1] = '\0';
+		}
+	}
+	return line;
+}
+
+static void html_output_flush(file_buff_t *fbuff)
+{
+	if (fbuff && (fbuff->length > 0)) {
+		cli_writen(fbuff->fd, fbuff->buffer, fbuff->length);
+		fbuff->length = 0;
+	}
+}
+
+static void html_output_c(file_buff_t *fbuff1, file_buff_t *fbuff2, unsigned char c)
+{
+	if (fbuff1) {
+		if (fbuff1->length == HTML_FILE_BUFF_LEN) {
+			html_output_flush(fbuff1);
+		}
+		fbuff1->buffer[fbuff1->length++] = c;
+	}
+	if (fbuff2) {
+		if (fbuff2->length == HTML_FILE_BUFF_LEN) {
+			html_output_flush(fbuff2);
+		}
+		fbuff2->buffer[fbuff2->length++] = c;
+	}
+}
+
+static void html_output_str(file_buff_t *fbuff, const unsigned char *str, int len)
+{
+	if (fbuff) {
+		if ((fbuff->length + len) >= HTML_FILE_BUFF_LEN) {
+			html_output_flush(fbuff);
+		}
+		if (len >= HTML_FILE_BUFF_LEN) {
+			html_output_flush(fbuff);
+			cli_writen(fbuff->fd, str, len);
+		} else {
+			memcpy(fbuff->buffer + fbuff->length, str, len);
+			fbuff->length += len;
+		}
+	}
+}
+
+static char *html_tag_arg_value(tag_arguments_t *tags, const char *tag)
+{
+	int i;
+	
+	for (i=0; i < tags->count; i++) {
+		if (strcmp(tags->tag[i], tag) == 0) {
+			return tags->value[i];
+		}
+	}
+	return NULL;
+}
+
+static void html_tag_arg_set(tag_arguments_t *tags, const char *tag, const char *value)
+{
+	int i;
+	
+	for (i=0; i < tags->count; i++) {
+		if (strcmp(tags->tag[i], tag) == 0) {
+			free(tags->value[i]);
+			tags->value[i] = cli_strdup(value);
+			return;
+		}
+	}
+	return;
+}
+static void html_tag_arg_add(tag_arguments_t *tags,
+		const unsigned char *tag, unsigned char *value)
+{
+	int len, i;
+	tags->count++;
+	tags->tag = (unsigned char **) cli_realloc2(tags->tag,
+				tags->count * sizeof(char *));
+	if (!tags->tag) {
+		goto abort;
+	}
+	tags->value = (unsigned char **) cli_realloc2(tags->value,
+				tags->count * sizeof(char *));
+	if (!tags->value) {
+		goto abort;
+	}
+	if(tags->scanContents) {
+		tags->contents= (blob **) cli_realloc2(tags->contents,
+				tags->count*sizeof(*tags->contents));
+		if(!tags->contents) {
+			goto abort;
+		}
+		tags->contents[tags->count-1]=NULL;
+	}
+	tags->tag[tags->count-1] = cli_strdup(tag);
+	if (value) {
+		if (*value == '"') {
+			tags->value[tags->count-1] = cli_strdup(value+1);
+			len = strlen(value+1);
+			if (len > 0) {
+				tags->value[tags->count-1][len-1] = '\0';
+			}
+		} else {
+			tags->value[tags->count-1] = cli_strdup(value);
+		}
+	} else {
+		tags->value[tags->count-1] = NULL;
+	}
+	return;
+	
+abort:
+	/* Bad error - can't do 100% recovery */
+	tags->count--;
+	for (i=0; i < tags->count; i++) {
+		if (tags->tag) {
+			free(tags->tag[i]);
+		}
+		if (tags->value) {
+			free(tags->value[i]);
+		}
+		if(tags->contents) {
+			if(tags->contents[i])
+				blobDestroy(tags->contents[i]);
+		}
+	}
+	if (tags->tag) {
+		free(tags->tag);
+	}
+	if (tags->value) {
+		free(tags->value);
+	}
+	if (tags->contents)
+		free(tags->contents);
+	tags->contents=NULL;
+	tags->tag = tags->value = NULL;
+	tags->count = 0;	
+	return;
+}
+
+static void html_output_tag(file_buff_t *fbuff, char *tag, tag_arguments_t *tags)
+{
+	int i, j, len;
+
+	html_output_c(fbuff, NULL, '<');
+	html_output_str(fbuff, tag, strlen(tag));
+	for (i=0; i < tags->count; i++) {
+		html_output_c(fbuff, NULL, ' ');
+		html_output_str(fbuff, tags->tag[i], strlen(tags->tag[i]));
+		if (tags->value[i]) {
+			html_output_str(fbuff, "=\"", 2);
+			len = strlen(tags->value[i]);
+			for (j=0 ; j<len ; j++) {
+				html_output_c(fbuff, NULL, tolower(tags->value[i][j]));
+			}
+			html_output_c(fbuff, NULL, '"');
+		}
+	}
+	html_output_c(fbuff, NULL, '>');
+}
+
+void html_tag_arg_free(tag_arguments_t *tags)
+{
+	int i;
+	
+	for (i=0; i < tags->count; i++) {
+		free(tags->tag[i]);
+		if (tags->value[i]) {
+			free(tags->value[i]);
+		}
+		if(tags->contents)
+			if (tags->contents[i])
+				blobDestroy(tags->contents[i]);
+	}
+	if (tags->tag) {
+		free(tags->tag);
+	}
+	if (tags->value) {
+		free(tags->value);
+	}
+	if(tags->contents)
+		free(tags->contents);
+	tags->contents = NULL;
+	tags->tag = tags->value = NULL;
+	tags->count = 0;
+}
+
+/**
+ * this is used for img, and iframe tags. If they are inside an <a href> tag, then set the contents of the image|iframe to the real URL.
+ */
+static inline void html_tag_set_inahref(tag_arguments_t *tags,int idx,int in_ahref)
+{
+	tags->contents[idx-1]=blobCreate();
+	blobAddData(tags->contents[idx-1],tags->value[in_ahref-1],strlen(tags->value[in_ahref-1]));
+	blobAddData(tags->contents[idx-1], "",1);
+	blobClose(tags->contents[idx-1]);
+}
+
+/**
+ * the displayed text for an <a href> tag
+ */
+static inline void html_tag_contents_append(tag_arguments_t *tags,int idx,const unsigned char* begin,const unsigned char *end)
+{
+	if(end && (begin<end)) {
+		const size_t blob_len = blobGetDataSize(tags->contents[idx-1]);
+		const size_t blob_sizeleft = blob_len <= MAX_TAG_CONTENTS_LENGTH ? (MAX_TAG_CONTENTS_LENGTH - blob_len) : 0;
+		const size_t str_len = end - begin;
+		if(blob_sizeleft)
+			blobAddData(tags->contents[idx-1],begin, blob_sizeleft < str_len ? blob_sizeleft : str_len );
+	}
+}
+
+
+static inline void html_tag_contents_done(tag_arguments_t *tags,int idx)
+{
+	/* append NUL byte */
+	blobAddData(tags->contents[idx-1], "", 1);
+	blobClose(tags->contents[idx-1]);
+}
+
+static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag_arguments_t *hrefs,const struct cli_dconf* dconf)
+{
+	int fd_tmp, tag_length, tag_arg_length, binary;
+	int retval=FALSE, escape, value = 0, hex, tag_val_length=0, table_pos, in_script=FALSE;
+	FILE *stream_in = NULL;
+	html_state state=HTML_NORM, next_state=HTML_BAD_STATE;
+	char filename[1024], tag[HTML_STR_LENGTH+1], tag_arg[HTML_STR_LENGTH+1];
+	char tag_val[HTML_STR_LENGTH+1], *tmp_file;
+	unsigned char *line, *ptr, *arg_value;
+	tag_arguments_t tag_args;
+	quoted_state quoted;
+	unsigned long length;
+	file_buff_t *file_buff_o1, *file_buff_o2, *file_buff_script;
+	file_buff_t *file_tmp_o1;
+	int in_ahref=0;/* index of <a> tag, whose contents we are parsing. Indexing starts from 1, 0 means outside of <a>*/
+	unsigned char* href_contents_begin=NULL;/*beginning of the next portion of <a> contents*/
+	unsigned char* ptrend=NULL;/*end of <a> contents*/
+	unsigned char* in_form_action = NULL;/* the action URL of the current <form> tag, if any*/
+
+	struct entity_conv conv;
+	int rc;
+	unsigned char entity_val[HTML_STR_LENGTH+1];
+	size_t entity_val_length = 0;
+	const int dconf_entconv = dconf && dconf->phishing&PHISHING_CONF_ENTCONV;
+	/* dconf for phishing engine sets scanContents, so no need for a flag here */
+
+
+	tag_args.scanContents=0;/* do we need to store the contents of <a></a>?*/
+	if (!m_area) {
+		if (fd < 0) {
+			cli_dbgmsg("Invalid HTML fd\n");
+			return FALSE;
+		}
+		lseek(fd, 0, SEEK_SET);	
+		fd_tmp = dup(fd);
+		if (fd_tmp < 0) {
+			return FALSE;
+		}
+		stream_in = fdopen(fd_tmp, "r");
+		if (!stream_in) {
+			close(fd_tmp);
+			return FALSE;
+		}
+	}
+
+	if(dconf_entconv && (rc = init_entity_converter(&conv, UNKNOWN, 16384) )) {
+		if (!m_area) {
+			fclose(stream_in);
+		}
+		return rc;
+	}
+
+	tag_args.count = 0;
+	tag_args.tag = NULL;
+	tag_args.value = NULL;
+	tag_args.contents = NULL;
+	
+	if (dirname) {
+		snprintf(filename, 1024, "%s/rfc2397", dirname);
+		if (mkdir(filename, 0700) && errno != EEXIST) {
+			file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
+			goto abort;
+		}
+		file_buff_o1 = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
+		if (!file_buff_o1) {
+			file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
+			goto abort;
+		}
+		
+		file_buff_o2 = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
+		if (!file_buff_o2) {
+			free(file_buff_o1);
+			file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
+			goto abort;
+		}
+		
+		file_buff_script = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
+		if (!file_buff_script) {
+			free(file_buff_o1);
+			free(file_buff_o2);
+			file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
+			goto abort;
+		}
+		
+		snprintf(filename, 1024, "%s/comment.html", dirname);
+		file_buff_o1->fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
+		if (!file_buff_o1->fd) {
+			cli_dbgmsg("open failed: %s\n", filename);
+			free(file_buff_o1);
+			free(file_buff_o2);
+			free(file_buff_script);
+			file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
+			goto abort;
+		}
+
+		snprintf(filename, 1024, "%s/nocomment.html", dirname);
+		file_buff_o2->fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
+		if (!file_buff_o2->fd) {
+			cli_dbgmsg("open failed: %s\n", filename);
+			close(file_buff_o1->fd);
+			free(file_buff_o1);
+			free(file_buff_o2);
+			free(file_buff_script);
+			file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
+			goto abort;
+		}
+
+		snprintf(filename, 1024, "%s/script.html", dirname);
+		file_buff_script->fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
+		if (!file_buff_script->fd) {
+			cli_dbgmsg("open failed: %s\n", filename);
+			close(file_buff_o1->fd);
+			close(file_buff_o2->fd);
+			free(file_buff_o1);
+			free(file_buff_o2);
+			free(file_buff_script);
+			file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
+			goto abort;
+		}
+
+		file_buff_o1->length = 0;
+		file_buff_o2->length = 0;
+		file_buff_script->length = 0;
+	} else {
+		file_buff_o1 = NULL;
+		file_buff_o2 = NULL;
+		file_buff_script = NULL;
+	}
+	
+	binary = FALSE;
+
+	if(dconf_entconv)
+		ptr = line = encoding_norm_readline(&conv, stream_in, m_area, 8192);
+	else   
+		ptr = line = cli_readline(stream_in, m_area, 8192);
+
+	while (line) {
+		if(href_contents_begin)
+			href_contents_begin=ptr;/*start of a new line, last line already appended to contents see below*/
+		while (*ptr && isspace(*ptr)) {
+			ptr++;
+		}
+		while (*ptr) {
+			if (!binary && *ptr == '\n') {
+				/* Convert it to a space and re-process */
+				*ptr = ' ';
+				continue;
+			}
+			if (!binary && *ptr == '\r') {
+				ptr++;
+				continue;
+			}
+			switch (state) {
+			case HTML_SPECIAL_CHAR:
+				cli_dbgmsg("Impossible, special_char can't occur here\n");
+				break;
+			case HTML_BAD_STATE:
+				/* An engine error has occurred */
+				cli_dbgmsg("HTML Engine Error\n");
+				goto abort;
+			case HTML_SKIP_LENGTH:
+				length--;
+				ptr++;
+				if (!length) {
+					state = next_state;
+				}
+				break;
+			case HTML_SKIP_WS:
+				if (isspace(*ptr)) {
+					ptr++;
+				} else {
+					state = next_state;
+					next_state = HTML_BAD_STATE;
+				}
+				break;
+			case HTML_TRIM_WS:
+				if (isspace(*ptr)) {
+					ptr++;
+				} else {
+					html_output_c(file_buff_o1, file_buff_o2, ' ');
+					state = next_state;
+					next_state = HTML_BAD_STATE;
+				}
+				break;
+			case HTML_NORM:
+				if (*ptr == '<') {
+					ptrend=ptr; /* for use by scanContents */
+					html_output_c(file_buff_o1, file_buff_o2, '<');
+					if (in_script) {
+						html_output_c(file_buff_script, NULL, '<');
+					}
+					if(hrefs && hrefs->scanContents && in_ahref && href_contents_begin) {
+						/*append this text portion to the contents of <a>*/
+						html_tag_contents_append(hrefs,in_ahref,href_contents_begin,ptr);
+						href_contents_begin=NULL;/*We just encountered another tag inside <a>, so skip it*/
+					}
+					ptr++;
+					state = HTML_SKIP_WS;
+					tag_length=0;
+					next_state = HTML_TAG;
+				} else if (isspace(*ptr)) {
+					state = HTML_TRIM_WS;
+					next_state = HTML_NORM;
+				} else if (*ptr == '&') {
+					state = HTML_CHAR_REF;
+					next_state = HTML_NORM;
+					ptr++;
+				} else {
+					html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
+					if (in_script) {
+						html_output_c(file_buff_script, NULL, tolower(*ptr));
+					}
+					ptr++;
+				}
+				break;
+			case HTML_TAG:
+				if ((tag_length == 0) && (*ptr == '!')) {
+					/* Comment */
+					html_output_c(file_buff_o1, NULL, '!');
+					if (in_script) {
+						html_output_c(file_buff_script, NULL, '!');
+					}
+					/* Need to rewind in the no-comment output stream */
+					if (file_buff_o2 && (file_buff_o2->length > 0)) {
+						file_buff_o2->length--;
+					}
+					state = HTML_COMMENT;
+					next_state = HTML_BAD_STATE;
+					ptr++;
+				} else if (*ptr == '>') {
+					html_output_c(file_buff_o1, file_buff_o2, '>');
+					if (in_script) {
+						html_output_c(file_buff_script, NULL, '>');
+					}
+					ptr++;
+					tag[tag_length] = '\0';
+					state = HTML_SKIP_WS;
+					next_state = HTML_PROCESS_TAG;
+				} else if (!isspace(*ptr)) {
+					html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
+					if (in_script) {
+						html_output_c(file_buff_script, NULL, tolower(*ptr));
+					}
+					if (tag_length < HTML_STR_LENGTH) {
+						tag[tag_length++] = tolower(*ptr);
+					}
+					ptr++;
+				}  else {
+					tag[tag_length] = '\0';
+					state = HTML_SKIP_WS;
+					tag_arg_length = 0;
+					next_state = HTML_TAG_ARG;
+				}
+				break;
+			case HTML_TAG_ARG:
+				if (*ptr == '=') {
+					html_output_c(file_buff_o1, file_buff_o2, '=');
+					tag_arg[tag_arg_length] = '\0';
+					ptr++;
+					state = HTML_SKIP_WS;
+					escape = FALSE;
+					quoted = NOT_QUOTED;
+					tag_val_length = 0;
+					next_state = HTML_TAG_ARG_VAL;
+				} else if (isspace(*ptr)) {
+					ptr++;
+					tag_arg[tag_arg_length] = '\0';
+					state = HTML_SKIP_WS;
+					next_state = HTML_TAG_ARG_EQUAL;
+				} else if (*ptr == '>') {
+					html_output_c(file_buff_o1, file_buff_o2, '>');
+					if (tag_arg_length > 0) {
+						tag_arg[tag_arg_length] = '\0';
+						html_tag_arg_add(&tag_args, tag_arg, NULL);
+					}
+					ptr++;
+					state = HTML_PROCESS_TAG;
+					next_state = HTML_BAD_STATE;
+				} else {
+					if (tag_arg_length == 0) {
+						/* Start of new tag - add space */
+						html_output_c(file_buff_o1, file_buff_o2,' ');
+					}
+					html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
+					if (tag_arg_length < HTML_STR_LENGTH) {
+						tag_arg[tag_arg_length++] = tolower(*ptr);
+					}
+					ptr++;
+				}
+				break;
+			case HTML_TAG_ARG_EQUAL:
+				if (*ptr == '=') {
+					html_output_c(file_buff_o1, file_buff_o2, '=');
+					ptr++;
+					state = HTML_SKIP_WS;
+					escape = FALSE;
+					quoted = NOT_QUOTED;
+					tag_val_length = 0;
+					next_state = HTML_TAG_ARG_VAL;
+				} else {
+					if (tag_arg_length > 0) {
+						tag_arg[tag_arg_length] = '\0';
+						html_tag_arg_add(&tag_args, tag_arg, NULL);
+					}
+					tag_arg_length=0;
+					state = HTML_TAG_ARG;
+					next_state = HTML_BAD_STATE;
+				}
+				break;
+			case HTML_TAG_ARG_VAL:
+				if ((tag_val_length == 5) && (strncmp(tag_val, "data:", 5) == 0)) {
+					/* RFC2397 inline data */
+
+					/* Rewind one byte so we don't recursuive */
+					if (file_buff_o1 && (file_buff_o1->length > 0)) {
+						file_buff_o1->length--;
+					}
+					if (file_buff_o2 && (file_buff_o2->length > 0)) {
+						file_buff_o2->length--;
+					}
+					
+					if (quoted != NOT_QUOTED) {
+						html_output_c(file_buff_o1, file_buff_o2, '"');
+					}
+					tag_val_length = 0;
+					state = HTML_RFC2397_TYPE;
+					next_state = HTML_TAG_ARG;
+				} else if ((tag_val_length == 6) && (strncmp(tag_val, "\"data:", 6) == 0)) {
+					/* RFC2397 inline data */
+
+					/* Rewind one byte so we don't recursuive */
+					if (file_buff_o1 && (file_buff_o1->length > 0)) {
+						file_buff_o1->length--;
+					}
+					if (file_buff_o2 && (file_buff_o2->length > 0)) {
+						file_buff_o2->length--;
+					}
+					
+					if (quoted != NOT_QUOTED) {
+						html_output_c(file_buff_o1, file_buff_o2, '"');
+					}
+
+					tag_val_length = 0;
+					state = HTML_RFC2397_TYPE;
+					next_state = HTML_TAG_ARG;
+				} else if (*ptr == '&') {
+					state = HTML_CHAR_REF;
+					next_state = HTML_TAG_ARG_VAL;
+					ptr++;
+				} else if (*ptr == '\'') {
+					if (tag_val_length == 0) {
+						quoted = SINGLE_QUOTED;
+						html_output_c(file_buff_o1, file_buff_o2, '"');
+						if (tag_val_length < HTML_STR_LENGTH) {
+							tag_val[tag_val_length++] = '"';
+						}
+						ptr++;
+					} else {
+						if (!escape && (quoted==SINGLE_QUOTED)) {
+							html_output_c(file_buff_o1, file_buff_o2, '"');
+							if (tag_val_length < HTML_STR_LENGTH) {
+								tag_val[tag_val_length++] = '"';
+							}
+							tag_val[tag_val_length] = '\0';
+							html_tag_arg_add(&tag_args, tag_arg, tag_val);
+							ptr++;
+							state = HTML_SKIP_WS;
+							tag_arg_length=0;
+							next_state = HTML_TAG_ARG;
+						} else {
+							html_output_c(file_buff_o1, file_buff_o2, '"');
+							if (tag_val_length < HTML_STR_LENGTH) {
+								tag_val[tag_val_length++] = '"';
+							}
+							ptr++;
+						}
+					}
+				} else if (*ptr == '"') {
+					if (tag_val_length == 0) {
+						quoted = DOUBLE_QUOTED;
+						html_output_c(file_buff_o1, file_buff_o2, '"');
+						if (tag_val_length < HTML_STR_LENGTH) {
+							tag_val[tag_val_length++] = '"';
+						}
+						ptr++;
+					} else {
+						if (!escape && (quoted==DOUBLE_QUOTED)) {					
+							html_output_c(file_buff_o1, file_buff_o2, '"');
+							if (tag_val_length < HTML_STR_LENGTH) {
+								tag_val[tag_val_length++] = '"';
+							}
+							tag_val[tag_val_length] = '\0';
+							html_tag_arg_add(&tag_args, tag_arg, tag_val);
+							ptr++;
+							state = HTML_SKIP_WS;
+							tag_arg_length=0;
+							next_state = HTML_TAG_ARG;
+						} else {
+							html_output_c(file_buff_o1, file_buff_o2, '"');
+							if (tag_val_length < HTML_STR_LENGTH) {
+								tag_val[tag_val_length++] = '"';
+							}
+							ptr++;
+						}
+					}
+				} else if (isspace(*ptr) || (*ptr == '>')) {
+					if (quoted == NOT_QUOTED) {
+						tag_val[tag_val_length] = '\0';
+						html_tag_arg_add(&tag_args, tag_arg, tag_val);
+						state = HTML_SKIP_WS;
+						tag_arg_length=0;
+						next_state = HTML_TAG_ARG;
+					} else {
+						html_output_c(file_buff_o1, file_buff_o2, *ptr);
+						if (tag_val_length < HTML_STR_LENGTH) {
+							if (isspace(*ptr)) {
+								tag_val[tag_val_length++] = ' ';
+							} else {
+								tag_val[tag_val_length++] = '>';
+							}
+						}
+						state = HTML_SKIP_WS;
+						escape = FALSE;
+						quoted = NOT_QUOTED;
+						next_state = HTML_TAG_ARG_VAL;
+						ptr++;
+					}
+				} else {
+					html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
+					if (tag_val_length < HTML_STR_LENGTH) {
+						tag_val[tag_val_length++] = *ptr;
+					}
+					ptr++;
+				}
+				
+				if (*ptr == '\\') {
+					escape = TRUE;
+				} else {
+					escape = FALSE;
+				}
+				break;
+			case HTML_COMMENT:
+				html_output_c(file_buff_o1, NULL, tolower(*ptr));
+				if (in_script) {
+					html_output_c(file_buff_script, NULL, tolower(*ptr));
+				}
+				if (*ptr == '>') {
+					state = HTML_SKIP_WS;
+					next_state = HTML_NORM;	
+				}
+				ptr++;
+				break;
+			case HTML_PROCESS_TAG:
+				
+				/* Default to no action for this tag */
+				state = HTML_SKIP_WS;
+				next_state = HTML_NORM;
+				if (tag[0] == '/') {
+					/* End tag */
+					state = HTML_SKIP_WS;
+					next_state = HTML_NORM;
+					if (strcmp(tag, "/script") == 0) {
+						in_script=FALSE;
+						html_output_c(file_buff_script, NULL, '\n');
+					}
+					if (hrefs && hrefs->scanContents && in_ahref) {
+						if(strcmp(tag,"/a") == 0) {
+							html_tag_contents_done(hrefs,in_ahref);
+							in_ahref=0;/* we are no longer inside an <a href>
+							nesting <a> tags not supported, and shouldn't be supported*/
+						}
+						href_contents_begin=ptr;
+					}
+					if (strcmp(tag, "/form") == 0)  {
+						if (in_form_action)
+							free(in_form_action);
+						in_form_action = NULL;
+					}
+				} else if (strcmp(tag, "script") == 0) {
+					arg_value = html_tag_arg_value(&tag_args, "language");
+					if (arg_value && (strcasecmp(arg_value, "jscript.encode") == 0)) {
+						html_tag_arg_set(&tag_args, "language", "javascript");
+						state = HTML_SKIP_WS;
+						next_state = HTML_JSDECODE;
+					} else if (arg_value && (strcasecmp(arg_value, "vbscript.encode") == 0)) {
+						html_tag_arg_set(&tag_args, "language", "vbscript");
+						state = HTML_SKIP_WS;
+						next_state = HTML_JSDECODE;
+					} else {
+						in_script = TRUE;
+					}
+					html_output_tag(file_buff_script, tag, &tag_args);
+				} else if (dconf_entconv && strcmp(tag, "meta") == 0) {
+					const unsigned char* http_equiv = html_tag_arg_value(&tag_args, "http-equiv");
+					const unsigned char* http_content = html_tag_arg_value(&tag_args, "content");
+					if(http_equiv && http_content && strcasecmp(http_equiv,"content-type") == 0) {
+						size_t len = strlen((const char*)http_content);
+						unsigned char* http_content2 = cli_malloc( len + 1);
+						unsigned char* charset;
+						size_t i;
+
+						if(!http_content2)
+							return CL_EMEM;
+						for(i = 0; i < len; i++)
+							http_content2[i] = tolower(http_content[i]);
+						http_content2[len] = '\0';
+						charset = (unsigned char*) strstr((char*)http_content2,"charset");
+						if(charset) {							
+							while(*charset && *charset != '=')
+								charset++;
+							if(*charset)
+								charset++;/* skip = */
+							len = strcspn((const char*)charset," \"'");
+							charset[len] = '\0';
+							if(len) {
+								process_encoding_set(&conv, charset, META);
+							}
+						}
+						free(http_content2);
+					}
+				} else if (hrefs) {
+					if(in_ahref && !href_contents_begin)
+						href_contents_begin=ptr;
+					if (strcmp(tag, "a") == 0) {
+						arg_value = html_tag_arg_value(&tag_args, "href");
+						if (arg_value && strlen(arg_value) > 0) {
+							if (hrefs->scanContents) {
+								unsigned char* arg_value_title = html_tag_arg_value(&tag_args,"title");
+								/*beginning of an <a> tag*/
+								if (in_ahref)
+									/*we encountered nested <a> tags, pretend previous closed*/
+									if (href_contents_begin) {
+										html_tag_contents_append(hrefs,in_ahref,
+											href_contents_begin,ptrend);
+										/*add pending contents between tags*/
+										html_tag_contents_done(hrefs,in_ahref);
+										in_ahref=0;
+										}
+								if (arg_value_title) {
+									/* title is a 'displayed link'*/
+									html_tag_arg_add(hrefs,"href_title",arg_value_title);
+									hrefs->contents[hrefs->count-1]=blobCreate();
+									html_tag_contents_append(hrefs,hrefs->count,arg_value,
+										arg_value+strlen(arg_value));
+									html_tag_contents_done(hrefs,hrefs->count);
+								}
+								if (in_form_action) {
+									/* form action is the real URL, and href is the 'displayed' */
+									html_tag_arg_add(hrefs,"form",arg_value);
+									hrefs->contents[hrefs->count-1] =  blobCreate();
+									html_tag_contents_append(hrefs, hrefs->count, in_form_action,
+											in_form_action + strlen(in_form_action));
+									html_tag_contents_done(hrefs,hrefs->count);
+								}
+							}
+							html_tag_arg_add(hrefs, "href", arg_value);
+							if (hrefs->scanContents) {
+								in_ahref=hrefs->count; /* index of this tag (counted from 1) */
+								href_contents_begin=ptr;/* contents begin after <a ..> ends */
+								hrefs->contents[hrefs->count-1]=blobCreate();
+							}
+						}
+					} else if (strcmp(tag,"form") == 0 && hrefs->scanContents) {
+						const unsigned char* arg_action_value = html_tag_arg_value(&tag_args,"action");
+						if (arg_action_value) {
+							if(in_form_action) 
+								free(in_form_action);							
+							in_form_action = cli_strdup(arg_action_value);
+						}
+					} else if (strcmp(tag, "img") == 0) {
+						arg_value = html_tag_arg_value(&tag_args, "src");
+						if (arg_value && strlen(arg_value) > 0) {
+							html_tag_arg_add(hrefs, "src", arg_value);
+							if(hrefs->scanContents && in_ahref)
+								/* "contents" of an img tag, is the URL of its parent <a> tag */
+								html_tag_set_inahref(hrefs,hrefs->count,in_ahref);
+							if (in_form_action) {
+								/* form action is the real URL, and href is the 'displayed' */
+								html_tag_arg_add(hrefs,"form",arg_value);
+								hrefs->contents[hrefs->count-1] =  blobCreate();
+								html_tag_contents_append(hrefs, hrefs->count, in_form_action,
+										in_form_action + strlen(in_form_action));
+								html_tag_contents_done(hrefs,hrefs->count);
+							}
+						}
+						arg_value = html_tag_arg_value(&tag_args, "dynsrc");
+						if (arg_value && strlen(arg_value) > 0) {
+							html_tag_arg_add(hrefs, "dynsrc", arg_value);
+							if(hrefs->scanContents && in_ahref)
+								/* see above */
+								html_tag_set_inahref(hrefs,hrefs->count,in_ahref);
+							if (in_form_action) {
+								/* form action is the real URL, and href is the 'displayed' */
+								html_tag_arg_add(hrefs,"form",arg_value);
+								hrefs->contents[hrefs->count-1] =  blobCreate();
+								html_tag_contents_append(hrefs, hrefs->count, in_form_action,
+										in_form_action + strlen(in_form_action));
+								html_tag_contents_done(hrefs,hrefs->count);
+							}
+						}
+					} else if (strcmp(tag, "iframe") == 0) {
+						arg_value = html_tag_arg_value(&tag_args, "src");
+						if (arg_value && strlen(arg_value) > 0) {
+							html_tag_arg_add(hrefs, "iframe", arg_value);
+							if(hrefs->scanContents && in_ahref)
+								/* see above */
+								html_tag_set_inahref(hrefs,hrefs->count,in_ahref);
+							if (in_form_action) {
+								/* form action is the real URL, and href is the 'displayed' */
+								html_tag_arg_add(hrefs,"form",arg_value);
+								hrefs->contents[hrefs->count-1] =  blobCreate();
+								html_tag_contents_append(hrefs, hrefs->count, in_form_action,
+										in_form_action + strlen(in_form_action));
+								html_tag_contents_done(hrefs,hrefs->count);
+							}
+						}
+					} else if (strcmp(tag,"area") == 0) {
+						arg_value = html_tag_arg_value(&tag_args,"href");
+						if (arg_value && strlen(arg_value) > 0) {
+							html_tag_arg_add(hrefs, "area", arg_value);
+							if(hrefs->scanContents && in_ahref)
+								/* see above */
+								html_tag_set_inahref(hrefs,hrefs->count,in_ahref);
+							if (in_form_action) {
+								/* form action is the real URL, and href is the 'displayed' */
+								html_tag_arg_add(hrefs,"form",arg_value);
+								hrefs->contents[hrefs->count-1] =  blobCreate();
+								html_tag_contents_append(hrefs, hrefs->count, in_form_action,
+									in_form_action + strlen(in_form_action));
+								html_tag_contents_done(hrefs,hrefs->count);
+							}
+						}						
+					}
+					/* TODO:imagemaps can have urls too */
+				}
+				html_tag_arg_free(&tag_args);
+				break;
+			case HTML_CHAR_REF:
+				if (*ptr == '#') {
+					value = 0;
+					hex = FALSE;
+					state = HTML_CHAR_REF_DECODE;
+					ptr++;
+				} else {
+					if(dconf_entconv)
+						state = HTML_ENTITY_REF_DECODE;
+					else {
+						if(next_state == HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
+							tag_val[tag_val_length++] = '&';
+						}
+						html_output_c(file_buff_o1, file_buff_o2, '&');
+
+						state = next_state;
+						next_state = HTML_BAD_STATE;
+					}
+				}
+				break;
+			case HTML_ENTITY_REF_DECODE:
+				if(*ptr == ';') {
+					size_t i;
+					unsigned char* normalized;
+					entity_val[entity_val_length] = '\0';
+					normalized = entity_norm(&conv, entity_val);
+					if(normalized) {
+						for(i=0; i < strlen(normalized); i++) {
+							const char c = tolower(normalized[i]);
+							html_output_c(file_buff_o1, file_buff_o2, c);
+							if (next_state == HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
+								tag_val[tag_val_length++] = c;
+							}
+						}
+						free(normalized);
+					}
+					else {
+						html_output_c(file_buff_o1, file_buff_o2, '&');
+						if (next_state == HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
+								tag_val[tag_val_length++] = '&';
+						}						
+						for(i=0; i < entity_val_length; i++) {
+							const char c = tolower(entity_val[i]);
+							html_output_c(file_buff_o1, file_buff_o2, c);
+							if (next_state == HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
+								tag_val[tag_val_length++] = c;
+							}
+						}
+						if (next_state == HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
+							tag_val[tag_val_length++] = ';';
+						}
+						html_output_c(file_buff_o1, file_buff_o2, ';');
+					}
+					entity_val_length = 0;
+					state = next_state;
+					next_state = HTML_BAD_STATE;
+					ptr++;
+				}
+				else if ( (isalnum(*ptr) || *ptr=='_' || *ptr==':' || (*ptr=='-')) && entity_val_length < HTML_STR_LENGTH) {
+					entity_val[entity_val_length++] = *ptr++;
+				}
+				else {
+						/* entity too long, or not valid, dump it */
+						size_t i;
+						if (next_state==HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
+								tag_val[tag_val_length++] = '&';
+						}
+						html_output_c(file_buff_o1, file_buff_o2, '&');
+						for(i=0; i < entity_val_length; i++) {
+							const char c = tolower(entity_val[i]);
+							html_output_c(file_buff_o1, file_buff_o2, c);
+							if (next_state==HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
+								tag_val[tag_val_length++] = c;
+							}
+						}
+
+						state = next_state;
+						next_state = HTML_BAD_STATE;
+						entity_val_length = 0;
+				}
+				break;
+			case HTML_CHAR_REF_DECODE:
+				if ((value==0) && ((*ptr == 'x') || (*ptr == 'X'))) {
+					hex=TRUE;
+					ptr++;
+				} else if (*ptr == ';') {
+					if (next_state==HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
+							tag_val[tag_val_length++] = value; /* store encoded values too */
+					}
+					if(dconf_entconv) {
+
+						if(value < 0x80)
+							html_output_c(file_buff_o1, file_buff_o2, tolower(value));
+						else {
+							unsigned char buff[10];
+							snprintf((char*)buff,9,"&#%d;",value);
+							buff[9] = '\0';
+							html_output_str(file_buff_o1, buff, strlen(buff));
+							html_output_str(file_buff_o2, buff, strlen(buff));
+						}
+					} else
+							html_output_c(file_buff_o1, file_buff_o2, tolower(value&0xff));
+					state = next_state;
+					next_state = HTML_BAD_STATE;
+					ptr++;
+				} else if (isdigit(*ptr) || (hex && isxdigit(*ptr))) {
+					if (hex) {
+						value *= 16;
+					} else {
+						value *= 10;
+					}
+					if (isdigit(*ptr)) {
+						value += (*ptr - '0');
+					} else {
+						value += (tolower(*ptr) - 'a' + 10);
+					}
+					ptr++;
+				} else {
+					html_output_c(file_buff_o1, file_buff_o2, value);
+					state = next_state;
+					next_state = HTML_BAD_STATE;
+				}
+				break;
+			case HTML_JSDECODE:
+				/* Check for start marker */
+				if (strncmp(ptr, "#@~^", 4) == 0) {
+					ptr += 4;
+					state = HTML_JSDECODE_LENGTH;
+					next_state = HTML_BAD_STATE;
+				} else {
+					html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
+					html_output_c(file_buff_script, NULL, tolower(*ptr));
+					ptr++;
+				}
+				break;
+			case HTML_JSDECODE_LENGTH:
+				if (strlen(ptr) < 8) {
+					state = HTML_NORM;
+					next_state = HTML_BAD_STATE;
+					break;
+				}
+				length = base64_chars[ptr[0]] << 2;
+				length += base64_chars[ptr[1]] >> 4;
+				length += (base64_chars[ptr[1]] & 0x0f) << 12;
+				length += (base64_chars[ptr[2]] >> 2) << 8;
+				length += (base64_chars[ptr[2]] & 0x03) << 22;
+				length += base64_chars[ptr[3]] << 16;
+				length += (base64_chars[ptr[4]] << 2) << 24;
+				length += (base64_chars[ptr[5]] >> 4) << 24;
+				table_pos = 0;
+				state = HTML_JSDECODE_DECRYPT;
+				next_state = HTML_BAD_STATE;
+				ptr += 8;
+				break;
+			case HTML_JSDECODE_DECRYPT:
+				if (length == 0) {
+					html_output_str(file_buff_script, "</script>\n", 10);
+					length = 12;
+					state = HTML_SKIP_LENGTH;
+					next_state = HTML_NORM;
+					break;
+				}
+				if (*ptr < 0x80) {
+					value = decrypt_tables[table_order[table_pos]][*ptr];
+					if (value == 0xFF) { /* special character */
+						ptr++;
+						length--;
+						switch (*ptr) {
+						case '\0':
+							/* Fixup for end of line */
+							ptr--;
+							break;
+						case 0x21:
+							html_output_c(file_buff_o1, file_buff_o2, 0x3c);
+							html_output_c(file_buff_script, NULL, 0x3c);
+							break;
+						case 0x23:
+							html_output_c(file_buff_o1, file_buff_o2, 0x0d);
+							html_output_c(file_buff_script, NULL, 0x0d);
+							break;
+						case 0x24:
+							html_output_c(file_buff_o1, file_buff_o2, 0x40);
+							html_output_c(file_buff_script, NULL, 0x40);
+							break;				
+						case 0x26:
+							html_output_c(file_buff_o1, file_buff_o2, 0x0a);
+							html_output_c(file_buff_script, NULL, 0x0a);
+							break;
+						case 0x2a:
+							html_output_c(file_buff_o1, file_buff_o2, 0x3e);
+							html_output_c(file_buff_script, NULL, 0x3e);
+							break;
+						}
+					} else {
+						html_output_c(file_buff_o1, file_buff_o2, value);
+						html_output_c(file_buff_script, NULL, tolower(value));
+					}
+				}
+				table_pos = (table_pos + 1) % 64;
+				ptr++;
+				length--;
+				break;
+				
+			case HTML_RFC2397_TYPE:
+				if (*ptr == '\'') {
+					if (!escape && (quoted==SINGLE_QUOTED)) {
+						/* Early end of data detected. Error */
+						ptr++;
+						state = HTML_SKIP_WS;
+						tag_arg_length=0;
+						next_state = HTML_TAG_ARG;
+					} else {
+						if (tag_val_length < HTML_STR_LENGTH) {
+							tag_val[tag_val_length++] = '"';
+						}
+						ptr++;
+					}
+				} else if (*ptr == '"') {
+					if (!escape && (quoted==DOUBLE_QUOTED)) {
+						/* Early end of data detected. Error */
+						ptr++;
+						state = HTML_SKIP_WS;
+						tag_arg_length=0;
+						next_state = HTML_TAG_ARG;
+					} else {
+						if (tag_val_length < HTML_STR_LENGTH) {
+							tag_val[tag_val_length++] = '"';
+						}
+						ptr++;
+					}
+				} else if (isspace(*ptr) || (*ptr == '>')) {
+					if (quoted == NOT_QUOTED) {
+						/* Early end of data detected. Error */
+						state = HTML_SKIP_WS;
+						tag_arg_length=0;
+						next_state = HTML_TAG_ARG;
+					} else {
+						if (tag_val_length < HTML_STR_LENGTH) {
+							if (isspace(*ptr)) {
+								tag_val[tag_val_length++] = ' ';
+							} else {
+								tag_val[tag_val_length++] = '>';
+							}
+						}
+						state = HTML_SKIP_WS;
+						escape = FALSE;
+						quoted = NOT_QUOTED;
+						next_state = HTML_RFC2397_TYPE;
+						ptr++;
+					}
+				} else if (*ptr == ',') {
+					/* Beginning of data */
+					tag_val[tag_val_length] = '\0';
+					state = HTML_RFC2397_INIT;
+					escape = FALSE;
+					next_state = HTML_BAD_STATE;
+					ptr++;
+				
+				} else {
+					if (tag_val_length < HTML_STR_LENGTH) {
+						tag_val[tag_val_length++] = tolower(*ptr);
+					}
+					ptr++;
+				}
+				if (*ptr == '\\') {
+					escape = TRUE;
+				} else {
+					escape = FALSE;
+				}
+				break;
+			case HTML_RFC2397_INIT:
+				if (dirname) {
+					file_tmp_o1 = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
+					if (!file_tmp_o1) {
+						goto abort;
+					}
+					snprintf(filename, 1024, "%s/rfc2397", dirname);
+					tmp_file = cli_gentemp(filename);
+					cli_dbgmsg("RFC2397 data file: %s\n", tmp_file);
+					file_tmp_o1->fd = open(tmp_file, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
+					free(tmp_file);
+					if (!file_tmp_o1->fd) {
+						cli_dbgmsg("open failed: %s\n", filename);
+						free(file_tmp_o1);
+						goto abort;
+					}
+					file_tmp_o1->length = 0;
+				
+					html_output_str(file_tmp_o1, "From html-normalise\n", 20);
+					html_output_str(file_tmp_o1, "Content-type: ", 14);
+					if ((tag_val_length == 0) && (*tag_val == ';')) {
+						html_output_str(file_tmp_o1, "text/plain\n", 11);
+					}
+					html_output_str(file_tmp_o1, tag_val, tag_val_length);
+					html_output_c(file_tmp_o1, NULL, '\n');
+					if (strstr(tag_val, ";base64") != NULL) {
+						html_output_str(file_tmp_o1, "Content-transfer-encoding: base64\n", 34);
+					}
+					html_output_c(file_tmp_o1, NULL, '\n');
+				} else {
+					file_tmp_o1 = NULL;
+				}
+				state = HTML_RFC2397_DATA;
+				binary = TRUE;
+				break;
+			case HTML_RFC2397_DATA:
+				if (*ptr == '&') {
+					state = HTML_CHAR_REF;
+					next_state = HTML_RFC2397_DATA;
+					ptr++;
+				} else if (*ptr == '%') {
+					length = 0;
+					value = 0;
+					state = HTML_ESCAPE_CHAR;
+					next_state = HTML_RFC2397_ESC;
+					ptr++;
+				} else if (*ptr == '\'') {
+					if (!escape && (quoted==SINGLE_QUOTED)) {
+						state = HTML_RFC2397_FINISH;
+						ptr++;
+					} else {
+						html_output_c(file_tmp_o1, NULL, *ptr);
+						ptr++;
+					}
+				} else if (*ptr == '\"') {
+					if (!escape && (quoted==DOUBLE_QUOTED)) {
+						state = HTML_RFC2397_FINISH;
+						ptr++;
+					} else {
+						html_output_c(file_tmp_o1, NULL, *ptr);
+						ptr++;
+					}
+				} else if (isspace(*ptr) || (*ptr == '>')) {
+					if (quoted == NOT_QUOTED) {
+						state = HTML_RFC2397_FINISH;
+						ptr++;
+					} else {
+						html_output_c(file_tmp_o1, NULL, *ptr);
+						ptr++;
+					}
+				} else {
+					html_output_c(file_tmp_o1, NULL, *ptr);
+					ptr++;
+				}
+				if (*ptr == '\\') {
+					escape = TRUE;
+				} else {
+					escape = FALSE;
+				}
+				break;
+			case HTML_RFC2397_FINISH:
+				if(file_tmp_o1) {
+					html_output_flush(file_tmp_o1);
+					close(file_tmp_o1->fd);
+					free(file_tmp_o1);
+				}
+				state = HTML_SKIP_WS;
+				escape = FALSE;
+				quoted = NOT_QUOTED;
+				next_state = HTML_TAG_ARG;
+				binary = FALSE;
+				break;
+			case HTML_RFC2397_ESC:
+				if (length == 2) {
+					html_output_c(file_tmp_o1, NULL, value);
+				} else if (length == 1) {
+					html_output_c(file_tmp_o1, NULL, '%');
+					html_output_c(file_tmp_o1, NULL, value+'0');
+				} else {
+					html_output_c(file_tmp_o1, NULL, '%');
+				}
+				state = HTML_RFC2397_DATA;
+				break;		
+			case HTML_ESCAPE_CHAR:
+				value *= 16;
+				length++;
+				if (isxdigit(*ptr)) {
+					if (isdigit(*ptr)) {
+						value += (*ptr - '0');
+					} else {
+						value += (tolower(*ptr) - 'a' + 10);
+					}
+				} else {
+					state = next_state;
+				}
+				if (length == 2) {
+					state = next_state;
+				}
+				ptr++;
+				break;	
+			}
+		}
+		if(hrefs && hrefs->scanContents && in_ahref && href_contents_begin)
+			/* end of line, append contents now, resume on next line */
+			html_tag_contents_append(hrefs,in_ahref,href_contents_begin,ptr);
+		ptrend = NULL;
+		free(line);
+ 		if(dconf_entconv)
+ 			ptr = line = encoding_norm_readline(&conv, stream_in, m_area, 8192);
+ 		else
+ 			ptr = line = cli_readline(stream_in, m_area, 8192);
+	}
+	
+ 	if(dconf_entconv) {
+ 		/* handle "unfinished" entitites */
+		size_t i;
+		unsigned char* normalized;
+		entity_val[entity_val_length] = '\0';
+		normalized = entity_norm(&conv, entity_val);
+		if(normalized) {
+			for(i=0; i < strlen(normalized); i++)
+				html_output_c(file_buff_o1, file_buff_o2, tolower(normalized[i]));
+						free(normalized);
+		}
+		else {
+			if(entity_val_length) {
+				html_output_c(file_buff_o1, file_buff_o2, '&');
+				for(i=0; i < entity_val_length; i++)
+					html_output_c(file_buff_o1, file_buff_o2, tolower(entity_val[i]));
+			}
+		}
+	}
+	retval = TRUE;
+abort:
+	if (in_form_action)
+		free(in_form_action);
+	if (in_ahref) /* tag not closed, force closing */
+		html_tag_contents_done(hrefs,in_ahref);
+
+	if(dconf_entconv)
+		entity_norm_done(&conv);
+	html_tag_arg_free(&tag_args);
+	if (!m_area) {
+		fclose(stream_in);
+	}
+	if (file_buff_o1) {
+		html_output_flush(file_buff_o1);
+		close(file_buff_o1->fd);
+		free(file_buff_o1);
+	}
+	if (file_buff_o2) {
+		html_output_flush(file_buff_o2);
+		close(file_buff_o2->fd);
+		free(file_buff_o2);
+	}
+	if (file_buff_script) {
+		html_output_flush(file_buff_script);
+		close(file_buff_script->fd);
+		free(file_buff_script);
+	}
+	return retval;
+}
+
+int html_normalise_mem(unsigned char *in_buff, off_t in_size, const char *dirname, tag_arguments_t *hrefs,const struct cli_dconf* dconf)
+{
+	m_area_t m_area;
+	
+	m_area.buffer = in_buff;
+	m_area.length = in_size;
+	m_area.offset = 0;
+	
+	return cli_html_normalise(-1, &m_area, dirname, hrefs, dconf);
+}
+
+int html_normalise_fd(int fd, const char *dirname, tag_arguments_t *hrefs,const struct cli_dconf* dconf)
+{
+#if HAVE_MMAP
+	int retval=FALSE;
+	m_area_t m_area;
+	struct stat statbuf;
+	
+	if (fstat(fd, &statbuf) == 0) {
+		m_area.length = statbuf.st_size;
+		m_area.buffer = (unsigned char *) mmap(NULL, m_area.length, PROT_READ, MAP_PRIVATE, fd, 0);
+		m_area.offset = 0;
+		if (m_area.buffer == MAP_FAILED) {
+			cli_dbgmsg("mmap HTML failed\n");
+			retval = cli_html_normalise(fd, NULL, dirname, hrefs, dconf);
+		} else {
+			cli_dbgmsg("mmap'ed file\n");
+			retval = cli_html_normalise(-1, &m_area, dirname, hrefs, dconf);
+			munmap(m_area.buffer, m_area.length);
+		}
+	} else {
+		cli_dbgmsg("fstat HTML failed\n");
+		retval = cli_html_normalise(fd, NULL, dirname, hrefs, dconf);
+	}
+	return retval;
+#else
+	return cli_html_normalise(fd, NULL, dirname, hrefs, dconf);
+#endif
+}
+
+int html_screnc_decode(int fd, const char *dirname)
+{
+	int fd_tmp, table_pos=0, result, count, state, retval=FALSE;
+	unsigned char *line, tmpstr[6];
+	unsigned long length;
+	unsigned char *ptr, filename[1024];
+	FILE *stream_in;
+	file_buff_t file_buff;
+	
+	lseek(fd, 0, SEEK_SET);	
+	fd_tmp = dup(fd);
+	if (fd_tmp < 0) {
+		return FALSE;
+	}
+	stream_in = fdopen(fd_tmp, "r");
+	if (!stream_in) {
+		close(fd_tmp);
+		return FALSE;
+	}
+	
+	snprintf(filename, 1024, "%s/screnc.html", dirname);
+	file_buff.fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
+	file_buff.length = 0;
+	
+	if (!file_buff.fd) {
+		cli_dbgmsg("open failed: %s\n", filename);
+		fclose(stream_in);
+		return FALSE;
+	}
+	
+	while ((line = cli_readline(stream_in, NULL, 8192)) != NULL) {
+		ptr = strstr(line, "#@~^");
+		if (ptr) {
+			break;
+		}
+		free(line);
+        }
+	if (!line) {
+		goto abort;
+	}
+	
+	/* Calculate the length of the encoded string */
+	ptr += 4;
+	count = 0;
+	do {
+		if (! *ptr) {
+			free(line);
+			ptr = line = cli_readline(stream_in, NULL, 8192);
+			if (!line) {
+				goto abort;
+			}
+		}
+		tmpstr[count++] = *ptr;
+		ptr++;
+	} while (count < 6);
+	
+	length = base64_chars[tmpstr[0]] << 2;
+	length += base64_chars[tmpstr[1]] >> 4;
+	length += (base64_chars[tmpstr[1]] & 0x0f) << 12;
+	length += (base64_chars[tmpstr[2]] >> 2) << 8;
+	length += (base64_chars[tmpstr[2]] & 0x03) << 22;
+	length += base64_chars[tmpstr[3]] << 16;
+	length += (base64_chars[tmpstr[4]] << 2) << 24;
+	length += (base64_chars[tmpstr[5]] >> 4) << 24;
+
+	/* Move forward 2 bytes */
+	count = 2;
+	state = HTML_SKIP_LENGTH;
+
+	while (length && line) {
+		while (length && *ptr) {
+			if ((*ptr == '\n') || (*ptr == '\r')) {
+				ptr++;
+				continue;
+			}
+			switch (state) {
+			case HTML_SKIP_LENGTH:
+				ptr++;
+				count--;
+				if (count == 0) {
+					state = HTML_NORM;
+				}
+				break;
+			case HTML_SPECIAL_CHAR:
+				switch (*ptr) {
+				case 0x21:
+					html_output_c(&file_buff, NULL, 0x3c);
+					break;
+				case 0x23:
+					html_output_c(&file_buff, NULL, 0x0d);
+					break;
+				case 0x24:
+					html_output_c(&file_buff, NULL, 0x40);
+					break;				
+				case 0x26:
+					html_output_c(&file_buff, NULL, 0x0a);
+					break;
+				case 0x2a:
+					html_output_c(&file_buff, NULL, 0x3e);
+					break;
+				}
+				ptr++;
+				length--;
+				state = HTML_NORM;
+				break;
+			case HTML_NORM:	
+				if (*ptr < 0x80) {
+					result = decrypt_tables[table_order[table_pos]][*ptr];
+					if (result == 0xFF) { /* special character */
+						state = HTML_SPECIAL_CHAR;
+					} else {
+						html_output_c(&file_buff, NULL, (char)result);
+					}
+				}
+				ptr++;
+				length--;
+				table_pos = (table_pos + 1) % 64;
+				break;
+			}
+		}
+		free(line);
+		if (length) {
+			ptr = line = cli_readline(stream_in, NULL, 8192);
+		}
+	}
+	retval = TRUE;
+						
+abort:
+	fclose(stream_in);
+	html_output_flush(&file_buff);
+	close(file_buff.fd);
+	return retval;
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_is_tar.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_is_tar.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_is_tar.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_is_tar.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,97 @@
+/*
+ * is_tar() -- figure out whether file is a tar archive.
+ *
+ * Stolen (by the author of the file utility!) from the public domain tar program:
+ * Public Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu).
+ *
+ * @(#)list.c 1.18 9/23/86 Public Domain - gnu
+ * $Id: is_tar.c,v 1.2 2006/06/17 21:00:44 tkojm Exp $
+ *
+ * Comments changed and some code/comments reformatted
+ * for file command by Ian Darwin.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <string.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include "is_tar.h"
+
+#include "others.h"
+
+#define	isodigit(c)	( ((c) >= '0') && ((c) <= '7') )
+
+static int from_oct(int digs, char *where);
+
+/*
+ * Return 
+ *	0 if the checksum is bad (i.e., probably not a tar archive), 
+ *	1 for old UNIX tar file,
+ *	2 for Unix Std (POSIX) tar file.
+ */
+int is_tar(unsigned char *buf, unsigned int nbytes)
+{
+	union record *header = (union record *)buf;
+	int	i;
+	int	sum, recsum;
+	char	*p;
+
+
+	if (nbytes < sizeof(union record))
+		return 0;
+
+	recsum = from_oct(8,  header->header.chksum);
+
+	sum = 0;
+	p = header->charptr;
+	for (i = sizeof(union record); --i >= 0;) {
+		/*
+		 * We can't use unsigned char here because of old compilers,
+		 * e.g. V7.
+		 */
+		sum += 0xFF & *p++;
+	}
+
+	/* Adjust checksum to count the "chksum" field as blanks. */
+	for (i = sizeof(header->header.chksum); --i >= 0;)
+		sum -= 0xFF & header->header.chksum[i];
+	sum += ' '* sizeof header->header.chksum;	
+
+	if (sum != recsum)
+		return 0;	/* Not a tar archive */
+
+	if (0==strcmp(header->header.magic, TMAGIC))
+		return 2;		/* Unix Standard tar archive */
+
+	return 1;			/* Old fashioned tar archive */
+}
+
+
+/*
+ * Quick and dirty octal conversion.
+ *
+ * Result is -1 if the field is invalid (all blank, or nonoctal).
+ */
+static int from_oct(int digs, char *where)
+{
+	int value;
+
+	while (isspace((unsigned char)*where)) {		/* Skip spaces */
+		where++;
+		if (--digs <= 0)
+			return -1;		/* All blank field */
+	}
+	value = 0;
+	while (digs > 0 && isodigit(*where)) {	/* Scan til nonoctal */
+		value = (value << 3) | (*where++ - '0');
+		--digs;
+	}
+
+	if (digs > 0 && *where && !isspace((unsigned char)*where))
+		return -1;			/* Ended on non-space/nul */
+
+	return value;
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_jscript.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_jscript.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_jscript.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_jscript.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,490 @@
+/*
+ *  Copyright (C) 2006 Nigel Horne <njh at clamav.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ *
+ * Save the JavaScript embedded in an HTML file, then run the script, saving
+ * the output in a file that is to be scanned, then remove the script file
+ *
+ * FIXME:	Includes .c files here, which need to be separated out
+ * FIXME:	The js code probably only compiles on GCC.
+ * FIXME:	The js code needs re_compile_pattern, re_compile_fastmap,
+ *			re_search, which NetBSD, and probably other platforms
+ *			don't have
+ * TODO:	Test with real malware
+ * TODO:	Add mailfollowurls type feature
+ * TODO:	Check the NGS code for vulnerabilities, leaks etc.
+ * TODO:	Check the NGS code is thread safe
+ * TODO:	Test code such as
+ *	<script>
+ *		document.writeln("<script> function f() { ..the real worm code..
+ *			</script>"); f();
+ *	</script>
+ */
+static	char	const	rcsid[] = "$Id: jscript.c,v 1.11 2006/12/13 15:25:34 njh Exp $";
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include "clamav.h"
+#include "others.h"
+
+#ifdef	CL_EXPERIMENTAL
+
+#if	HAVE_MMAP
+
+#include <memory.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+#include <ctype.h>
+
+#ifdef	HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "jscript.h"
+
+#if HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+
+/* Maximum filenames under various systems - njh */
+#ifndef	NAME_MAX	/* e.g. Linux */
+# ifdef	MAXNAMELEN	/* e.g. Solaris */
+#   define	NAME_MAX	MAXNAMELEN
+# else
+#   ifdef	FILENAME_MAX	/* e.g. SCO */
+#     define	NAME_MAX	FILENAME_MAX
+#   else
+#     define	NAME_MAX	256
+#   endif
+# endif
+#endif
+
+#ifdef	CL_THREAD_SAFE
+#define	VM_TIMEOUT	5	/* In seconds: FIXME should be configurable */
+#endif
+
+#if	defined(VM_TIMEOUT) && (VM_TIMEOUT > 0)
+#include <pthread.h>
+#include <sys/time.h>
+#include <signal.h>
+#endif
+
+static	int	run_js(const char *filename, const char *dir);
+static	const	char	*cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns);
+
+int
+cli_scanjs(const char *dir, int desc)
+{
+	struct stat statb;
+	off_t size;	/* total number of bytes in the file */
+	char *buf;	/* start of memory mapped area */
+	const char *p;
+	long bytesleft;
+	int created_output, done_header, rc;
+	FILE *fout;
+	char script_filename[NAME_MAX + 1];
+
+	cli_dbgmsg("in cli_scanjs(%s)\n", dir);
+
+	if(fstat(desc, &statb) < 0)
+		return CL_EOPEN;
+
+	size = (size_t)statb.st_size;
+
+	if(size == 0)
+		return CL_CLEAN;
+
+	if(size <= 17)	/* doesn't even include <script></script> */
+		return CL_EFORMAT;
+
+	p = buf = mmap(NULL, size, PROT_READ, MAP_PRIVATE, desc, 0);
+	if(buf == MAP_FAILED)
+		return CL_EMEM;
+
+	cli_dbgmsg("cli_scanjs: scanning %lu bytes\n", size);
+
+	p = buf;
+	bytesleft = size;
+	created_output = done_header = 0;
+	fout = NULL;
+
+	while(p < &buf[size]) {
+		const char *q = cli_pmemstr(p, bytesleft, "<script", 7);
+
+		if(q == NULL)
+			/* TODO: full case independant search */
+			q = cli_pmemstr(p, bytesleft, "<SCRIPT", 7);
+
+		if(q == NULL)
+			break;
+
+		/*
+		 * TODO: check language is javascript
+		 * TODO: follow src if mail-follow-urls is set
+		 */
+
+		bytesleft -= (q - p);
+		p = q;
+
+		q = cli_pmemstr(p, bytesleft, ">", 1);
+		if(q == NULL)
+			break;
+
+		bytesleft -= (q - p);
+		p = q;
+
+		p++;
+		bytesleft--;
+
+		while(bytesleft) {
+			char c;
+
+			if(*p == '<') {
+				p++;
+				if(--bytesleft == 0)
+					break;
+				if((*p == '!') && !done_header) {
+					while(bytesleft && (*p != '\n')) {
+						p++;
+						bytesleft--;
+					}
+					continue;
+				}
+				if((bytesleft >= 7) && (strncasecmp(p, "/script", 7) == 0)) {
+					bytesleft -= 7;
+					p = &p[7];
+					while(bytesleft && (*p != '>')) {
+						p++;
+						bytesleft--;
+					}
+					if(fout) {
+						fclose(fout);
+						fout = NULL;
+						(void)run_js(script_filename, dir);
+
+						if(!cli_leavetemps_flag)
+							unlink(script_filename);
+					}
+					done_header = 0;
+					break;
+				}
+				c = '<';
+			} else {
+				/*c = tolower(*p);*/
+				c = *p;
+				p++;
+				bytesleft--;
+			}
+
+			if(!done_header) {
+				int fd;
+
+				snprintf(script_filename, sizeof(script_filename), "%s/jsXXXXXX", dir);
+#if	defined(C_LINUX) || defined(C_BSD) || defined(HAVE_MKSTEMP) || defined(C_SOLARIS) || defined(C_CYGWIN)
+				fd = mkstemp(script_filename);
+				fout = fdopen(fd, "wb");
+				if(fout == NULL)
+					close(fd);
+#elif	defined(C_WINDOWS)
+				if(_mktemp(script_filename) == NULL) {
+					/* mktemp only allows 26 files */
+					char *name = cli_gentemp(dir);
+					if(name == NULL)
+						fout = NULL;
+					else {
+						strcpy(script_filename, name);
+						free(name);
+						fout = fopen(script_filename, "wb");
+					}
+				} else
+					fout = fopen(script_filename, "wb");
+#else
+				mktemp(script_filename);
+				fout = fopen(script_filename, "wb");
+#endif
+
+				if(fout == NULL) {
+					cli_errmsg("cli_scanjs: can't create temporary file %s: %s\n", script_filename, strerror(errno));
+					munmap(buf, size);
+					return CL_ETMPFILE;
+				}
+				cli_dbgmsg("Saving javascript to %s\n",
+					script_filename);
+
+				/*
+				 * Create a document object, on web pages it's
+				 *	used to send output to the browser
+				 * FIXME: will create a file even if the script
+				 *	is empty, e.g. src is somewhere else
+				 */
+				fputs("function createDoc() {\n", fout);
+				fputs("\tfunction write(text) {\n", fout);
+				/*
+				 * Use System.print rather than print so that
+				 *	a new line is not appended
+				 */
+				fputs("\t\tSystem.print(text);\n", fout);
+				fputs("\t}\n", fout);
+				fputs("}\n", fout);
+				fputs("document = new createDoc();\n", fout);
+
+				done_header = 1;
+				created_output = 1;
+			}
+			putc(c, fout);
+		}
+	}
+
+	munmap(buf, size);
+
+	rc = CL_SUCCESS;
+
+	if(!created_output)
+		cli_dbgmsg("No javascript was detected\n");
+	else if(fout) {
+		fclose(fout);
+		rc = run_js(script_filename, dir);
+
+		if(!cli_leavetemps_flag)
+			unlink(script_filename);
+	}
+	return rc;
+}
+
+#include "compiler.c"
+#include "iostream.c"
+#include "js.c"
+#include "main.c"
+#include "debug.c"
+#include "crc32.c"
+
+static	FILE *fout;
+
+static	int
+write_to_fout(void *context, unsigned char *buf, unsigned int len)
+{
+	return (int)fwrite(buf, (size_t)len, 1, fout);
+}
+
+#if	defined(VM_TIMEOUT) && (VM_TIMEOUT > 0)
+
+struct args {
+	const char *filename;
+	const char *dir;
+	pthread_cond_t	*cond;
+	int	result;
+};
+
+static void *
+js_thread(void *a)
+{
+	JSInterpPtr interp;
+	char *outputfilename;
+	struct args *args = (struct args *)a;
+	const char *dir = args->dir;
+	const char *filename = args->filename;
+	int otype;
+
+	cli_dbgmsg("run_js(%s)\n", filename);
+
+	outputfilename = cli_gentemp(dir);
+	if(outputfilename == NULL) {
+		pthread_cond_broadcast(args->cond);
+		args->result = CL_ETMPFILE;
+		return NULL;
+	}
+
+	fout = fopen(outputfilename, "wb");
+	if(fout == NULL) {
+		pthread_cond_broadcast(args->cond);
+		cli_warnmsg("Can't create %s\n", outputfilename);
+		free(outputfilename);
+		args->result = CL_ETMPFILE;
+		return NULL;
+	}
+
+	cli_dbgmsg("Redirecting JS VM stdout to %s\n", outputfilename);
+	free(outputfilename);
+
+	/*
+	 * Run NGS on the file
+	 */
+	interp = create_interp(write_to_fout);
+
+	args->result = CL_EIO;	/* TODO: CL_TIMEOUT */
+
+	pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &otype);
+
+	if(!js_eval_file(interp, filename)) {
+		cli_warnmsg("JS failed: %s\n", js_error_message(interp));
+		/*rc = CL_EIO;*/
+	}
+
+	/*
+	 * If a pthread_cancel() is issued exactly here, js_destroy_interp()
+	 * wouldn't be called, leading to a memory leak
+	 */
+	if(pthread_cond_broadcast(args->cond) < 0)
+		perror("pthread_cond_broadcast");
+
+	js_destroy_interp(interp);
+
+	fclose(fout);
+
+	args->result = CL_SUCCESS;
+	return NULL;
+}
+
+static int
+run_js(const char *filename, const char *dir)
+{
+	struct args args;
+	pthread_t tid;
+	struct timespec ts;
+	struct timeval tp;
+	pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+	pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+
+	args.filename = filename;
+	args.dir = dir;
+	args.cond = &cond;
+
+	pthread_create(&tid, NULL, js_thread, &args);
+
+	gettimeofday(&tp, NULL);
+
+	ts.tv_sec = tp.tv_sec + VM_TIMEOUT;
+	ts.tv_nsec = tp.tv_usec * 1000;
+
+	pthread_mutex_lock(&mutex);
+	if(pthread_cond_timedwait(&cond, &mutex, &ts) == ETIMEDOUT) {
+		cli_warnmsg("Runaway javascript stopped after %d seconds\n",
+			VM_TIMEOUT);
+		/*pthread_kill(tid, SIGUSR1);*/
+		if(pthread_cancel(tid) < 0)
+			perror("pthread_cancel");
+	}
+	pthread_mutex_unlock(&mutex);
+	pthread_join(tid, NULL);
+
+	return args.result;
+}
+#else
+static int
+run_js(const char *filename, const char *dir)
+{
+	JSInterpPtr interp;
+	char *outputfilename;
+
+	cli_dbgmsg("run_js(%s)\n", filename);
+
+	outputfilename = cli_gentemp(dir);
+	if(outputfilename == NULL)
+		return CL_ETMPFILE;
+
+	fout = fopen(outputfilename, "wb");
+	if(fout == NULL) {
+		cli_warnmsg("Can't create %s\n", outputfilename);
+		free(outputfilename);
+		return CL_ETMPFILE;
+	}
+
+	cli_dbgmsg("Redirecting JS VM stdout to %s\n", outputfilename);
+	free(outputfilename);
+
+	/*
+	 * Run NGS on the file
+	 */
+	interp = create_interp(write_to_fout);
+
+	if(!js_eval_file(interp, filename)) {
+		cli_warnmsg("JS failed: %s\n", js_error_message(interp));
+		/*rc = CL_EIO;*/
+	}
+
+	js_destroy_interp(interp);
+
+	fclose(fout);
+
+	return CL_SUCCESS;
+}
+#endif
+
+/* Copied from pdf.c :-( */
+/*
+ * like cli_memstr - but returns the location of the match
+ * FIXME: need a case insensitive version`
+ */
+static const char *
+cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns)
+{
+	const char *pt, *hay;
+	size_t n;
+
+	if(haystack == needle)
+		return haystack;
+
+	if(hs < ns)
+		return NULL;
+
+	if(memcmp(haystack, needle, ns) == 0)
+		return haystack;
+
+	pt = hay = haystack;
+	n = hs;
+
+	while((pt = memchr(hay, needle[0], n)) != NULL) {
+		n -= (int) pt - (int) hay;
+		if(n < ns)
+			break;
+
+		if(memcmp(pt, needle, ns) == 0)
+			return pt;
+
+		if(hay == pt) {
+			n--;
+			hay++;
+		} else
+			hay = pt;
+	}
+
+	return NULL;
+}
+
+#else
+
+int
+cli_scanjs(const char *dir, int desc)
+{
+	cli_warnmsg("File not decoded - JS decoding needs mmap() (for now)\n");
+	return CL_CLEAN;
+}
+#endif	/*HAVE_MMAP*/
+
+#else	/*!CL_EXPERIMENTAL*/
+
+int
+cli_scanjs(const char *dir, int desc)
+{
+	cli_warnmsg("JS decoding files not yet supported\n");
+	return CL_EFORMAT;
+}
+
+#endif	/*CL_EXPERIMENTAL*/

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_line.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_line.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_line.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_line.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,118 @@
+/*
+ *  Copyright (C) 2004 Nigel Horne <njh at bandsman.co.uk>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ *
+ * $Log: line.c,v $
+ * Revision 1.11  2007/02/12 20:46:08  njh
+ * Various tidy
+ *
+ * Revision 1.10  2006/04/09 19:59:27  kojm
+ * update GPL headers with new address for FSF
+ *
+ * Revision 1.9  2005/03/10 08:53:33  nigelhorne
+ * Tidy
+ *
+ * Revision 1.8  2005/03/01 11:38:11  nigelhorne
+ * Fix typo
+ *
+ * Revision 1.7  2004/12/08 20:07:23  nigelhorne
+ * Fix compilation error on Solaris
+ *
+ * Revision 1.6  2004/10/14 17:45:55  nigelhorne
+ * Try to reclaim some memory if it becomes low when decoding
+ *
+ * Revision 1.5  2004/09/30 08:58:56  nigelhorne
+ * Remove empty lines
+ *
+ * Revision 1.4  2004/09/21 14:55:26  nigelhorne
+ * Handle blank lines in text/plain messages
+ *
+ * Revision 1.3  2004/08/25 12:30:36  nigelhorne
+ * Use memcpy rather than strcpy
+ *
+ * Revision 1.2  2004/08/21 11:57:57  nigelhorne
+ * Use line.[ch]
+ *
+ * Revision 1.1  2004/08/20 11:58:20  nigelhorne
+ * First draft
+ *
+ */
+
+static	char	const	rcsid[] = "$Id: line.c,v 1.11 2007/02/12 20:46:08 njh Exp $";
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#ifndef	CL_DEBUG
+#define	NDEBUG	/* map CLAMAV debug onto standard */
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "line.h"
+#include "others.h"
+
+line_t *
+lineCreate(const char *data)
+{
+	const size_t size = strlen(data);
+	line_t *ret = (line_t *)cli_malloc(size + 2);
+
+	if(ret == NULL)
+		return (line_t *)NULL;
+
+	ret[0] = (char)1;
+	/*strcpy(&ret[1], data);*/
+	memcpy(&ret[1], data, size);
+	ret[size + 1] = '\0';
+
+	return ret;
+}
+
+line_t *
+lineLink(line_t *line)
+{
+	assert(line != NULL);
+	if((unsigned char)line[0] == (unsigned char)255) {
+		cli_dbgmsg("lineLink: linkcount too large (%s)\n", lineGetData(line));
+		return lineCreate(lineGetData(line));
+	}
+	line[0]++;
+	/*printf("%d:\n\t'%s'\n", (int)line[0], &line[1]);*/
+	return line;
+}
+
+line_t *
+lineUnlink(line_t *line)
+{
+	/*printf("%d:\n\t'%s'\n", (int)line[0], &line[1]);*/
+
+	if(--line[0] == 0) {
+		free(line);
+		return NULL;
+	}
+	return line;
+}
+
+const char *
+lineGetData(const line_t *line)
+{
+	return line ? &line[1] : NULL;
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_lockdb.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_lockdb.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_lockdb.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_lockdb.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,335 @@
+/*
+ *  Copyright (C) 2006 Mark Pizzolato <clamav-devel at subscriptions.pizzolato.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+
+/*
+ * This is a problem, which from a purist point of view, best wants an 
+ * RW locking mechanism.
+ * On Posix platforms, we leverage advisory locks provided by fcntl().
+ * Windows doesn't have a native interprocess RW exclusion mechanism, 
+ * one could be constructed from the services available, but it is somewhat
+ * complicated.  Meanwhile, we observe that in ClamAV, it is extremely rare 
+ * that there will ever be an occasion when multiple processes will be 
+ * reading the ClamAV database from a given directory at the same, and in 
+ * none of those possible cases would it matter if they serialized their 
+ * accesses.  So, a simple mutual exclusion mechanism will suffice for both 
+ * the reader and writer locks on Windows.
+ */
+#ifdef	_MSC_VER
+#include <windows.h>
+#endif
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef	HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#include <fcntl.h>
+#include <errno.h>
+
+#include "clamav.h"
+#include "others.h"
+#include "lockdb.h"
+
+#ifdef CL_THREAD_SAFE
+#include <pthread.h>
+pthread_mutex_t lock_mutex = PTHREAD_MUTEX_INITIALIZER;
+#else
+#define pthread_mutex_lock(arg)
+#define pthread_mutex_unlock(arg)
+#endif
+
+#ifdef C_WINDOWS /* FIXME */
+#define DONT_LOCK_DBDIRS
+#endif
+
+struct dblock {
+	struct dblock *lock_link;
+	char lock_file[NAME_MAX];
+#ifndef C_WINDOWS
+	int lock_fd;
+#else
+	HANDLE lock_fd;
+#endif
+	int lock_type;
+};
+
+static struct dblock *dblocks = NULL;
+
+static void cli_lockname(char *lock_file, size_t lock_file_size, const char *dbdirpath);
+static int cli_lockdb(const char *dbdirpath, int wait, int writelock);
+
+#ifdef DONT_LOCK_DBDIRS
+
+int cli_readlockdb(const char *dbdirpath, int wait)
+{
+    return CL_SUCCESS;
+}
+
+int cli_writelockdb(const char *dbdirpath, int wait)
+{
+    return CL_SUCCESS;
+}
+
+int cli_unlockdb(const char *dbdirpath)
+{
+    return CL_SUCCESS;
+}
+
+int cli_freelocks(void)
+{
+	return CL_SUCCESS;
+}
+
+#else /* !DONT_LOCK_DBDIRS */
+
+int cli_readlockdb(const char *dbdirpath, int wait)
+{
+    return cli_lockdb(dbdirpath, wait, 0);
+}
+
+int cli_writelockdb(const char *dbdirpath, int wait)
+{
+    return cli_lockdb(dbdirpath, wait, 1);
+}
+
+int cli_freelocks(void)
+{
+	struct dblock * lock, *nextlock, *usedlocks = NULL;
+
+	pthread_mutex_lock(&lock_mutex);
+	for(lock = dblocks; lock; lock = nextlock) {
+		/* there might be some locks in use, eg: during a db reload, a failure can lead 
+		 * to cl_free being called */
+		nextlock = lock->lock_link;
+		if(lock->lock_type != -1 && lock->lock_fd != -1) {
+			lock->lock_link = usedlocks;
+			usedlocks = lock;
+		}
+		else {
+			free(lock);
+		}
+	}
+	dblocks = usedlocks;
+	pthread_mutex_unlock(&lock_mutex);
+	return CL_SUCCESS;
+}
+
+
+int cli_unlockdb(const char *dbdirpath)
+{
+	char lock_file[NAME_MAX];
+	struct dblock *lock;
+#ifndef C_WINDOWS
+	struct flock fl;
+#endif
+
+    cli_lockname(lock_file, sizeof(lock_file), dbdirpath);
+    pthread_mutex_lock(&lock_mutex);
+    for(lock=dblocks; lock; lock=lock->lock_link)
+	if(!strcmp(lock_file, lock->lock_file))
+	    break;
+    if((!lock) || (lock->lock_type == -1)) {
+	cli_errmsg("Database Directory: %s not locked\n", dbdirpath);
+	pthread_mutex_unlock(&lock_mutex);
+	return CL_ELOCKDB;
+    }
+#ifndef C_WINDOWS
+    memset(&fl, 0, sizeof(fl));
+    fl.l_type = F_UNLCK;
+    if(fcntl(lock->lock_fd, F_SETLK, &fl) == -1) {
+#else
+    if(!ReleaseMutex(lock->lock_fd)) {
+#endif
+	cli_errmsg("Error Unlocking Database Directory %s\n", dbdirpath);
+	pthread_mutex_unlock(&lock_mutex);
+#ifndef C_WINDOWS
+	close(lock->lock_fd);
+	lock->lock_fd=-1;
+	unlink(lock->lock_file);
+#endif
+	return CL_ELOCKDB;
+    }
+    lock->lock_type = -1;
+#ifndef C_WINDOWS
+    close(lock->lock_fd);
+    lock->lock_fd=-1;
+    unlink(lock->lock_file);
+#endif
+    pthread_mutex_unlock(&lock_mutex);
+
+    return CL_SUCCESS;
+}
+
+static int cli_lockdb(const char *dbdirpath, int wait, int writelock)
+{
+	char lock_file[NAME_MAX];
+	struct dblock *lock;
+#ifndef C_WINDOWS
+	struct flock fl;
+	mode_t old_mask;
+	unsigned int existing = 0;
+#else
+	DWORD LastError;
+	SECURITY_ATTRIBUTES saAttr;
+	SECURITY_DESCRIPTOR sdDesc;
+#endif
+
+    cli_lockname(lock_file, sizeof(lock_file), dbdirpath);
+    pthread_mutex_lock(&lock_mutex);
+    for(lock=dblocks; lock; lock=lock->lock_link)
+	if(!strcmp(lock_file, lock->lock_file))
+	    break;
+    if(!lock) {
+	lock = cli_calloc(1, sizeof(*lock));
+	if(!lock) {
+	    cli_errmsg("cli_lockdb(): Can't allocate lock structure to lock Database Directory: %s\n", dbdirpath);
+	    pthread_mutex_unlock(&lock_mutex);
+	    return CL_EMEM;
+	}
+	lock->lock_link = dblocks;
+	strcpy(lock->lock_file, lock_file);
+	lock->lock_fd = -1;
+	lock->lock_type = -1;
+	dblocks = lock;
+    }
+    if(lock->lock_type != -1) {
+	cli_dbgmsg("Database Directory: %s already %s locked\n", dbdirpath, (lock->lock_type? "write" : "read"));
+	pthread_mutex_unlock(&lock_mutex);
+	return CL_ELOCKDB;
+    }
+#ifndef C_WINDOWS
+    if(lock->lock_fd == -1) {
+	old_mask = umask(0);
+	if(-1 == (lock->lock_fd = open(lock->lock_file, O_RDWR|O_CREAT|O_TRUNC, S_IRWXU|S_IRWXG|S_IROTH))) {
+	    if((writelock) ||
+	       (-1 == (lock->lock_fd = open(lock->lock_file, O_RDONLY)))) {
+		cli_dbgmsg("Can't %s Lock file for Database Directory: %s\n", (writelock ? "create" : "open"), dbdirpath);
+		umask(old_mask);
+		pthread_mutex_unlock(&lock_mutex);
+		return CL_EIO; /* or CL_EACCESS */
+	    } else {
+		existing = 1;
+	    }
+	}
+	umask(old_mask);
+    }
+#else
+    if(lock->lock_fd == -1) {
+	/* Create a security descriptor which allows any process to acquire the Mutex */
+	InitializeSecurityDescriptor(&sdDesc, SECURITY_DESCRIPTOR_REVISION);
+	SetSecurityDescriptorDacl(&sdDesc, TRUE, NULL, FALSE);
+	saAttr.nLength = sizeof(saAttr);
+	saAttr.bInheritHandle = FALSE;
+	saAttr.lpSecurityDescriptor = &sdDesc;
+	if(!(lock->lock_fd = CreateMutexA(&saAttr, TRUE, lock->lock_file))) {
+	    if((GetLastError() != ERROR_ACCESS_DENIED) || 
+	       (!(lock->lock_fd = OpenMutexA(MUTEX_MODIFY_STATE, FALSE, lock->lock_file)))) {
+		cli_dbgmsg("Can't Create Mutex Lock for Database Directory: %s\n", dbdirpath);
+		pthread_mutex_unlock(&lock_mutex);
+		return CL_EIO;
+	    }
+	    LastError = ERROR_ALREADY_EXISTS;
+	}
+	LastError = GetLastError();
+    } else {
+	LastError = ERROR_ALREADY_EXISTS;
+    }
+#endif
+    pthread_mutex_unlock(&lock_mutex);
+
+#ifndef C_WINDOWS
+    memset(&fl, 0, sizeof(fl));
+    fl.l_type = (writelock ? F_WRLCK : F_RDLCK);
+    if(fcntl(lock->lock_fd, ((wait) ? F_SETLKW : F_SETLK), &fl) == -1) {
+#ifndef C_WINDOWS
+	close(lock->lock_fd);
+	lock->lock_fd = -1;
+	if(errno != EACCES && errno != EAGAIN) {
+	    if(!existing)
+		unlink(lock->lock_file);
+	    cli_errmsg("Can't acquire %s lock: %s\n", writelock ? "write" : "read", strerror(errno));
+	    return CL_EIO;
+	}
+#endif
+	return CL_ELOCKDB;
+    }
+#else
+    if(LastError == ERROR_ALREADY_EXISTS) {
+	if(WAIT_TIMEOUT == WaitForSingleObject(lock->lock_fd, ((wait) ? INFINITE : 0))) {
+	    lock->lock_type = -1;
+	    return CL_ELOCKDB;
+	}
+    }
+#endif
+    lock->lock_type = writelock;
+
+    return CL_SUCCESS;
+}
+
+static void cli_lockname(char *lock_file, size_t lock_file_size, const char *dbdirpath)
+{
+	char *c;
+
+    lock_file[lock_file_size-1] = '\0';
+#ifndef C_WINDOWS
+    snprintf(lock_file, lock_file_size-1, "%s/.dbLock", dbdirpath);
+    for (c=lock_file; *c; ++c) {
+#else
+    snprintf(lock_file, lock_file_size-1, "Global\\ClamAVDB-%s", dbdirpath);
+    for (c=lock_file+16; *c; ++c) {
+#endif
+	switch (*c) {
+#ifdef C_WINDOWS
+	case '\\':
+	    *c = '/';
+#endif
+	case '/':
+	    if(c!=lock_file && *(c-1) == '/') { /* compress imbedded // */
+		--c;
+		memmove(c, c+1,strlen(c+1)+1);
+            } else if(c > lock_file+1 && (*(c-2) == '/') && (*(c-1) == '.')) { /* compress imbedded /./ */
+		c -= 2;
+		memmove(c, c+2,strlen(c+2)+1);
+            }
+	    break;
+#ifdef C_WINDOWS
+	default:
+	    if(islower(*c)) /* Normalize to upper case */
+		*c = toupper(*c);
+	    break;
+#endif
+	}
+    }
+#ifdef C_WINDOWS
+    if('/' == lock_file[strlen(lock_file)-1]) /* Remove trailing / */
+	lock_file[strlen(lock_file)-1] = '\0';
+#endif
+}
+
+#endif /* DONT_LOCK_DBDIRS */

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher-ac.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher-ac.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher-ac.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher-ac.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,985 @@
+/*
+ *  Copyright (C) 2002 - 2007 Tomasz Kojm <tkojm at clamav.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#ifdef	HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "clamav.h"
+#include "others.h"
+#include "matcher.h"
+#include "matcher-ac.h"
+#include "filetypes.h"
+#include "cltypes.h"
+#include "str.h"
+
+uint8_t cli_ac_mindepth = AC_DEFAULT_MIN_DEPTH;
+uint8_t cli_ac_maxdepth = AC_DEFAULT_MAX_DEPTH;
+
+int cli_ac_addpatt(struct cli_matcher *root, struct cli_ac_patt *pattern)
+{
+	struct cli_ac_node *pt, *next, **newtable;
+	struct cli_ac_patt *ph;
+	struct cli_ac_alt *a1, *a2;
+	uint8_t i, match;
+	uint16_t len = MIN(root->ac_maxdepth, pattern->length);
+
+
+    for(i = 0; i < len; i++) {
+	if(pattern->pattern[i] & CLI_MATCH_WILDCARD) {
+	    len = i;
+	    break;
+	}
+    }
+
+    if(len < root->ac_mindepth)
+	return CL_EPATSHORT;
+
+    pt = root->ac_root;
+
+    for(i = 0; i < len; i++) {
+	if(!pt->trans) {
+	    pt->trans = (struct cli_ac_node **) cli_calloc(256, sizeof(struct cli_ac_node *));
+	    if(!pt->trans) {
+		cli_errmsg("cli_ac_addpatt: Can't allocate memory for pt->trans\n");
+		return CL_EMEM;
+	    }
+	}
+
+	next = pt->trans[(unsigned char) (pattern->pattern[i] & 0xff)]; 
+
+	if(!next) {
+	    next = (struct cli_ac_node *) cli_calloc(1, sizeof(struct cli_ac_node));
+	    if(!next) {
+		cli_errmsg("cli_ac_addpatt: Can't allocate memory for AC node\n");
+		return CL_EMEM;
+	    }
+
+	    if(i != len - 1) {
+		next->trans = (struct cli_ac_node **) cli_calloc(256, sizeof(struct cli_ac_node *));
+		if(!next->trans) {
+		    cli_errmsg("cli_ac_addpatt: Can't allocate memory for next->trans\n");
+		    free(next);
+		    return CL_EMEM;
+		}
+	    } else {
+		next->leaf = 1;
+	    }
+
+	    root->ac_nodes++;
+	    newtable = (struct cli_ac_node **) cli_realloc(root->ac_nodetable, root->ac_nodes * sizeof(struct cli_ac_node *));
+	    if(!newtable) {
+		root->ac_nodes--;
+		cli_errmsg("cli_ac_addpatt: Can't realloc ac_nodetable\n");
+		if(next->trans)
+		    free(next->trans);
+		free(next);
+		return CL_EMEM;
+	    }
+	    newtable[root->ac_nodes - 1] = next;
+	    root->ac_nodetable = newtable;
+
+	    pt->trans[(unsigned char) (pattern->pattern[i] & 0xff)] = next;
+	    pt->leaf = 0;
+	}
+
+	pt = next;
+    }
+
+    root->ac_patterns++;
+    root->ac_pattable = (struct cli_ac_patt **) cli_realloc2(root->ac_pattable, root->ac_patterns * sizeof(struct cli_ac_patt *));
+    if(!root->ac_pattable) {
+	cli_errmsg("cli_ac_addpatt: Can't realloc ac_pattable\n");
+	return CL_EMEM;
+    }
+    root->ac_pattable[root->ac_patterns - 1] = pattern;
+
+    pt->final = 1;
+    pattern->depth = i;
+
+    ph = pt->list;
+    while(ph) {
+	if((ph->length == pattern->length) && (ph->prefix_length == pattern->prefix_length)) {
+	    if(!memcmp(ph->pattern, pattern->pattern, ph->length * sizeof(uint16_t)) && !memcmp(ph->prefix, pattern->prefix, ph->prefix_length * sizeof(uint16_t))) {
+		if(!ph->alt && !pattern->alt) {
+		    match = 1;
+		} else if(ph->alt == pattern->alt) {
+		    match = 1;
+		    for(i = 0; i < ph->alt; i++) {
+			a1 = ph->alttable[i];
+			a2 = pattern->alttable[i];
+
+			if(a1->num != a2->num) {
+			    match = 0;
+			    break;
+			}
+			if(a1->chmode != a2->chmode) {
+			    match = 0;
+			    break;
+			} else if(a1->chmode) {
+			    if(memcmp(a1->str, a2->str, a1->num)) {
+				match = 0;
+				break;
+			    }
+			} else {
+			    while(a1 && a2) {
+				if((a1->len != a2->len) || memcmp(a1->str, a2->str, a1->len))
+				    break;
+				a1 = a1->next;
+				a2 = a2->next;
+			    }
+			    if(a1 || a2) {
+				match = 0;
+				break;
+			    }
+			}
+		    }
+		} else {
+		    match = 0;
+		}
+
+		if(match) {
+		    pattern->next_same = ph->next_same;
+		    ph->next_same = pattern;
+		    return CL_SUCCESS;
+		}
+	    }
+	}
+	ph = ph->next;
+    }
+
+    pattern->next = pt->list;
+    pt->list = pattern;
+
+    return CL_SUCCESS;
+}
+
+struct bfs_list {
+    struct cli_ac_node *node;
+    struct bfs_list *next;
+};
+
+static int bfs_enqueue(struct bfs_list **bfs, struct bfs_list **last, struct cli_ac_node *n)
+{
+	struct bfs_list *new;
+
+
+    new = (struct bfs_list *) cli_malloc(sizeof(struct bfs_list));
+    if(!new) {
+	cli_errmsg("bfs_enqueue: Can't allocate memory for bfs_list\n");
+	return CL_EMEM;
+    }
+    new->next = NULL;
+    new->node = n;
+
+    if(*last) {
+	(*last)->next = new;
+	*last = new;
+    } else {
+	*bfs = *last = new;
+    }
+
+    return CL_SUCCESS;
+}
+
+static struct cli_ac_node *bfs_dequeue(struct bfs_list **bfs, struct bfs_list **last)
+{
+	struct bfs_list *lpt;
+	struct cli_ac_node *pt;
+
+
+    if(!(lpt = *bfs)) {
+	return NULL;
+    } else {
+	*bfs = (*bfs)->next;
+	pt = lpt->node;
+	if(lpt == *last)
+	    *last = NULL;
+	free(lpt);
+	return pt;
+    }
+}
+
+static int ac_maketrans(struct cli_matcher *root)
+{
+	struct bfs_list *bfs = NULL, *bfs_last = NULL;
+	struct cli_ac_node *ac_root = root->ac_root, *child, *node, *fail;
+	struct cli_ac_patt *patt;
+	int i, ret;
+
+
+    for(i = 0; i < 256; i++) {
+	node = ac_root->trans[i];
+	if(!node) {
+	    ac_root->trans[i] = ac_root;
+	} else {
+	    node->fail = ac_root;
+	    if((ret = bfs_enqueue(&bfs, &bfs_last, node)))
+		return ret;
+	}
+    }
+
+    while((node = bfs_dequeue(&bfs, &bfs_last))) {
+	if(node->leaf)
+	    continue;
+
+	for(i = 0; i < 256; i++) {
+	    child = node->trans[i];
+	    if(child) {
+		fail = node->fail;
+		while(fail->leaf || !fail->trans[i])
+		    fail = fail->fail;
+
+		child->fail = fail->trans[i];
+
+		if(child->list) {
+		    patt = child->list;
+		    while(patt->next)
+			patt = patt->next;
+
+		    patt->next = child->fail->list;
+		} else {
+		    child->list = child->fail->list;
+		}
+
+		if(child->list)
+		    child->final = 1;
+
+		if((ret = bfs_enqueue(&bfs, &bfs_last, child)) != 0)
+		    return ret;
+	    }
+	}
+    }
+
+    return CL_SUCCESS;
+}
+
+int cli_ac_buildtrie(struct cli_matcher *root)
+{
+    if(!root)
+	return CL_EMALFDB;
+
+    if(!root->ac_root) {
+	cli_dbgmsg("cli_ac_buildtrie: AC pattern matcher is not initialised\n");
+	return CL_SUCCESS;
+    }
+
+    return ac_maketrans(root);
+}
+
+int cli_ac_init(struct cli_matcher *root, uint8_t mindepth, uint8_t maxdepth)
+{
+
+    root->ac_root = (struct cli_ac_node *) cli_calloc(1, sizeof(struct cli_ac_node));
+    if(!root->ac_root) {
+	cli_errmsg("cli_ac_init: Can't allocate memory for ac_root\n");
+	return CL_EMEM;
+    }
+
+    root->ac_root->trans = (struct cli_ac_node **) cli_calloc(256, sizeof(struct cli_ac_node *));
+    if(!root->ac_root->trans) {
+	cli_errmsg("cli_ac_init: Can't allocate memory for ac_root->trans\n");
+	free(root->ac_root);
+	return CL_EMEM;
+    }
+
+    root->ac_mindepth = mindepth;
+    root->ac_maxdepth = maxdepth;
+
+    return CL_SUCCESS;
+}
+
+static void ac_free_alt(struct cli_ac_patt *p)
+{
+	uint16_t i;
+	struct cli_ac_alt *a1, *a2;
+
+
+    if(!p->alt)
+	return;
+
+    for(i = 0; i < p->alt; i++) {
+	a1 = p->alttable[i];
+	while(a1) {
+	    a2 = a1;
+	    a1 = a1->next;
+	    if(a2->str)
+		free(a2->str);
+	    free(a2);
+	}
+    }
+    free(p->alttable);
+}
+
+void cli_ac_free(struct cli_matcher *root)
+{
+	uint32_t i;
+	struct cli_ac_patt *patt;
+
+
+    for(i = 0; i < root->ac_patterns; i++) {
+	patt = root->ac_pattable[i];
+
+	if(patt->prefix)
+	    free(patt->prefix);
+	else
+	    free(patt->pattern);
+	free(patt->virname);
+	if(patt->offset)
+	    free(patt->offset);
+	if(patt->alt)
+	    ac_free_alt(patt);
+	free(patt);
+    }
+    if(root->ac_pattable)
+	free(root->ac_pattable);
+
+    for(i = 0; i < root->ac_nodes; i++) {
+	if(!root->ac_nodetable[i]->leaf)
+	    free(root->ac_nodetable[i]->trans);
+	free(root->ac_nodetable[i]);
+    }
+
+    if(root->ac_nodetable)
+	free(root->ac_nodetable);
+
+    if(root->ac_root) {
+	free(root->ac_root->trans);
+	free(root->ac_root);
+    }
+}
+
+/* 
+ * FIXME: the current support for string alternatives uses a brute-force
+ *        approach and doesn't perform any kind of verification and
+ *        backtracking. This may easily lead to false negatives, eg. when
+ *        an alternative contains strings of different lengths and 
+ *        more than one of them can match at the current position.
+ */
+
+#define AC_MATCH_CHAR(p,b)						\
+    switch(wc = p & CLI_MATCH_WILDCARD) {				\
+	case CLI_MATCH_CHAR:						\
+	    if((unsigned char) p != b)					\
+		return 0;						\
+	    break;							\
+									\
+	case CLI_MATCH_IGNORE:						\
+	    break;							\
+									\
+	case CLI_MATCH_ALTERNATIVE:					\
+	    found = 0;							\
+	    alt = pattern->alttable[altcnt];				\
+	    if(alt->chmode) {						\
+		for(j = 0; j < alt->num; j++) {				\
+		    if(alt->str[j] == b) {				\
+			found = 1;					\
+			break;						\
+		    }							\
+		}							\
+	    } else {							\
+		while(alt) {						\
+		    if(bp + alt->len <= length) {			\
+			if(!memcmp(&buffer[bp], alt->str, alt->len)) {	\
+			    found = 1;					\
+			    bp += alt->len - 1;				\
+			    break;					\
+			}						\
+		    }							\
+		    alt = alt->next;					\
+		}							\
+	    }								\
+	    if(!found)							\
+		return 0;						\
+	    altcnt++;							\
+	    break;							\
+									\
+	case CLI_MATCH_NIBBLE_HIGH:					\
+	    if((unsigned char) (p & 0x00f0) != (b & 0xf0))		\
+		return 0;						\
+	    break;							\
+									\
+	case CLI_MATCH_NIBBLE_LOW:					\
+	    if((unsigned char) (p & 0x000f) != (b & 0x0f))		\
+		return 0;						\
+	    break;							\
+									\
+	default:							\
+	    cli_errmsg("ac_findmatch: Unknown wildcard 0x%x\n", wc);	\
+	    return 0;							\
+    }
+
+inline static int ac_findmatch(const unsigned char *buffer, uint32_t offset, uint32_t length, const struct cli_ac_patt *pattern, uint32_t *end)
+{
+	uint32_t bp;
+	uint16_t wc, i, j, altcnt = pattern->alt_pattern;
+	uint8_t found;
+	struct cli_ac_alt *alt;
+
+
+    if((offset + pattern->length > length) || (pattern->prefix_length > offset))
+	return 0;
+
+    bp = offset + pattern->depth;
+
+    for(i = pattern->depth; i < pattern->length && bp < length; i++) {
+	AC_MATCH_CHAR(pattern->pattern[i],buffer[bp]);
+	bp++;
+    }
+    *end = bp;
+
+    if(pattern->prefix) {
+	altcnt = 0;
+	bp = offset - pattern->prefix_length;
+
+	for(i = 0; i < pattern->prefix_length; i++) {
+	    AC_MATCH_CHAR(pattern->prefix[i],buffer[bp]);
+	    bp++;
+	}
+    }
+
+    return 1;
+}
+
+int cli_ac_initdata(struct cli_ac_data *data, uint32_t partsigs, uint8_t tracklen)
+{
+
+    if(!data) {
+	cli_errmsg("cli_ac_init: data == NULL\n");
+	return CL_ENULLARG;
+    }
+
+    data->partsigs = partsigs;
+
+    if(!partsigs)
+	return CL_SUCCESS;
+
+    data->offmatrix = (int32_t ***) cli_calloc(partsigs, sizeof(int32_t **));
+    if(!data->offmatrix) {
+	cli_errmsg("cli_ac_init: Can't allocate memory for data->offmatrix\n");
+	return CL_EMEM;
+    }
+
+    return CL_SUCCESS;
+}
+
+void cli_ac_freedata(struct cli_ac_data *data)
+{
+	uint32_t i;
+
+
+    if(data && data->partsigs) {
+	for(i = 0; i < data->partsigs; i++) {
+	    if(data->offmatrix[i]) {
+		free(data->offmatrix[i][0]);
+		free(data->offmatrix[i]);
+	    }
+	}
+	free(data->offmatrix);
+    }
+}
+
+inline static int ac_addtype(struct cli_matched_type **list, cli_file_t type, off_t offset)
+{
+	struct cli_matched_type *tnode, *tnode_last;
+
+
+    if(*list && (*list)->cnt >= MAX_EMBEDDED_OBJ)
+	return CL_SUCCESS;
+
+    if(!(tnode = cli_calloc(1, sizeof(struct cli_matched_type)))) {
+	cli_errmsg("cli_ac_addtype: Can't allocate memory for new type node\n");
+	return CL_EMEM;
+    }
+
+    tnode->type = type;
+    tnode->offset = offset;
+
+    tnode_last = *list;
+    while(tnode_last && tnode_last->next)
+	tnode_last = tnode_last->next;
+
+    if(tnode_last)
+	tnode_last->next = tnode;
+    else
+	*list = tnode;
+
+    (*list)->cnt++;
+    return CL_SUCCESS;
+}
+
+int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_matcher *root, struct cli_ac_data *mdata, uint8_t otfrec, uint32_t offset, cli_file_t ftype, int fd, struct cli_matched_type **ftoffset)
+{
+	struct cli_ac_node *current;
+	struct cli_ac_patt *patt, *pt;
+        uint32_t i, bp, realoff, matchend;
+	uint16_t j;
+	int32_t **offmatrix;
+	uint8_t found;
+	struct cli_target_info info;
+	int type = CL_CLEAN;
+
+
+    if(!root->ac_root)
+	return CL_CLEAN;
+
+    if(!mdata) {
+	cli_errmsg("cli_ac_scanbuff: mdata == NULL\n");
+	return CL_ENULLARG;
+    }
+
+    memset(&info, 0, sizeof(info));
+    current = root->ac_root;
+
+    for(i = 0; i < length; i++)  {
+
+	while(current->leaf || !current->trans[buffer[i]])
+	    current = current->fail;
+
+	current = current->trans[buffer[i]];
+
+	if(current->final) {
+	    patt = current->list;
+	    while(patt) {
+		bp = i + 1 - patt->depth;
+		if(ac_findmatch(buffer, bp, length, patt, &matchend)) {
+		    pt = patt;
+		    while(pt) {
+			realoff = offset + bp - pt->prefix_length;
+
+			if((pt->offset || pt->target) && (!pt->sigid || pt->partno == 1)) {
+			    if((fd == -1 && !ftype) || !cli_validatesig(ftype, pt->offset, realoff, &info, fd, pt->virname)) {
+				pt = pt->next_same;
+				continue;
+			    }
+			}
+
+			if(pt->sigid) { /* it's a partial signature */
+
+			    if(pt->partno != 1 && (!mdata->offmatrix[pt->sigid - 1] || !mdata->offmatrix[pt->sigid - 1][pt->partno - 2][0])) {
+				pt = pt->next_same;
+				continue;
+			    }
+
+			    if(!mdata->offmatrix[pt->sigid - 1]) {
+				mdata->offmatrix[pt->sigid - 1] = cli_malloc(pt->parts * sizeof(int32_t *));
+				if(!mdata->offmatrix[pt->sigid - 1]) {
+				    cli_errmsg("cli_ac_scanbuff: Can't allocate memory for mdata->offmatrix[%u]\n", pt->sigid - 1);
+				    return CL_EMEM;
+				}
+
+				mdata->offmatrix[pt->sigid - 1][0] = cli_malloc(pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t));
+				if(!mdata->offmatrix[pt->sigid - 1][0]) {
+				    cli_errmsg("cli_ac_scanbuff: Can't allocate memory for mdata->offmatrix[%u][0]\n", pt->sigid - 1);
+				    free(mdata->offmatrix[pt->sigid - 1]);
+				    mdata->offmatrix[pt->sigid - 1] = NULL;
+				    return CL_EMEM;
+				}
+				memset(mdata->offmatrix[pt->sigid - 1][0], -1, pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t));
+				mdata->offmatrix[pt->sigid - 1][0][0] = 0;
+				for(j = 1; j < pt->parts; j++) {
+				    mdata->offmatrix[pt->sigid - 1][j] = mdata->offmatrix[pt->sigid - 1][0] + j * (AC_DEFAULT_TRACKLEN + 1);
+				    mdata->offmatrix[pt->sigid - 1][j][0] = 0;
+				}
+			    }
+			    offmatrix = mdata->offmatrix[pt->sigid - 1];
+
+			    if(pt->partno != 1) {
+				found = 0;
+				for(j = 1; j <= AC_DEFAULT_TRACKLEN && offmatrix[pt->partno - 2][j] != -1; j++) {
+				    found = 1;
+				    if(pt->maxdist)
+					if(realoff - offmatrix[pt->partno - 2][j] > pt->maxdist)
+					    found = 0;
+
+				    if(found && pt->mindist)
+					if(realoff - offmatrix[pt->partno - 2][j] < pt->mindist)
+					    found = 0;
+
+				    if(found)
+					break;
+				}
+			    }
+
+			    if(pt->partno == 1 || (found && (pt->partno != pt->parts))) {
+				offmatrix[pt->partno - 1][0] %= AC_DEFAULT_TRACKLEN;
+				offmatrix[pt->partno - 1][0]++;
+				offmatrix[pt->partno - 1][offmatrix[pt->partno - 1][0]] = offset + matchend;
+
+				if(pt->partno == 1) /* save realoff for the first part */
+				    offmatrix[pt->parts - 1][offmatrix[pt->partno - 1][0]] = realoff;
+			    } else if(found && pt->partno == pt->parts) {
+				if(pt->type) {
+				    if(otfrec) {
+					if(pt->type > type || pt->type >= CL_TYPE_SFX || pt->type == CL_TYPE_MSEXE) {
+					    cli_dbgmsg("Matched signature for file type %s\n", pt->virname);
+					    type = pt->type;
+					    if(ftoffset && (!*ftoffset || (*ftoffset)->cnt < MAX_EMBEDDED_OBJ) && ((ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) || ((ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP) && type == CL_TYPE_MSEXE)))  {
+						/* FIXME: we don't know which offset of the first part is the correct one */
+						for(j = 1; j <= AC_DEFAULT_TRACKLEN && offmatrix[0][j] != -1; j++) {
+						    if(ac_addtype(ftoffset, type, offmatrix[pt->parts - 1][j])) {
+							if(info.exeinfo.section)
+							    free(info.exeinfo.section);
+							return CL_EMEM;
+						    }
+						}
+					    }
+
+					    memset(offmatrix[0], -1, pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t));
+					    for(j = 0; j < pt->parts; j++)
+						offmatrix[j][0] = 0;
+					}
+				    }
+
+				} else { /* !pt->type */
+				    if(virname)
+					*virname = pt->virname;
+
+				    if(info.exeinfo.section)
+					free(info.exeinfo.section);
+
+				    return CL_VIRUS;
+				}
+			    }
+
+			} else { /* old type signature */
+			    if(pt->type) {
+				if(otfrec) {
+				    if(pt->type > type || pt->type >= CL_TYPE_SFX || pt->type == CL_TYPE_MSEXE) {
+					cli_dbgmsg("Matched signature for file type %s at %u\n", pt->virname, realoff);
+					type = pt->type;
+					if(ftoffset && (!*ftoffset || (*ftoffset)->cnt < MAX_EMBEDDED_OBJ) && ((ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) || ((ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP) && type == CL_TYPE_MSEXE)))  {
+
+					    if(ac_addtype(ftoffset, type, realoff)) {
+						if(info.exeinfo.section)
+						    free(info.exeinfo.section);
+						return CL_EMEM;
+					    }
+					}
+				    }
+				}
+			    } else {
+				if(virname)
+				    *virname = pt->virname;
+
+				if(info.exeinfo.section)
+				    free(info.exeinfo.section);
+				return CL_VIRUS;
+			    }
+			}
+			pt = pt->next_same;
+		    }
+		}
+		patt = patt->next;
+	    }
+	}
+    }
+
+    if(info.exeinfo.section)
+	free(info.exeinfo.section);
+
+    return otfrec ? type : CL_CLEAN;
+}
+
+/* FIXME: clean up the code */
+int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hexsig, uint32_t sigid, uint16_t parts, uint16_t partno, uint16_t type, uint32_t mindist, uint32_t maxdist, const char *offset, uint8_t target)
+{
+	struct cli_ac_patt *new;
+	char *pt, *hex = NULL;
+	uint16_t i, j, ppos = 0, pend;
+	uint8_t wprefix = 0, zprefix = 1, namelen, plen = 0;
+	struct cli_ac_alt *newalt, *altpt, **newtable;
+	int ret, error = CL_SUCCESS;
+
+
+    if(strlen(hexsig) / 2 < root->ac_mindepth)
+	return CL_EPATSHORT;
+
+    if((new = (struct cli_ac_patt *) cli_calloc(1, sizeof(struct cli_ac_patt))) == NULL)
+	return CL_EMEM;
+
+    new->type = type;
+    new->sigid = sigid;
+    new->parts = parts;
+    new->partno = partno;
+    new->mindist = mindist;
+    new->maxdist = maxdist;
+    new->target = target;
+
+    if(strchr(hexsig, '(')) {
+	    char *hexcpy, *hexnew, *start, *h, *c;
+
+	if(!(hexcpy = cli_strdup(hexsig))) {
+	    free(new);
+	    return CL_EMEM;
+	}
+
+	if(!(hexnew = (char *) cli_calloc(strlen(hexsig) + 1, 1))) {
+	    free(hexcpy);
+	    free(new);
+	    return CL_EMEM;
+	}
+
+	start = pt = hexcpy;
+	while((pt = strchr(start, '('))) {
+	    *pt++ = 0;
+
+	    if(!start) {
+		error = CL_EMALFDB;
+		break;
+	    }
+
+	    strcat(hexnew, start);
+	    strcat(hexnew, "()");
+
+	    if(!(start = strchr(pt, ')'))) {
+		error = CL_EMALFDB;
+		break;
+	    }
+	    *start++ = 0;
+
+	    newalt = (struct cli_ac_alt *) cli_calloc(1, sizeof(struct cli_ac_alt));
+	    if(!newalt) {
+		cli_errmsg("cli_ac_addsig: Can't allocate newalt\n");
+		error = CL_EMEM;
+		break;
+	    }
+
+	    new->alt++;
+	    newtable = (struct cli_ac_alt **) cli_realloc(new->alttable, new->alt * sizeof(struct cli_ac_alt *));
+	    if(!newtable) {
+		new->alt--;
+		free(newalt);
+		cli_errmsg("cli_ac_addsig: Can't realloc new->alttable\n");
+		error = CL_EMEM;
+		break;
+	    }
+	    newtable[new->alt - 1] = newalt;
+	    new->alttable = newtable;
+
+	    for(i = 0; i < strlen(pt); i++)
+		if(pt[i] == '|')
+		    newalt->num++;
+
+            if(!newalt->num) {
+                error = CL_EMALFDB;
+                break;
+            } else
+                newalt->num++;
+
+	    if(3 * newalt->num - 1 == (uint16_t) strlen(pt)) {
+		newalt->chmode = 1;
+		newalt->str = (unsigned char *) cli_malloc(newalt->num);
+		if(!newalt->str) {
+		    cli_errmsg("cli_ac_addsig: Can't allocate newalt->str\n");
+		    error = CL_EMEM;
+		    break;
+		}
+	    }
+
+	    for(i = 0; i < newalt->num; i++) {
+		if(!(h = cli_strtok(pt, i, "|"))) {
+		    error = CL_EMALFDB;
+		    break;
+		}
+
+		if(!(c = cli_hex2str(h))) {
+		    free(h);
+		    error = CL_EMALFDB;
+		    break;
+		}
+
+		if(newalt->chmode) {
+		    newalt->str[i] = *c;
+		    free(c);
+		} else {
+		    if(i) {
+			altpt = newalt;
+			while(altpt->next)
+			    altpt = altpt->next;
+
+			altpt->next = (struct cli_ac_alt *) cli_calloc(1, sizeof(struct cli_ac_alt));
+			if(!altpt->next) {
+			    cli_errmsg("cli_ac_addsig: Can't allocate altpt->next\n");
+			    error = CL_EMEM;
+			    free(c);
+			    free(h);
+			    break;
+			}
+
+			altpt->next->str = (unsigned char *) c;
+			altpt->next->len = strlen(h) / 2;
+		    } else {
+			newalt->str = (unsigned char *) c;
+			newalt->len = strlen(h) / 2;
+		    }
+		}
+
+		free(h);
+	    }
+
+	    if(error)
+		break;
+	}
+
+	if(start)
+	    strcat(hexnew, start);
+
+	hex = hexnew;
+	free(hexcpy);
+
+	if(error) {
+	    if(new->alt) {
+		free(hex);
+		ac_free_alt(new);
+	    }
+	    free(new);
+	    return error;
+	}
+    }
+
+    if((new->pattern = cli_hex2ui(new->alt ? hex : hexsig)) == NULL) {
+	if(new->alt) {
+	    free(hex);
+	    ac_free_alt(new);
+	}
+	free(new);
+	return CL_EMALFDB;
+    }
+    new->length = strlen(new->alt ? hex : hexsig) / 2;
+    if(new->alt)
+	free(hex);
+
+    for(i = 0; i < root->ac_maxdepth && i < new->length; i++) {
+	if(new->pattern[i] & CLI_MATCH_WILDCARD) {
+	    wprefix = 1;
+	    break;
+	}
+	if(zprefix && new->pattern[i])
+	    zprefix = 0;
+    }
+
+    if(wprefix || zprefix) {
+	pend = new->length - root->ac_mindepth + 1;
+	for(i = 0; i < pend; i++) {
+	    for(j = i; j < i + root->ac_maxdepth && j < new->length; j++) {
+		if(new->pattern[j] & CLI_MATCH_WILDCARD) {
+		    break;
+		} else {
+		    if(j - i + 1 >= plen) {
+			plen = j - i + 1;
+			ppos = i;
+		    }
+		}
+		if(plen >= root->ac_maxdepth && (new->pattern[ppos] || new->pattern[ppos + 1]))
+		    break;
+	    }
+	    if(plen >= root->ac_maxdepth && (new->pattern[ppos] || new->pattern[ppos + 1]))
+		break;
+	}
+
+	if(plen < root->ac_mindepth) {
+	    cli_errmsg("cli_ac_addsig: Can't find a static subpattern of length %u\n", root->ac_mindepth);
+	    ac_free_alt(new);
+	    free(new->pattern);
+	    free(new);
+	    return CL_EMALFDB;
+	}
+
+	new->prefix = new->pattern;
+	new->prefix_length = ppos;
+	new->pattern = &new->prefix[ppos];
+	new->length -= ppos;
+
+	for(i = 0; i < new->prefix_length; i++)
+	    if((new->prefix[i] & CLI_MATCH_WILDCARD) == CLI_MATCH_ALTERNATIVE)
+		new->alt_pattern++;
+    }
+
+    if(new->length > root->maxpatlen)
+	root->maxpatlen = new->length;
+
+    if((pt = strstr(virname, " (Clam)")))
+	namelen = strlen(virname) - strlen(pt);
+    else
+	namelen = strlen(virname);
+
+    if(!namelen) {
+	cli_errmsg("cli_ac_addsig: No virus name\n");
+	if(new->prefix)
+	    free(new->prefix);
+	else
+	    free(new->pattern);
+	ac_free_alt(new);
+	free(new);
+	return CL_EMALFDB;
+    }
+
+    if((new->virname = cli_calloc(namelen + 1, sizeof(char))) == NULL) {
+	if(new->prefix)
+	    free(new->prefix);
+	else
+	    free(new->pattern);
+	ac_free_alt(new);
+	free(new);
+	return CL_EMEM;
+    }
+    strncpy(new->virname, virname, namelen);
+
+    if(offset) {
+	new->offset = cli_strdup(offset);
+	if(!new->offset) {
+	    if(new->prefix)
+		free(new->prefix);
+	    else
+		free(new->pattern);
+	    ac_free_alt(new);
+	    free(new->virname);
+	    free(new);
+	    return CL_EMEM;
+	}
+    }
+
+    if((ret = cli_ac_addpatt(root, new))) {
+	if(new->prefix)
+	    free(new->prefix);
+	else
+	    free(new->pattern);
+	free(new->virname);
+	ac_free_alt(new);
+	if(new->offset)
+	    free(new->offset);
+	free(new);
+	return ret;
+    }
+
+    return CL_SUCCESS;
+}
+
+void cli_ac_setdepth(uint8_t mindepth, uint8_t maxdepth)
+{
+    cli_ac_mindepth = mindepth;
+    cli_ac_maxdepth = maxdepth;
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher-bm.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher-bm.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher-bm.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher-bm.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,241 @@
+/*
+ *  Copyright (C) 2004 - 2005 Tomasz Kojm <tkojm at clamav.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+
+#include "clamav.h"
+#include "memory.h"
+#include "others.h"
+#include "cltypes.h"
+#include "matcher.h"
+#include "matcher-bm.h"
+#include "filetypes.h"
+
+#define BM_MIN_LENGTH	3
+#define BM_BLOCK_SIZE	3
+#define HASH(a,b,c) (211 * a + 37 * b + c)
+
+int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern)
+{
+	uint16_t idx, i;
+	const unsigned char *pt = pattern->pattern;
+	struct cli_bm_patt *prev, *next = NULL;
+
+
+    if(pattern->length < BM_MIN_LENGTH) {
+	cli_errmsg("Signature for %s is too short\n", pattern->virname);
+	return CL_EPATSHORT;
+    }
+
+#if BM_MIN_LENGTH == BM_BLOCK_SIZE
+    /* try to load balance bm_suffix (at the cost of bm_shift) */
+    for(i = 0; i < pattern->length - BM_BLOCK_SIZE + 1; i++) {
+	idx = HASH(pt[i], pt[i + 1], pt[i + 2]);
+	if(!root->bm_suffix[idx]) {
+	    if(i) {
+		pattern->prefix = pattern->pattern;
+		pattern->prefix_length = i;
+		pattern->pattern = &pattern->pattern[i];
+		pattern->length -= i;
+		pt = pattern->pattern;
+	    }
+	    break;
+	}
+    }
+#endif
+
+    for(i = 0; i <= BM_MIN_LENGTH - BM_BLOCK_SIZE; i++) {
+	idx = HASH(pt[i], pt[i + 1], pt[i + 2]);
+	root->bm_shift[idx] = MIN(root->bm_shift[idx], BM_MIN_LENGTH - BM_BLOCK_SIZE - i);
+    }
+
+    prev = next = root->bm_suffix[idx];
+    while(next) {
+	if(pt[0] >= next->pattern[0])
+	    break;
+	prev = next;
+	next = next->next;
+    }
+
+    if(next == root->bm_suffix[idx]) {
+	pattern->next = root->bm_suffix[idx];
+	if(root->bm_suffix[idx])
+	    pattern->cnt = root->bm_suffix[idx]->cnt;
+	root->bm_suffix[idx] = pattern;
+    } else {
+	pattern->next = prev->next;
+	prev->next = pattern;
+    }
+    root->bm_suffix[idx]->cnt++;
+
+    return CL_SUCCESS;
+}
+
+int cli_bm_init(struct cli_matcher *root)
+{
+	uint16_t i, size = HASH(255, 255, 255) + 1;
+
+
+    if(!(root->bm_shift = (uint8_t *) cli_malloc(size * sizeof(uint8_t))))
+	return CL_EMEM;
+
+    if(!(root->bm_suffix = (struct cli_bm_patt **) cli_calloc(size, sizeof(struct cli_bm_patt *)))) {
+	free(root->bm_shift);
+	return CL_EMEM;
+    }
+
+    for(i = 0; i < size; i++)
+	root->bm_shift[i] = BM_MIN_LENGTH - BM_BLOCK_SIZE + 1;
+
+    return CL_SUCCESS;
+}
+
+void cli_bm_free(struct cli_matcher *root)
+{
+	struct cli_bm_patt *patt, *prev;
+	uint16_t i, size = HASH(255, 255, 255) + 1;
+
+
+    if(root->bm_shift)
+	free(root->bm_shift);
+
+    if(root->bm_suffix) {
+	for(i = 0; i < size; i++) {
+	    patt = root->bm_suffix[i];
+	    while(patt) {
+		prev = patt;
+		patt = patt->next;
+		if(prev->prefix)
+		    free(prev->prefix);
+		else
+		    free(prev->pattern);
+		if(prev->virname)
+		    free(prev->virname);
+		if(prev->offset)
+		    free(prev->offset);
+		free(prev);
+	    }
+	}
+	free(root->bm_suffix);
+    }
+}
+
+int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_matcher *root, uint32_t offset, cli_file_t ftype, int fd)
+{
+	uint32_t i, j, off;
+	uint8_t found, pchain, shift;
+	uint16_t idx, idxchk;
+	struct cli_bm_patt *p;
+	const unsigned char *bp, *pt;
+	unsigned char prefix;
+	struct cli_target_info info;
+
+
+    if(!root->bm_shift)
+	return CL_CLEAN;
+
+    if(length < BM_MIN_LENGTH)
+	return CL_CLEAN;
+
+    memset(&info, 0, sizeof(info));
+
+    for(i = BM_MIN_LENGTH - BM_BLOCK_SIZE; i < length - BM_BLOCK_SIZE + 1; ) {
+	idx = HASH(buffer[i], buffer[i + 1], buffer[i + 2]);
+	shift = root->bm_shift[idx];
+
+	if(shift == 0) {
+	    prefix = buffer[i - BM_MIN_LENGTH + BM_BLOCK_SIZE];
+	    p = root->bm_suffix[idx];
+	    pchain = 0;
+	    while(p) {
+		if(p->pattern[0] != prefix) {
+		    if(pchain)
+			break;
+		    p = p->next;
+		    continue;
+		} else pchain = 1;
+
+		off = i - BM_MIN_LENGTH + BM_BLOCK_SIZE;
+		bp = buffer + off;
+
+		if((off + p->length > length) || (p->prefix_length > off)) {
+		    p = p->next;
+		    continue;
+		}
+
+		idxchk = MIN(p->length, length - off) - 1;
+		if(idxchk) {
+		    if((bp[idxchk] != p->pattern[idxchk]) ||  (bp[idxchk / 2] != p->pattern[idxchk / 2])) {
+			p = p->next;
+			continue;
+		    }
+		}
+
+		if(p->prefix_length) {
+		    off -= p->prefix_length;
+		    bp -= p->prefix_length;
+		    pt = p->prefix;
+		} else {
+		    pt = p->pattern;
+		}
+
+		found = 1;
+		for(j = 0; j < p->length + p->prefix_length && off < length; j++, off++) {
+		    if(bp[j] != pt[j]) {
+			found = 0;
+			break;
+		    }
+		}
+
+		if(found && p->length + p->prefix_length == j) {
+
+		    if(p->target || p->offset) {
+			off = offset + i - p->prefix_length - BM_MIN_LENGTH + BM_BLOCK_SIZE;
+			if((fd == -1 && !ftype) || !cli_validatesig(ftype, p->offset, off, &info, fd, p->virname)) {
+			    p = p->next;
+			    continue;
+			}
+		    }
+
+		    if(virname)
+			*virname = p->virname;
+
+		    if(info.exeinfo.section)
+			free(info.exeinfo.section);
+
+		    return CL_VIRUS;
+		}
+
+		p = p->next;
+	    }
+
+	    shift = 1;
+	}
+
+	i += shift;
+    }
+
+    if(info.exeinfo.section)
+	free(info.exeinfo.section);
+
+    return CL_CLEAN;
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,439 @@
+/*
+ *  Copyright (C) 2002 - 2007 Tomasz Kojm <tkojm at clamav.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <string.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifdef	HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "clamav.h"
+#include "others.h"
+#include "matcher-ac.h"
+#include "matcher-bm.h"
+#include "md5.h"
+#include "filetypes.h"
+#include "matcher.h"
+#include "pe.h"
+#include "elf.h"
+#include "execs.h"
+#include "special.h"
+#include "str.h"
+#include "cltypes.h"
+
+static cli_file_t targettab[CL_TARGET_TABLE_SIZE] = { 0, CL_TYPE_MSEXE, CL_TYPE_MSOLE2, CL_TYPE_HTML, CL_TYPE_MAIL, CL_TYPE_GRAPHICS, CL_TYPE_ELF };
+
+int cli_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cl_engine *engine, cli_file_t ftype)
+{
+	int ret = CL_CLEAN;
+	unsigned int i;
+	struct cli_ac_data mdata;
+	struct cli_matcher *groot, *troot = NULL;
+
+
+    if(!engine) {
+	cli_errmsg("cli_scanbuff: engine == NULL\n");
+	return CL_ENULLARG;
+    }
+
+    groot = engine->root[0]; /* generic signatures */
+
+    if(ftype) {
+	for(i = 1; i < CL_TARGET_TABLE_SIZE; i++) {
+	    if(targettab[i] == ftype) {
+		troot = engine->root[i];
+		break;
+	    }
+	}
+    }
+
+    if(troot) {
+
+	if((ret = cli_ac_initdata(&mdata, troot->ac_partsigs, AC_DEFAULT_TRACKLEN)))
+	    return ret;
+
+	if(troot->ac_only || (ret = cli_bm_scanbuff(buffer, length, virname, troot, 0, ftype, -1)) != CL_VIRUS)
+	    ret = cli_ac_scanbuff(buffer, length, virname, troot, &mdata, 0, 0, ftype, -1, NULL);
+
+	cli_ac_freedata(&mdata);
+
+	if(ret == CL_VIRUS)
+	    return ret;
+    }
+
+    if((ret = cli_ac_initdata(&mdata, groot->ac_partsigs, AC_DEFAULT_TRACKLEN)))
+	return ret;
+
+    if(groot->ac_only || (ret = cli_bm_scanbuff(buffer, length, virname, groot, 0, ftype, -1)) != CL_VIRUS)
+	ret = cli_ac_scanbuff(buffer, length, virname, groot, &mdata, 0, 0, ftype, -1, NULL);
+
+    cli_ac_freedata(&mdata);
+
+    return ret;
+}
+
+struct cli_md5_node *cli_vermd5(const unsigned char *md5, const struct cl_engine *engine)
+{
+	struct cli_md5_node *pt;
+
+
+    if(!(pt = engine->md5_hlist[md5[0] & 0xff]))
+	return NULL;
+
+    while(pt) {
+	if(!memcmp(pt->md5, md5, 16))
+	    return pt;
+
+	pt = pt->next;
+    }
+
+    return NULL;
+}
+
+off_t cli_caloff(const char *offstr, struct cli_target_info *info, int fd, cli_file_t ftype, int *ret, unsigned int *maxshift)
+{
+	int (*einfo)(int, struct cli_exe_info *) = NULL;
+	unsigned int n, val;
+	const char *pt;
+	off_t pos, offset;
+
+
+    *ret = 0;
+
+    if(!strncmp(offstr, "EP", 2) || offstr[0] == 'S') {
+
+	if(info->status == -1) {
+	    *ret = -1;
+	    return 0;
+
+	} else if(!info->status) {
+
+	    if(ftype == CL_TYPE_MSEXE)
+		einfo = cli_peheader;
+	    else if(ftype == CL_TYPE_ELF)
+		einfo = cli_elfheader;
+
+	    if(einfo) {
+		if((pos = lseek(fd, 0, SEEK_CUR)) == -1) {
+		    cli_dbgmsg("Invalid descriptor\n");
+		    info->status = *ret = -1;
+		    return 0;
+		}
+
+		lseek(fd, 0, SEEK_SET);
+		if(einfo(fd, &info->exeinfo)) {
+		    lseek(fd, pos, SEEK_SET);
+		    info->status = *ret = -1;
+		    return 0;
+		}
+		lseek(fd, pos, SEEK_SET);
+		info->status = 1;
+	    }
+	}
+    }
+
+    if((pt = strchr(offstr, ',')))
+	*maxshift = atoi(++pt);
+
+    if(isdigit(offstr[0])) {
+	return atoi(offstr);
+
+    } else if(info->status == 1 && (!strncmp(offstr, "EP+", 3) || !strncmp(offstr, "EP-", 3))) {
+
+	if(offstr[2] == '+')
+	    return info->exeinfo.ep + atoi(offstr + 3);
+	else
+	    return info->exeinfo.ep - atoi(offstr + 3);
+
+    } else if(info->status == 1 && offstr[0] == 'S') {
+
+	if(!strncmp(offstr, "SL", 2) && info->exeinfo.section[info->exeinfo.nsections - 1].rsz) {
+
+	    if(sscanf(offstr, "SL+%u", &val) != 1) {
+		*ret = -1;
+		return 0;
+	    }
+
+	    offset = val + info->exeinfo.section[info->exeinfo.nsections - 1].raw;
+
+	} else {
+
+	    if(sscanf(offstr, "S%u+%u", &n, &val) != 2) {
+		*ret = -1;
+		return 0;
+	    }
+
+	    if(n >= info->exeinfo.nsections || !info->exeinfo.section[n].rsz) {
+		*ret = -1;
+		return 0;
+	    }
+
+	    offset = val + info->exeinfo.section[n].raw;
+	}
+
+	return offset;
+
+    } else if(!strncmp(offstr, "EOF-", 4)) {
+	    struct stat sb;
+
+	if(!info->fsize) {
+	    if(fstat(fd, &sb) == -1) {
+		info->status = *ret = -1;
+		return 0;
+	    }
+	    info->fsize = sb.st_size;
+	}
+
+	return info->fsize - atoi(offstr + 4);
+    }
+
+    *ret = -1;
+    return 0;
+}
+
+static int cli_checkfp(int fd, const struct cl_engine *engine)
+{
+	struct cli_md5_node *md5_node;
+	unsigned char *digest;
+
+
+    if(engine->md5_hlist) {
+
+	if(!(digest = cli_md5digest(fd))) {
+	    cli_errmsg("cli_checkfp(): Can't generate MD5 checksum\n");
+	    return 0;
+	}
+
+	if((md5_node = cli_vermd5(digest, engine)) && md5_node->fp) {
+		struct stat sb;
+
+	    if(fstat(fd, &sb))
+		return CL_EIO;
+
+	    if((unsigned int) sb.st_size != md5_node->size) {
+		cli_warnmsg("Detected false positive MD5 match. Please report.\n");
+	    } else {
+		cli_dbgmsg("Eliminated false positive match (fp sig: %s)\n", md5_node->virname);
+		free(digest);
+		return 1;
+	    }
+	}
+
+	free(digest);
+    }
+
+    return 0;
+}
+
+int cli_validatesig(cli_file_t ftype, const char *offstr, off_t fileoff, struct cli_target_info *info, int desc, const char *virname)
+{
+	off_t offset;
+	int ret;
+	unsigned int maxshift = 0;
+
+
+    if(offstr && desc != -1) {
+	offset = cli_caloff(offstr, info, desc, ftype, &ret, &maxshift);
+
+	if(ret == -1) {
+	    cli_dbgmsg("cli_validatesig: Can't calculate offset for signature %s\n", virname);
+	    return 0;
+	}
+
+	if(maxshift) {
+	    if((fileoff < offset) || (fileoff > offset + (off_t) maxshift)) {
+		cli_dbgmsg("Signature offset: %lu, expected: [%lu..%lu] (%s)\n", fileoff, offset, offset + maxshift, virname);
+		return 0;
+	    }
+	} else if(fileoff != offset) {
+	    cli_dbgmsg("Signature offset: %lu, expected: %lu (%s)\n", fileoff, offset, virname);
+	    return 0;
+	}
+    }
+
+    return 1;
+}
+
+int cli_scandesc(int desc, cli_ctx *ctx, uint8_t otfrec, cli_file_t ftype, uint8_t ftonly, struct cli_matched_type **ftoffset)
+{
+ 	unsigned char *buffer, *buff, *endbl, *upt;
+	int ret = CL_CLEAN, type = CL_CLEAN, i, bytes;
+	uint32_t buffersize, length, maxpatlen, shift = 0, offset = 0;
+	struct cli_ac_data gdata, tdata;
+	cli_md5_ctx md5ctx;
+	unsigned char digest[16];
+	struct cli_md5_node *md5_node;
+	struct cli_matcher *groot = NULL, *troot = NULL;
+
+
+    if(!ctx->engine) {
+	cli_errmsg("cli_scandesc: engine == NULL\n");
+	return CL_ENULLARG;
+    }
+
+    if(!ftonly)
+	groot = ctx->engine->root[0]; /* generic signatures */
+
+    if(ftype) {
+	for(i = 1; i < CL_TARGET_TABLE_SIZE; i++) {
+	    if(targettab[i] == ftype) {
+		troot = ctx->engine->root[i];
+		break;
+	    }
+	}
+    }
+
+    if(ftonly) {
+	if(!troot)
+	    return CL_CLEAN;
+
+	maxpatlen = troot->maxpatlen;
+    } else {
+	if(troot)
+	    maxpatlen = MAX(troot->maxpatlen, groot->maxpatlen);
+	else
+	    maxpatlen = groot->maxpatlen;
+    }
+
+    /* prepare the buffer */
+    buffersize = maxpatlen + SCANBUFF;
+    if(!(buffer = (unsigned char *) cli_calloc(buffersize, sizeof(unsigned char)))) {
+	cli_dbgmsg("cli_scandesc(): unable to cli_calloc(%u)\n", buffersize);
+	return CL_EMEM;
+    }
+
+    if(!ftonly && (ret = cli_ac_initdata(&gdata, groot->ac_partsigs, AC_DEFAULT_TRACKLEN)))
+	return ret;
+
+    if(troot) {
+	if((ret = cli_ac_initdata(&tdata, troot->ac_partsigs, AC_DEFAULT_TRACKLEN)))
+	    return ret;
+    }
+
+    if(!ftonly && ctx->engine->md5_hlist)
+	cli_md5_init(&md5ctx);
+
+    buff = buffer;
+    buff += maxpatlen; /* pointer to read data block */
+    endbl = buff + SCANBUFF - maxpatlen; /* pointer to the last block
+					  * length of maxpatlen
+					  */
+
+    upt = buff;
+    while((bytes = cli_readn(desc, buff + shift, SCANBUFF - shift)) > 0) {
+
+	if(ctx->scanned)
+	    *ctx->scanned += bytes / CL_COUNT_PRECISION;
+
+	length = shift + bytes;
+	if(upt == buffer)
+	    length += maxpatlen;
+
+	if(troot) {
+	    if(troot->ac_only || (ret = cli_bm_scanbuff(upt, length, ctx->virname, troot, offset, ftype, desc)) != CL_VIRUS)
+		ret = cli_ac_scanbuff(upt, length, ctx->virname, troot, &tdata, otfrec, offset, ftype, desc, ftoffset);
+
+	    if(ret == CL_VIRUS) {
+		free(buffer);
+		if(!ftonly)
+		    cli_ac_freedata(&gdata);
+		cli_ac_freedata(&tdata);
+
+		lseek(desc, 0, SEEK_SET);
+		if(cli_checkfp(desc, ctx->engine))
+		    return CL_CLEAN;
+		else
+		    return CL_VIRUS;
+	    }
+	}
+
+	if(!ftonly) {
+	    if(groot->ac_only || (ret = cli_bm_scanbuff(upt, length, ctx->virname, groot, offset, ftype, desc)) != CL_VIRUS)
+		ret = cli_ac_scanbuff(upt, length, ctx->virname, groot, &gdata, otfrec, offset, ftype, desc, ftoffset);
+
+	    if(ret == CL_VIRUS) {
+		free(buffer);
+		cli_ac_freedata(&gdata);
+		if(troot)
+		    cli_ac_freedata(&tdata);
+		lseek(desc, 0, SEEK_SET);
+		if(cli_checkfp(desc, ctx->engine))
+		    return CL_CLEAN;
+		else
+		    return CL_VIRUS;
+
+	    } else if(otfrec && ret >= CL_TYPENO) {
+		if(ret > type)
+		    type = ret;
+	    }
+
+	    if(ctx->engine->md5_hlist)
+		cli_md5_update(&md5ctx, buff + shift, bytes);
+	}
+
+	if(bytes + shift == SCANBUFF) {
+	    memmove(buffer, endbl, maxpatlen);
+	    offset += SCANBUFF;
+
+	    if(upt == buff) {
+		upt = buffer;
+		offset -= maxpatlen;
+	    }
+
+	    shift = 0;
+
+	} else {
+	    shift += bytes;
+	}
+
+    }
+
+    free(buffer);
+    if(!ftonly)
+	cli_ac_freedata(&gdata);
+    if(troot)
+	cli_ac_freedata(&tdata);
+
+    if(!ftonly && ctx->engine->md5_hlist) {
+	cli_md5_final(digest, &md5ctx);
+
+	if((md5_node = cli_vermd5(digest, ctx->engine)) && !md5_node->fp) {
+		struct stat sb;
+
+	    if(fstat(desc, &sb))
+		return CL_EIO;
+
+	    if((unsigned int) sb.st_size != md5_node->size) {
+		cli_warnmsg("Detected false positive MD5 match. Please report.\n");
+	    } else {
+		if(ctx->virname)
+		    *ctx->virname = md5_node->virname;
+
+		return CL_VIRUS;
+	    }
+	}
+    }
+
+    return otfrec ? type : CL_CLEAN;
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mbox.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mbox.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mbox.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mbox.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,5220 @@
+/*
+ *  Copyright (C) 2002-2006 Nigel Horne <njh at bandsman.co.uk>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+static	char	const	rcsid[] = "$Id: mbox.c,v 1.381 2007/02/15 12:26:44 njh Exp $";
+
+#ifdef	_MSC_VER
+#include <winsock.h>	/* only needed in CL_EXPERIMENTAL */
+#endif
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#ifndef	CL_DEBUG
+#define	NDEBUG	/* map CLAMAV debug onto standard */
+#endif
+
+#ifdef CL_THREAD_SAFE
+#ifndef	_REENTRANT
+#define	_REENTRANT	/* for Solaris 2.8 */
+#endif
+#endif
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <assert.h>
+#include <string.h>
+#ifdef	HAVE_STRINGS_H
+#include <strings.h>
+#endif
+#include <ctype.h>
+#include <time.h>
+#include <fcntl.h>
+#ifdef	HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+#include "clamav.h"
+#ifndef	C_WINDOWS
+#include <dirent.h>
+#endif
+#include <limits.h>
+#include <signal.h>
+
+#ifdef	HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
+#include <stddef.h>
+#endif
+
+#ifdef	CL_THREAD_SAFE
+#include <pthread.h>
+#endif
+
+#include "others.h"
+#include "str.h"
+#include "filetypes.h"
+#include "mbox.h"
+#include "dconf.h"
+
+#define DCONF_PHISHING mctx->ctx->dconf->phishing
+
+#ifdef	CL_DEBUG
+
+#if	defined(C_LINUX) || defined(C_CYGWIN)
+#include <features.h>
+#endif
+
+#if __GLIBC__ == 2 && __GLIBC_MINOR__ >= 1
+#define HAVE_BACKTRACE
+#endif
+#endif
+
+#ifdef HAVE_BACKTRACE
+#include <execinfo.h>
+#include <syslog.h>
+
+static	void	sigsegv(int sig);
+static	void	print_trace(int use_syslog);
+
+/*#define	SAVE_TMP	/* Save the file being worked on in tmp */
+#endif
+
+#if	defined(NO_STRTOK_R) || !defined(CL_THREAD_SAFE)
+#undef strtok_r
+#undef __strtok_r
+#define strtok_r(a,b,c)	strtok(a,b)
+#endif
+
+#ifdef	HAVE_STDBOOL_H
+#ifdef	C_BEOS
+#include "SupportDefs.h"
+#else
+#include <stdbool.h>
+#endif
+#else
+#ifdef	FALSE
+typedef	unsigned	char	bool;
+#else
+typedef enum	{ FALSE = 0, TRUE = 1 } bool;
+#endif
+#endif
+
+typedef	enum {
+	FAIL,
+	OK,
+	OK_ATTACHMENTS_NOT_SAVED,
+	VIRUS,
+	MAXREC,
+	MAXFILES
+} mbox_status;
+
+#ifndef isblank
+#define isblank(c)	(((c) == ' ') || ((c) == '\t'))
+#endif
+
+#define	SAVE_TO_DISC	/* multipart/message are saved in a temporary file */
+
+#define	FOLLOWURLS	5	/*
+				 * Maximum number of URLs scanned in a message
+				 * part. Helps to prevent Dialer.gen-45 and
+				 * Trojan.WinREG.Zapchast which are often
+				 * dispatched by emails which point to it. If
+				 * not defined, don't check any URLs
+				 * It is also used to indicate the number of
+				 * 301/302 redirects we wish to follow
+				 */
+
+#include "htmlnorm.h"
+
+#include "phishcheck.h"
+
+#ifndef	C_WINDOWS
+#include <netdb.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#ifndef	C_BEOS
+#include <net/if.h>
+#include <arpa/inet.h>
+#endif
+#endif
+
+#ifndef	C_WINDOWS
+#define	closesocket(s)	close(s)
+#define	SOCKET	int
+#endif
+
+#include <fcntl.h>
+#ifndef	C_WINDOWS
+#include <sys/time.h>
+#endif
+
+#ifndef HAVE_IN_PORT_T
+typedef	unsigned	short	in_port_t;
+#endif
+
+#ifndef HAVE_IN_ADDR_T
+typedef	unsigned	int	in_addr_t;
+#endif
+
+#if	(!defined(EALREADY)) && (defined(WSAEALREADY))
+#define EALREADY	WSAEALREADY
+#endif
+#if	(!defined(EINPROGRESS)) && (defined(WSAEINPROGRESS))
+#define EINPROGRESS	WSAEINPROGRESS
+#endif
+#if	(!defined(EISCONN)) && (defined(WSAEISCONN))
+#define EISCONN	WSAEISCONN
+#endif
+
+/* Needs HAVE_STRCASSTR test in configure */
+#ifndef	C_LINUX
+#define	strcasestr(h, n)	strstr(h, n)	/* This will cause isBounceMessage() to match too much */
+#endif
+
+/*
+ * Define this to handle messages covered by section 7.3.2 of RFC1341.
+ *	This is experimental code so it is up to YOU to (1) ensure it's secure
+ * (2) periodically trim the directory of old files
+ *
+ * If you use the load balancing feature of clamav-milter to run clamd on
+ * more than one machine you must make sure that .../partial is on a shared
+ * network filesystem
+ */
+#ifndef	C_WINDOWS	/* TODO: when opendir() is done */
+#define	PARTIAL_DIR
+#endif
+
+/*#define	NEW_WORLD*/
+
+/*#define	SCAN_UNENCODED_BOUNCES	*//*
+					 * Slows things down a lot and only catches unencoded copies
+					 * of EICAR within bounces, which don't matter
+					 */
+
+typedef	struct	mbox_ctx {
+	const	char	*dir;
+	unsigned	int	files;	/* number of files extracted */
+	const	table_t	*rfc821Table;
+	const	table_t	*subtypeTable;
+	cli_ctx	*ctx;
+} mbox_ctx;
+
+static	int	cli_parse_mbox(const char *dir, int desc, cli_ctx *ctx);
+static	message	*parseEmailFile(FILE *fin, const table_t *rfc821Table, const char *firstLine, const char *dir);
+static	message	*parseEmailHeaders(message *m, const table_t *rfc821Table);
+static	int	parseEmailHeader(message *m, const char *line, const table_t *rfc821Table);
+static	mbox_status	parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int recursion_level);
+static	int	boundaryStart(const char *line, const char *boundary);
+static	int	boundaryEnd(const char *line, const char *boundary);
+static	int	initialiseTables(table_t **rfc821Table, table_t **subtypeTable);
+static	int	getTextPart(message *const messages[], size_t size);
+static	size_t	strip(char *buf, int len);
+static	int	parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg);
+static	int	saveTextPart(mbox_ctx *mctx, message *m, int destroy_text);
+static	char	*rfc2047(const char *in);
+static	char	*rfc822comments(const char *in, char *out);
+#ifdef	PARTIAL_DIR
+static	int	rfc1341(message *m, const char *dir);
+#endif
+static	bool	usefulHeader(int commandNumber, const char *cmd);
+static	char	*getline_from_mbox(char *buffer, size_t len, FILE *fin);
+static	bool	isBounceStart(const char *line);
+static	bool	exportBinhexMessage(mbox_ctx *mctx, message *m);
+static	int	exportBounceMessage(mbox_ctx *ctx, text *start);
+static	message	*do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, mbox_ctx *mctx, message *messageIn, text **tptr, unsigned int recursion_level);
+static	int	count_quotes(const char *buf);
+static	bool	next_is_folded_header(const text *t);
+static	bool	newline_in_header(const char *line);
+
+static	blob	*getHrefs(message *m, tag_arguments_t *hrefs);
+static	void	hrefs_done(blob *b, tag_arguments_t *hrefs);
+static	void	checkURLs(message *m, mbox_ctx *mctx, mbox_status *rc, int is_html);
+static	void	do_checkURLs(const char *dir, tag_arguments_t *hrefs);
+
+#if	defined(FOLLOWURLS) && (FOLLOWURLS > 0)
+struct arg {
+	char *url;
+	const char *dir;
+	char *filename;
+	int	depth;
+};
+#define	URL_TIMEOUT	5	/* Allow 5 seconds to connect */
+#ifdef	CL_THREAD_SAFE
+static	void	*getURL(void *a);
+#else
+static	void	*getURL(struct arg *arg);
+#endif
+static	int	nonblock_connect(const char *url, SOCKET sock, const struct sockaddr *addr);
+static	int	connect_error(const char *url, SOCKET sock);
+static	int	my_r_gethostbyname(const char *hostname, struct hostent *hp, char *buf, size_t len);
+
+#define NONBLOCK_SELECT_MAX_FAILURES	3
+#define NONBLOCK_MAX_ATTEMPTS	10
+
+#endif
+
+/* Maximum line length according to RFC821 */
+#define	RFC2821LENGTH	1000
+
+/* Hashcodes for our hash tables */
+#define	CONTENT_TYPE			1
+#define	CONTENT_TRANSFER_ENCODING	2
+#define	CONTENT_DISPOSITION		3
+
+/* Mime sub types */
+#define	PLAIN		1
+#define	ENRICHED	2
+#define	HTML		3
+#define	RICHTEXT	4
+#define	MIXED		5
+#define	ALTERNATIVE	6	/* RFC1521*/
+#define	DIGEST		7
+#define	SIGNED		8
+#define	PARALLEL	9
+#define	RELATED		10	/* RFC2387 */
+#define	REPORT		11	/* RFC1892 */
+#define	APPLEDOUBLE	12	/* Handling of this in only noddy for now */
+#define	FAX		MIXED	/*
+				 * RFC3458
+				 * Drafts stated to treat is as mixed if it is
+				 * not known.  This disappeared in the final
+				 * version (except when talking about
+				 * voice-message), but it is good enough for us
+				 * since we do no validation of coversheet
+				 * presence etc. (which also has disappeared
+				 * in the final version)
+				 */
+#define	ENCRYPTED	13	/*
+				 * e.g. RFC2015
+				 * Content-Type: multipart/encrypted;
+				 * boundary="nextPart1383049.XCRrrar2yq";
+				 * protocol="application/pgp-encrypted"
+				 */
+#define	X_BFILE		RELATED	/*
+				 * BeOS, expert two parts: the file and it's
+				 * attributes. The attributes part comes as
+				 *	Content-Type: application/x-be_attribute
+				 *		name="foo"
+				 * I can't find where it is defined, any
+				 * pointers would be appreciated. For now
+				 * we treat it as multipart/related
+				 */
+#define	KNOWBOT		14	/* Unknown and undocumented format? */
+
+static	const	struct tableinit {
+	const	char	*key;
+	int	value;
+} rfc821headers[] = {
+	/* TODO: make these regular expressions */
+	{	"Content-Type",			CONTENT_TYPE		},
+	{	"Content-Transfer-Encoding",	CONTENT_TRANSFER_ENCODING	},
+	{	"Content-Disposition",		CONTENT_DISPOSITION	},
+	{	NULL,				0			}
+}, mimeSubtypes[] = {	/* see RFC2045 */
+		/* subtypes of Text */
+	{	"plain",	PLAIN		},
+	{	"enriched",	ENRICHED	},
+	{	"html",		HTML		},
+	{	"richtext",	RICHTEXT	},
+		/* subtypes of Multipart */
+	{	"mixed",	MIXED		},
+	{	"alternative",	ALTERNATIVE	},
+	{	"digest",	DIGEST		},
+	{	"signed",	SIGNED		},
+	{	"parallel",	PARALLEL	},
+	{	"related",	RELATED		},
+	{	"report",	REPORT		},
+	{	"appledouble",	APPLEDOUBLE	},
+	{	"fax-message",	FAX		},
+	{	"encrypted",	ENCRYPTED	},
+	{	"x-bfile",	X_BFILE		},	/* BeOS */
+	{	"knowbot",		KNOWBOT		},	/* ??? */
+	{	"knowbot-metadata",	KNOWBOT		},	/* ??? */
+	{	"knowbot-code",		KNOWBOT		},	/* ??? */
+	{	"knowbot-state",	KNOWBOT		},	/* ??? */
+	{	NULL,		0		}
+};
+
+#ifdef	CL_THREAD_SAFE
+static	pthread_mutex_t	tables_mutex = PTHREAD_MUTEX_INITIALIZER;
+#endif
+
+#ifndef	O_BINARY
+#define	O_BINARY	0
+#endif
+
+#ifdef	NEW_WORLD
+
+#include "matcher.h"
+
+#undef	PARTIAL_DIR
+
+#if HAVE_MMAP
+#if HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#else /* HAVE_SYS_MMAN_H */
+#undef HAVE_MMAP
+#endif
+#else	/*HAVE_MMAP*/
+#undef	NEW_WORLD
+#endif
+#endif
+
+#ifdef	NEW_WORLD
+/*
+ * Files larger than this are scanned with the old method, should be
+ *	StreamMaxLength, I guess
+ * If NW_MAX_FILE_SIZE is not defined, all files go through the
+ *	new method. This definition is for machines very tight on RAM, or
+ *	with large StreamMaxLength values
+ */
+#define	MAX_ALLOCATION	134217728	/* see libclamav/others.c */
+#define	NW_MAX_FILE_SIZE	MAX_ALLOCATION
+
+struct scanlist {
+	const	char	*start;
+	size_t	size;
+	encoding_type	decoder;	/* only BASE64 and QUOTEDPRINTABLE for now */
+	struct	scanlist *next;
+};
+
+static struct map {
+	const	char	*offset;	/* sorted */
+	const	char	*word;
+	struct	map	*next;
+} *map, *tail;
+
+static	int	save_text(cli_ctx *ctx, const char *dir, const char *start, size_t len);
+static	void	create_map(const char *begin, const char *end);
+static	void	add_to_map(const char *offset, const char *word);
+static	const	char	*find_in_map(const char *offset, const char *word);
+static	void	free_map(void);
+
+/*
+ * This could be the future. Instead of parsing and decoding it just decodes.
+ *
+ * USE IT AT YOUR PERIL, a large number of viruses are not detected with this
+ * method, possibly because the decoded files must be exact and not have
+ * extra data at the start or end, which this code will produce.
+ *
+ * Currently only supports base64 and quoted-printable
+ *
+ * You may also see a lot of warnings. For the moment it falls back to old
+ *	world mode if it doesn't know what to do - that'll be removed.
+ * The code is untidy...
+ *
+ * FIXME: Some mailbox scans are slower with this method. I suspect that it's
+ * because the scan can proceed to the end of the file rather than the end
+ * of the attachment which can mean than later emails are scanned many times
+ *
+ * FIXME: quoted printable doesn't know when to stop, so size related virus
+ *	matching breaks
+ *
+ * TODO: Fall through to cli_parse_mbox() too often
+ *
+ * TODO: Add support for systems without mmap()
+ *
+ * TODO: partial_dir fall through
+ *
+ * FIXME: Some EICAR gets through
+ */
+int
+cli_mbox(const char *dir, int desc, cli_ctx *ctx)
+{
+	char *start, *ptr, *line;
+	const char *last, *p, *q;
+	size_t size;
+	struct stat statb;
+	message *m;
+	fileblob *fb;
+	int ret = CL_CLEAN;
+	int wasAlloced;
+	struct scanlist *scanlist, *scanelem;
+
+	if(dir == NULL) {
+		cli_warnmsg("cli_mbox called with NULL dir\n");
+		return CL_ENULLARG;
+	}
+	if(fstat(desc, &statb) < 0)
+		return CL_EOPEN;
+
+	size = statb.st_size;
+
+	if(size == 0)
+		return CL_CLEAN;
+
+#ifdef	NW_MAX_FILE_SIZE
+	if(size > NW_MAX_FILE_SIZE)
+		return cli_parse_mbox(dir, desc, ctx);
+#endif
+
+	/*cli_warnmsg("NEW_WORLD is new code - use at your own risk.\n");*/
+#ifdef	PARTIAL_DIR
+	cli_warnmsg("PARTIAL_DIR doesn't work in the NEW_WORLD yet\n");
+#endif
+
+	start = mmap(NULL, size, PROT_READ, MAP_PRIVATE, desc, 0);
+	if(start == MAP_FAILED)
+		return CL_EMEM;
+
+	cli_dbgmsg("mmap'ed mbox\n");
+
+	ptr = cli_malloc(size);
+	if(ptr) {
+		memcpy(ptr, start, size);
+		munmap(start, size);
+		start = ptr;
+		wasAlloced = 1;
+	} else
+		wasAlloced = 0;
+
+	/* last points to the last *valid* address in the array */
+	last = &start[size - 1];
+
+	create_map(start, last);
+
+	scanelem = scanlist = NULL;
+	q = start;
+	/*
+	 * FIXME: mismatch of const char * and char * here and in later calls
+	 *	to find_in_map()
+	 */
+	while((p = find_in_map(q, "base64")) != NULL) {
+		cli_dbgmsg("Found base64\n");
+		if(scanelem) {
+			scanelem->next = cli_malloc(sizeof(struct scanlist));
+			scanelem = scanelem->next;
+		} else
+			scanlist = scanelem = cli_malloc(sizeof(struct scanlist));
+		scanelem->next = NULL;
+		scanelem->decoder = BASE64;
+		q = scanelem->start = &p[6];
+		if(((p = find_in_map(q, "\nFrom ")) != NULL) ||
+		   ((p = find_in_map(q, "base64")) != NULL) ||
+		   ((p = find_in_map(q, "quoted-printable")) != NULL)) {
+			scanelem->size = (size_t)(p - q);
+			q = p;
+		} else {
+			scanelem->size = (size_t)(last - scanelem->start) + 1;
+			break;
+		}
+		cli_dbgmsg("base64: last %u q %u\n", (unsigned int)last, (unsigned int)q);
+		assert(scanelem->size <= size);
+	}
+
+	q = start;
+	while((p = find_in_map(q, "quoted-printable")) != NULL) {
+		if(p != q)
+			switch(p[-1]) {
+				case ' ':
+				case ':':
+				case '=':	/* wrong but allow it */
+					break;
+				default:
+					q = &p[16];
+					cli_dbgmsg("Ignore quoted-printable false positive\n");
+					continue;	/* false positive */
+			}
+
+		cli_dbgmsg("Found quoted-printable\n");
+#ifdef	notdef
+		/*
+		 * The problem with quoted printable is recognising when to stop
+		 * parsing
+		 */
+		if(scanelem) {
+			scanelem->next = cli_malloc(sizeof(struct scanlist));
+			scanelem = scanelem->next;
+		} else
+			scanlist = scanelem = cli_malloc(sizeof(struct scanlist));
+		scanelem->next = NULL;
+		scanelem->decoder = QUOTEDPRINTABLE;
+		q = scanelem->start = &p[16];
+		cli_dbgmsg("qp: last %u q %u\n", (unsigned int)last, (unsigned int)q);
+		if(((p = find_in_map(q, "\nFrom ")) != NULL) ||
+		   ((p = find_in_map(q, "quoted-printable")) != NULL) ||
+		   ((p = find_in_map(q, "base64")) != NULL)) {
+			scanelem->size = (size_t)(p - q);
+			q = p;
+			cli_dbgmsg("qp: scanelem->size = %u\n", scanelem->size);
+		} else {
+			scanelem->size = (size_t)(last - scanelem->start) + 1;
+			break;
+		}
+		assert(scanelem->size <= size);
+#else
+		if(wasAlloced)
+			free(start);
+		else
+			munmap(start, size);
+
+		free_map();
+		return cli_parse_mbox(dir, desc, ctx);
+#endif
+	}
+
+	if(scanlist == NULL) {
+		const struct tableinit *tableinit;
+		bool anyHeadersFound = FALSE;
+		bool hasuuencode = FALSE;
+		cli_file_t type;
+
+		/* FIXME: message: There could of course be no decoder needed... */
+		for(tableinit = rfc821headers; tableinit->key; tableinit++)
+			if(find_in_map(start, tableinit->key)) {
+				anyHeadersFound = TRUE;
+				break;
+			}
+
+		if((!anyHeadersFound) &&
+		   ((p = find_in_map(start, "\nbegin ")) != NULL) &&
+		   (isuuencodebegin(++p)))
+			/* uuencoded part */
+			hasuuencode = TRUE;
+		else {
+			cli_dbgmsg("Nothing encoded, looking for a text part to save\n");
+			ret = save_text(ctx, dir, start, size);
+			if(wasAlloced)
+				free(start);
+			else
+				munmap(start, size);
+
+			free_map();
+			if(ret != CL_EFORMAT)
+				return ret;
+			ret = CL_CLEAN;
+		}
+
+		free_map();
+
+		type = cli_filetype(start, size);
+
+		if((type == CL_TYPE_UNKNOWN_TEXT) &&
+		   (strncmp(start, "Microsoft Mail Internet Headers", 31) == 0))
+			type = CL_TYPE_MAIL;
+
+		if(wasAlloced)
+			free(start);
+		else
+			munmap(start, size);
+
+		if(anyHeadersFound || hasuuencode) {
+			/* TODO: reduce the number of falls through here */
+			if(hasuuencode)
+				/* TODO: fast track visa */
+				cli_warnmsg("New world - fall back to old uudecoder\n");
+			else
+				cli_warnmsg("cli_mbox: unknown encoder, type %d\n", type);
+			if(type == CL_TYPE_MAIL)
+				return cli_parse_mbox(dir, desc, ctx);
+			cli_dbgmsg("Unknown filetype %d, return CLEAN\n", type);
+			return CL_CLEAN;
+		}
+
+#if	0	/* I don't believe this is needed any more */
+		/*
+		 * The message could be a plain text phish
+		 * FIXME: Can't get to the option whether we are looking for
+		 *	phishes or not, so assume we are, this slows things a
+		 *	lot
+		 * Should be
+		 *	if((type == CL_TYPE_MAIL) && (!(no-phishing))
+		 */
+		if(type == CL_TYPE_MAIL)
+			return cli_parse_mbox(dir, desc, ctx);
+#endif
+		cli_dbgmsg("cli_mbox: I believe it's plain text (type == %d) which must be clean\n",
+			type);
+		return CL_CLEAN;
+	}
+#if	0
+	if(wasAlloced) {
+		const char *max = NULL;
+
+		for(scanelem = scanlist; scanelem; scanelem = scanelem->next) {
+			const char *end = &scanelem->start[scanelem->size];
+
+			if(end > max)
+				max = end;
+		}
+
+		if(max < last)
+			printf("could free %d bytes\n", (int)(last - max));
+	}
+#endif
+
+	for(scanelem = scanlist; scanelem; scanelem = scanelem->next) {
+		if(scanelem->decoder == BASE64) {
+			const char *b64start = scanelem->start;
+			size_t b64size = scanelem->size;
+
+			cli_dbgmsg("b64size = %lu\n", b64size);
+			while((*b64start != '\n') && (*b64start != '\r')) {
+				b64start++;
+				b64size--;
+			}
+			/*
+			 * Look for the end of the headers
+			 */
+			while(b64start < last) {
+				if(*b64start == ';') {
+					b64start++;
+					b64size--;
+				} else if((memcmp(b64start, "\n\n", 2) == 0) ||
+					  (memcmp(b64start, "\r\r", 2) == 0)) {
+					b64start += 2;
+					b64size -= 2;
+					break;
+				} else if(memcmp(b64start, "\r\n\r\n", 4) == 0) {
+					b64start += 4;
+					b64size -= 4;
+					break;
+				} else if(memcmp(b64start, "\n \n", 3) == 0) {
+					/*
+					 * Some viruses are broken and have
+					 * one space character at the end of
+					 * the headers
+					 */
+					b64start += 3;
+					b64size -= 3;
+					break;
+				} else if(memcmp(b64start, "\r\n \r\n", 5) == 0) {
+					/*
+					 * Some viruses are broken and have
+					 * one space character at the end of
+					 * the headers
+					 */
+					b64start += 5;
+					b64size -= 5;
+					break;
+				}
+				b64start++;
+				b64size--;
+			}
+
+			if(b64size > 0L)
+				while((!isalnum(*b64start)) && (*b64start != '/')) {
+					if(b64size-- == 0L)
+						break;
+					b64start++;
+				}
+
+			if(b64size > 0L) {
+				int lastline;
+				char *tmpfilename;
+				unsigned char *uptr;
+
+				cli_dbgmsg("cli_mbox: decoding %ld base64 bytes\n", b64size);
+				if((fb = fileblobCreate()) == NULL) {
+					free_map();
+					if(wasAlloced)
+						free(start);
+					else
+						munmap(start, size);
+
+					return CL_EMEM;
+				}
+
+				tmpfilename = cli_gentemp(dir);
+				if(tmpfilename == NULL) {
+					free_map();
+					if(wasAlloced)
+						free(start);
+					else
+						munmap(start, size);
+					fileblobDestroy(fb);
+
+					return CL_EMEM;
+				}
+				fileblobSetFilename(fb, dir, tmpfilename);
+				free(tmpfilename);
+
+				line = NULL;
+
+				m = messageCreate();
+				if(m == NULL) {
+					free_map();
+					if(wasAlloced)
+						free(start);
+					else
+						munmap(start, size);
+					fileblobDestroy(fb);
+
+					return CL_EMEM;
+				}
+				messageSetEncoding(m, "base64");
+
+				messageSetCTX(m, ctx);
+				fileblobSetCTX(fb, ctx);
+
+				lastline = 0;
+				do {
+					int length = 0, datalen;
+					char *newline, *equal;
+					unsigned char *bigbuf, *data;
+					unsigned char smallbuf[1024];
+					const char *cptr;
+
+					/*printf("%ld: ", b64size); fflush(stdout);*/
+
+					for(cptr = b64start; b64size && (*cptr != '\n') && (*cptr != '\r'); cptr++) {
+						length++;
+						--b64size;
+					}
+
+					/*printf("%d: ", length); fflush(stdout);*/
+
+					newline = cli_realloc(line, length + 1);
+					if(newline == NULL)
+						break;
+					line = newline;
+
+					memcpy(line, b64start, length);
+					line[length] = '\0';
+
+					equal = strchr(line, '=');
+					if(equal) {
+						lastline++;
+						*equal = '\0';
+					}
+					/*puts(line);*/
+
+#if	0
+					if(messageAddStr(m, line) < 0)
+						break;
+#endif
+					if(length >= (int)sizeof(smallbuf)) {
+						datalen = length + 2;
+						data = bigbuf = cli_malloc(datalen);
+						if(data == NULL)
+							break;
+					} else {
+						bigbuf = NULL;
+						data = smallbuf;
+						datalen = sizeof(data) - 1;
+					}
+					uptr = decodeLine(m, BASE64, line, data, datalen);
+
+					if(uptr == NULL) {
+						if(bigbuf)
+							free(bigbuf);
+						break;
+					}
+					/*cli_dbgmsg("base64: write %u bytes\n", (size_t)(uptr - data));*/
+					datalen = fileblobAddData(fb, data, (size_t)(uptr - data));
+					if(bigbuf)
+						free(bigbuf);
+
+					if(datalen < 0)
+						break;
+					if(fileblobContainsVirus(fb))
+						break;
+
+					if((b64size > 0) && (*cptr == '\r')) {
+						b64start = ++cptr;
+						--b64size;
+					}
+					if((b64size > 0) && (*cptr == '\n')) {
+						b64start = ++cptr;
+						--b64size;
+					}
+					if(lastline)
+						break;
+				} while(b64size > 0L);
+
+				if(m->base64chars) {
+					unsigned char data[4];
+
+					uptr = base64Flush(m, data);
+					if(uptr) {
+						/*cli_dbgmsg("base64: flush %u bytes\n", (size_t)(uptr - data));*/
+						(void)fileblobAddData(fb, data, (size_t)(uptr - data));
+					}
+				}
+				if(fb)
+					fileblobDestroy(fb);
+				else
+					ret = -1;
+
+				messageDestroy(m);
+				free(line);
+			}
+		} else if(scanelem->decoder == QUOTEDPRINTABLE) {
+			const char *quotedstart = scanelem->start;
+			size_t quotedsize = scanelem->size;
+
+			cli_dbgmsg("quotedsize = %lu\n", quotedsize);
+			while(*quotedstart != '\n') {
+				quotedstart++;
+				quotedsize--;
+			}
+			/*
+			 * Look for the end of the headers
+			 */
+			while(quotedstart < last) {
+				if(*quotedstart == ';') {
+					quotedstart++;
+					quotedsize--;
+				} else if((*quotedstart == '\n') || (*quotedstart == '\r')) {
+					quotedstart++;
+					quotedsize--;
+					if((*quotedstart == '\n') || (*quotedstart == '\r')) {
+						quotedstart++;
+						quotedsize--;
+						break;
+					}
+				}
+				quotedstart++;
+				quotedsize--;
+			}
+
+			while(!isalnum(*quotedstart)) {
+				quotedstart++;
+				quotedsize--;
+			}
+
+			if(quotedsize > 0L) {
+				cli_dbgmsg("cli_mbox: decoding %ld quoted-printable bytes\n", quotedsize);
+
+				m = messageCreate();
+				if(m == NULL) {
+					free_map();
+					if(wasAlloced)
+						free(start);
+					else
+						munmap(start, size);
+
+					return CL_EMEM;
+				}
+				messageSetEncoding(m, "quoted-printable");
+				messageSetCTX(m, ctx);
+
+				line = NULL;
+
+				do {
+					int length = 0;
+					char *newline;
+					const char *cptr;
+
+					/*printf("%ld: ", quotedsize); fflush(stdout);*/
+
+					for(cptr = quotedstart; quotedsize && (*cptr != '\n') && (*cptr != '\r'); cptr++) {
+						length++;
+						--quotedsize;
+					}
+
+					/*printf("%d: ", length); fflush(stdout);*/
+
+					newline = cli_realloc(line, length + 1);
+					if(newline == NULL)
+						break;
+					line = newline;
+
+					memcpy(line, quotedstart, length);
+					line[length] = '\0';
+
+					/*puts(line);*/
+
+					if(messageAddStr(m, line) < 0)
+						break;
+
+					if((quotedsize > 0) && (*cptr == '\r')) {
+						quotedstart = ++cptr;
+						--quotedsize;
+					}
+					if((quotedsize > 0) && (*cptr == '\n')) {
+						quotedstart = ++cptr;
+						--quotedsize;
+					}
+				} while(quotedsize > 0L);
+
+				free(line);
+				fb = messageToFileblob(m, dir, 1);
+				messageDestroy(m);
+
+				if(fb)
+					fileblobDestroy(fb);
+				else
+					ret = -1;
+			}
+		}
+	}
+	scanelem = scanlist;
+
+	/*
+	 * There could be a phish in the plain text part, so save that
+	 * FIXME: Can't get to the option whether we are looking for
+	 *	phishes or not, so assume we are, this slows things a
+	 *	lot
+	 * Should be
+	 *	if((type == CL_TYPE_MAIL) && (!(no-phishing))
+	 */
+	ret = save_text(ctx, dir, start, size);
+
+	free_map();
+
+	while(scanelem) {
+		struct scanlist *n = scanelem->next;
+
+		free(scanelem);
+		scanelem = n;
+	}
+
+	if(wasAlloced)
+		free(start);
+	else
+		munmap(start, size);
+
+	/*
+	 * FIXME: Need to run cl_scandir() here and return that value
+	 */
+	cli_dbgmsg("cli_mbox: ret = %d\n", ret);
+	if(ret != CL_EFORMAT)
+		return ret;
+
+	cli_warnmsg("New world - don't know what to do - fall back to old world\n");
+	/* Fall back for now */
+	lseek(desc, 0L, SEEK_SET);
+	return cli_parse_mbox(dir, desc, ctx);
+}
+
+/*
+ * Save a text part - it could contain phish or jscript
+ */
+static int
+save_text(cli_ctx *ctx, const char *dir, const char *start, size_t len)
+{
+	const char *p;
+
+	if((p = find_in_map(start, "\n\n")) || (p = find_in_map(start, "\r\n\r\n"))) {
+		const char *q;
+		fileblob *fb;
+		char *tmpfilename;
+
+		if(((q = find_in_map(start, "base64")) == NULL) &&
+		   ((q = find_in_map(start, "quoted_printable")) == NULL)) {
+			cli_dbgmsg("It's all plain text!\n");
+			if(*p == '\r')
+				p += 4;
+			else
+				p += 2;
+			len -= (p - start);
+		} else if(((q = find_in_map(p, "\nFrom ")) == NULL) &&
+		   ((q = find_in_map(p, "base64")) == NULL) &&
+		   ((q = find_in_map(p, "quoted-printable")) == NULL))
+			cli_dbgmsg("Can't find end of plain text - assume it's all\n");
+		else
+			len = (size_t)(q - p);
+
+		if(len < 5) {
+			cli_dbgmsg("save_text: Too small\n");
+			return CL_EFORMAT;
+		}
+		if(ctx->scanned)
+			*ctx->scanned += len / CL_COUNT_PRECISION;
+
+		/*
+		 * This doesn't work, cli_scanbuff isn't designed to be used
+		 *	in this way. It gets the "filetype" wrong and then
+		 *	doesn't scan correctly
+		 */
+		if(cli_scanbuff((char *)p, len, ctx->virname, ctx->engine, CL_TYPE_UNKNOWN_DATA) == CL_VIRUS) {
+			cli_dbgmsg("save_text: found %s\n", *ctx->virname);
+			return CL_VIRUS;
+		}
+
+		fb = fileblobCreate();
+		if(fb == NULL)
+			return CL_EMEM;
+
+		tmpfilename = cli_gentemp(dir);
+
+		if(tmpfilename == NULL) {
+			fileblobDestroy(fb);
+			return CL_ETMPFILE;
+		}
+		cli_dbgmsg("save plain bit to %s, %u bytes\n",
+			tmpfilename, len);
+
+		fileblobSetFilename(fb, dir, tmpfilename);
+		free(tmpfilename);
+
+		(void)fileblobAddData(fb, (const unsigned char *)p, len);
+		fileblobDestroy(fb);
+		return CL_SUCCESS;
+	}
+	cli_dbgmsg("No text part found to save\n");
+	return CL_EFORMAT;
+}
+
+static void
+create_map(const char *begin, const char *end)
+{
+	const struct wordlist {
+		const char *word;
+		int len;
+	} wordlist[] = {
+		{	"base64",		6	},
+		{	"quoted-printable",	16	},
+		{	"\nbegin ",		7	},
+		{	"\nFrom ",		6	},
+		{	"\n\n",			2	},
+		{	"\r\n\r\n",		4	},
+		{	NULL,			0	}
+	};
+
+	if(map) {
+		cli_warnmsg("create_map called without free_map\n");
+		free_map();
+	}
+	while(begin < end) {
+		const struct wordlist *word;
+
+		for(word = wordlist; word->word; word++) {
+			if((end - begin) < word->len)
+				continue;
+			if(strncasecmp(begin, word->word, word->len) == 0) {
+				add_to_map(begin, word->word);
+				break;
+			}
+		}
+		begin++;
+	}
+}
+
+/* To sort map, assume 'offset' is presented in sorted order */
+static void
+add_to_map(const char *offset, const char *word)
+{
+	if(map) {
+		tail->next = cli_malloc(sizeof(struct map));	/* FIXME: verify */
+		tail = tail->next;
+	} else
+		map = tail = cli_malloc(sizeof(struct map));	/* FIXME: verify */
+
+	tail->offset = offset;
+	tail->word = word;
+	tail->next = NULL;
+}
+
+static const char *
+find_in_map(const char *offset, const char *word)
+{
+	const struct map *item;
+
+	for(item = map; item; item = item->next)
+		if(item->offset >= offset)
+			if(strcasecmp(word, item->word) == 0)
+				return item->offset;
+
+	return NULL;
+}
+
+static void
+free_map(void)
+{
+	while(map) {
+		struct map *next = map->next;
+
+		free(map);
+		map = next;
+	}
+	map = NULL;
+}
+
+#else	/*!NEW_WORLD*/
+int
+cli_mbox(const char *dir, int desc, cli_ctx *ctx)
+{
+	if(dir == NULL) {
+		cli_warnmsg("cli_mbox called with NULL dir\n");
+		return CL_ENULLARG;
+	}
+	return cli_parse_mbox(dir, desc, ctx);
+}
+#endif
+
+/*
+ * TODO: when signal handling is added, need to remove temp files when a
+ *	signal is received
+ * TODO: add option to scan in memory not via temp files, perhaps with a
+ * named pipe or memory mapped file, though this won't work on big e-mails
+ * containing many levels of encapsulated messages - it'd just take too much
+ * RAM
+ * TODO: parse .msg format files
+ * TODO: fully handle AppleDouble format, see
+ *	http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf
+ * TODO: ensure parseEmailHeaders is always called before parseEmailBody
+ * TODO: create parseEmail which calls parseEmailHeaders then parseEmailBody
+ * TODO: Handle unepected NUL bytes in header lines which stop strcmp()s:
+ *	e.g. \0Content-Type: application/binary;
+ */
+static int
+cli_parse_mbox(const char *dir, int desc, cli_ctx *ctx)
+{
+	int retcode, i;
+	message *body;
+	FILE *fd;
+	char buffer[RFC2821LENGTH + 1];
+	mbox_ctx mctx;
+#ifdef HAVE_BACKTRACE
+	void (*segv)(int);
+#endif
+	static table_t *rfc821, *subtype;
+#ifdef	SAVE_TMP
+	char tmpfilename[16];
+	int tmpfd;
+#endif
+
+#ifdef	NEW_WORLD
+	cli_dbgmsg("fall back to old world\n");
+#else
+	cli_dbgmsg("in mbox()\n");
+#endif
+
+	i = dup(desc);
+	if((fd = fdopen(i, "rb")) == NULL) {
+		cli_errmsg("Can't open descriptor %d\n", desc);
+		close(i);
+		return CL_EOPEN;
+	}
+	rewind(fd);	/* bug 240 */
+#ifdef	SAVE_TMP
+	/*
+	 * Copy the incoming mail for debugging, so that if it falls over
+	 * we have a copy of the offending email. This is debugging code
+	 * that you shouldn't of course install in a live environment. I am
+	 * not interested in hearing about security issues with this section
+	 * of the parser.
+	 */
+	strcpy(tmpfilename, "/tmp/mboxXXXXXX");
+	tmpfd = mkstemp(tmpfilename);
+	if(tmpfd < 0) {
+		perror(tmpfilename);
+		cli_errmsg("Can't make debugging file\n");
+	} else {
+		FILE *tmpfp = fdopen(tmpfd, "w");
+
+		if(tmpfp) {
+			while(fgets(buffer, sizeof(buffer) - 1, fd) != NULL)
+				fputs(buffer, tmpfp);
+			fclose(tmpfp);
+			rewind(fd);
+		} else
+			cli_errmsg("Can't fdopen debugging file\n");
+	}
+#endif
+	if(fgets(buffer, sizeof(buffer) - 1, fd) == NULL) {
+		/* empty message */
+		fclose(fd);
+#ifdef	SAVE_TMP
+		unlink(tmpfilename);
+#endif
+		return CL_CLEAN;
+	}
+#ifdef	CL_THREAD_SAFE
+	pthread_mutex_lock(&tables_mutex);
+#endif
+	if(rfc821 == NULL) {
+		assert(subtype == NULL);
+
+		if(initialiseTables(&rfc821, &subtype) < 0) {
+			rfc821 = NULL;
+			subtype = NULL;
+#ifdef	CL_THREAD_SAFE
+			pthread_mutex_unlock(&tables_mutex);
+#endif
+			fclose(fd);
+#ifdef	SAVE_TMP
+			unlink(tmpfilename);
+#endif
+			return CL_EMEM;
+		}
+	}
+#ifdef	CL_THREAD_SAFE
+	pthread_mutex_unlock(&tables_mutex);
+#endif
+
+#ifdef HAVE_BACKTRACE
+	segv = signal(SIGSEGV, sigsegv);
+#endif
+
+	retcode = CL_SUCCESS;
+	body = NULL;
+
+	mctx.dir = dir;
+	mctx.rfc821Table = rfc821;
+	mctx.subtypeTable = subtype;
+	mctx.ctx = ctx;
+	mctx.files = 0;
+
+	/*
+	 * Is it a UNIX style mbox with more than one
+	 * mail message, or just a single mail message?
+	 *
+	 * TODO: It would be better if we called cli_scandir here rather than
+	 * in cli_scanmail. Then we could improve the way mailboxes with more
+	 * than one message is handled, e.g. giving a better indication of
+	 * which message within the mailbox is infected
+	 */
+	/*if((strncmp(buffer, "From ", 5) == 0) && isalnum(buffer[5])) {*/
+	if(strncmp(buffer, "From ", 5) == 0) {
+		/*
+		 * Have been asked to check a UNIX style mbox file, which
+		 * may contain more than one e-mail message to decode
+		 *
+		 * It would be far better for scanners.c to do this splitting
+		 * and do this
+		 *	FOR EACH mail in the mailbox
+		 *	DO
+		 *		pass this mail to cli_mbox --
+		 *		scan this file
+		 *		IF this file has a virus quit
+		 *		THEN
+		 *			return CL_VIRUS
+		 *		FI
+		 *	END
+		 * This would remove a problem with this code that it can
+		 * fill up the tmp directory before it starts scanning
+		 */
+		bool lastLineWasEmpty;
+		int messagenumber;
+		message *m = messageCreate();
+
+		if(m == NULL) {
+			fclose(fd);
+#ifdef HAVE_BACKTRACE
+			signal(SIGSEGV, segv);
+#endif
+#ifdef	SAVE_TMP
+			unlink(tmpfilename);
+#endif
+			return CL_EMEM;
+		}
+
+		lastLineWasEmpty = FALSE;
+		messagenumber = 1;
+		messageSetCTX(m, ctx);
+
+		do {
+			cli_chomp(buffer);
+			/*if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0) && isalnum(buffer[5])) {*/
+			if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0)) {
+				cli_dbgmsg("Deal with message number %d\n", messagenumber++);
+				/*
+				 * End of a message in the mail box
+				 */
+				body = parseEmailHeaders(m, rfc821);
+				if(body == NULL) {
+					messageReset(m);
+					continue;
+				}
+				messageSetCTX(body, ctx);
+				messageDestroy(m);
+				if(messageGetBody(body)) {
+					mbox_status rc = parseEmailBody(body, NULL, &mctx, 0);
+					if(rc == FAIL) {
+						messageReset(body);
+						m = body;
+						continue;
+					} else if(rc == VIRUS) {
+						cli_dbgmsg("Message number %d is infected\n",
+							messagenumber);
+						retcode = CL_VIRUS;
+						m = NULL;
+						break;
+					}
+				}
+				/*
+				 * Starting a new message, throw away all the
+				 * information about the old one. It would
+				 * be best to be able to scan this message
+				 * now, but cli_scanfile needs arguments
+				 * that haven't been passed here so it can't be
+				 * called
+				 */
+				m = body;
+				messageReset(body);
+				messageSetCTX(body, ctx);
+
+				cli_dbgmsg("Finished processing message\n");
+			} else
+				lastLineWasEmpty = (bool)(buffer[0] == '\0');
+
+			if(isuuencodebegin(buffer)) {
+				/*
+				 * Fast track visa to uudecode.
+				 * TODO: binhex, yenc
+				 */
+				if(uudecodeFile(m, buffer, dir, fd) < 0)
+					if(messageAddStr(m, buffer) < 0)
+						break;
+			} else
+				/* at this point, the \n has been removed */
+				if(messageAddStr(m, buffer) < 0)
+					break;
+		} while(fgets(buffer, sizeof(buffer) - 1, fd) != NULL);
+
+		fclose(fd);
+
+		if(retcode == CL_SUCCESS) {
+			cli_dbgmsg("Extract attachments from email %d\n", messagenumber);
+			body = parseEmailHeaders(m, rfc821);
+		}
+		if(m)
+			messageDestroy(m);
+	} else {
+		/*
+		 * It's a single message, parse the headers then the body
+		 */
+		if(strncmp(buffer, "P I ", 4) == 0)
+			/*
+			 * CommuniGate Pro format: ignore headers until
+			 * blank line
+			 */
+			while((fgets(buffer, sizeof(buffer) - 1, fd) != NULL) &&
+				(strchr("\r\n", buffer[0]) == NULL))
+					;
+		/*
+		 * Ignore any blank lines at the top of the message
+		 */
+		while(strchr("\r\n", buffer[0]) &&
+		     (getline_from_mbox(buffer, sizeof(buffer) - 1, fd) != NULL))
+			;
+
+		buffer[sizeof(buffer) - 1] = '\0';
+
+		body = parseEmailFile(fd, rfc821, buffer, dir);
+		fclose(fd);
+	}
+
+	if(body) {
+		/*
+		 * Write out the last entry in the mailbox
+		 */
+		if((retcode == CL_SUCCESS) && messageGetBody(body)) {
+			messageSetCTX(body, ctx);
+			switch(parseEmailBody(body, NULL, &mctx, 0)) {
+				case FAIL:
+					/*
+					 * beware: cli_magic_scandesc(),
+					 * changes this into CL_CLEAN, so only
+					 * use it to inform the higher levels
+					 * that we couldn't decode it because
+					 * it isn't an mbox, not to signal
+					 * decoding errors on what *is* a valid
+					 * mbox
+					 */
+					retcode = CL_EFORMAT;
+					break;
+				case MAXREC:
+					retcode = CL_EMAXREC;
+					break;
+				case MAXFILES:
+					retcode = CL_EMAXFILES;
+					break;
+				case VIRUS:
+					retcode = CL_VIRUS;
+					break;
+			}
+		}
+
+		/*
+		 * Tidy up and quit
+		 */
+		messageDestroy(body);
+	}
+
+	if((retcode == CL_CLEAN) && ctx->found_possibly_unwanted && (*ctx->virname == NULL)) {
+		*ctx->virname = "Phishing.Heuristics.Email";
+		ctx->found_possibly_unwanted = 0;
+		retcode = CL_VIRUS;
+	}
+
+	cli_dbgmsg("cli_mbox returning %d\n", retcode);
+
+#ifdef HAVE_BACKTRACE
+	signal(SIGSEGV, segv);
+#endif
+
+#ifdef	SAVE_TMP
+	unlink(tmpfilename);
+#endif
+	return retcode;
+}
+
+/*
+ * Read in an email message from fin, parse it, and return the message
+ *
+ * FIXME: files full of new lines and nothing else are
+ * handled ungracefully...
+ */
+static message *
+parseEmailFile(FILE *fin, const table_t *rfc821, const char *firstLine, const char *dir)
+{
+	bool inHeader = TRUE;
+	bool bodyIsEmpty = TRUE;
+	bool lastWasBlank = FALSE, lastBodyLineWasBlank = FALSE;
+	message *ret;
+	bool anyHeadersFound = FALSE;
+	int commandNumber = -1;
+	char *fullline = NULL, *boundary = NULL;
+	size_t fulllinelength = 0;
+	char buffer[RFC2821LENGTH + 1];
+
+	cli_dbgmsg("parseEmailFile\n");
+
+	ret = messageCreate();
+	if(ret == NULL)
+		return NULL;
+
+	strcpy(buffer, firstLine);
+	do {
+		const char *line;
+
+		(void)cli_chomp(buffer);
+
+		if(buffer[0] == '\0')
+			line = NULL;
+		else
+			line = buffer;
+
+		/*
+		 * Don't blank lines which are only spaces from headers,
+		 * otherwise they'll be treated as the end of header marker
+		 */
+		if(lastWasBlank) {
+			lastWasBlank = FALSE;
+			if(boundaryStart(buffer, boundary)) {
+				cli_dbgmsg("Found a header line with space that should be blank\n");
+				inHeader = FALSE;
+			}
+		}
+		if(inHeader) {
+			cli_dbgmsg("parseEmailFile: check '%s' fullline %p\n",
+				buffer ? buffer : "", fullline);
+			/*
+			 * Ensure wide characters are handled where
+			 * sizeof(char) > 1
+			 */
+			if(line && isspace(line[0] & 0xFF)) {
+				char copy[sizeof(buffer)];
+
+				strcpy(copy, buffer);
+				strstrip(copy);
+				if(copy[0] == '\0') {
+					/*
+					 * The header line contains only white
+					 * space. This is not the end of the
+					 * headers according to RFC2822, but
+					 * some MUAs will handle it as though
+					 * it were, and virus writers exploit
+					 * this bug. We can't just break from
+					 * the loop here since that would allow
+					 * other exploits such as inserting a
+					 * white space line before the
+					 * content-type line. So we just have
+					 * to make a best guess. Sigh.
+					 */
+					if(fullline) {
+						if(parseEmailHeader(ret, fullline, rfc821) < 0)
+							continue;
+
+						free(fullline);
+						fullline = NULL;
+					}
+					if(boundary ||
+					   ((boundary = (char *)messageFindArgument(ret, "boundary")) != NULL)) {
+						lastWasBlank = TRUE;
+						continue;
+					}
+				}
+			}
+			if((line == NULL) && (fullline == NULL)) {	/* empty line */
+				/*
+				 * A blank line signifies the end of
+				 * the header and the start of the text
+				 */
+				if(!anyHeadersFound)
+					/* Ignore the junk at the top */
+					continue;
+
+				cli_dbgmsg("End of header information\n");
+				inHeader = FALSE;
+				bodyIsEmpty = TRUE;
+			} else {
+				char *ptr;
+				int lookahead;
+
+				if(fullline == NULL) {
+					char cmd[RFC2821LENGTH + 1], out[RFC2821LENGTH + 1];
+
+					/*
+					 * Continuation of line we're ignoring?
+					 */
+					if(isblank(line[0]))
+						continue;
+
+					/*
+					 * Is this a header we're interested in?
+					 */
+					if((strchr(line, ':') == NULL) ||
+					   (cli_strtokbuf(line, 0, ":", cmd) == NULL)) {
+						if(strncmp(line, "From ", 5) == 0)
+							anyHeadersFound = TRUE;
+						continue;
+					}
+
+					ptr = rfc822comments(cmd, out);
+					commandNumber = tableFind(rfc821, ptr ? ptr : cmd);
+
+					switch(commandNumber) {
+						case CONTENT_TRANSFER_ENCODING:
+						case CONTENT_DISPOSITION:
+						case CONTENT_TYPE:
+							anyHeadersFound = TRUE;
+							break;
+						default:
+							if(!anyHeadersFound)
+								anyHeadersFound = usefulHeader(commandNumber, cmd);
+							continue;
+					}
+					fullline = cli_strdup(line);
+					fulllinelength = strlen(line) + 1;
+				} else if(line != NULL) {
+					fulllinelength += strlen(line);
+					ptr = cli_realloc(fullline, fulllinelength);
+					if(ptr == NULL)
+						continue;
+					fullline = ptr;
+					strcat(fullline, line);
+				}
+
+				assert(fullline != NULL);
+
+				lookahead = getc(fin);
+				if(lookahead != EOF) {
+					ungetc(lookahead, fin);
+
+					/*
+					 * Section B.2 of RFC822 says TAB or
+					 * SPACE means a continuation of the
+					 * previous entry.
+					 *
+					 * Add all the arguments on the line
+					 */
+					if(isblank(lookahead))
+						continue;
+				}
+
+				/*
+				 * Handle broken headers, where the next
+				 * line isn't indented by whitespace
+				 */
+				if(fullline[fulllinelength - 2] == ';')
+					/* Add arguments to this line */
+					continue;
+
+				if(line && (count_quotes(fullline) & 1))
+					continue;
+
+				ptr = rfc822comments(fullline, NULL);
+				if(ptr) {
+					free(fullline);
+					fullline = ptr;
+				}
+
+				if(parseEmailHeader(ret, fullline, rfc821) < 0)
+					continue;
+
+				free(fullline);
+				fullline = NULL;
+			}
+		} else if(line && isuuencodebegin(line)) {
+			/*
+			 * Fast track visa to uudecode.
+			 * TODO: binhex, yenc
+			 */
+			bodyIsEmpty = FALSE;
+			if(uudecodeFile(ret, line, dir, fin) < 0)
+				if(messageAddStr(ret, line) < 0)
+					break;
+		} else {
+			if(line == NULL) {
+				/*
+				 * Although this would save time and RAM, some
+				 * phish signatures have been built which need
+				 * the blank lines
+				 */
+				if(lastBodyLineWasBlank &&
+				  (messageGetMimeType(ret) != TEXT)) {
+					cli_dbgmsg("Ignoring consecutive blank lines in the body\n");
+					continue;
+				}
+				lastBodyLineWasBlank = TRUE;
+			} else {
+				if(bodyIsEmpty) {
+					/*
+					 * Broken message: new line in the
+					 * middle of the headers, so the first
+					 * line of the body is in fact
+					 * the last lines of the header
+					 */
+					if(newline_in_header(line))
+						continue;
+					bodyIsEmpty = FALSE;
+				}
+				lastBodyLineWasBlank = FALSE;
+			}
+
+			if(messageAddStr(ret, line) < 0)
+				break;
+		}
+	} while(getline_from_mbox(buffer, sizeof(buffer) - 1, fin) != NULL);
+
+	if(boundary)
+		free(boundary);
+
+	if(fullline) {
+		if(*fullline) switch(commandNumber) {
+			case CONTENT_TRANSFER_ENCODING:
+			case CONTENT_DISPOSITION:
+			case CONTENT_TYPE:
+				cli_dbgmsg("parseEmailFile: Fullline unparsed '%s'\n", fullline);
+		}
+		free(fullline);
+	}
+
+	if(!anyHeadersFound) {
+		/*
+		 * False positive in believing we have an e-mail when we don't
+		 */
+		messageDestroy(ret);
+		cli_dbgmsg("parseEmailFile: no headers found, assuming it isn't an email\n");
+		return NULL;
+	}
+
+	cli_dbgmsg("parseEmailFile: return\n");
+
+	return ret;
+}
+
+/*
+ * The given message contains a raw e-mail.
+ *
+ * Returns the message's body with the correct arguments set, empties the
+ * given message's contents (note that it isn't destroyed)
+ *
+ * TODO: remove the duplication with parseEmailFile
+ */
+static message *
+parseEmailHeaders(message *m, const table_t *rfc821)
+{
+	bool inHeader = TRUE;
+	bool bodyIsEmpty = TRUE;
+	text *t;
+	message *ret;
+	bool anyHeadersFound = FALSE;
+	int commandNumber = -1;
+	char *fullline = NULL;
+	size_t fulllinelength = 0;
+
+	cli_dbgmsg("parseEmailHeaders\n");
+
+	if(m == NULL)
+		return NULL;
+
+	ret = messageCreate();
+
+	for(t = messageGetBody(m); t; t = t->t_next) {
+		const char *line;
+
+		if(t->t_line)
+			line = lineGetData(t->t_line);
+		else
+			line = NULL;
+
+		if(inHeader) {
+			cli_dbgmsg("parseEmailHeaders: check '%s'\n",
+				line ? line : "");
+			if(line == NULL) {
+				/*
+				 * A blank line signifies the end of
+				 * the header and the start of the text
+				 */
+				cli_dbgmsg("End of header information\n");
+				if(!anyHeadersFound) {
+					cli_dbgmsg("Nothing interesting in the header\n");
+					break;
+				}
+				inHeader = FALSE;
+				bodyIsEmpty = TRUE;
+			} else {
+				char *ptr;
+
+				if(fullline == NULL) {
+					char cmd[RFC2821LENGTH + 1];
+
+					/*
+					 * Continuation of line we're ignoring?
+					 */
+					if(isblank(line[0]))
+						continue;
+
+					/*
+					 * Is this a header we're interested in?
+					 */
+					if((strchr(line, ':') == NULL) ||
+					   (cli_strtokbuf(line, 0, ":", cmd) == NULL)) {
+						if(strncmp(line, "From ", 5) == 0)
+							anyHeadersFound = TRUE;
+						continue;
+					}
+
+					ptr = rfc822comments(cmd, NULL);
+					commandNumber = tableFind(rfc821, ptr ? ptr : cmd);
+					if(ptr)
+						free(ptr);
+
+					switch(commandNumber) {
+						case CONTENT_TRANSFER_ENCODING:
+						case CONTENT_DISPOSITION:
+						case CONTENT_TYPE:
+							anyHeadersFound = TRUE;
+							break;
+						default:
+							if(!anyHeadersFound)
+								anyHeadersFound = usefulHeader(commandNumber, cmd);
+							continue;
+					}
+					fullline = cli_strdup(line);
+					fulllinelength = strlen(line) + 1;
+				} else if(line) {
+					fulllinelength += strlen(line);
+					ptr = cli_realloc(fullline, fulllinelength);
+					if(ptr == NULL)
+						continue;
+					fullline = ptr;
+					strcat(fullline, line);
+				}
+				assert(fullline != NULL);
+
+				if(next_is_folded_header(t))
+					/* Add arguments to this line */
+					continue;
+
+				lineUnlink(t->t_line);
+				t->t_line = NULL;
+
+				if(count_quotes(fullline) & 1)
+					continue;
+
+				ptr = rfc822comments(fullline, NULL);
+				if(ptr) {
+					free(fullline);
+					fullline = ptr;
+				}
+
+				if(parseEmailHeader(ret, fullline, rfc821) < 0)
+					continue;
+
+				free(fullline);
+				fullline = NULL;
+			}
+		} else {
+			if(bodyIsEmpty) {
+				if(line == NULL)
+					/* throw away leading blank lines */
+					continue;
+				/*
+				 * Broken message: new line in the
+				 * middle of the headers, so the first
+				 * line of the body is in fact
+				 * the last lines of the header
+				 */
+				if(newline_in_header(line))
+					continue;
+				bodyIsEmpty = FALSE;
+			}
+			/*if(t->t_line && isuuencodebegin(t->t_line))
+				puts("FIXME: add fast visa here");*/
+			cli_dbgmsg("parseEmailHeaders: inished with headers, moving body\n");
+			messageMoveText(ret, t, m);
+			break;
+		}
+	}
+
+	if(fullline) {
+		if(*fullline) switch(commandNumber) {
+			case CONTENT_TRANSFER_ENCODING:
+			case CONTENT_DISPOSITION:
+			case CONTENT_TYPE:
+				cli_dbgmsg("parseEmailHeaders: Fullline unparsed '%s'\n", fullline);
+		}
+		free(fullline);
+	}
+
+	if(!anyHeadersFound) {
+		/*
+		 * False positive in believing we have an e-mail when we don't
+		 */
+		messageDestroy(ret);
+		cli_dbgmsg("parseEmailHeaders: no headers found, assuming it isn't an email\n");
+		return NULL;
+	}
+
+	cli_dbgmsg("parseEmailHeaders: return\n");
+
+	return ret;
+}
+
+/*
+ * Handle a header line of an email message
+ */
+static int
+parseEmailHeader(message *m, const char *line, const table_t *rfc821)
+{
+	int ret;
+#ifdef CL_THREAD_SAFE
+	char *strptr;
+#endif
+	const char *separater;
+	char *cmd, *copy, tokenseparater[2];
+
+	cli_dbgmsg("parseEmailHeader '%s'\n", line);
+
+	/*
+	 * In RFC822 the separater between the key a value is a colon,
+	 * e.g.	Content-Transfer-Encoding: base64
+	 * However some MUA's are lapse about this and virus writers exploit
+	 * this hole, so we need to check all known possiblities
+	 */
+	for(separater = ":= "; *separater; separater++)
+		if(strchr(line, *separater) != NULL)
+			break;
+
+	if(*separater == '\0')
+		return -1;
+
+	copy = rfc2047(line);
+	if(copy == NULL)
+		/* an RFC checker would return -1 here */
+		copy = cli_strdup(line);
+
+	tokenseparater[0] = *separater;
+	tokenseparater[1] = '\0';
+
+	ret = -1;
+
+#ifdef	CL_THREAD_SAFE
+	cmd = strtok_r(copy, tokenseparater, &strptr);
+#else
+	cmd = strtok(copy, tokenseparater);
+#endif
+
+	if(cmd && (strstrip(cmd) > 0)) {
+#ifdef	CL_THREAD_SAFE
+		char *arg = strtok_r(NULL, "", &strptr);
+#else
+		char *arg = strtok(NULL, "");
+#endif
+
+		if(arg)
+			/*
+			 * Found a header such as
+			 * Content-Type: multipart/mixed;
+			 * set arg to be
+			 * "multipart/mixed" and cmd to
+			 * be "Content-Type"
+			 */
+			ret = parseMimeHeader(m, cmd, rfc821, arg);
+	}
+	free(copy);
+	return ret;
+}
+
+/*
+ * This is a recursive routine.
+ * FIXME: We are not passed &mrec so we can't check against MAX_MAIL_RECURSION
+ *
+ * This function parses the body of mainMessage and saves its attachments in dir
+ *
+ * mainMessage is the buffer to be parsed, it contains an e-mail's body, without
+ * any headers. First time of calling it'll be
+ * the whole message. Later it'll be parts of a multipart message
+ * textIn is the plain text message being built up so far
+ */
+static mbox_status
+parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int recursion_level)
+{
+	mbox_status rc;
+	text *aText = textIn;
+	message *mainMessage = messageIn;
+	fileblob *fb;
+	bool infected = FALSE;
+	const int doPhishingScan = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS && (DCONF_PHISHING & PHISHING_CONF_ENGINE);
+	const struct cl_limits *limits = mctx->ctx->limits;
+
+	cli_dbgmsg("in parseEmailBody, %u files saved so far\n",
+		mctx->files);
+
+	if(limits) {
+		if(limits->maxmailrec) {
+			const cli_ctx *ctx = mctx->ctx;	/* needed for BLOCKMAX :-( */
+
+			/*
+			 * This is approximate
+			 */
+			if(recursion_level > limits->maxmailrec) {
+
+				cli_warnmsg("parseEmailBody: hit maximum recursion level (%u)\n", recursion_level);
+				if(BLOCKMAX) {
+					if(ctx->virname)
+						*ctx->virname = "MIME.RecursionLimit";
+					return VIRUS;
+				} else
+					return MAXREC;
+			}
+		}
+		if(limits->maxfiles && (mctx->files >= limits->maxfiles)) {
+			/*
+			 * FIXME: This is only approx - it may have already
+			 * been exceeded
+			 */
+			cli_dbgmsg("parseEmailBody: number of files exceeded %u\n", limits->maxfiles);
+			return MAXFILES;
+		}
+	}
+
+	rc = OK;
+
+	/* Anything left to be parsed? */
+	if(mainMessage && (messageGetBody(mainMessage) != NULL)) {
+		mime_type mimeType;
+		int subtype, inhead, htmltextPart, inMimeHead, i;
+		const char *mimeSubtype;
+		char *protocol, *boundary;
+		const text *t_line;
+		/*bool isAlternative;*/
+		message *aMessage;
+		int multiparts = 0;
+		message **messages = NULL;	/* parts of a multipart message */
+
+		cli_dbgmsg("Parsing mail file\n");
+
+		mimeType = messageGetMimeType(mainMessage);
+		mimeSubtype = messageGetMimeSubtype(mainMessage);
+
+		/* pre-process */
+		subtype = tableFind(mctx->subtypeTable, mimeSubtype);
+		if((mimeType == TEXT) && (subtype == PLAIN)) {
+			/*
+			 * This is effectively no encoding, notice that we
+			 * don't check that charset is us-ascii
+			 */
+			cli_dbgmsg("text/plain: Assume no attachements\n");
+			mimeType = NOMIME;
+			messageSetMimeSubtype(mainMessage, "");
+		} else if((mimeType == MESSAGE) &&
+			  (strcasecmp(mimeSubtype, "rfc822-headers") == 0)) {
+			/*
+			 * RFC1892/RFC3462: section 2 text/rfc822-headers
+			 * incorrectly sent as message/rfc822-headers
+			 *
+			 * Parse as text/plain, i.e. no mime
+			 */
+			cli_dbgmsg("Changing message/rfc822-headers to text/rfc822-headers\n");
+			mimeType = NOMIME;
+			messageSetMimeSubtype(mainMessage, "");
+		} else
+			cli_dbgmsg("mimeType = %d\n", (int)mimeType);
+
+		switch(mimeType) {
+		case NOMIME:
+			cli_dbgmsg("Not a mime encoded message\n");
+			aText = textAddMessage(aText, mainMessage);
+			if(!doPhishingScan)
+				break;
+			/*
+			 * Fall through: some phishing mails claim they are
+			 * text/plain, when they are in fact html
+			 */
+		case TEXT:
+			/* text/plain has been preprocessed as no encoding */
+			if(((mctx->ctx->options&CL_SCAN_MAILURL) && (subtype == HTML)) || doPhishingScan) {
+				/*
+				 * It would be better to save and scan the
+				 * file and only checkURLs if it's found to be
+				 * clean
+				 */
+				checkURLs(mainMessage, mctx, &rc, (subtype == HTML));
+				/*
+				 * There might be html sent without subtype
+				 * html too, so scan them for phishing
+				 */
+				if(rc == VIRUS)
+					infected = TRUE;
+			}
+			break;
+		case MULTIPART:
+			cli_dbgmsg("Content-type 'multipart' handler\n");
+			boundary = messageFindArgument(mainMessage, "boundary");
+
+			if(boundary == NULL) {
+				cli_warnmsg("Multipart/%s MIME message contains no boundary header\n",
+					mimeSubtype);
+				/* Broken e-mail message */
+				mimeType = NOMIME;
+				/*
+				 * The break means that we will still
+				 * check if the file contains a uuencoded file
+				 */
+				break;
+			}
+
+			/* Perhaps it should assume mixed? */
+			if(mimeSubtype[0] == '\0') {
+				cli_warnmsg("Multipart has no subtype assuming alternative\n");
+				mimeSubtype = "alternative";
+				messageSetMimeSubtype(mainMessage, "alternative");
+			}
+
+			/*
+			 * Get to the start of the first message
+			 */
+			t_line = messageGetBody(mainMessage);
+
+			if(t_line == NULL) {
+				cli_warnmsg("Multipart MIME message has no body\n");
+				free((char *)boundary);
+				mimeType = NOMIME;
+				break;
+			}
+
+			do
+				if(t_line->t_line) {
+					if(boundaryStart(lineGetData(t_line->t_line), boundary))
+						break;
+					/*
+					 * Found a binhex file before
+					 *	the first multipart
+					 * TODO: check yEnc
+					 */
+					if(binhexBegin(mainMessage) == t_line) {
+						if(exportBinhexMessage(mctx, mainMessage)) {
+							/* virus found */
+							rc = VIRUS;
+							infected = TRUE;
+							break;
+						}
+					} else if(t_line->t_next &&
+						 (encodingLine(mainMessage) == t_line->t_next)) {
+						/*
+						 * We look for the next line
+						 * since later on we'll skip
+						 * over the important line when
+						 * we think it's a blank line
+						 * at the top of the message -
+						 * which it would have been in
+						 * an RFC compliant world
+						 */
+						cli_dbgmsg("Found MIME attachment before the first MIME section \"%s\"\n",
+							lineGetData(t_line->t_next->t_line));
+						if(messageGetEncoding(mainMessage) == NOENCODING)
+							break;
+					}
+				}
+			while((t_line = t_line->t_next) != NULL);
+
+			if(t_line == NULL) {
+				cli_dbgmsg("Multipart MIME message contains no boundary lines (%s)\n",
+					boundary);
+				/*
+				 * Free added by Thomas Lamy
+				 * <Thomas.Lamy at in-online.net>
+				 */
+				free((char *)boundary);
+				mimeType = NOMIME;
+				/*
+				 * The break means that we will still
+				 * check if the file contains a yEnc/binhex file
+				 */
+				break;
+			}
+			/*
+			 * Build up a table of all of the parts of this
+			 * multipart message. Remember, each part may itself
+			 * be a multipart message.
+			 */
+			inhead = 1;
+			inMimeHead = 0;
+
+			/*
+			 * Re-read this variable in case mimeSubtype has changed
+			 */
+			subtype = tableFind(mctx->subtypeTable, mimeSubtype);
+
+			/*
+			 * Parse the mainMessage object and create an array
+			 * of objects called messages, one for each of the
+			 * multiparts that mainMessage contains.
+			 *
+			 * This looks like parseEmailHeaders() - maybe there's
+			 * some duplication of code to be cleaned up
+			 *
+			 * We may need to create an array rather than just
+			 * save each part as it is found because not all
+			 * elements will need scanning, and we don't yet know
+			 * which of those elements it will be, except in
+			 * the case of mixed, when all parts need to be scanned.
+			 */
+			for(multiparts = 0; t_line && !infected; multiparts++) {
+				int lines = 0;
+				message **m;
+				mbox_status old_rc;
+
+				m = cli_realloc(messages, ((multiparts + 1) * sizeof(message *)));
+				if(m == NULL)
+					break;
+				messages = m;
+
+				aMessage = messages[multiparts] = messageCreate();
+				if(aMessage == NULL) {
+					multiparts--;
+					continue;
+				}
+				messageSetCTX(aMessage, mctx->ctx);
+
+				cli_dbgmsg("Now read in part %d\n", multiparts);
+
+				/*
+				 * Ignore blank lines. There shouldn't be ANY
+				 * but some viruses insert them
+				 */
+				while((t_line = t_line->t_next) != NULL)
+					if(t_line->t_line &&
+					   /*(cli_chomp(t_line->t_text) > 0))*/
+					   (strlen(lineGetData(t_line->t_line)) > 0))
+						break;
+
+				if(t_line == NULL) {
+					cli_dbgmsg("Empty part\n");
+					/*
+					 * Remove this part unless there's
+					 * a binhex portion somewhere in
+					 * the complete message that we may
+					 * throw away by mistake if the MIME
+					 * encoding information is incorrect
+					 */
+					if(mainMessage &&
+					   (binhexBegin(mainMessage) == NULL)) {
+						messageDestroy(aMessage);
+						--multiparts;
+					}
+					continue;
+				}
+
+				do {
+					const char *line = lineGetData(t_line->t_line);
+
+					/*cli_dbgmsg("multipart %d: inMimeHead %d inhead %d boundary '%s' line '%s' next '%s'\n",
+						multiparts, inMimeHead, inhead, boundary, line,
+						t_line->t_next && t_line->t_next->t_line ? lineGetData(t_line->t_next->t_line) : "(null)");*/
+
+					if(inMimeHead) {	/* continuation line */
+						if(line == NULL) {
+							/*inhead =*/ inMimeHead = 0;
+							continue;
+						}
+						/*
+						 * Handle continuation lines
+						 * because the previous line
+						 * ended with a ; or this line
+						 * starts with a white space
+						 */
+						cli_dbgmsg("Multipart %d: About to add mime Argument '%s'\n",
+							multiparts, line);
+						/*
+						 * Handle the case when it
+						 * isn't really a continuation
+						 * line:
+						 * Content-Type: application/octet-stream;
+						 * Content-Transfer-Encoding: base64
+						 */
+						parseEmailHeader(aMessage, line, mctx->rfc821Table);
+
+						while(isspace((int)*line))
+							line++;
+
+						if(*line == '\0') {
+							inhead = inMimeHead = 0;
+							continue;
+						}
+						inMimeHead = FALSE;
+						messageAddArgument(aMessage, line);
+					} else if(inhead) {	/* handling normal headers */
+						/*int quotes;*/
+						char *fullline, *ptr;
+
+						if(line == NULL) {
+							/*
+							 * empty line, should the end of the headers,
+							 * but some base64 decoders, e.g. uudeview, are broken
+							 * and will handle this type of entry, decoding the
+							 * base64 content...
+							 * Content-Type: application/octet-stream; name=text.zip
+							 * Content-Transfer-Encoding: base64
+							 * Content-Disposition: attachment; filename="text.zip"
+							 *
+							 * Content-Disposition: attachment;
+							 *	filename=text.zip
+							 * Content-Type: application/octet-stream;
+							 *	name=text.zip
+							 * Content-Transfer-Encoding: base64
+							 *
+							 * UEsDBAoAAAAAAACgPjJ2RHw676gAAO+oAABEAAAAbWFpbF90ZXh0LWluZm8udHh0ICAgICAgICAg
+							 */
+							const text *next = t_line->t_next;
+
+							if(next && next->t_line) {
+								const char *data = lineGetData(next->t_line);
+
+								if((messageGetEncoding(aMessage) == NOENCODING) &&
+								   (messageGetMimeType(aMessage) == APPLICATION) &&
+								   strstr(data, "base64")) {
+									/*
+									 * Handle this nightmare (note the blank
+									 * line in the header and the incorrect
+									 * content-transfer-encoding header)
+									 *
+									 * Content-Type: application/octet-stream; name="zipped_files.EXEX-Spanska: Yes
+									 *
+									 * r-Encoding: base64
+									 * Content-Disposition: attachment; filename="zipped_files.EXE"
+									 */
+									messageSetEncoding(aMessage, "base64");
+									cli_dbgmsg("Ignoring fake end of headers\n");
+									continue;
+								}
+								if((strncmp(data, "Content", 7) == 0) ||
+								   (strncmp(data, "filename=", 9) == 0)) {
+									cli_dbgmsg("Ignoring fake end of headers\n");
+									continue;
+								}
+							}
+							cli_dbgmsg("Multipart %d: End of header information\n",
+								multiparts);
+							inhead = 0;
+							continue;
+						}
+						if(isspace((int)*line)) {
+							/*
+							 * The first line is
+							 * continuation line.
+							 * This is tricky
+							 * to handle, but
+							 * all we can do is our
+							 * best
+							 */
+							cli_dbgmsg("Part %d starts with a continuation line\n",
+								multiparts);
+							messageAddArgument(aMessage, line);
+							/*
+							 * Give it a default
+							 * MIME type since
+							 * that may be the
+							 * missing line
+							 *
+							 * Choose application to
+							 * force a save
+							 */
+							if(messageGetMimeType(aMessage) == NOMIME)
+								messageSetMimeType(aMessage, "application");
+							continue;
+						}
+
+						inMimeHead = FALSE;
+
+						assert(strlen(line) <= RFC2821LENGTH);
+
+						fullline = rfc822comments(line, NULL);
+						if(fullline == NULL)
+							fullline = cli_strdup(line);
+
+						/*quotes = count_quotes(fullline);*/
+
+						/*
+						 * Fold next lines to the end of this
+						 * if they start with a white space
+						 * or if this line has an odd number of quotes:
+						 * Content-Type: application/octet-stream; name="foo
+						 * "
+						 */
+						while(t_line && next_is_folded_header(t_line)) {
+							const char *data;
+
+							t_line = t_line->t_next;
+
+							data = lineGetData(t_line->t_line);
+
+							if(data[1] == '\0') {
+								/*
+								 * Broken message: the
+								 * blank line at the end
+								 * of the headers isn't blank -
+								 * it contains a space
+								 */
+								cli_dbgmsg("Multipart %d: headers not terminated by blank line\n",
+									multiparts);
+								inhead = FALSE;
+								break;
+							}
+
+							ptr = cli_realloc(fullline,
+								strlen(fullline) + strlen(data) + 1);
+
+							if(ptr == NULL)
+								break;
+
+							fullline = ptr;
+							strcat(fullline, data);
+
+							/*quotes = count_quotes(data);*/
+						}
+
+						cli_dbgmsg("Multipart %d: About to parse folded header '%s'\n",
+							multiparts, fullline);
+
+						parseEmailHeader(aMessage, fullline, mctx->rfc821Table);
+						free(fullline);
+					} else if(boundaryEnd(line, boundary)) {
+						/*
+						 * Some viruses put information
+						 * *after* the end of message,
+						 * which presumably some broken
+						 * mail clients find, so we
+						 * can't assume that this
+						 * is the end of the message
+						 */
+						/* t_line = NULL;*/
+						break;
+					} else if(boundaryStart(line, boundary)) {
+						inhead = 1;
+						break;
+					} else {
+						if(messageAddLine(aMessage, t_line->t_line) < 0)
+							break;
+						lines++;
+					}
+				} while((t_line = t_line->t_next) != NULL);
+
+				cli_dbgmsg("Part %d has %d lines, rc = %d\n",
+					multiparts, lines, (int)rc);
+
+				/*
+				 * Only save in the array of messages if some
+				 * decision will be taken on whether to scan.
+				 * If all parts will be scanned then save to
+				 * file straight away
+				 */
+				switch(subtype) {
+					case MIXED:
+					case ALTERNATIVE:
+					case REPORT:
+					case DIGEST:
+					case APPLEDOUBLE:
+					case KNOWBOT:
+					case -1:
+						old_rc = rc;
+						mainMessage = do_multipart(mainMessage,
+							messages, multiparts,
+							&rc, mctx, messageIn,
+							&aText, recursion_level);
+						if((rc == OK_ATTACHMENTS_NOT_SAVED) && (old_rc == OK))
+							rc = OK;
+						if(messages[multiparts]) {
+							messageDestroy(messages[multiparts]);
+							messages[multiparts] = NULL;
+						}
+						--multiparts;
+						if(rc == VIRUS)
+							infected = TRUE;
+						break;
+				}
+			}
+
+			free((char *)boundary);
+
+			/*
+			 * Preprocess. Anything special to be done before
+			 * we handle the multiparts?
+			 */
+			switch(subtype) {
+				case KNOWBOT:
+					/* TODO */
+					cli_dbgmsg("multipart/knowbot parsed as multipart/mixed for now\n");
+					mimeSubtype = "mixed";
+					break;
+				case -1:
+					/*
+					 * According to section 7.2.6 of
+					 * RFC1521, unrecognised multiparts
+					 * should be treated as multipart/mixed.
+					 */
+					cli_dbgmsg("Unsupported multipart format `%s', parsed as mixed\n", mimeSubtype);
+					mimeSubtype = "mixed";
+					break;
+			}
+
+			/*
+			 * We've finished message we're parsing
+			 */
+			if(mainMessage && (mainMessage != messageIn)) {
+				messageDestroy(mainMessage);
+				mainMessage = NULL;
+			}
+
+			cli_dbgmsg("The message has %d parts\n", multiparts);
+
+			if(infected || ((multiparts == 0) && (aText == NULL))) {
+				if(messages) {
+					for(i = 0; i < multiparts; i++)
+						if(messages[i])
+							messageDestroy(messages[i]);
+					free(messages);
+				}
+				if(aText && (textIn == NULL))
+					textDestroy(aText);
+
+				/*
+				 * Nothing to do
+				 */
+				switch(rc) {
+					case VIRUS: return VIRUS;
+					case MAXREC: return MAXREC;
+					default: return OK_ATTACHMENTS_NOT_SAVED;
+				}
+			}
+
+			cli_dbgmsg("Find out the multipart type (%s)\n", mimeSubtype);
+
+			/*
+			 * We now have all the parts of the multipart message
+			 * in the messages array:
+			 *	message *messages[multiparts]
+			 * Let's decide what to do with them all
+			 */
+			switch(tableFind(mctx->subtypeTable, mimeSubtype)) {
+			case RELATED:
+				cli_dbgmsg("Multipart related handler\n");
+				/*
+				 * Have a look to see if there's HTML code
+				 * which will need scanning
+				 */
+				aMessage = NULL;
+				assert(multiparts > 0);
+
+				htmltextPart = getTextPart(messages, multiparts);
+
+				if(htmltextPart >= 0) {
+					if(messageGetBody(messages[htmltextPart]))
+						aText = textAddMessage(aText, messages[htmltextPart]);
+				} else
+					/*
+					 * There isn't an HTML bit. If there's a
+					 * multipart bit, it'll may be in there
+					 * somewhere
+					 */
+					for(i = 0; i < multiparts; i++)
+						if(messageGetMimeType(messages[i]) == MULTIPART) {
+							aMessage = messages[i];
+							htmltextPart = i;
+							break;
+						}
+
+				if(htmltextPart == -1)
+					cli_dbgmsg("No HTML code found to be scanned\n");
+				else {
+					rc = parseEmailBody(aMessage, aText, mctx, recursion_level + 1);
+					if((rc == OK) && aMessage) {
+						assert(aMessage == messages[htmltextPart]);
+						messageDestroy(aMessage);
+						messages[htmltextPart] = NULL;
+					} else if(rc == VIRUS) {
+						infected = TRUE;
+						break;
+					}
+				}
+
+				/*
+				 * Fixed based on an idea from Stephen White <stephen at earth.li>
+				 * The message is confused about the difference
+				 * between alternative and related. Badtrans.B
+				 * suffers from this problem.
+				 *
+				 * Fall through in this case:
+				 * Content-Type: multipart/related;
+				 *	type="multipart/alternative"
+				 */
+				/*
+				 * Changed to always fall through based on
+				 * an idea from Michael Dankov <misha at btrc.ru>
+				 * that some viruses are completely confused
+				 * about the difference between related
+				 * and mixed
+				 */
+				/*cptr = messageFindArgument(mainMessage, "type");
+				if(cptr == NULL)
+					break;
+				isAlternative = (bool)(strcasecmp(cptr, "multipart/alternative") == 0);
+				free((char *)cptr);
+				if(!isAlternative)
+					break;*/
+			case DIGEST:
+				/*
+				 * According to section 5.1.5 RFC2046, the
+				 * default mime type of multipart/digest parts
+				 * is message/rfc822
+				 *
+				 * We consider them as alternative, wrong in
+				 * the strictest sense since they aren't
+				 * alternatives - all parts a valid - but it's
+				 * OK for our needs since it means each part
+				 * will be scanned
+				 */
+			case ALTERNATIVE:
+				cli_dbgmsg("Multipart alternative handler\n");
+
+				/*
+				 * Fall through - some clients are broken and
+				 * say alternative instead of mixed. The Klez
+				 * virus is broken that way, and anyway we
+				 * wish to scan all of the alternatives
+				 */
+			case REPORT:
+				/*
+				 * According to section 1 of RFC1892, the
+				 * syntax of multipart/report is the same
+				 * as multipart/mixed. There are some required
+				 * parameters, but there's no need for us to
+				 * verify that they exist
+				 */
+			case MIXED:
+			case APPLEDOUBLE:	/* not really supported */
+				/*
+				 * Look for attachments
+				 *
+				 * Not all formats are supported. If an
+				 * unsupported format turns out to be
+				 * common enough to implement, it is a simple
+				 * matter to add it
+				 */
+				if(aText) {
+					if(mainMessage && (mainMessage != messageIn))
+						messageDestroy(mainMessage);
+					mainMessage = NULL;
+				}
+
+				cli_dbgmsg("Mixed message with %d parts\n", multiparts);
+				for(i = 0; i < multiparts; i++) {
+					mainMessage = do_multipart(mainMessage,
+						messages, i, &rc, mctx,
+						messageIn, &aText, recursion_level + 1);
+					if(rc == VIRUS) {
+						infected = TRUE;
+						break;
+					}
+					if(rc == MAXREC)
+						break;
+				}
+
+				/* rc = parseEmailBody(NULL, NULL, mctx, recursion_level + 1); */
+				break;
+			case SIGNED:
+			case PARALLEL:
+				/*
+				 * If we're here it could be because we have a
+				 * multipart/mixed message, consisting of a
+				 * message followed by an attachment. That
+				 * message itself is a multipart/alternative
+				 * message and we need to dig out the plain
+				 * text part of that alternative
+				 */
+				htmltextPart = getTextPart(messages, multiparts);
+				if(htmltextPart == -1)
+					htmltextPart = 0;
+
+				rc = parseEmailBody(messages[htmltextPart], aText, mctx, recursion_level + 1);
+				break;
+			case ENCRYPTED:
+				rc = FAIL;	/* Not yet handled */
+				protocol = (char *)messageFindArgument(mainMessage, "protocol");
+				if(protocol) {
+					if(strcasecmp(protocol, "application/pgp-encrypted") == 0) {
+						/* RFC2015 */
+						cli_warnmsg("PGP encoded attachment not scanned\n");
+						rc = OK_ATTACHMENTS_NOT_SAVED;
+					} else
+						cli_warnmsg("Unknown encryption protocol '%s' - if you believe this file contains a virus, submit it to www.clamav.net\n", protocol);
+					free(protocol);
+				} else
+					cli_dbgmsg("Encryption method missing protocol name\n");
+
+				break;
+			default:
+				assert(0);
+			}
+
+			if(mainMessage && (mainMessage != messageIn))
+				messageDestroy(mainMessage);
+
+			if(aText && (textIn == NULL)) {
+				if((!infected) && (fb = fileblobCreate()) != NULL) {
+					cli_dbgmsg("Save non mime and/or text/plain part\n");
+					fileblobSetFilename(fb, mctx->dir, "textpart");
+					/*fileblobAddData(fb, "Received: by clamd (textpart)\n", 30);*/
+					fileblobSetCTX(fb, mctx->ctx);
+					(void)textToFileblob(aText, fb, 1);
+
+					fileblobDestroy(fb);
+					mctx->files++;
+				}
+				textDestroy(aText);
+			}
+
+			for(i = 0; i < multiparts; i++)
+				if(messages[i])
+					messageDestroy(messages[i]);
+
+			if(messages)
+				free(messages);
+
+			return rc;
+
+		case MESSAGE:
+			/*
+			 * Check for forbidden encodings
+			 */
+			switch(messageGetEncoding(mainMessage)) {
+				case NOENCODING:
+				case EIGHTBIT:
+				case BINARY:
+					break;
+				default:
+					cli_warnmsg("MIME type 'message' cannot be decoded\n");
+					break;
+			}
+			rc = FAIL;
+			if((strcasecmp(mimeSubtype, "rfc822") == 0) ||
+			   (strcasecmp(mimeSubtype, "delivery-status") == 0)) {
+				message *m = parseEmailHeaders(mainMessage, mctx->rfc821Table);
+				if(m) {
+					cli_dbgmsg("Decode rfc822\n");
+
+					messageSetCTX(m, mctx->ctx);
+
+					if(mainMessage && (mainMessage != messageIn)) {
+						messageDestroy(mainMessage);
+						mainMessage = NULL;
+					} else
+						messageReset(mainMessage);
+					if(messageGetBody(m))
+						rc = parseEmailBody(m, NULL, mctx, recursion_level + 1);
+
+					messageDestroy(m);
+				}
+				break;
+			} else if(strcasecmp(mimeSubtype, "disposition-notification") == 0) {
+				/* RFC 2298 - handle like a normal email */
+				rc = OK;
+				break;
+			} else if(strcasecmp(mimeSubtype, "partial") == 0) {
+#ifdef	PARTIAL_DIR
+				/* RFC1341 message split over many emails */
+				if(rfc1341(mainMessage, mctx->dir) >= 0)
+					rc = OK;
+#else
+				cli_warnmsg("Partial message received from MUA/MTA - message cannot be scanned\n");
+#endif
+			} else if(strcasecmp(mimeSubtype, "external-body") == 0)
+				/* TODO */
+				cli_warnmsg("Attempt to send Content-type message/external-body trapped");
+			else
+				cli_warnmsg("Unsupported message format `%s' - if you believe this file contains a virus, submit it to www.clamav.net\n", mimeSubtype);
+
+
+			if(mainMessage && (mainMessage != messageIn))
+				messageDestroy(mainMessage);
+			if(messages)
+				free(messages);
+			return rc;
+
+		default:
+			cli_warnmsg("Message received with unknown mime encoding - assume application");
+			/*
+			 * Some Yahoo emails attach as
+			 * Content-Type: X-unknown/unknown;
+			 * instead of
+			 * Content-Type: application/unknown;
+			 * so let's try our best to salvage something
+			 */
+		case APPLICATION:
+			/*cptr = messageGetMimeSubtype(mainMessage);
+
+			if((strcasecmp(cptr, "octet-stream") == 0) ||
+			   (strcasecmp(cptr, "x-msdownload") == 0)) {*/
+			{
+				fb = messageToFileblob(mainMessage, mctx->dir, 1);
+
+				if(fb) {
+					cli_dbgmsg("Saving main message as attachment\n");
+					if(fileblobScanAndDestroy(fb) == CL_VIRUS)
+						rc = VIRUS;
+					mctx->files++;
+					if(mainMessage != messageIn) {
+						messageDestroy(mainMessage);
+						mainMessage = NULL;
+					} else
+						messageReset(mainMessage);
+				}
+			} /*else
+				cli_warnmsg("Discarded application not sent as attachment\n");*/
+			break;
+
+		case AUDIO:
+		case VIDEO:
+		case IMAGE:
+			break;
+		}
+
+		if(messages) {
+			/* "can't happen" */
+			cli_warnmsg("messages != NULL, report to http://bugs.clamav.net\n");
+			free(messages);
+		}
+	}
+
+	if(aText && (textIn == NULL)) {
+		/* Look for a bounce in the text (non mime encoded) portion */
+		const text *t;
+		/* isBounceStart() is expensive, reduce the number of calls */
+		bool lookahead_definately_is_bounce = FALSE;
+
+		for(t = aText; t && (rc != VIRUS); t = t->t_next) {
+			const line_t *l = t->t_line;
+			const text *lookahead, *topofbounce;
+			const char *s;
+			bool inheader;
+
+			if(l == NULL) {
+				/* assert(lookahead_definately_is_bounce == FALSE) */
+				continue;
+			}
+
+			if(lookahead_definately_is_bounce)
+				lookahead_definately_is_bounce = FALSE;
+			else if(!isBounceStart(lineGetData(l)))
+				continue;
+
+			lookahead = t->t_next;
+			if(lookahead) {
+				if(isBounceStart(lineGetData(lookahead->t_line))) {
+					lookahead_definately_is_bounce = TRUE;
+					/* don't save worthless header lines */
+					continue;
+				}
+			} else	/* don't save a single liner */
+				break;
+
+			/*
+			 * We've found what looks like the start of a bounce
+			 * message. Only bother saving if it really is a bounce
+			 * message, this helps to speed up scanning of ping-pong
+			 * messages that have lots of bounces within bounces in
+			 * them
+			 */
+			for(; lookahead; lookahead = lookahead->t_next) {
+				l = lookahead->t_line;
+
+				if(l == NULL)
+					break;
+				s = lineGetData(l);
+				if(strncasecmp(s, "Content-Type:", 13) == 0) {
+					/*
+					 * Don't bother with text/plain or
+					 * text/html
+					 */
+					if(strcasestr(s, "text/plain") != NULL)
+						/*
+						 * Don't bother to save the
+						 * unuseful part, read past
+						 * the headers then we'll go
+						 * on to look for the next
+						 * bounce message
+						 */
+						continue;
+					if((!doPhishingScan) &&
+					   (strcasestr(s, "text/html") != NULL))
+						continue;
+					break;
+				}
+			}
+
+			if(lookahead && (lookahead->t_line == NULL)) {
+				cli_dbgmsg("Non mime part bounce message is not mime encoded, so it will not be scanned\n");
+				t = lookahead;
+				/* look for next bounce message */
+				continue;
+			}
+
+			/*
+			 * Prescan the bounce message to see if there's likely
+			 * to be anything nasty.
+			 * This algorithm is hand crafted and may be breakable
+			 * so all submissions are welcome. It's best NOT to
+			 * remove this however you may be tempted, because it
+			 * significantly speeds up the scanning of multiple
+			 * bounces (i.e. bounces within many bounces)
+			 */
+			for(; lookahead; lookahead = lookahead->t_next) {
+				l = lookahead->t_line;
+
+				if(l) {
+					s = lineGetData(l);
+					if((strncasecmp(s, "Content-Type:", 13) == 0) &&
+					   (strstr(s, "multipart/") == NULL) &&
+					   (strstr(s, "message/rfc822") == NULL) &&
+					   (strstr(s, "text/plain") == NULL))
+						break;
+				}
+			}
+			if(lookahead == NULL) {
+				cli_dbgmsg("cli_mbox: I believe it's plain text which must be clean\n");
+				/* nothing here, move along please */
+				break;
+			}
+			if((fb = fileblobCreate()) == NULL)
+				break;
+			cli_dbgmsg("Save non mime part bounce message\n");
+			fileblobSetFilename(fb, mctx->dir, "bounce");
+			fileblobAddData(fb, (const unsigned char *)"Received: by clamd (bounce)\n", 28);
+			fileblobSetCTX(fb, mctx->ctx);
+
+			inheader = TRUE;
+			topofbounce = NULL;
+			do {
+				l = t->t_line;
+
+				if(l == NULL) {
+					if(inheader) {
+						inheader = FALSE;
+						topofbounce = t;
+					}
+				} else {
+					s = lineGetData(l);
+					fileblobAddData(fb, (const unsigned char *)s, strlen(s));
+				}
+				fileblobAddData(fb, (const unsigned char *)"\n", 1);
+				lookahead = t->t_next;
+				if(lookahead == NULL)
+					break;
+				t = lookahead;
+				l = t->t_line;
+				if((!inheader) && l) {
+					s = lineGetData(l);
+					if(isBounceStart(s)) {
+						cli_dbgmsg("Found the start of another bounce candidate (%s)\n", s);
+						lookahead_definately_is_bounce = TRUE;
+						break;
+					}
+				}
+			} while(!fileblobInfected(fb));
+
+			if(fileblobScanAndDestroy(fb) == CL_VIRUS)
+				rc = VIRUS;
+			mctx->files++;
+
+			if(topofbounce)
+				t = topofbounce;
+		}
+		textDestroy(aText);
+		aText = NULL;
+	}
+
+	/*
+	 * No attachments - scan the text portions, often files
+	 * are hidden in HTML code
+	 */
+	if(mainMessage && (rc != VIRUS)) {
+		text *t_line;
+
+		/*
+		 * Look for uu-encoded main file
+		 */
+		if((encodingLine(mainMessage) != NULL) &&
+		   ((t_line = bounceBegin(mainMessage)) != NULL))
+			rc = (exportBounceMessage(mctx, t_line) == CL_VIRUS) ? VIRUS : OK;
+		else {
+			bool saveIt;
+
+			if(messageGetMimeType(mainMessage) == MESSAGE)
+				/*
+				 * Quick peek, if the encapsulated
+				 * message has no
+				 * content encoding statement don't
+				 * bother saving to scan, it's safe
+				 */
+				saveIt = (bool)(encodingLine(mainMessage) != NULL);
+			else if((t_line = encodingLine(mainMessage)) != NULL) {
+				/*
+				 * Some bounces include the message
+				 * body without the headers.
+				 * FIXME: Unfortunately this generates a
+				 * lot of false positives that a bounce
+				 * has been found when it hasn't.
+				 */
+				if((fb = fileblobCreate()) != NULL) {
+					cli_dbgmsg("Found a bounce message with no header at '%s'\n",
+						lineGetData(t_line->t_line));
+					fileblobSetFilename(fb, mctx->dir, "bounce");
+					fileblobAddData(fb,
+						(const unsigned char *)"Received: by clamd (bounce)\n",
+						28);
+
+					fileblobSetCTX(fb, mctx->ctx);
+					if(fileblobScanAndDestroy(textToFileblob(t_line, fb, 1)) == CL_VIRUS)
+						rc = VIRUS;
+					mctx->files++;
+				}
+				saveIt = FALSE;
+			} else
+				/*
+				 * Save the entire text portion,
+				 * since it it may be an HTML file with
+				 * a JavaScript virus or a phish
+				 */
+				saveIt = TRUE;
+
+			if(saveIt) {
+				cli_dbgmsg("Saving text part to scan, rc = %d\n",
+					(int)rc);
+				if(saveTextPart(mctx, mainMessage, 1) == CL_VIRUS)
+					rc = VIRUS;
+
+				if(mainMessage != messageIn) {
+					messageDestroy(mainMessage);
+					mainMessage = NULL;
+				} else
+					messageReset(mainMessage);
+			}
+		}
+	} /*else
+		rc = OK_ATTACHMENTS_NOT_SAVED;	/* nothing saved */
+
+	if(mainMessage && (mainMessage != messageIn))
+		messageDestroy(mainMessage);
+
+	if((rc != FAIL) && infected)
+		rc = VIRUS;
+
+	cli_dbgmsg("parseEmailBody() returning %d\n", (int)rc);
+
+	return rc;
+}
+
+/*
+ * Is the current line the start of a new section?
+ *
+ * New sections start with --boundary
+ */
+static int
+boundaryStart(const char *line, const char *boundary)
+{
+	const char *ptr;
+	char *out;
+	int rc;
+	char buf[RFC2821LENGTH + 1];
+
+	if(line == NULL)
+		return 0;	/* empty line */
+	if(boundary == NULL)
+		return 0;
+
+	/*cli_dbgmsg("boundaryStart: line = '%s' boundary = '%s'\n", line, boundary);*/
+
+	if((*line != '-') && (*line != '('))
+		return 0;
+
+	if(strchr(line, '-') == NULL)
+		return 0;
+
+	if(strlen(line) <= sizeof(buf)) {
+		out = NULL;
+		ptr = rfc822comments(line, buf);
+	} else
+		ptr = out = rfc822comments(line, NULL);
+
+	if(ptr == NULL)
+		ptr = line;
+
+	if((*ptr++ != '-') || (*ptr == '\0')) {
+		if(out)
+			free(out);
+		return 0;
+	}
+
+	/*
+	 * Gibe.B3 is broken, it has:
+	 *	boundary="---- =_NextPart_000_01C31177.9DC7C000"
+	 * but it's boundaries look like
+	 *	------ =_NextPart_000_01C31177.9DC7C000
+	 * notice the one too few '-'.
+	 * Presumably this is a deliberate exploitation of a bug in some mail
+	 * clients.
+	 *
+	 * The trouble is that this creates a lot of false positives for
+	 * boundary conditions, if we're too lax about matches. We do our level
+	 * best to avoid these false positives. For example if we have
+	 * boundary="1" we want to ensure that we don't break out of every line
+	 * that has -1 in it instead of starting --1. This needs some more work.
+	 *
+	 * Look with and without RFC822 comments stripped, I've seen some
+	 * samples where () are taken as comments in boundaries and some where
+	 * they're not. Irrespective of whatever RFC2822 says, we need to find
+	 * viruses in both types of mails.
+	 */
+	if((strstr(&ptr[1], boundary) != NULL) || (strstr(line, boundary) != NULL)) {
+		const char *k = ptr;
+
+		/*
+		 * We need to ensure that we don't match --11=-=-=11 when
+		 * looking for --1=-=-=1 in well behaved headers, that's a
+		 * false positive problem mentioned above
+		 */
+		rc = 0;
+		do
+			if(strcmp(++k, boundary) == 0) {
+				rc = 1;
+				break;
+			}
+		while(*k == '-');
+		if(rc == 0) {
+			k = &line[1];
+			do
+				if(strcmp(++k, boundary) == 0) {
+					rc = 1;
+					break;
+				}
+			while(*k == '-');
+		}
+	} else if(*ptr++ != '-')
+		rc = 0;
+	else
+		rc = (strcasecmp(ptr, boundary) == 0);
+
+	if(out)
+		free(out);
+
+	if(rc == 1)
+		cli_dbgmsg("boundaryStart: found %s in %s\n", boundary, line);
+
+	return rc;
+}
+
+/*
+ * Is the current line the end?
+ *
+ * The message ends with with --boundary--
+ */
+static int
+boundaryEnd(const char *line, const char *boundary)
+{
+	size_t len;
+
+	if(line == NULL)
+		return 0;
+
+	/*cli_dbgmsg("boundaryEnd: line = '%s' boundary = '%s'\n", line, boundary);*/
+
+	if(*line++ != '-')
+		return 0;
+	if(*line++ != '-')
+		return 0;
+	len = strlen(boundary);
+	if(strncasecmp(line, boundary, len) != 0)
+		return 0;
+	/*
+	 * Use < rather than == because some broken mails have white
+	 * space after the boundary
+	 */
+	if(strlen(line) < (len + 2))
+		return 0;
+	line = &line[len];
+	if(*line++ != '-')
+		return 0;
+	if(*line == '-') {
+		cli_dbgmsg("boundaryEnd: found %s in %s\n", boundary, line);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * Initialise the various lookup tables
+ */
+static int
+initialiseTables(table_t **rfc821Table, table_t **subtypeTable)
+{
+	const struct tableinit *tableinit;
+
+	/*
+	 * Initialise the various look up tables
+	 */
+	*rfc821Table = tableCreate();
+	assert(*rfc821Table != NULL);
+
+	for(tableinit = rfc821headers; tableinit->key; tableinit++)
+		if(tableInsert(*rfc821Table, tableinit->key, tableinit->value) < 0) {
+			tableDestroy(*rfc821Table);
+			*rfc821Table = NULL;
+			return -1;
+		}
+
+	*subtypeTable = tableCreate();
+	assert(*subtypeTable != NULL);
+
+	for(tableinit = mimeSubtypes; tableinit->key; tableinit++)
+		if(tableInsert(*subtypeTable, tableinit->key, tableinit->value) < 0) {
+			tableDestroy(*rfc821Table);
+			tableDestroy(*subtypeTable);
+			*rfc821Table = NULL;
+			*subtypeTable = NULL;
+			return -1;
+		}
+
+	return 0;
+}
+
+/*
+ * If there's a HTML text version use that, otherwise
+ * use the first text part, otherwise just use the
+ * first one around. HTML text is most likely to include
+ * a scripting worm
+ *
+ * If we can't find one, return -1
+ */
+static int
+getTextPart(message *const messages[], size_t size)
+{
+	size_t i;
+	int textpart = -1;
+
+	for(i = 0; i < size; i++)
+		if(messages[i] && (messageGetMimeType(messages[i]) == TEXT)) {
+			if(strcasecmp(messageGetMimeSubtype(messages[i]), "html") == 0)
+				return (int)i;
+			textpart = (int)i;
+		}
+
+	return textpart;
+}
+
+/*
+ * strip -
+ *	Remove the trailing spaces from a buffer. Don't call this directly,
+ * always call strstrip() which is a wrapper to this routine to be used with
+ * NUL terminated strings. This code looks a bit strange because of it's
+ * heritage from code that worked on strings that weren't necessarily NUL
+ * terminated.
+ * TODO: rewrite for clamAV
+ *
+ * Returns it's new length (a la strlen)
+ *
+ * len must be int not size_t because of the >= 0 test, it is sizeof(buf)
+ *	not strlen(buf)
+ */
+static size_t
+strip(char *buf, int len)
+{
+	register char *ptr;
+	register size_t i;
+
+	if((buf == NULL) || (len <= 0))
+		return 0;
+
+	i = strlen(buf);
+	if(len > (int)(i + 1))
+		return i;
+	ptr = &buf[--len];
+
+#if	defined(UNIX) || defined(C_LINUX) || defined(C_DARWIN)	/* watch - it may be in shared text area */
+	do
+		if(*ptr)
+			*ptr = '\0';
+	while((--len >= 0) && (!isgraph(*--ptr)) && (*ptr != '\n') && (*ptr != '\r'));
+#else	/* more characters can be displayed on DOS */
+	do
+#ifndef	REAL_MODE_DOS
+		if(*ptr)	/* C8.0 puts into a text area */
+#endif
+			*ptr = '\0';
+	while((--len >= 0) && ((*--ptr == '\0') || isspace((int)(*ptr & 0xFF))));
+#endif
+	return((size_t)(len + 1));
+}
+
+/*
+ * strstrip:
+ *	Strip a given string
+ */
+size_t
+strstrip(char *s)
+{
+	if(s == (char *)NULL)
+		return(0);
+
+	return(strip(s, (int)strlen(s) + 1));
+}
+
+/*
+ * Returns 0 for OK, -1 for error
+ */
+static int
+parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg)
+{
+	char *copy, *p, *buf;
+	const char *ptr;
+	int commandNumber;
+
+	cli_dbgmsg("parseMimeHeader: cmd='%s', arg='%s'\n", cmd, arg);
+
+	copy = rfc822comments(cmd, NULL);
+	if(copy) {
+		commandNumber = tableFind(rfc821Table, copy);
+		free(copy);
+	} else
+		commandNumber = tableFind(rfc821Table, cmd);
+
+	copy = rfc822comments(arg, NULL);
+
+	if(copy)
+		ptr = copy;
+	else
+		ptr = arg;
+
+	buf = NULL;
+
+	switch(commandNumber) {
+		case CONTENT_TYPE:
+			/*
+			 * Fix for non RFC1521 compliant mailers
+			 * that send content-type: Text instead
+			 * of content-type: Text/Plain, or
+			 * just simply "Content-Type:"
+			 */
+			if(arg == NULL)
+				/*
+				 * According to section 4 of RFC1521:
+				 * "Note also that a subtype specification is
+				 * MANDATORY. There are no default subtypes"
+				 *
+				 * We have to break this and make an assumption
+				 * for the subtype because virus writers and
+				 * email client writers don't get it right
+				 */
+				 cli_warnmsg("Empty content-type received, no subtype specified, assuming text/plain; charset=us-ascii\n");
+			else if(strchr(ptr, '/') == NULL)
+				/*
+				 * Empty field, such as
+				 *	Content-Type:
+				 * which I believe is illegal according to
+				 * RFC1521
+				 */
+				cli_dbgmsg("Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii\n", ptr);
+			else {
+				int i;
+
+				buf = cli_malloc(strlen(ptr) + 1);
+				if(buf == NULL) {
+					if(copy)
+						free(copy);
+					return -1;
+				}
+				/*
+				 * Some clients are broken and
+				 * put white space after the ;
+				 */
+				if(*arg == '/') {
+					cli_warnmsg("Content-type '/' received, assuming application/octet-stream\n");
+					messageSetMimeType(m, "application");
+					messageSetMimeSubtype(m, "octet-stream");
+				} else {
+					/*
+					 * The content type could be in quotes:
+					 *	Content-Type: "multipart/mixed"
+					 * FIXME: this is a hack in that ignores
+					 *	the quotes, it doesn't handle
+					 *	them properly
+					 */
+					while(isspace(*ptr))
+						ptr++;
+					if(ptr[0] == '\"')
+						ptr++;
+
+					if(ptr[0] != '/') {
+						char *s;
+#ifdef CL_THREAD_SAFE
+						char *strptr = NULL;
+#endif
+
+						s = cli_strtokbuf(ptr, 0, ";", buf);
+						/*
+						 * Handle
+						 * Content-Type: foo/bar multipart/mixed
+						 * and
+						 * Content-Type: multipart/mixed foo/bar
+						 */
+						if(s && *s) {
+							char *buf2 = cli_strdup(buf);
+
+							if(buf2 == NULL) {
+								if(copy)
+									free(copy);
+								free(buf);
+								return -1;
+							}
+							for(;;) {
+#ifdef	CL_THREAD_SAFE
+								int set = messageSetMimeType(m, strtok_r(s, "/", &strptr));
+#else
+								int set = messageSetMimeType(m, strtok(s, "/"));
+#endif
+
+								/*
+								 * Stephen White <stephen at earth.li>
+								 * Some clients put space after
+								 * the mime type but before
+								 * the ;
+								 */
+#ifdef	CL_THREAD_SAFE
+								s = strtok_r(NULL, ";", &strptr);
+#else
+								s = strtok(NULL, ";");
+#endif
+								if(s == NULL)
+									break;
+								if(set) {
+									size_t len = strstrip(s) - 1;
+									if(s[len] == '\"') {
+										s[len] = '\0';
+										len = strstrip(s);
+									}
+									if(len) {
+										if(strchr(s, ' '))
+											messageSetMimeSubtype(m,
+												cli_strtokbuf(s, 0, " ", buf2));
+										else
+											messageSetMimeSubtype(m, s);
+									}
+								}
+
+								while(*s && !isspace(*s))
+									s++;
+								if(*s++ == '\0')
+									break;
+								if(*s == '\0')
+									break;
+							}
+							free(buf2);
+						}
+					}
+				}
+
+				/*
+				 * Add in all rest of the the arguments.
+				 * e.g. if the header is this:
+				 * Content-Type:', arg='multipart/mixed; boundary=foo
+				 * we find the boundary argument set it
+				 */
+				i = 1;
+				while(cli_strtokbuf(ptr, i++, ";", buf) != NULL) {
+					cli_dbgmsg("mimeArgs = '%s'\n", buf);
+
+					messageAddArguments(m, buf);
+				}
+			}
+			break;
+		case CONTENT_TRANSFER_ENCODING:
+			messageSetEncoding(m, ptr);
+			break;
+		case CONTENT_DISPOSITION:
+			buf = cli_malloc(strlen(ptr) + 1);
+			if(buf == NULL) {
+				if(copy)
+					free(copy);
+				return -1;
+			}
+			p = cli_strtokbuf(ptr, 0, ";", buf);
+			if(p) {
+				if(*p) {
+					messageSetDispositionType(m, p);
+					messageAddArgument(m, cli_strtokbuf(ptr, 1, ";", buf));
+				}
+			}
+			if(!messageHasFilename(m))
+				/*
+				 * Handle this type of header, without
+				 * a filename (e.g. some Worm.Torvil.D)
+				 *	Content-ID: <nRfkHdrKsAxRU>
+				 * Content-Transfer-Encoding: base64
+				 * Content-Disposition: attachment
+				 */
+				messageAddArgument(m, "filename=unknown");
+	}
+	if(copy)
+		free(copy);
+	if(buf)
+		free(buf);
+
+	return 0;
+}
+
+/*
+ * Save the text portion of the message
+ */
+static int
+saveTextPart(mbox_ctx *mctx, message *m, int destroy_text)
+{
+	fileblob *fb;
+
+	messageAddArgument(m, "filename=textportion");
+	if((fb = messageToFileblob(m, mctx->dir, destroy_text)) != NULL) {
+		/*
+		 * Save main part to scan that
+		 */
+		cli_dbgmsg("Saving main message\n");
+
+		mctx->files++;
+		return fileblobScanAndDestroy(fb);
+	}
+	return CL_ETMPFILE;
+}
+
+/*
+ * Handle RFC822 comments in headers.
+ * If out == NULL, return a buffer without the comments, the caller must free
+ *	the returned buffer
+ * Return NULL on error or if the input * has no comments.
+ * See secion 3.4.3 of RFC822
+ * TODO: handle comments that go on to more than one line
+ */
+static char *
+rfc822comments(const char *in, char *out)
+{
+	const char *iptr;
+	char *optr;
+	int backslash, inquote, commentlevel;
+
+	if(in == NULL)
+		return NULL;
+
+	if(strchr(in, '(') == NULL)
+		return NULL;
+
+	assert(out != in);
+
+	if(out == NULL) {
+		out = cli_malloc(strlen(in) + 1);
+		if(out == NULL)
+			return NULL;
+	}
+
+	backslash = commentlevel = inquote = 0;
+	optr = out;
+
+	cli_dbgmsg("rfc822comments: contains a comment\n");
+
+	for(iptr = in; *iptr; iptr++)
+		if(backslash) {
+			if(commentlevel == 0)
+				*optr++ = *iptr;
+			backslash = 0;
+		} else switch(*iptr) {
+			case '\\':
+				backslash = 1;
+				break;
+			case '\"':
+				*optr++ = '\"';
+				inquote = !inquote;
+				break;
+			case '(':
+				if(inquote)
+					*optr++ = '(';
+				else
+					commentlevel++;
+				break;
+			case ')':
+				if(inquote)
+					*optr++ = ')';
+				else if(commentlevel > 0)
+					commentlevel--;
+				break;
+			default:
+				if(commentlevel == 0)
+					*optr++ = *iptr;
+		}
+
+	if(backslash)	/* last character was a single backslash */
+		*optr++ = '\\';
+	*optr = '\0';
+
+	/*strstrip(out);*/
+
+	cli_dbgmsg("rfc822comments '%s'=>'%s'\n", in, out);
+
+	return out;
+}
+
+/*
+ * Handle RFC2047 encoding. Returns a malloc'd buffer that the caller must
+ * free, or NULL on error
+ */
+static char *
+rfc2047(const char *in)
+{
+	char *out, *pout;
+	size_t len;
+
+	if((strstr(in, "=?") == NULL) || (strstr(in, "?=") == NULL))
+		return cli_strdup(in);
+
+	cli_dbgmsg("rfc2047 '%s'\n", in);
+	out = cli_malloc(strlen(in) + 1);
+
+	if(out == NULL)
+		return NULL;
+
+	pout = out;
+
+	/* For each RFC2047 string */
+	while(*in) {
+		char encoding, *ptr, *enctext;
+		message *m;
+		blob *b;
+
+		/* Find next RFC2047 string */
+		while(*in) {
+			if((*in == '=') && (in[1] == '?')) {
+				in += 2;
+				break;
+			}
+			*pout++ = *in++;
+		}
+		/* Skip over charset, find encoding */
+		while((*in != '?') && *in)
+			in++;
+		if(*in == '\0')
+			break;
+		encoding = *++in;
+		encoding = (char)tolower(encoding);
+
+		if((encoding != 'q') && (encoding != 'b')) {
+			cli_warnmsg("Unsupported RFC2047 encoding type '%c' - if you believe this file contains a virus, submit it to www.clamav.net\n", encoding);
+			free(out);
+			out = NULL;
+			break;
+		}
+		/* Skip to encoded text */
+		if(*++in != '?')
+			break;
+		if(*++in == '\0')
+			break;
+
+		enctext = cli_strdup(in);
+		if(enctext == NULL) {
+			free(out);
+			out = NULL;
+			break;
+		}
+		in = strstr(in, "?=");
+		if(in == NULL) {
+			free(enctext);
+			break;
+		}
+		in += 2;
+		ptr = strstr(enctext, "?=");
+		assert(ptr != NULL);
+		*ptr = '\0';
+		/*cli_dbgmsg("Need to decode '%s' with method '%c'\n", enctext, encoding);*/
+
+		m = messageCreate();
+		if(m == NULL)
+			break;
+		messageAddStr(m, enctext);
+		free(enctext);
+		switch(encoding) {
+			case 'q':
+				messageSetEncoding(m, "quoted-printable");
+				break;
+			case 'b':
+				messageSetEncoding(m, "base64");
+				break;
+		}
+		b = messageToBlob(m, 1);
+		len = blobGetDataSize(b);
+		cli_dbgmsg("Decoded as '%*.*s'\n", (int)len, (int)len,
+			(const char *)blobGetData(b));
+		memcpy(pout, blobGetData(b), len);
+		blobDestroy(b);
+		messageDestroy(m);
+		if(pout[len - 1] == '\n')
+			pout += len - 1;
+		else
+			pout += len;
+
+	}
+	if(out == NULL)
+		return NULL;
+
+	*pout = '\0';
+
+	cli_dbgmsg("rfc2047 returns '%s'\n", out);
+	return out;
+}
+
+#ifdef	PARTIAL_DIR
+/*
+ * Handle partial messages
+ */
+static int
+rfc1341(message *m, const char *dir)
+{
+	fileblob *fb;
+	char *arg, *id, *number, *total, *oldfilename;
+	const char *tmpdir;
+	char pdir[NAME_MAX + 1];
+
+	id = (char *)messageFindArgument(m, "id");
+	if(id == NULL)
+		return -1;
+
+#ifdef  C_CYGWIN
+	if((tmpdir = getenv("TEMP")) == (char *)NULL)
+		if((tmpdir = getenv("TMP")) == (char *)NULL)
+			if((tmpdir = getenv("TMPDIR")) == (char *)NULL)
+				tmpdir = "C:\\";
+#else
+	if((tmpdir = getenv("TMPDIR")) == (char *)NULL)
+		if((tmpdir = getenv("TMP")) == (char *)NULL)
+			if((tmpdir = getenv("TEMP")) == (char *)NULL)
+#ifdef	P_tmpdir
+				tmpdir = P_tmpdir;
+#else
+				tmpdir = "/tmp";
+#endif
+#endif
+
+	snprintf(pdir, sizeof(pdir) - 1, "%s/clamav-partial", tmpdir);
+
+	if((mkdir(pdir, S_IRWXU) < 0) && (errno != EEXIST)) {
+		cli_errmsg("Can't create the directory '%s'\n", pdir);
+		free(id);
+		return -1;
+	} else if(errno == EEXIST) {
+		struct stat statb;
+
+		if(stat(pdir, &statb) < 0) {
+			cli_errmsg("Partial directory %s: %s\n", pdir,
+				strerror(errno));
+			free(id);
+			return -1;
+		}
+		if(statb.st_mode&(S_IRWXG|S_IRWXO))
+			cli_warnmsg("Insecure partial directory %s (mode 0%o)\n",
+				pdir,
+#ifdef	ACCESSPERMS
+				(int)(statb.st_mode&ACCESSPERMS)
+#else
+				(int)(statb.st_mode & 0777)
+#endif
+			);
+	}
+
+	number = (char *)messageFindArgument(m, "number");
+	if(number == NULL) {
+		free(id);
+		return -1;
+	}
+
+	oldfilename = messageGetFilename(m);
+
+	arg = cli_malloc(10 + strlen(id) + strlen(number));
+	if(arg) {
+		sprintf(arg, "filename=%s%s", id, number);
+		messageAddArgument(m, arg);
+		free(arg);
+	}
+
+	if(oldfilename) {
+		cli_warnmsg("Must reset to %s\n", oldfilename);
+		free(oldfilename);
+	}
+
+	if((fb = messageToFileblob(m, pdir, 0)) == NULL) {
+		free(id);
+		free(number);
+		return -1;
+	}
+
+	fileblobDestroy(fb);
+
+	total = (char *)messageFindArgument(m, "total");
+	cli_dbgmsg("rfc1341: %s, %s of %s\n", id, number, (total) ? total : "?");
+	if(total) {
+		int n = atoi(number);
+		int t = atoi(total);
+		DIR *dd = NULL;
+
+		free(total);
+		/*
+		 * If it's the last one - reassemble it
+		 * FIXME: this assumes that we receive the parts in order
+		 */
+		if((n == t) && ((dd = opendir(pdir)) != NULL)) {
+			FILE *fout;
+			char outname[NAME_MAX + 1];
+			time_t now;
+
+			sanitiseName(id);
+
+			snprintf(outname, sizeof(outname) - 1, "%s/%s", dir, id);
+
+			cli_dbgmsg("outname: %s\n", outname);
+
+			fout = fopen(outname, "wb");
+			if(fout == NULL) {
+				cli_errmsg("Can't open '%s' for writing", outname);
+				free(id);
+				free(number);
+				closedir(dd);
+				return -1;
+			}
+
+			time(&now);
+			for(n = 1; n <= t; n++) {
+				char filename[NAME_MAX + 1];
+				const struct dirent *dent;
+#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
+				union {
+					struct dirent d;
+					char b[offsetof(struct dirent, d_name) + NAME_MAX + 1];
+				} result;
+#endif
+
+				snprintf(filename, sizeof(filename), "%s%d", id, n);
+
+#ifdef HAVE_READDIR_R_3
+				while((readdir_r(dd, &result.d, &dent) == 0) && dent) {
+#elif defined(HAVE_READDIR_R_2)
+				while((dent = (struct dirent *)readdir_r(dd, &result.d))) {
+#else	/*!HAVE_READDIR_R*/
+				while((dent = readdir(dd))) {
+#endif
+					FILE *fin;
+					char buffer[BUFSIZ], fullname[NAME_MAX + 1];
+					int nblanks;
+					struct stat statb;
+
+#ifndef  C_CYGWIN
+					if(dent->d_ino == 0)
+						continue;
+#endif
+
+					snprintf(fullname, sizeof(fullname) - 1,
+						"%s/%s", pdir, dent->d_name);
+
+					if(strncmp(filename, dent->d_name, strlen(filename)) != 0) {
+						if(!cli_leavetemps_flag)
+							continue;
+						if(stat(fullname, &statb) < 0)
+							continue;
+						if(now - statb.st_mtime > (time_t)(7 * 24 * 3600))
+							if(unlink(fullname) >= 0)
+								cli_warnmsg("removed old RFC1341 file %s\n", fullname);
+						continue;
+					}
+
+					fin = fopen(fullname, "rb");
+					if(fin == NULL) {
+						cli_errmsg("Can't open '%s' for reading", fullname);
+						fclose(fout);
+						unlink(outname);
+						free(id);
+						free(number);
+						closedir(dd);
+						return -1;
+					}
+					nblanks = 0;
+					while(fgets(buffer, sizeof(buffer) - 1, fin) != NULL)
+						/*
+						 * Ensure that trailing newlines
+						 * aren't copied
+						 */
+						if(buffer[0] == '\n')
+							nblanks++;
+						else {
+							if(nblanks)
+								do
+									putc('\n', fout);
+								while(--nblanks > 0);
+							fputs(buffer, fout);
+						}
+					fclose(fin);
+
+					/* don't unlink if leave temps */
+					if(!cli_leavetemps_flag)
+						unlink(fullname);
+					break;
+				}
+				rewinddir(dd);
+			}
+			closedir(dd);
+			fclose(fout);
+		}
+	}
+	free(number);
+	free(id);
+
+	return 0;
+}
+#endif
+
+static void
+hrefs_done(blob *b, tag_arguments_t *hrefs)
+{
+	if(b)
+		blobDestroy(b);
+	html_tag_arg_free(hrefs);
+}
+
+/*
+ * This used to be part of checkURLs, split out, because phishingScan needs it
+ * too, and phishingScan might be used in situations where checkURLs is
+ * disabled (see ifdef)
+ */
+static blob *
+getHrefs(message *m, tag_arguments_t *hrefs)
+{
+	blob *b = messageToBlob(m, 0);
+	size_t len;
+
+	if(b == NULL)
+		return NULL;
+
+	len = blobGetDataSize(b);
+
+	if(len == 0) {
+		blobDestroy(b);
+		return NULL;
+	}
+
+	/* TODO: make this size customisable */
+	if(len > 100*1024) {
+		cli_warnmsg("Viruses pointed to by URLs not scanned in large message\n");
+		blobDestroy(b);
+		return NULL;
+	}
+
+	hrefs->count = 0;
+	hrefs->tag = hrefs->value = NULL;
+	hrefs->contents = NULL;
+
+	cli_dbgmsg("getHrefs: calling html_normalise_mem\n");
+	if(!html_normalise_mem(blobGetData(b), (off_t)len, NULL, hrefs,m->ctx->dconf)) {
+		blobDestroy(b);
+		return NULL;
+	}
+	cli_dbgmsg("getHrefs: html_normalise_mem returned\n");
+
+	/* TODO: Do we need to call remove_html_comments? */
+	return b;
+}
+
+/*
+ * validate URLs for phishes
+ * followurls: see if URLs point to malware
+ */
+static void
+checkURLs(message *mainMessage, mbox_ctx *mctx, mbox_status *rc, int is_html)
+{
+	blob *b;
+	tag_arguments_t hrefs;
+
+	if(*rc == VIRUS)
+		return;
+
+	hrefs.scanContents = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS && (DCONF_PHISHING & PHISHING_CONF_ENGINE);
+
+#if    (!defined(FOLLOWURLS)) || (FOLLOWURLS <= 0)
+	if(!hrefs.scanContents)
+		/*
+		 * Don't waste time extracting hrefs (parsing html), nobody
+		 * will need it
+		 */
+		return;
+#endif
+
+	hrefs.count = 0;
+	hrefs.tag = hrefs.value = NULL;
+	hrefs.contents = NULL;
+
+	b = getHrefs(mainMessage, &hrefs);
+	if(b) {
+		if(hrefs.scanContents) {
+			if(phishingScan(mainMessage, mctx->dir, mctx->ctx, &hrefs) == CL_VIRUS) {
+				/*
+				 * FIXME: message objects' contents are
+				 *	encapsulated so we should not access
+				 *	the members directly
+				 */
+				mainMessage->isInfected = TRUE;
+				*rc = VIRUS;
+				cli_dbgmsg("PH:Phishing found\n");
+			}
+		}
+		if(is_html && (mctx->ctx->options&CL_SCAN_MAILURL) && (*rc != VIRUS))
+			do_checkURLs(mctx->dir, &hrefs);
+	}
+	hrefs_done(b,&hrefs);
+}
+
+#if	defined(FOLLOWURLS) && (FOLLOWURLS > 0)
+static void
+do_checkURLs(const char *dir, tag_arguments_t *hrefs)
+{
+	table_t *t;
+	int i, n;
+#ifdef	CL_THREAD_SAFE
+	pthread_t tid[FOLLOWURLS];
+	struct arg args[FOLLOWURLS];
+#endif
+
+	t = tableCreate();
+	if(t == NULL)
+		return;
+
+	n = 0;
+
+	/*
+	 * Sort .exes higher up so that there's more chance they'll be
+	 * downloaded and scanned
+	 */
+	for(i = FOLLOWURLS; (i < hrefs->count) && (n < FOLLOWURLS); i++) {
+		char *url = (char *)hrefs->value[i];
+		char *ptr;
+
+		if(strncasecmp("http://", url, 7) != 0)
+			continue;
+
+		ptr = strrchr(url, '.');
+		if(ptr == NULL)
+			continue;
+		if(strcasecmp(ptr, ".exe") == 0) {
+			/* FIXME: Could be swapping with another .exe */
+			cli_dbgmsg("swap %s %s\n", hrefs->value[n], hrefs->value[i]);
+			ptr = (char *)hrefs->value[n];
+			hrefs->value[n++] = (unsigned char *)url;
+			hrefs->value[i] = (unsigned char *)ptr;
+		}
+	}
+
+	n = 0;
+
+	for(i = 0; i < hrefs->count; i++) {
+		const char *url = (const char *)hrefs->value[i];
+
+		/*
+		 * TODO: If it's an image source, it'd be nice to note beacons
+		 *	where width="0" height="0", which needs support from
+		 *	the HTML normalise code
+		 */
+		if(strncasecmp("http://", url, 7) == 0) {
+#ifndef	CL_THREAD_SAFE
+			struct arg arg;
+#endif
+			char name[NAME_MAX + 1];
+
+			if(tableFind(t, url) == 1) {
+				cli_dbgmsg("URL %s already downloaded\n", url);
+				continue;
+			}
+			/*
+			 * What about foreign character spoofing?
+			 */
+			if(strchr(url, '%') && strchr(url, '@'))
+				cli_warnmsg("Possible URL spoofing attempt noticed, but not yet handled (%s)\n", url);
+
+			if(n == FOLLOWURLS) {
+				cli_warnmsg("URL %s will not be scanned (FOLLOWURLS limit %d was reached)\n",
+					url, FOLLOWURLS);
+				break;
+			}
+
+			(void)tableInsert(t, url, 1);
+			cli_dbgmsg("Downloading URL %s to be scanned\n", url);
+			strncpy(name, url, sizeof(name) - 1);
+			name[sizeof(name) - 1] = '\0';
+			sanitiseName(name);	/* bug #538 */
+
+#ifdef	CL_THREAD_SAFE
+			args[n].dir = dir;
+			args[n].url = cli_strdup(url);
+			args[n].filename = cli_strdup(name);
+			args[n].depth = 0;
+			pthread_create(&tid[n], NULL, getURL, &args[n]);
+#else
+			arg.url = cli_strdup(url);
+			arg.dir = dir;
+			arg.filename = name;
+			arg.depth = 0;
+			getURL(&arg);
+			free(arg.url);
+#endif
+			++n;
+		}
+	}
+	tableDestroy(t);
+
+#ifdef	CL_THREAD_SAFE
+	assert(n <= FOLLOWURLS);
+	cli_dbgmsg("checkURLs: waiting for %d thread(s) to finish\n", n);
+	while(--n >= 0) {
+		pthread_join(tid[n], NULL);
+		free(args[n].filename);
+		free(args[n].url);
+	}
+#endif
+}
+
+#else	/*!FOLLOWURLS*/
+
+static void
+do_checkURLs(const char *dir, tag_arguments_t *hrefs)
+{
+}
+
+#endif
+
+#if	defined(FOLLOWURLS) && (FOLLOWURLS > 0)
+/*
+ * Includes some Win32 patches by Gianluigi Tiesi <sherpya at netfarm.it>
+ *
+ * FIXME: Often WMF exploits work by sending people an email directing them
+ *	to a page which displays a picture containing the exploit. This is not
+ *	currently found, since only the HTML on the referred page is downloaded.
+ *	It would be useful to scan the HTML for references to pictures and
+ *	download them for scanning. But that will hit performance so there is
+ *	an issue here.
+ */
+
+/*
+ * Simple implementation of a subset of RFC1945 (HTTP/1.0)
+ * TODO: HTTP/1.1 (RFC2068)
+ */
+static void *
+#ifdef	CL_THREAD_SAFE
+getURL(void *a)
+#else
+getURL(struct arg *arg)
+#endif
+{
+	FILE *fp;
+#ifdef	CL_THREAD_SAFE
+	struct arg *arg = (struct arg *)a;
+#endif
+	const char *url = arg->url;
+	const char *dir = arg->dir;
+	const char *filename = arg->filename;
+	SOCKET sd;
+	struct sockaddr_in server;
+#ifdef	HAVE_IN_ADDR_T
+	in_addr_t ip;
+#else
+	unsigned int ip;
+#endif
+	in_port_t port;
+	static in_port_t default_port;
+	static int tcp;
+	int doingsite, firstpacket;
+	char *ptr;
+	long flags;
+	int via_proxy;
+	const char *proxy;
+	char buf[BUFSIZ + 1], site[BUFSIZ], fout[NAME_MAX + 1];
+
+	if(strlen(url) > (sizeof(site) - 1)) {
+		cli_dbgmsg("Ignoring long URL \"%s\"\n", url);
+		return NULL;
+	}
+
+	snprintf(fout, sizeof(fout) - 1, "%s/%s", dir, filename);
+
+	fp = fopen(fout, "wb");
+
+	if(fp == NULL) {
+		cli_errmsg("Can't open '%s' for writing\n", fout);
+		return NULL;
+	}
+	cli_dbgmsg("Saving %s to %s\n", url, fout);
+
+#ifndef	C_BEOS
+	if(tcp == 0) {
+		const struct protoent *proto = getprotobyname("tcp");
+
+		if(proto == NULL) {
+			cli_warnmsg("Unknown prototol tcp, check /etc/protocols\n");
+			fclose(fp);
+			return NULL;
+		}
+		tcp = proto->p_proto;
+#ifndef	C_WINDOWS
+		endprotoent();
+#endif
+	}
+#endif
+	if(default_port == 0) {
+		const struct servent *servent = getservbyname("http", "tcp");
+
+		if(servent)
+			default_port = (in_port_t)ntohs(servent->s_port);
+		else
+			default_port = 80;
+#if	!defined(C_WINDOWS) && !defined(C_BEOS)
+		endservent();
+#endif
+	}
+	port = default_port;
+
+	doingsite = 1;
+	ptr = site;
+
+	proxy = getenv("http_proxy");	/* FIXME: handle no_proxy */
+
+	via_proxy = (proxy && *proxy);
+
+	if(via_proxy) {
+		if(strncasecmp(proxy, "http://", 7) != 0) {
+			cli_warnmsg("Unsupported proxy protocol (proxy = %s)\n",
+				proxy);
+			fclose(fp);
+			return NULL;
+		}
+
+		cli_dbgmsg("Getting %s via %s\n", url, proxy);
+
+		proxy += 7;
+		while(*proxy) {
+			if(doingsite && (*proxy == ':')) {
+				port = 0;
+				while(isdigit(*++proxy)) {
+					port *= 10;
+					port += *proxy - '0';
+				}
+				continue;
+			}
+			if(doingsite && (*proxy == '/')) {
+				proxy++;
+				break;
+			}
+			*ptr++ = *proxy++;
+		}
+	} else {
+		cli_dbgmsg("Getting %s\n", url);
+
+		if(strncasecmp(url, "http://", 7) != 0) {
+			cli_warnmsg("Unsupported protocol\n");
+			fclose(fp);
+			return NULL;
+		}
+
+		url += 7;
+		while(*url) {
+			if(doingsite && (*url == ':')) {
+				port = 0;
+				while(isdigit(*++url)) {
+					port *= 10;
+					port += *url - '0';
+				}
+				continue;
+			}
+			if(doingsite && (*url == '/')) {
+				url++;
+				break;
+			}
+			*ptr++ = *url++;
+		}
+	}
+	*ptr = '\0';
+
+	memset((char *)&server, '\0', sizeof(struct sockaddr_in));
+	server.sin_family = AF_INET;
+	server.sin_port = (in_port_t)htons(port);
+
+	ip = inet_addr(site);
+#ifdef	INADDR_NONE
+	if(ip == INADDR_NONE) {
+#else
+	if(ip == (in_addr_t)-1) {
+#endif
+		struct hostent h;
+
+		if((my_r_gethostbyname(site, &h, buf, sizeof(buf)) != 0) ||
+		   (h.h_addr_list == NULL) ||
+		   (h.h_addr == NULL)) {
+			cli_dbgmsg("Unknown host %s\n", site);
+			fclose(fp);
+			return NULL;
+		}
+
+		memcpy((char *)&ip, h.h_addr, sizeof(ip));
+	}
+	if((sd = socket(AF_INET, SOCK_STREAM, tcp)) < 0) {
+		fclose(fp);
+		return NULL;
+	}
+#ifdef	F_GETFL
+	flags = fcntl(sd, F_GETFL, 0);
+
+	if(flags == -1L)
+		cli_warnmsg("getfl: %s\n", strerror(errno));
+	else if(fcntl(sd, F_SETFL, (long)(flags | O_NONBLOCK)) < 0)
+		cli_warnmsg("setfl: %s\n", strerror(errno));
+#else
+	flags = -1L;
+#endif
+	server.sin_addr.s_addr = ip;
+	if(nonblock_connect(url, sd, (struct sockaddr *)&server) < 0) {
+		closesocket(sd);
+		fclose(fp);
+		return NULL;
+	}
+#ifdef	F_SETFL
+	if(flags != -1L)
+		if(fcntl(sd, F_SETFL, flags))
+			cli_warnmsg("f_setfl: %s\n", strerror(errno));
+#endif
+
+	/*
+	 * TODO: consider HTTP/1.1
+	 */
+	if(via_proxy)
+		snprintf(buf, sizeof(buf) - 1,
+			"GET %s HTTP/1.0\r\nUser-Agent: ClamAV %s\r\n\r\n",
+				url, VERSION);
+	else
+		snprintf(buf, sizeof(buf) - 1,
+			"GET /%s HTTP/1.0\r\nUser-Agent: ClamAV %s\r\n\r\n",
+				url, VERSION);
+
+	/*cli_dbgmsg("%s", buf);*/
+
+	if(send(sd, buf, (int)strlen(buf), 0) < 0) {
+		closesocket(sd);
+		fclose(fp);
+		return NULL;
+	}
+
+#ifdef	SHUT_WR
+	shutdown(sd, SHUT_WR);
+#else
+	shutdown(sd, 1);
+#endif
+
+	firstpacket = 1;
+
+	for(;;) {
+		fd_set set;
+		struct timeval tv;
+		int n;
+
+		FD_ZERO(&set);
+		FD_SET(sd, &set);
+
+		tv.tv_sec = 30;	/* FIXME: make this customisable */
+		tv.tv_usec = 0;
+
+		if(select((int)sd + 1, &set, NULL, NULL, &tv) < 0) {
+			if(errno == EINTR)
+				continue;
+			closesocket(sd);
+			fclose(fp);
+			return NULL;
+		}
+		if(!FD_ISSET(sd, &set)) {
+			fclose(fp);
+			closesocket(sd);
+			return NULL;
+		}
+		n = recv(sd, buf, sizeof(buf) - 1, 0);
+
+		if(n < 0) {
+			fclose(fp);
+			closesocket(sd);
+			return NULL;
+		}
+		if(n == 0)
+			break;
+
+		/*
+		 * FIXME: Handle header in more than one packet
+		 */
+		if(firstpacket) {
+			char *statusptr;
+
+			buf[n] = '\0';
+
+			statusptr = cli_strtok(buf, 1, " ");
+
+			if(statusptr) {
+				int status = atoi(statusptr);
+
+				cli_dbgmsg("HTTP status %d\n", status);
+
+				free(statusptr);
+
+				if((status == 301) || (status == 302)) {
+					char *location;
+
+					location = strstr(buf, "\nLocation: ");
+
+					if(location) {
+						char *end;
+
+						unlink(fout);
+						if(arg->depth >= FOLLOWURLS) {
+							cli_warnmsg("URL %s will not be followed to %s (FOLLOWURLS limit %d was reached)\n",
+								arg->url, location, FOLLOWURLS);
+							break;
+						}
+
+						fclose(fp);
+						closesocket(sd);
+
+						location += 11;
+						free(arg->url);
+						end = location;
+						while(*end && (*end != '\n'))
+							end++;
+						*end = '\0';
+						arg->url = cli_strdup(location);
+						arg->depth++;
+						cli_dbgmsg("Redirecting to %s\n", arg->url);
+						return getURL(arg);
+					}
+				}
+			}
+			/*
+			 * Don't write the HTTP header
+			 */
+			if((ptr = strstr(buf, "\r\n\r\n")) != NULL) {
+				ptr += 4;
+				n -= (int)(ptr - buf);
+			} else if((ptr = strstr(buf, "\n\n")) != NULL) {
+				ptr += 2;
+				n -= (int)(ptr - buf);
+			} else
+				ptr = buf;
+
+			firstpacket = 0;
+		} else
+			ptr = buf;
+
+		if(n && (fwrite(ptr, n, 1, fp) != 1)) {
+			cli_warnmsg("Error writing %d bytes to %s\n",
+				n, fout);
+			break;
+		}
+	}
+
+	fclose(fp);
+	closesocket(sd);
+	return NULL;
+}
+
+/*
+ * Have a copy here because r_gethostbyname is in shared not libclamav :-(
+ */
+static int
+my_r_gethostbyname(const char *hostname, struct hostent *hp, char *buf, size_t len)
+{
+#if	defined(HAVE_GETHOSTBYNAME_R_6)
+	/* e.g. Linux */
+	struct hostent *hp2;
+	int ret = -1;
+
+	if((hostname == NULL) || (hp == NULL))
+		return -1;
+	if(gethostbyname_r(hostname, hp, buf, len, &hp2, &ret) < 0)
+		return ret;
+#elif	defined(HAVE_GETHOSTBYNAME_R_5)
+	/* e.g. BSD, Solaris, Cygwin */
+	/*
+	 * Configure doesn't work on BeOS. We need -lnet to link, but configure
+	 * doesn't add it, so you need to do something like
+	 *	LIBS=-lnet ./configure --enable-cache --disable-clamav
+	 */
+	int ret = -1;
+
+	if((hostname == NULL) || (hp == NULL))
+		return -1;
+	if(gethostbyname_r(hostname, hp, buf, len, &ret) == NULL)
+		return ret;
+#elif	defined(HAVE_GETHOSTBYNAME_R_3)
+	/* e.g. HP/UX, AIX */
+	if((hostname == NULL) || (hp == NULL))
+		return -1;
+	if(gethostbyname_r(hostname, &hp, (struct hostent_data *)buf) < 0)
+		return h_errno;
+#else
+	/* Single thread the code e.g. VS2005 */
+	struct hostent *hp2;
+#ifdef  CL_THREAD_SAFE
+	static pthread_mutex_t hostent_mutex = PTHREAD_MUTEX_INITIALIZER;
+#endif
+
+	if((hostname == NULL) || (hp == NULL))
+		return -1;
+#ifdef  CL_THREAD_SAFE
+	pthread_mutex_lock(&hostent_mutex);
+#endif
+	if((hp2 = gethostbyname(hostname)) == NULL) {
+#ifdef  CL_THREAD_SAFE
+		pthread_mutex_unlock(&hostent_mutex);
+#endif
+		return h_errno;
+	}
+	memcpy(hp, hp2, sizeof(struct hostent));
+#ifdef  CL_THREAD_SAFE
+	pthread_mutex_unlock(&hostent_mutex);
+#endif
+
+#endif
+	return 0;
+}
+
+/*
+ * Non-blocking connect, based on an idea by Everton da Silva Marques
+ *	 <everton.marques at gmail.com>
+ */
+static int
+nonblock_connect(const char *url, SOCKET sock, const struct sockaddr *addr)
+{
+	int select_failures;	/* Max. of unexpected select() failures */
+	int attempts;
+	struct timeval timeout;	/* When we should time out */
+	int numfd;		/* Highest fdset fd plus 1 */
+
+	gettimeofday(&timeout, 0);	/* store when we started to connect */
+
+	if(connect(sock, addr, sizeof(struct sockaddr_in)) != 0)
+		switch(errno) {
+			case EALREADY:
+			case EINPROGRESS:
+				cli_dbgmsg("%s: connect: %s\n", url, strerror(errno));
+				break; /* wait for connection */
+			case EISCONN:
+				return 0; /* connected */
+			default:
+				cli_warnmsg("%s: connect: %s\n", url, strerror(errno));
+				return -1; /* failed */
+		}
+	else
+		return connect_error(url, sock);
+
+	numfd = (int)sock + 1;
+	select_failures = NONBLOCK_SELECT_MAX_FAILURES;
+	attempts = 1;
+	timeout.tv_sec += URL_TIMEOUT;
+
+	for (;;) {
+		int n, t;
+		fd_set fds;
+		struct timeval now, waittime;
+
+		/* Force timeout if we ran out of time */
+		gettimeofday(&now, 0);
+		t = (now.tv_sec == timeout.tv_sec) ?
+			(now.tv_usec > timeout.tv_usec) :
+			(now.tv_sec > timeout.tv_sec);
+
+		if(t) {
+			cli_warnmsg("%s: connect timeout (%d secs)\n",
+				url, URL_TIMEOUT);
+			break;
+		}
+
+		/* Calculate how long to wait */
+		waittime.tv_sec = timeout.tv_sec - now.tv_sec;
+		waittime.tv_usec = timeout.tv_usec - now.tv_usec;
+		if(waittime.tv_usec < 0) {
+			waittime.tv_sec--;
+			waittime.tv_usec += 1000000;
+		}
+
+		/* Init fds with 'sock' as the only fd */
+		FD_ZERO(&fds);
+		FD_SET(sock, &fds);
+
+		n = select(numfd, 0, &fds, 0, &waittime);
+		if(n < 0) {
+			cli_warnmsg("%s: select attempt %d %s\n",
+				url, select_failures, strerror(errno));
+			if(--select_failures >= 0)
+				continue; /* not timed-out, try again */
+			break; /* failed */
+		}
+
+		cli_dbgmsg("%s: select = %d\n", url, n);
+
+		if(n)
+			return connect_error(url, sock);
+
+		/* timeout */
+		if(attempts++ == NONBLOCK_MAX_ATTEMPTS) {
+			cli_warnmsg("timeout connecting to %s\n", url);
+			break;
+		}
+	}
+
+	return -1; /* failed */
+}
+
+static int
+connect_error(const char *url, SOCKET sock)
+{
+#ifdef	SO_ERROR
+	int optval;
+	socklen_t optlen = sizeof(optval);
+
+	getsockopt(sock, SOL_SOCKET, SO_ERROR, &optval, &optlen);
+
+	if(optval) {
+		cli_warnmsg("%s: %s\n", url, strerror(optval));
+		return -1;
+	}
+#endif
+
+	return 0;
+}
+
+#endif
+
+#ifdef HAVE_BACKTRACE
+static void
+sigsegv(int sig)
+{
+	signal(SIGSEGV, SIG_DFL);
+	print_trace(1);
+	exit(SIGSEGV);
+}
+
+static void
+print_trace(int use_syslog)
+{
+	void *array[10];
+	size_t size;
+	char **strings;
+	size_t i;
+	pid_t pid = getpid();
+
+	cli_errmsg("Segmentation fault, attempting to print backtrace\n");
+
+	size = backtrace(array, 10);
+	strings = backtrace_symbols(array, size);
+
+	cli_errmsg("Backtrace of pid %d:\n", pid);
+	if(use_syslog)
+		syslog(LOG_ERR, "Backtrace of pid %d:", pid);
+
+	for(i = 0; i < size; i++) {
+		cli_errmsg("%s\n", strings[i]);
+		if(use_syslog)
+			syslog(LOG_ERR, "bt[%u]: %s", i, strings[i]);
+	}
+
+#ifdef	SAVE_TMP
+	cli_errmsg("The errant mail file has been saved\n");
+#endif
+	/* #else TODO: dump the current email */
+
+	free(strings);
+}
+#endif
+
+/* See also clamav-milter */
+static bool
+usefulHeader(int commandNumber, const char *cmd)
+{
+	switch(commandNumber) {
+		case CONTENT_TRANSFER_ENCODING:
+		case CONTENT_DISPOSITION:
+		case CONTENT_TYPE:
+			return TRUE;
+		default:
+			if(strcasecmp(cmd, "From") == 0)
+				return TRUE;
+			if(strcasecmp(cmd, "Received") == 0)
+				return TRUE;
+			if(strcasecmp(cmd, "De") == 0)
+				return TRUE;
+	}
+
+	return FALSE;
+}
+
+/*
+ * Like fgets but cope with end of line by "\n", "\r\n", "\n\r", "\r"
+ */
+static char *
+getline_from_mbox(char *buffer, size_t len, FILE *fin)
+{
+	char *ret;
+
+	if(feof(fin))
+		return NULL;
+
+	if((len == 0) || (buffer == NULL)) {
+		cli_errmsg("Invalid call to getline_from_mbox(). Refer to http://www.clamav.net/bugs\n");
+		return NULL;
+	}
+
+	ret = buffer;
+
+	do {
+		int c = getc(fin);
+
+		if(ferror(fin))
+			return NULL;
+
+		switch(c) {
+			case '\n':
+				*buffer++ = '\n';
+				c = getc(fin);
+				if((c != '\r') && !feof(fin))
+					ungetc(c, fin);
+				break;
+			default:
+				*buffer++ = (char)c;
+				continue;
+			case EOF:
+				break;
+			case '\r':
+				*buffer++ = '\n';
+				c = getc(fin);
+				if((c != '\n') && !feof(fin))
+					ungetc(c, fin);
+				break;
+		}
+		break;
+	} while(--len > 1);
+
+	if(len == 0) {
+		/* the email probably breaks RFC821 */
+		cli_warnmsg("getline_from_mbox: buffer overflow stopped, line lost\n");
+		return NULL;
+	}
+	*buffer = '\0';
+
+	if(len == 1)
+		/* overflows will have appeared on separate lines */
+		cli_dbgmsg("getline_from_mbox: buffer overflow stopped, line recovered\n");
+
+	return ret;
+}
+
+/*
+ * Is this line a candidate for the start of a bounce message?
+ */
+static bool
+isBounceStart(const char *line)
+{
+	size_t len;
+
+	if(line == NULL)
+		return FALSE;
+	if(*line == '\0')
+		return FALSE;
+	/*if((strncmp(line, "From ", 5) == 0) && !isalnum(line[5]))
+		return FALSE;
+	if((strncmp(line, ">From ", 6) == 0) && !isalnum(line[6]))
+		return FALSE;*/
+
+	len = strlen(line);
+	if((len < 6) || (len >= 72))
+		return FALSE;
+
+	if((memcmp(line, "From ", 5) == 0) ||
+	   (memcmp(line, ">From ", 6) == 0)) {
+		int numSpaces = 0, numDigits = 0;
+
+		line += 4;
+
+		do
+			if(*line == ' ')
+				numSpaces++;
+			else if(isdigit((*line) & 0xFF))
+				numDigits++;
+		while(*++line != '\0');
+
+		if(numSpaces < 6)
+			return FALSE;
+		if(numDigits < 11)
+			return FALSE;
+		return TRUE;
+	}
+	return cli_filetype((const unsigned char *)line, len) == CL_TYPE_MAIL;
+}
+
+/*
+ * Extract a binhexEncoded message, return if it's found to be infected as we
+ *	extract it
+ */
+static bool
+exportBinhexMessage(mbox_ctx *mctx, message *m)
+{
+	bool infected = FALSE;
+	fileblob *fb;
+
+	if(messageGetEncoding(m) == NOENCODING)
+		messageSetEncoding(m, "x-binhex");
+
+	fb = messageToFileblob(m, mctx->dir, 0);
+
+	if(fb) {
+		cli_dbgmsg("Binhex file decoded to %s\n",
+			fileblobGetFilename(fb));
+
+		if(fileblobScanAndDestroy(fb) == CL_VIRUS)
+			infected = TRUE;
+		mctx->files++;
+	} else
+		cli_errmsg("Couldn't decode binhex file to %s\n", mctx->dir);
+
+	return infected;
+}
+
+/*
+ * Locate any bounce message and extract it. Return cl_status
+ */
+static int
+exportBounceMessage(mbox_ctx *mctx, text *start)
+{
+	int rc = CL_CLEAN;
+	text *t;
+	fileblob *fb;
+
+	/*
+	 * Attempt to save the original (unbounced)
+	 * message - clamscan will find that in the
+	 * directory and call us again (with any luck)
+	 * having found an e-mail message to handle.
+	 *
+	 * This finds a lot of false positives, the
+	 * search that a content type is in the
+	 * bounce (i.e. it's after the bounce header)
+	 * helps a bit.
+	 *
+	 * messageAddLine
+	 * optimisation could help here, but needs
+	 * careful thought, do it with line numbers
+	 * would be best, since the current method in
+	 * messageAddLine of checking encoding first
+	 * must remain otherwise non bounce messages
+	 * won't be scanned
+	 */
+	for(t = start; t; t = t->t_next) {
+		const char *txt = lineGetData(t->t_line);
+		char cmd[RFC2821LENGTH + 1];
+
+		if(txt == NULL)
+			continue;
+		if(cli_strtokbuf(txt, 0, ":", cmd) == NULL)
+			continue;
+
+		switch(tableFind(mctx->rfc821Table, cmd)) {
+			case CONTENT_TRANSFER_ENCODING:
+				if((strstr(txt, "7bit") == NULL) &&
+				   (strstr(txt, "8bit") == NULL))
+					break;
+				continue;
+			case CONTENT_DISPOSITION:
+				break;
+			case CONTENT_TYPE:
+				if(strstr(txt, "text/plain") != NULL)
+					t = NULL;
+				break;
+			default:
+				if(strcasecmp(cmd, "From") == 0)
+					start = t;
+				else if(strcasecmp(cmd, "Received") == 0)
+					start = t;
+				continue;
+		}
+		break;
+	}
+	if(t && ((fb = fileblobCreate()) != NULL)) {
+		cli_dbgmsg("Found a bounce message\n");
+		fileblobSetFilename(fb, mctx->dir, "bounce");
+		fileblobSetCTX(fb, mctx->ctx);
+		if(textToFileblob(start, fb, 1) == NULL) {
+			cli_dbgmsg("Nothing new to save in the bounce message\n");
+			fileblobDestroy(fb);
+		} else
+			rc = fileblobScanAndDestroy(fb);
+		mctx->files++;
+	} else
+		cli_dbgmsg("Not found a bounce message\n");
+
+	return rc;
+}
+
+/*
+ * Handle the ith element of a number of multiparts, e.g. multipart/alternative
+ */
+static message *
+do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, mbox_ctx *mctx, message *messageIn, text **tptr, unsigned int recursion_level)
+{
+	bool addToText = FALSE;
+	const char *dtype;
+#ifndef	SAVE_TO_DISC
+	message *body;
+#endif
+	message *aMessage = messages[i];
+	const int doPhishingScan = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS && (DCONF_PHISHING&PHISHING_CONF_ENGINE);
+
+	if(aMessage == NULL)
+		return mainMessage;
+
+	if(*rc != OK)
+		return mainMessage;
+
+	cli_dbgmsg("Mixed message part %d is of type %d\n",
+		i, messageGetMimeType(aMessage));
+
+	switch(messageGetMimeType(aMessage)) {
+		case APPLICATION:
+		case AUDIO:
+		case IMAGE:
+		case VIDEO:
+			break;
+		case NOMIME:
+			cli_dbgmsg("No mime headers found in multipart part %d\n", i);
+			if(mainMessage) {
+				if(binhexBegin(aMessage)) {
+					cli_dbgmsg("Found binhex message in multipart/mixed mainMessage\n");
+
+					if(exportBinhexMessage(mctx, mainMessage))
+						*rc = VIRUS;
+				}
+				if(mainMessage != messageIn)
+					messageDestroy(mainMessage);
+				mainMessage = NULL;
+			} else if(aMessage) {
+				if(binhexBegin(aMessage)) {
+					cli_dbgmsg("Found binhex message in multipart/mixed non mime part\n");
+					if(exportBinhexMessage(mctx, aMessage))
+						*rc = VIRUS;
+					assert(aMessage == messages[i]);
+					messageReset(messages[i]);
+				}
+			}
+			addToText = TRUE;
+			if(messageGetBody(aMessage) == NULL)
+				/*
+				 * No plain text version
+				 */
+				cli_dbgmsg("No plain text alternative\n");
+			break;
+		case TEXT:
+			dtype = messageGetDispositionType(aMessage);
+			cli_dbgmsg("Mixed message text part disposition \"%s\"\n",
+				dtype);
+			if(strcasecmp(dtype, "attachment") == 0)
+				break;
+			if((*dtype == '\0') || (strcasecmp(dtype, "inline") == 0)) {
+				const char *cptr;
+
+				if(mainMessage && (mainMessage != messageIn))
+					messageDestroy(mainMessage);
+				mainMessage = NULL;
+				cptr = messageGetMimeSubtype(aMessage);
+				cli_dbgmsg("Mime subtype \"%s\"\n", cptr);
+				if((tableFind(mctx->subtypeTable, cptr) == PLAIN) &&
+				   (messageGetEncoding(aMessage) == NOENCODING)) {
+					/*
+					 * Strictly speaking, a text/plain part
+					 * is not an attachment. We pretend it
+					 * is so that we can decode and scan it
+					 */
+					if(!messageHasFilename(aMessage)) {
+						cli_dbgmsg("Adding part to main message\n");
+						addToText = TRUE;
+					} else
+						cli_dbgmsg("Treating inline as attachment\n");
+				} else {
+					const int is_html = (tableFind(mctx->subtypeTable, cptr) == HTML);
+					if((mctx->ctx->options&CL_SCAN_MAILURL) && is_html)
+						checkURLs(aMessage, mctx, rc, 1);
+					else if(doPhishingScan)
+						checkURLs(aMessage, mctx, rc, is_html);
+					messageAddArgument(aMessage,
+						"filename=mixedtextportion");
+				}
+				break;
+			}
+			cli_dbgmsg("Text type %s is not supported\n", dtype);
+			return mainMessage;
+		case MESSAGE:
+			/* Content-Type: message/rfc822 */
+			cli_dbgmsg("Found message inside multipart (encoding type %d)\n",
+				messageGetEncoding(aMessage));
+#ifndef	SCAN_UNENCODED_BOUNCES
+			switch(messageGetEncoding(aMessage)) {
+				case NOENCODING:
+				case EIGHTBIT:
+				case BINARY:
+					if(encodingLine(aMessage) == NULL) {
+						/*
+						 * This means that the message
+						 * has no attachments
+						 *
+						 * The test for
+						 * messageGetEncoding is needed
+						 * since encodingLine won't have
+						 * been set if the message
+						 * itself has been encoded
+						 */
+						cli_dbgmsg("Unencoded multipart/message will not be scanned\n");
+						assert(aMessage == messages[i]);
+						messageDestroy(messages[i]);
+						messages[i] = NULL;
+						return mainMessage;
+					}
+					/* FALLTHROUGH */
+				default:
+					cli_dbgmsg("Encoded multipart/message will be scanned\n");
+			}
+#endif
+#if	0
+			messageAddStrAtTop(aMessage,
+				"Received: by clamd (message/rfc822)");
+#endif
+#ifdef	SAVE_TO_DISC
+			/*
+			 * Save this embedded message
+			 * to a temporary file
+			 */
+			if(saveTextPart(mctx, aMessage, 1) == CL_VIRUS)
+				*rc = VIRUS;
+			assert(aMessage == messages[i]);
+			messageDestroy(messages[i]);
+			messages[i] = NULL;
+#else
+			/*
+			 * Scan in memory, faster but is open to DoS attacks
+			 * when many nested levels are involved.
+			 */
+			body = parseEmailHeaders(aMessage, mctx->rfc821Table);
+
+			/*
+			 * We've fininished with the
+			 * original copy of the message,
+			 * so throw that away and
+			 * deal with the encapsulated
+			 * message as a message.
+			 * This can save a lot of memory
+			 */
+			assert(aMessage == messages[i]);
+			messageDestroy(messages[i]);
+			messages[i] = NULL;
+			if(body) {
+				messageSetCTX(body, mctx->ctx);
+				*rc = parseEmailBody(body, NULL, mctx, recursion_level + 1);
+				if((*rc == OK) && messageContainsVirus(body))
+					*rc = VIRUS;
+				messageDestroy(body);
+			}
+#endif
+			return mainMessage;
+		case MULTIPART:
+			/*
+			 * It's a multi part within a multi part
+			 * Run the message parser on this bit, it won't
+			 * be an attachment
+			 */
+			cli_dbgmsg("Found multipart inside multipart\n");
+			if(aMessage) {
+				/*
+				 * The headers were parsed when reading in the
+				 * whole multipart section
+				 */
+				*rc = parseEmailBody(aMessage, *tptr, mctx, recursion_level + 1);
+				cli_dbgmsg("Finished recursion, rc = %d\n", (int)*rc);
+				assert(aMessage == messages[i]);
+				messageDestroy(messages[i]);
+				messages[i] = NULL;
+			} else {
+				*rc = parseEmailBody(NULL, NULL, mctx, recursion_level + 1);
+				if(mainMessage && (mainMessage != messageIn))
+					messageDestroy(mainMessage);
+				mainMessage = NULL;
+			}
+			return mainMessage;
+		default:
+			cli_warnmsg("Only text and application attachments are fully supported, type = %d\n",
+				messageGetMimeType(aMessage));
+			/* fall through - we may be able to salvage something */
+	}
+
+	if(*rc != VIRUS) {
+		if(addToText) {
+			cli_dbgmsg("Adding to non mime-part\n");
+			if(messageGetBody(aMessage))
+				*tptr = textMove(*tptr, messageGetBody(aMessage));
+		} else {
+			fileblob *fb = messageToFileblob(aMessage, mctx->dir, 1);
+
+			if(fb) {
+				if(fileblobScanAndDestroy(fb) == CL_VIRUS)
+					*rc = VIRUS;
+				mctx->files++;
+			}
+		}
+		if(messageContainsVirus(aMessage))
+			*rc = VIRUS;
+	}
+	messageDestroy(aMessage);
+	messages[i] = NULL;
+
+	return mainMessage;
+}
+
+/*
+ * Returns the number of quote characters in the given string
+ */
+static int
+count_quotes(const char *buf)
+{
+	int quotes = 0;
+
+	while(*buf)
+		if(*buf++ == '\"')
+			quotes++;
+
+	return quotes;
+}
+
+/*
+ * Will the next line be a folded header? See RFC2822 section 2.2.3
+ */
+static bool
+next_is_folded_header(const text *t)
+{
+	const text *next = t->t_next;
+	const char *data, *ptr;
+
+	if(next == NULL)
+		return FALSE;
+
+	if(next->t_line == NULL)
+		return FALSE;
+
+	data = lineGetData(next->t_line);
+
+	/*
+	 * Section B.2 of RFC822 says TAB or SPACE means a continuation of the
+	 * previous entry.
+	 */
+	if(isblank(data[0]))
+		return TRUE;
+
+	if(strchr(data, '=') == NULL)
+		/*
+		 * Avoid false positives with
+		 *	Content-Type: text/html;
+		 *	Content-Transfer-Encoding: quoted-printable
+		 */
+		return FALSE;
+
+	/*
+	 * Some are broken and don't fold headers lines
+	 * correctly as per section 2.2.3 of RFC2822.
+	 * Generally they miss the white space at
+	 * the start of the fold line:
+	 *	Content-Type: multipart/related;
+	 *	type="multipart/alternative";
+	 *	boundary="----=_NextPart_000_006A_01C6AC47.348CB550"
+	 * should read:
+	 *	Content-Type: multipart/related;
+	 *	 type="multipart/alternative";
+	 *	 boundary="----=_NextPart_000_006A_01C6AC47.348CB550"
+	 * Since we're a virus checker not an RFC
+	 * verifier we need to handle these
+	 */
+	data = lineGetData(t->t_line);
+
+	ptr = strchr(data, '\0');
+
+	while(--ptr > data)
+		switch(*ptr) {
+			case ';':
+				return TRUE;
+			case '\n':
+			case ' ':
+			case '\r':
+			case '\t':
+				continue;	/* white space at end of line */
+			default:
+				return FALSE;
+		}
+	return FALSE;
+}
+
+/*
+ * This routine is called on the first line of the body of
+ * an email to handle broken messages that have newlines
+ * in the middle of its headers
+ */
+static bool
+newline_in_header(const char *line)
+{
+	cli_dbgmsg("newline_in_header, check \"%s\"\n", line);
+
+	if(strncmp(line, "Message-Id: ", 12) == 0)
+		return TRUE;
+	if(strncmp(line, "Date: ", 6) == 0)
+		return TRUE;
+
+	return FALSE;
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_md5.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_md5.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_md5.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_md5.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,271 @@
+/*
+ * This is an OpenSSL-compatible implementation of the RSA Data Security,
+ * Inc. MD5 Message-Digest Algorithm (RFC 1321).
+ *
+ * Written by Solar Designer <solar at openwall.com> in 2001, and placed
+ * in the public domain.  There's absolutely no warranty.
+ *
+ * This differs from Colin Plumb's older public domain implementation in
+ * that no 32-bit integer data type is required, there's no compile-time
+ * endianness configuration, and the function prototypes match OpenSSL's.
+ * The primary goals are portability and ease of use.
+ *
+ * This implementation is meant to be fast, but not as fast as possible.
+ * Some known optimizations are not included to reduce source code size
+ * and avoid compile-time configuration.
+ */
+
+#include <string.h>
+
+#include "md5.h"
+
+/*
+ * The basic MD5 functions.
+ *
+ * F and G are optimized compared to their RFC 1321 definitions for
+ * architectures that lack an AND-NOT instruction, just like in Colin Plumb's
+ * implementation.
+ */
+#define F(x, y, z)			((z) ^ ((x) & ((y) ^ (z))))
+#define G(x, y, z)			((y) ^ ((z) & ((x) ^ (y))))
+#define H(x, y, z)			((x) ^ (y) ^ (z))
+#define I(x, y, z)			((y) ^ ((x) | ~(z)))
+
+/*
+ * The MD5 transformation for all four rounds.
+ */
+#define STEP(f, a, b, c, d, x, t, s) \
+	(a) += f((b), (c), (d)) + (x) + (t); \
+	(a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
+	(a) += (b);
+
+/*
+ * SET reads 4 input bytes in little-endian byte order and stores them
+ * in a properly aligned word in host byte order.
+ *
+ * The check for little-endian architectures that tolerate unaligned
+ * memory accesses is just an optimization.  Nothing will break if it
+ * doesn't work.
+ */
+#if defined(__i386__) || defined(__x86_64__) || defined(__vax__)
+#define SET(n) \
+	(*(MD5_u32plus *)&ptr[(n) * 4])
+#define GET(n) \
+	SET(n)
+#else
+#define SET(n) \
+	(ctx->block[(n)] = \
+	(MD5_u32plus)ptr[(n) * 4] | \
+	((MD5_u32plus)ptr[(n) * 4 + 1] << 8) | \
+	((MD5_u32plus)ptr[(n) * 4 + 2] << 16) | \
+	((MD5_u32plus)ptr[(n) * 4 + 3] << 24))
+#define GET(n) \
+	(ctx->block[(n)])
+#endif
+
+/*
+ * This processes one or more 64-byte data blocks, but does NOT update
+ * the bit counters.  There are no alignment requirements.
+ */
+static void *body(cli_md5_ctx *ctx, void *data, unsigned long size)
+{
+	unsigned char *ptr;
+	MD5_u32plus a, b, c, d;
+	MD5_u32plus saved_a, saved_b, saved_c, saved_d;
+
+	ptr = data;
+
+	a = ctx->a;
+	b = ctx->b;
+	c = ctx->c;
+	d = ctx->d;
+
+	do {
+		saved_a = a;
+		saved_b = b;
+		saved_c = c;
+		saved_d = d;
+
+/* Round 1 */
+		STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
+		STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
+		STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
+		STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
+		STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
+		STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
+		STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
+		STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
+		STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
+		STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
+		STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
+		STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
+		STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
+		STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
+		STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
+		STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
+
+/* Round 2 */
+		STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
+		STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
+		STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
+		STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
+		STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
+		STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
+		STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
+		STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
+		STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
+		STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
+		STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
+		STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
+		STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
+		STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
+		STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
+		STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
+
+/* Round 3 */
+		STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
+		STEP(H, d, a, b, c, GET(8), 0x8771f681, 11)
+		STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
+		STEP(H, b, c, d, a, GET(14), 0xfde5380c, 23)
+		STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
+		STEP(H, d, a, b, c, GET(4), 0x4bdecfa9, 11)
+		STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
+		STEP(H, b, c, d, a, GET(10), 0xbebfbc70, 23)
+		STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
+		STEP(H, d, a, b, c, GET(0), 0xeaa127fa, 11)
+		STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
+		STEP(H, b, c, d, a, GET(6), 0x04881d05, 23)
+		STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
+		STEP(H, d, a, b, c, GET(12), 0xe6db99e5, 11)
+		STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
+		STEP(H, b, c, d, a, GET(2), 0xc4ac5665, 23)
+
+/* Round 4 */
+		STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
+		STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
+		STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
+		STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
+		STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
+		STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
+		STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
+		STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
+		STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
+		STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
+		STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
+		STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
+		STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
+		STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
+		STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
+		STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
+
+		a += saved_a;
+		b += saved_b;
+		c += saved_c;
+		d += saved_d;
+
+		ptr += 64;
+	} while (size -= 64);
+
+	ctx->a = a;
+	ctx->b = b;
+	ctx->c = c;
+	ctx->d = d;
+
+	return ptr;
+}
+
+void cli_md5_init(cli_md5_ctx *ctx)
+{
+	ctx->a = 0x67452301;
+	ctx->b = 0xefcdab89;
+	ctx->c = 0x98badcfe;
+	ctx->d = 0x10325476;
+
+	ctx->lo = 0;
+	ctx->hi = 0;
+}
+
+void cli_md5_update(cli_md5_ctx *ctx, void *data, unsigned long size)
+{
+	MD5_u32plus saved_lo;
+	unsigned long used, free;
+
+	saved_lo = ctx->lo;
+	if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo)
+		ctx->hi++;
+	ctx->hi += size >> 29;
+
+	used = saved_lo & 0x3f;
+
+	if (used) {
+		free = 64 - used;
+
+		if (size < free) {
+			memcpy(&ctx->buffer[used], data, size);
+			return;
+		}
+
+		memcpy(&ctx->buffer[used], data, free);
+		data = (unsigned char *)data + free;
+		size -= free;
+		body(ctx, ctx->buffer, 64);
+	}
+
+	if (size >= 64) {
+		data = body(ctx, data, size & ~(unsigned long)0x3f);
+		size &= 0x3f;
+	}
+
+	memcpy(ctx->buffer, data, size);
+}
+
+void cli_md5_final(unsigned char *result, cli_md5_ctx *ctx)
+{
+	unsigned long used, free;
+
+	used = ctx->lo & 0x3f;
+
+	ctx->buffer[used++] = 0x80;
+
+	free = 64 - used;
+
+	if (free < 8) {
+		memset(&ctx->buffer[used], 0, free);
+		body(ctx, ctx->buffer, 64);
+		used = 0;
+		free = 64;
+	}
+
+	memset(&ctx->buffer[used], 0, free - 8);
+
+	ctx->lo <<= 3;
+	ctx->buffer[56] = ctx->lo;
+	ctx->buffer[57] = ctx->lo >> 8;
+	ctx->buffer[58] = ctx->lo >> 16;
+	ctx->buffer[59] = ctx->lo >> 24;
+	ctx->buffer[60] = ctx->hi;
+	ctx->buffer[61] = ctx->hi >> 8;
+	ctx->buffer[62] = ctx->hi >> 16;
+	ctx->buffer[63] = ctx->hi >> 24;
+
+	body(ctx, ctx->buffer, 64);
+
+	result[0] = ctx->a;
+	result[1] = ctx->a >> 8;
+	result[2] = ctx->a >> 16;
+	result[3] = ctx->a >> 24;
+	result[4] = ctx->b;
+	result[5] = ctx->b >> 8;
+	result[6] = ctx->b >> 16;
+	result[7] = ctx->b >> 24;
+	result[8] = ctx->c;
+	result[9] = ctx->c >> 8;
+	result[10] = ctx->c >> 16;
+	result[11] = ctx->c >> 24;
+	result[12] = ctx->d;
+	result[13] = ctx->d >> 8;
+	result[14] = ctx->d >> 16;
+	result[15] = ctx->d >> 24;
+
+	memset(ctx, 0, sizeof(*ctx));
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_message.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_message.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_message.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_message.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,2830 @@
+/*
+ *  Copyright (C) 2002-2006 Nigel Horne <njh at bandsman.co.uk>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ *
+ * TODO: Optimise messageExport, decodeLine, messageIsEncoding
+ */
+static	char	const	rcsid[] = "$Id: message.c,v 1.195 2007/02/12 20:46:09 njh Exp $";
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#ifndef	CL_DEBUG
+#define	NDEBUG	/* map CLAMAV debug onto standard */
+#endif
+
+#ifdef CL_THREAD_SAFE
+#ifndef	_REENTRANT
+#define	_REENTRANT	/* for Solaris 2.8 */
+#endif
+#endif
+
+#ifdef	C_DARWIN
+#include <sys/types.h>
+#endif
+#include <stdlib.h>
+#include <string.h>
+#ifdef	HAVE_STRINGS_H
+#include <strings.h>
+#endif
+#include <assert.h>
+#include <ctype.h>
+#include <stdio.h>
+
+#ifdef	CL_THREAD_SAFE
+#include <pthread.h>
+#endif
+
+#include "others.h"
+#include "str.h"
+#include "filetypes.h"
+
+#include "mbox.h"
+
+#ifndef isblank
+#define isblank(c)	(((c) == ' ') || ((c) == '\t'))
+#endif
+
+#define	RFC2045LENGTH	76	/* maximum number of characters on a line */
+
+#ifdef	HAVE_STDBOOL_H
+#include <stdbool.h>
+#else
+#ifdef	FALSE
+typedef	unsigned	char	bool;
+#else
+typedef enum	{ FALSE = 0, TRUE = 1 } bool;
+#endif
+#endif
+
+static	int	messageHasArgument(const message *m, const char *variable);
+static	void	messageIsEncoding(message *m);
+static unsigned char *decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast);
+static	void	sanitiseBase64(char *s);
+#ifdef	__GNUC__
+static	unsigned	char	hex(char c)	__attribute__((const));
+static	unsigned	char	base64(char c)	__attribute__((const));
+static	unsigned	char	uudecode(char c)	__attribute__((const));
+#else
+static	unsigned	char	hex(char c);
+static	unsigned	char	base64(char c);
+static	unsigned	char	uudecode(char c);
+#endif
+static	const	char	*messageGetArgument(const message *m, int arg);
+static	void	*messageExport(message *m, const char *dir, void *(*create)(void), void (*destroy)(void *), void (*setFilename)(void *, const char *, const char *), void (*addData)(void *, const unsigned char *, size_t), void *(*exportText)(text *, void *, int), void (*setCTX)(void *, cli_ctx *), int destroy_text);
+static	int	usefulArg(const char *arg);
+static	void	messageDedup(message *m);
+static	char	*rfc2231(const char *in);
+static	int	simil(const char *str1, const char *str2);
+
+/*
+ * These maps are ordered in decreasing likelyhood of their appearance
+ * in an e-mail. Probably these should be in a table...
+ */
+static	const	struct	encoding_map {
+	const	char	*string;
+	encoding_type	type;
+} encoding_map[] = {	/* rfc2045 */
+	{	"7bit",			NOENCODING	},
+	{	"text/plain",		NOENCODING	},
+	{	"quoted-printable",	QUOTEDPRINTABLE	},	/* rfc2045 */
+	{	"base64",		BASE64		},	/* rfc2045 */
+	{	"8bit",			EIGHTBIT	},
+	{	"binary",		BINARY		},
+	{	"x-uuencode",		UUENCODE	},	/* uuencode(5) */
+	{	"x-yencode",		YENCODE		},
+	{	"x-binhex",		BINHEX		},
+	{	"us-ascii",		NOENCODING	},	/* incorrect */
+	{	"x-uue",		UUENCODE	},	/* incorrect */
+	{	"uuencode",		UUENCODE	},	/* incorrect */
+	{	NULL,			NOENCODING	}
+};
+
+static	const	struct	mime_map {
+	const	char	*string;
+	mime_type	type;
+} mime_map[] = {
+	{	"text",			TEXT		},
+	{	"multipart",		MULTIPART	},
+	{	"application",		APPLICATION	},
+	{	"audio",		AUDIO		},
+	{	"image",		IMAGE		},
+	{	"message",		MESSAGE		},
+	{	"video",		VIDEO		},
+	{	NULL,			TEXT		}
+};
+
+/*
+ * See RFC2045, section 6.8, table 1
+ */
+static const unsigned char base64Table[256] = {
+	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+	255,255,255,255,255,255,255,255,255,255,255,62,255,255,255,63,
+	52,53,54,55,56,57,58,59,60,61,255,255,255,0,255,255,
+	255,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
+	15,16,17,18,19,20,21,22,23,24,25,255,255,255,255,255,
+	255,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
+	41,42,43,44,45,46,47,48,49,50,51,255,255,255,255,255,
+	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255
+};
+
+message *
+messageCreate(void)
+{
+	message *m = (message *)cli_calloc(1, sizeof(message));
+
+	if(m)
+		m->mimeType = NOMIME;
+
+	return m;
+}
+
+void
+messageDestroy(message *m)
+{
+	assert(m != NULL);
+
+	messageReset(m);
+
+	free(m);
+}
+
+void
+messageReset(message *m)
+{
+	int i;
+
+	assert(m != NULL);
+
+	if(m->mimeSubtype)
+		free(m->mimeSubtype);
+
+	if(m->mimeDispositionType)
+		free(m->mimeDispositionType);
+
+	if(m->mimeArguments) {
+		for(i = 0; i < m->numberOfArguments; i++)
+			free(m->mimeArguments[i]);
+		free(m->mimeArguments);
+	}
+
+	if(m->body_first)
+		textDestroy(m->body_first);
+
+	assert(m->base64chars == 0);
+
+	if(m->encodingTypes) {
+		assert(m->numberOfEncTypes > 0);
+		free(m->encodingTypes);
+	}
+
+	memset(m, '\0', sizeof(message));
+	m->mimeType = NOMIME;
+}
+
+/*
+ * Handle the Content-Type header. The syntax is in RFC1341.
+ * Return success (1) or failure (0). Failure only happens when it's an
+ * unknown type and we've already received a known type, or we've received an
+ * empty type. If we receive an unknown type by itself we default to application
+ */
+int
+messageSetMimeType(message *mess, const char *type)
+{
+#ifdef	CL_THREAD_SAFE
+	static pthread_mutex_t mime_mutex = PTHREAD_MUTEX_INITIALIZER;
+#endif
+	const struct mime_map *m;
+	int typeval;
+	static table_t *mime_table;
+
+	assert(mess != NULL);
+	if(type == NULL) {
+		cli_warnmsg("Empty content-type field\n");
+		return 0;
+	}
+
+	cli_dbgmsg("messageSetMimeType: '%s'\n", type);
+
+	/* Ignore leading spaces */
+	while(!isalpha(*type))
+		if(*type++ == '\0')
+			return 0;
+
+#ifdef	CL_THREAD_SAFE
+	pthread_mutex_lock(&mime_mutex);
+#endif
+	if(mime_table == NULL) {
+		mime_table = tableCreate();
+		if(mime_table == NULL) {
+#ifdef	CL_THREAD_SAFE
+			pthread_mutex_unlock(&mime_mutex);
+#endif
+			return 0;
+		}
+
+		for(m = mime_map; m->string; m++)
+			if(!tableInsert(mime_table, m->string, m->type)) {
+				tableDestroy(mime_table);
+				mime_table = NULL;
+#ifdef	CL_THREAD_SAFE
+				pthread_mutex_unlock(&mime_mutex);
+#endif
+				return 0;
+			}
+	}
+#ifdef	CL_THREAD_SAFE
+	pthread_mutex_unlock(&mime_mutex);
+#endif
+
+	typeval = tableFind(mime_table, type);
+
+	if(typeval != -1) {
+		mess->mimeType = (mime_type)typeval;
+		return 1;
+	}
+	if(mess->mimeType == NOMIME) {
+		if(strncasecmp(type, "x-", 2) == 0)
+			mess->mimeType = MEXTENSION;
+		else {
+			/*
+			 * Based on a suggestion by James Stevens
+			 *	<James at kyzo.com>
+			 * Force scanning of strange messages
+			 */
+			if(strcasecmp(type, "plain") == 0) {
+				cli_dbgmsg("Incorrect MIME type: `plain', set to Text\n");
+				mess->mimeType = TEXT;
+			} else {
+				/*
+				 * Don't handle broken e-mail probably sending
+				 *	Content-Type: plain/text
+				 * instead of
+				 *	Content-Type: text/plain
+				 * as an attachment
+				 */
+				int highestSimil = 0, t = -1;
+				const char *closest = NULL;
+
+				for(m = mime_map; m->string; m++) {
+					const int s = simil(m->string, type);
+
+					if(s > highestSimil) {
+						highestSimil = s;
+						closest = m->string;
+						t = m->type;
+					}
+				}
+				if(highestSimil >= 50) {
+					cli_dbgmsg("Unknown MIME type \"%s\" - guessing as %s (%u%% certainty)\n",
+						type, closest,
+						(int)highestSimil);
+					mess->mimeType = (mime_type)t;
+				} else {
+					cli_dbgmsg("Unknown MIME type: `%s', set to Application - if you believe this file contains a virus, submit it to www.clamav.net\n", type);
+					mess->mimeType = APPLICATION;
+				}
+			}
+		}
+		return 1;
+	}
+	return 0;
+}
+
+mime_type
+messageGetMimeType(const message *m)
+{
+	assert(m != NULL);
+
+	return m->mimeType;
+}
+
+void
+messageSetMimeSubtype(message *m, const char *subtype)
+{
+	assert(m != NULL);
+
+	if(subtype == NULL) {
+		/*
+		 * Handle broken content-type lines, e.g.
+		 *	Content-Type: text/
+		 */
+		cli_dbgmsg("Empty content subtype\n");
+		subtype = "";
+	}
+
+	if(m->mimeSubtype)
+		free(m->mimeSubtype);
+
+	m->mimeSubtype = cli_strdup(subtype);
+}
+
+const char *
+messageGetMimeSubtype(const message *m)
+{
+	return (m->mimeSubtype) ? m->mimeSubtype : "";
+}
+
+void
+messageSetDispositionType(message *m, const char *disptype)
+{
+	assert(m != NULL);
+
+	if(m->mimeDispositionType)
+		free(m->mimeDispositionType);
+	if(disptype == NULL) {
+		m->mimeDispositionType = NULL;
+		return;
+	}
+
+	/*
+	 * It's broken for there to be an entry such as "Content-Disposition:"
+	 * However some spam and viruses are rather broken, it's a sign
+	 * that something is wrong if we get that - maybe we should force a
+	 * scan of this part
+	 */
+	while(*disptype && isspace((int)*disptype))
+		disptype++;
+	if(*disptype) {
+		m->mimeDispositionType = cli_strdup(disptype);
+		if(m->mimeDispositionType)
+			strstrip(m->mimeDispositionType);
+	} else
+		m->mimeDispositionType = NULL;
+}
+
+const char *
+messageGetDispositionType(const message *m)
+{
+	return (m->mimeDispositionType) ? m->mimeDispositionType : "";
+}
+
+/*
+ * TODO:
+ *	Arguments are held on a per message basis, they should be held on
+ * a per section basis. Otherwise what happens if two sections have two
+ * different values for charset? Probably doesn't matter for the use this
+ * code will be given, but will need fixing if this code is used elsewhere
+ */
+void
+messageAddArgument(message *m, const char *arg)
+{
+	int offset;
+
+	assert(m != NULL);
+
+	if(arg == NULL)
+		return;	/* Note: this is not an error condition */
+
+	while(isspace(*arg))
+		arg++;
+
+	if(*arg == '\0')
+		/* Empty argument? Probably a broken mail client... */
+		return;
+
+	cli_dbgmsg("messageAddArgument, arg='%s'\n", arg);
+
+	if(!usefulArg(arg))
+		return;
+
+	for(offset = 0; offset < m->numberOfArguments; offset++)
+		if(m->mimeArguments[offset] == NULL)
+			break;
+		else if(strcasecmp(arg, m->mimeArguments[offset]) == 0)
+			return;	/* already in there */
+
+	if(offset == m->numberOfArguments) {
+		char **ptr;
+
+		m->numberOfArguments++;
+		ptr = (char **)cli_realloc(m->mimeArguments, m->numberOfArguments * sizeof(char *));
+		if(ptr == NULL) {
+			m->numberOfArguments--;
+			return;
+		}
+		m->mimeArguments = ptr;
+	}
+
+	arg = m->mimeArguments[offset] = rfc2231(arg);
+
+	/*
+	 * This is terribly broken from an RFC point of view but is useful
+	 * for catching viruses which have a filename but no type of
+	 * mime. By pretending defaulting to an application rather than
+	 * to nomime we can ensure they're saved and scanned
+	 */
+	if(arg && ((strncasecmp(arg, "filename=", 9) == 0) || (strncasecmp(arg, "name=", 5) == 0)))
+		if(messageGetMimeType(m) == NOMIME) {
+			cli_dbgmsg("Force mime encoding to application\n");
+			messageSetMimeType(m, "application");
+		}
+}
+
+/*
+ * Add in all the arguments.
+ * Cope with:
+ *	name="foo bar.doc"
+ *	charset=foo name=bar
+ */
+void
+messageAddArguments(message *m, const char *s)
+{
+	const char *string = s;
+
+	cli_dbgmsg("Add arguments '%s'\n", string);
+
+	assert(string != NULL);
+
+	while(*string) {
+		const char *key, *cptr;
+		char *data, *field;
+
+		if(isspace(*string) || (*string == ';')) {
+			string++;
+			continue;
+		}
+
+		key = string;
+
+		data = strchr(string, '=');
+
+		/*
+		 * Some spam breaks RFC2045 by using ':' instead of '='
+		 * e.g.:
+		 *	Content-Type: text/html; charset:ISO-8859-1
+		 * should be:
+		 *	Content-type: text/html; charset=ISO-8859-1
+		 *
+		 * We give up with lines that are completely broken because
+		 * we don't have ESP and don't know what was meant to be there.
+		 * It's unlikely to really be a problem.
+		 */
+		if(data == NULL)
+			data = strchr(string, ':');
+
+		if(data == NULL) {
+			/*
+			 * Completely broken, give up
+			 */
+			cli_dbgmsg("Can't parse header \"%s\"\n", s);
+			return;
+		}
+
+		string = &data[1];
+
+		/*
+		 * Handle white space to the right of the equals sign
+		 * This breaks RFC2045 which has:
+		 *	parameter := attribute "=" value
+		 *	attribute := token   ; case-insensitive
+		 *	token  :=  1*<any (ASCII) CHAR except SPACE, CTLs,
+		 *		or tspecials>
+		 * But too many MUAs ignore this
+		 */
+		while(isspace(*string) && (*string != '\0'))
+			string++;
+
+		cptr = string++;
+
+		if(strlen(key) == 0)
+			continue;
+
+		if(*cptr == '"') {
+			char *ptr, *kcopy;
+
+			/*
+			 * The field is in quotes, so look for the
+			 * closing quotes
+			 */
+			kcopy = cli_strdup(key);
+
+			if(kcopy == NULL)
+				return;
+
+			ptr = strchr(kcopy, '=');
+			if(ptr == NULL)
+				ptr = strchr(kcopy, ':');
+			*ptr = '\0';
+
+			string = strchr(++cptr, '"');
+
+			if(string == NULL) {
+				cli_dbgmsg("Unbalanced quote character in \"%s\"\n", s);
+				string = "";
+			} else
+				string++;
+
+			if(!usefulArg(kcopy)) {
+				free(kcopy);
+				continue;
+			}
+
+			data = cli_strdup(cptr);
+
+			ptr = (data) ? strchr(data, '"') : NULL;
+			if(ptr == NULL) {
+				/*
+				 * Weird e-mail header such as:
+				 * Content-Type: application/octet-stream; name="
+				 * "
+				 * Content-Transfer-Encoding: base64
+				 * Content-Disposition: attachment; filename="
+				 * "
+				 *
+				 * TODO: the file should still be saved and
+				 * virus checked
+				 */
+				cli_dbgmsg("Can't parse header \"%s\" - if you believe this file contains a virus, submit it to www.clamav.net\n", s);
+				if(data)
+					free(data);
+				free(kcopy);
+				return;
+			}
+
+			*ptr = '\0';
+
+			field = cli_realloc(kcopy, strlen(kcopy) + strlen(data) + 2);
+			if(field) {
+				strcat(field, "=");
+				strcat(field, data);
+			} else
+				free(kcopy);
+			free(data);
+		} else {
+			size_t len;
+
+			if(*cptr == '\0') {
+				cli_dbgmsg("Ignoring empty field in \"%s\"\n", s);
+				return;
+			}
+
+			/*
+			 * The field is not in quotes, so look for the closing
+			 * white space
+			 */
+			while((*string != '\0') && !isspace(*string))
+				string++;
+
+			len = (size_t)string - (size_t)key + 1;
+			field = cli_malloc(len);
+
+			if(field) {
+				memcpy(field, key, len - 1);
+				field[len - 1] = '\0';
+			}
+		}
+		if(field) {
+			messageAddArgument(m, field);
+			free(field);
+		}
+	}
+}
+
+static const char *
+messageGetArgument(const message *m, int arg)
+{
+	assert(m != NULL);
+	assert(arg >= 0);
+	assert(arg < m->numberOfArguments);
+
+	return (m->mimeArguments[arg]) ? m->mimeArguments[arg] : "";
+}
+
+/*
+ * Find a MIME variable from the header and return a COPY to the value of that
+ * variable. The caller must free the copy
+ */
+char *
+messageFindArgument(const message *m, const char *variable)
+{
+	int i;
+	size_t len;
+
+	assert(m != NULL);
+	assert(variable != NULL);
+
+	len = strlen(variable);
+
+	for(i = 0; i < m->numberOfArguments; i++) {
+		const char *ptr;
+
+		ptr = messageGetArgument(m, i);
+		if((ptr == NULL) || (*ptr == '\0'))
+			continue;
+#ifdef	CL_DEBUG
+		cli_dbgmsg("messageFindArgument: compare %lu bytes of %s with %s\n",
+			(unsigned long)len, variable, ptr);
+#endif
+		if(strncasecmp(ptr, variable, len) == 0) {
+			ptr = &ptr[len];
+			while(isspace(*ptr))
+				ptr++;
+			if(*ptr != '=') {
+				cli_warnmsg("messageFindArgument: no '=' sign found in MIME header '%s' (%s)\n", variable, messageGetArgument(m, i));
+				return NULL;
+			}
+			if((*++ptr == '"') && (strchr(&ptr[1], '"') != NULL)) {
+				/* Remove any quote characters */
+				char *ret = cli_strdup(++ptr);
+				char *p;
+
+				if(ret == NULL)
+					return NULL;
+
+				/*
+				 * Thomas Lamy <Thomas.Lamy at in-online.net>:
+				 * fix un-quoting of boundary strings from
+				 * header, occurs if boundary was given as
+				 *	'boundary="_Test_";'
+				 *
+				 * At least two quotes in string, assume
+				 * quoted argument
+				 * end string at next quote
+				 */
+				if((p = strchr(ret, '"')) != NULL) {
+					ret[strlen(ret) - 1] = '\0';
+					*p = '\0';
+				}
+				return ret;
+			}
+			return cli_strdup(ptr);
+		}
+	}
+	return NULL;
+}
+
+char *
+messageGetFilename(const message *m)
+{
+	char *filename = (char *)messageFindArgument(m, "filename");
+
+	if(filename)
+		return filename;
+
+	return (char *)messageFindArgument(m, "name");
+}
+
+/* Returns true or false */
+static int
+messageHasArgument(const message *m, const char *variable)
+{
+	int i;
+	size_t len;
+
+	assert(m != NULL);
+	assert(variable != NULL);
+
+	len = strlen(variable);
+
+	for(i = 0; i < m->numberOfArguments; i++) {
+		const char *ptr;
+
+		ptr = messageGetArgument(m, i);
+		if((ptr == NULL) || (*ptr == '\0'))
+			continue;
+#ifdef	CL_DEBUG
+		cli_dbgmsg("messageArgumentExists: compare %lu bytes of %s with %s\n",
+			(unsigned long)len, variable, ptr);
+#endif
+		if(strncasecmp(ptr, variable, len) == 0) {
+			ptr = &ptr[len];
+			while(isspace(*ptr))
+				ptr++;
+			if(*ptr != '=') {
+				cli_warnmsg("messageArgumentExists: no '=' sign found in MIME header '%s' (%s)\n", variable, messageGetArgument(m, i));
+				return 0;
+			}
+			return 1;
+		}
+	}
+	return 0;
+}
+
+int
+messageHasFilename(const message *m)
+{
+	return messageHasArgument(m, "filename") || messageHasArgument(m, "file");
+}
+
+void
+messageSetEncoding(message *m, const char *enctype)
+{
+	const struct encoding_map *e;
+	int i;
+	char *type;
+
+	assert(m != NULL);
+	assert(enctype != NULL);
+
+	/*m->encodingType = EEXTENSION;*/
+
+	while(isblank(*enctype))
+		enctype++;
+
+	cli_dbgmsg("messageSetEncoding: '%s'\n", enctype);
+
+	if(strcasecmp(enctype, "8 bit") == 0) {
+		cli_dbgmsg("Broken content-transfer-encoding: '8 bit' changed to '8bit'\n");
+		enctype = "8bit";
+	}
+
+	/*
+	 * Iterate through
+	 *	Content-Transfer-Encoding: base64 binary
+	 * cli_strtok's fieldno counts from 0
+	 */
+	i = 0;
+	while((type = cli_strtok(enctype, i++, " \t")) != NULL) {
+		int highestSimil = 0;
+		const char *closest = NULL;
+
+		for(e = encoding_map; e->string; e++) {
+			int sim;
+			const char lowertype = tolower(type[0]);
+
+			if((lowertype != tolower(e->string[0])) && (lowertype != 'x'))
+				/*
+				 * simil is expensive, I'm yet to encounter only
+				 * one example of a missent encoding when the
+				 * first character was wrong, so lets assume no
+				 * match to save the call.
+				 *
+				 * That example was quoted-printable sent as
+				 * X-quoted-printable.
+				 */
+				continue;
+
+			if(strcmp(e->string, "uuencode") == 0)
+				/*
+				 * No need to test here - fast track visa will have
+				 * handled uuencoded files
+				 */
+				continue;
+
+			sim = simil(type, e->string);
+
+			if(sim == 100) {
+				int j;
+				encoding_type *et;
+
+				for(j = 0; j < m->numberOfEncTypes; j++)
+					if(m->encodingTypes[j] == e->type)
+						break;
+
+				if(j < m->numberOfEncTypes) {
+					cli_dbgmsg("Ignoring duplicate encoding mechanism '%s'\n",
+						type);
+					break;
+				}
+
+				et = (encoding_type *)cli_realloc(m->encodingTypes, (m->numberOfEncTypes + 1) * sizeof(encoding_type));
+				if(et == NULL)
+					break;
+
+				m->encodingTypes = et;
+				m->encodingTypes[m->numberOfEncTypes++] = e->type;
+
+				cli_dbgmsg("Encoding type %d is \"%s\"\n", m->numberOfEncTypes, type);
+				break;
+			} else if(sim > highestSimil) {
+				closest = e->string;
+				highestSimil = sim;
+			}
+		}
+
+		if(e->string == NULL) {
+			/*
+			 * The stated encoding type is illegal, so we
+			 * use a best guess of what it should be.
+			 *
+			 * 50% is arbitary. For example 7bi will match as
+			 * 66% certain to be 7bit
+			 */
+			if(highestSimil >= 50) {
+				cli_dbgmsg("Unknown encoding type \"%s\" - guessing as %s (%u%% certainty)\n",
+					type, closest, highestSimil);
+				messageSetEncoding(m, closest);
+			} else {
+				cli_dbgmsg("Unknown encoding type \"%s\" - if you believe this file contains a virus, submit it to www.clamav.net\n", type);
+				/*
+				 * Err on the side of safety, enable all
+				 * decoding modules
+				 */
+				messageSetEncoding(m, "base64");
+				messageSetEncoding(m, "quoted-printable");
+			}
+		}
+
+		free(type);
+	}
+}
+
+encoding_type
+messageGetEncoding(const message *m)
+{
+	assert(m != NULL);
+
+	if(m->numberOfEncTypes == 0)
+		return NOENCODING;
+	return m->encodingTypes[0];
+}
+
+int
+messageAddLine(message *m, line_t *line)
+{
+	assert(m != NULL);
+
+	if(m->body_first == NULL)
+		m->body_last = m->body_first = (text *)cli_malloc(sizeof(text));
+	else {
+		m->body_last->t_next = (text *)cli_malloc(sizeof(text));
+		m->body_last = m->body_last->t_next;
+	}
+
+	if(m->body_last == NULL)
+		return -1;
+
+	m->body_last->t_next = NULL;
+
+	if(line && lineGetData(line)) {
+		m->body_last->t_line = lineLink(line);
+
+		messageIsEncoding(m);
+	} else
+		m->body_last->t_line = NULL;
+
+	return 1;
+}
+
+/*
+ * Add the given line to the end of the given message
+ * If needed a copy of the given line is taken which the caller must free
+ * Line must not be terminated by a \n
+ */
+int
+messageAddStr(message *m, const char *data)
+{
+	line_t *repeat = NULL;
+
+	assert(m != NULL);
+
+	if(data) {
+		if(*data == '\0')
+			data = NULL;
+		else {
+			/*
+			 * If it's only white space, just store one space to
+			 * save memory. You must store something since it may
+			 * be a header line
+			 */
+			int iswhite = 1;
+			const char *p;
+
+			for(p = data; *p; p++)
+				if(((*p) & 0x80) || !isspace(*p)) {
+					iswhite = 0;
+					break;
+				}
+			if(iswhite) {
+				/*cli_dbgmsg("messageAddStr: empty line: '%s'\n", data);*/
+				data = " ";
+			}
+		}
+	}
+
+	if(m->body_first == NULL)
+		m->body_last = m->body_first = (text *)cli_malloc(sizeof(text));
+	else {
+		assert(m->body_last != NULL);
+		if((data == NULL) && (m->body_last->t_line == NULL))
+			/*
+			 * Although this would save time and RAM, some
+			 * phish signatures have been built which need the
+			 * blank lines
+			 */
+			if(messageGetMimeType(m) != TEXT)
+				/* don't save two blank lines in sucession */
+				return 1;
+
+		m->body_last->t_next = (text *)cli_malloc(sizeof(text));
+		if(m->body_last->t_next == NULL) {
+			messageDedup(m);
+			m->body_last->t_next = (text *)cli_malloc(sizeof(text));
+			if(m->body_last->t_next == NULL) {
+				cli_errmsg("messageAddStr: out of memory\n");
+				return -1;
+			}
+		}
+
+		if(data && m->body_last->t_line && (strcmp(data, lineGetData(m->body_last->t_line)) == 0))
+			repeat = m->body_last->t_line;
+		m->body_last = m->body_last->t_next;
+	}
+
+	if(m->body_last == NULL) {
+		cli_errmsg("messageAddStr: out of memory\n");
+		return -1;
+	}
+
+	m->body_last->t_next = NULL;
+
+	if(data && *data) {
+		if(repeat)
+			m->body_last->t_line = lineLink(repeat);
+		else {
+			m->body_last->t_line = lineCreate(data);
+
+			if(m->body_last->t_line == NULL) {
+				messageDedup(m);
+				m->body_last->t_line = lineCreate(data);
+
+				if(m->body_last->t_line == NULL) {
+					cli_errmsg("messageAddStr: out of memory\n");
+					return -1;
+				}
+			}
+			/* cli_chomp(m->body_last->t_text); */
+			messageIsEncoding(m);
+		}
+	} else
+		m->body_last->t_line = NULL;
+
+	return 1;
+}
+
+/*
+ * Add the given line to the start of the given message
+ * A copy of the given line is taken which the caller must free
+ * Line must not be terminated by a \n
+ */
+int
+messageAddStrAtTop(message *m, const char *data)
+{
+	text *oldfirst;
+
+	assert(m != NULL);
+
+	if(m->body_first == NULL)
+		return messageAddLine(m, lineCreate(data));
+
+	oldfirst = m->body_first;
+	m->body_first = (text *)cli_malloc(sizeof(text));
+	if(m->body_first == NULL) {
+		m->body_first = oldfirst;
+		return -1;
+	}
+
+	m->body_first->t_next = oldfirst;
+	m->body_first->t_line = lineCreate((data) ? data : "");
+
+	if(m->body_first->t_line == NULL) {
+		cli_errmsg("messageAddStrAtTop: out of memory\n");
+		return -1;
+	}
+	return 1;
+}
+
+/*
+ * Put the contents of the given text at the end of the current object.
+ * Can be used either to move a text object into a message, or to move a
+ * message's text into another message only moving from a given offset.
+ * The given text emptied; it can be used again if needed, though be warned that
+ * it will have an empty line at the start.
+ * Returns 0 for failure, 1 for success
+ */
+int
+messageMoveText(message *m, text *t, message *old_message)
+{
+	int rc;
+
+	if(m->body_first == NULL) {
+		if(old_message) {
+			text *u;
+			/*
+			 * t is within old_message which is about to be
+			 * destroyed
+			 */
+			assert(old_message->body_first != NULL);
+
+			m->body_first = t;
+			for(u = old_message->body_first; u != t;) {
+				text *next;
+
+				if(u->t_line)
+					lineUnlink(u->t_line);
+				next = u->t_next;
+
+				free(u);
+				u = next;
+
+				if(u == NULL) {
+					cli_errmsg("messageMoveText sanity check: t not within old_message\n");
+					return -1;
+				}
+			}
+			assert(old_message->body_last->t_next == NULL);
+
+			m->body_last = old_message->body_last;
+			old_message->body_first = old_message->body_last = NULL;
+
+			/* Do any pointers need to be reset? */
+			if((old_message->bounce == NULL) &&
+			   (old_message->encoding == NULL) &&
+			   (old_message->binhex == NULL) &&
+			   (old_message->yenc == NULL))
+				return 0;
+
+			m->body_last = m->body_first;
+			rc = 0;
+		} else {
+			m->body_last = m->body_first = textMove(NULL, t);
+			if(m->body_first == NULL)
+				rc = -1;
+			else
+				rc = 0;
+		}
+	} else {
+		m->body_last = textMove(m->body_last, t);
+		if(m->body_last == NULL) {
+			rc = -1;
+			m->body_last = m->body_first;
+		} else
+			rc = 0;
+	}
+
+	while(m->body_last->t_next) {
+		m->body_last = m->body_last->t_next;
+		if(m->body_last->t_line)
+			messageIsEncoding(m);
+	}
+
+	return rc;
+}
+
+/*
+ * See if the last line marks the start of a non MIME inclusion that
+ * will need to be scanned
+ */
+static void
+messageIsEncoding(message *m)
+{
+	static const char encoding[] = "Content-Transfer-Encoding";
+	static const char binhex[] = "(This file must be converted with BinHex 4.0)";
+	const char *line = lineGetData(m->body_last->t_line);
+
+	if((m->encoding == NULL) &&
+	   (strncasecmp(line, encoding, sizeof(encoding) - 1) == 0) &&
+	   (strstr(line, "7bit") == NULL))
+		m->encoding = m->body_last;
+	else if((m->bounce == NULL) &&
+		(strncasecmp(line, "Received: ", 10) == 0) &&
+		(cli_filetype((const unsigned char *)line, strlen(line)) == CL_TYPE_MAIL))
+			m->bounce = m->body_last;
+		/* Not needed with fast track visa technology */
+	/*else if((m->uuencode == NULL) && isuuencodebegin(line))
+		m->uuencode = m->body_last;*/
+	else if((m->binhex == NULL) &&
+		strstr(line, "BinHex") &&
+		(simil(line, binhex) > 90))
+			/*
+			 * Look for close matches for BinHex, but
+			 * simil() is expensive so only do it if it's
+			 * likely to be found
+			 */
+			m->binhex = m->body_last;
+	else if((m->yenc == NULL) && (strncmp(line, "=ybegin line=", 13) == 0))
+		m->yenc = m->body_last;
+}
+
+/*
+ * Returns a pointer to the body of the message. Note that it does NOT return
+ * a copy of the data
+ */
+text *
+messageGetBody(message *m)
+{
+	assert(m != NULL);
+	return m->body_first;
+}
+
+/*
+ * Export a message using the given export routines
+ *
+ * TODO: It really should export into an array, one
+ * for each encoding algorithm. However, what it does is it returns the
+ * last item that was exported. That's sufficient for now.
+ */
+static void *
+messageExport(message *m, const char *dir, void *(*create)(void), void (*destroy)(void *), void (*setFilename)(void *, const char *, const char *), void (*addData)(void *, const unsigned char *, size_t), void *(*exportText)(text *, void *, int), void(*setCTX)(void *, cli_ctx *), int destroy_text)
+{
+	void *ret;
+	text *t_line;
+	char *filename;
+	int i;
+
+	assert(m != NULL);
+
+	if(messageGetBody(m) == NULL)
+		return NULL;
+
+	ret = (*create)();
+
+	if(ret == NULL)
+		return NULL;
+
+	cli_dbgmsg("messageExport: numberOfEncTypes == %d\n", m->numberOfEncTypes);
+
+	if((t_line = binhexBegin(m)) != NULL) {
+		unsigned char byte;
+		unsigned long newlen = 0L, len, dataforklen, resourceforklen, l;
+		unsigned char *data;
+		char *ptr;
+		int bytenumber;
+		blob *tmp;
+
+		/*
+		 * Table look up by Thomas Lamy <Thomas.Lamy at in-online.net>
+		 * HQX conversion table - illegal chars are 0xff
+		 */
+		const unsigned char hqxtbl[] = {
+			     /*   00   01   02   03   04   05   06   07   08   09   0a   0b   0c   0d   0e   0f */
+		/* 00-0f */	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+		/* 10-1f */	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+		/* 20-2f */	0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0xff,0xff,
+		/* 30-3f */	0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0xff,0x14,0x15,0xff,0xff,0xff,0xff,0xff,0xff,
+		/* 40-4f */	0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0xff,
+		/* 50-5f */	0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0xff,0x2c,0x2d,0x2e,0x2f,0xff,0xff,0xff,0xff,
+		/* 60-6f */	0x30,0x31,0x32,0x33,0x34,0x35,0x36,0xff,0x37,0x38,0x39,0x3a,0x3b,0x3c,0xff,0xff,
+		/* 70-7f */	0x3d,0x3e,0x3f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+		};
+
+		cli_dbgmsg("messageExport: decode binhex\n");
+		/*
+		 * Decode BinHex4. First create a temporary blob which contains
+		 * the encoded message. Then decode that blob to the target
+		 * blob, free the temporary blob and return the target one
+		 *
+		 * FIXME: EICAR isn't detected: should create 3 files in fork
+		 *	format: .info, .data and .rsrc. This is needed for
+		 *	position dependant detection such as EICAR
+		 *
+		 * See RFC1741
+		 */
+		while(((t_line = t_line->t_next) != NULL) &&
+		      (t_line->t_line == NULL))
+			;
+
+		tmp = textToBlob(t_line, NULL,
+			((m->numberOfEncTypes == 1) && (m->encodingTypes[0] == BINHEX)) ? destroy_text : 0);
+
+		if(tmp == NULL) {
+			/*
+			 * FIXME: We've probably run out of memory during the
+			 * text to blob.
+			 */
+			cli_warnmsg("Couldn't start binhex parser\n");
+			(*destroy)(ret);
+			return NULL;
+		}
+
+		data = blobGetData(tmp);
+
+		if(data == NULL) {
+			cli_warnmsg("Couldn't locate the binhex message that was claimed to be there\n");
+			blobDestroy(tmp);
+			(*destroy)(ret);
+			return NULL;
+		}
+		len = blobGetDataSize(tmp);
+
+		if(data[0] == ':') {
+			unsigned char *uptr;
+			/* 7 bit (ala RFC1741) */
+
+			/*
+			 * FIXME: this is dirty code, modification of the
+			 * contents of a member of the blob object should be
+			 * done through blob.c
+			 *
+			 * Convert 7 bit data into 8 bit
+			 */
+			cli_dbgmsg("decode HQX7 message (%lu bytes)\n", len);
+
+			uptr = cli_malloc(len);
+			if(uptr == NULL) {
+				blobDestroy(tmp);
+				(*destroy)(ret);
+				return NULL;
+			}
+			memcpy(uptr, data, len);
+			bytenumber = 0;
+
+			/*
+			 * uptr now contains the encoded (7bit) data - len bytes long
+			 * data will contain the unencoded (8bit) data
+			 */
+			for(l = 1; l < len; l++) {
+				unsigned char c = uptr[l];
+
+				if(c == ':')
+					break;
+
+				if((c == '\n') || (c == '\r'))
+					continue;
+
+				if((c < 0x20) || (c > 0x7f) || (hqxtbl[c] == 0xff)) {
+					cli_warnmsg("Invalid HQX7 character '%c' (0x%02x)\n", c, c);
+					break;
+				}
+				c = hqxtbl[c];
+				assert(c <= 63);
+
+				/*
+				 * These masks probably aren't needed, but
+				 * they're here to verify the code is correct
+				 */
+				switch(bytenumber) {
+					case 0:
+						data[newlen] = (c << 2) & 0xFC;
+						bytenumber = 1;
+						break;
+					case 1:
+						data[newlen++] |= (c >> 4) & 0x3;
+						data[newlen] = (c << 4) & 0xF0;
+						bytenumber = 2;
+						break;
+					case 2:
+						data[newlen++] |= (c >> 2) & 0xF;
+						data[newlen] = (c << 6) & 0xC0;
+						bytenumber = 3;
+						break;
+					case 3:
+						data[newlen++] |= c & 0x3F;
+						bytenumber = 0;
+						break;
+				}
+			}
+
+			cli_dbgmsg("decoded HQX7 message (now %lu bytes)\n", newlen);
+
+			/*
+			 * Throw away the old encoded (7bit) data
+			 * data now points to the encoded (8bit) data - newlen bytes
+			 *
+			 * The data array may contain repetitive characters
+			 */
+			free(uptr);
+		} else {
+			cli_warnmsg("HQX8 messages not yet supported, extraction may fail - if you believe this file contains a virus, submit it to www.clamav.net\n");
+			newlen = len;
+		}
+
+		/*
+		 * Uncompress repetitive characters
+		 */
+		if(memchr(data, 0x90, newlen)) {
+			blob *u = blobCreate();	/* uncompressed data */
+
+			if(u == NULL) {
+				(*destroy)(ret);
+				blobDestroy(tmp);
+				return NULL;
+			}
+			/*
+			 * Includes compression
+			 */
+			for(l = 0L; l < newlen; l++) {
+				unsigned char c = data[l];
+
+				/*
+				 * TODO: handle the case where the first byte
+				 * is 0x90
+				 */
+				blobAddData(u, &c, 1);
+
+				if((l < (newlen - 1L)) && (data[l + 1] == 0x90)) {
+					int count;
+
+					l += 2;
+					count = data[l];
+
+					if(count == 0) {
+						c = 0x90;
+						blobAddData(u, &c, 1);
+					} else {
+#ifdef	CL_DEBUG
+						cli_dbgmsg("uncompress HQX7 at 0x%06lu: %d repetitive bytes\n", l, count);
+#endif
+						blobGrow(u, count);
+						while(--count > 0)
+							blobAddData(u, &c, 1);
+					}
+				}
+			}
+			blobDestroy(tmp);
+			tmp = u;
+			data = blobGetData(tmp);
+			len = blobGetDataSize(tmp);
+			cli_dbgmsg("Uncompressed %lu bytes to %lu\n", newlen, len);
+		} else {
+			len = newlen;
+			cli_dbgmsg("HQX7 message (%lu bytes) is not compressed\n",
+				len);
+		}
+		if(len == 0) {
+			cli_warnmsg("Discarding empty binHex attachment\n");
+			(*destroy)(ret);
+			blobDestroy(tmp);
+			return NULL;
+		}
+
+		/*
+		 * The blob tmp now contains the uncompressed data
+		 * of len bytes, i.e. the repetitive bytes have been removed
+		 */
+
+		/*
+		 * Parse the header
+		 *
+		 * TODO: set filename argument in message as well
+		 */
+		byte = data[0];
+		if(byte >= len) {
+			(*destroy)(ret);
+			blobDestroy(tmp);
+			return NULL;
+		}
+		filename = cli_malloc(byte + 1);
+		if(filename == NULL) {
+			(*destroy)(ret);
+			blobDestroy(tmp);
+			return NULL;
+		}
+		memcpy(filename, &data[1], byte);
+		filename[byte] = '\0';
+		(*setFilename)(ret, dir, filename);
+		/*ptr = cli_malloc(strlen(filename) + 6);*/
+		ptr = cli_malloc(byte + 6);
+		if(ptr) {
+			sprintf(ptr, "name=%s", filename);
+			messageAddArgument(m, ptr);
+			free(ptr);
+		}
+
+		/*
+		 * skip over length, filename, version, type, creator and flags
+		 */
+		byte = 1 + byte + 1 + 4 + 4 + 2;
+
+		/*
+		 * Set len to be the data fork length
+		 */
+		dataforklen = ((data[byte] << 24) & 0xFF000000) |
+			((data[byte + 1] << 16) & 0xFF0000) |
+			((data[byte + 2] << 8) & 0xFF00) |
+			(data[byte + 3] & 0xFF);
+
+		resourceforklen = ((data[byte + 4] << 24) & 0xFF000000) |
+			((data[byte + 5] << 16) & 0xFF0000) |
+			((data[byte + 6] << 8) & 0xFF00) |
+			(data[byte + 7] & 0xFF);
+
+		cli_dbgmsg("Filename = '%s', data fork length = %lu, resource fork length = %lu bytes\n",
+			filename, dataforklen, resourceforklen);
+
+		free((char *)filename);
+
+		/*
+		 * Skip over data fork length, resource fork length and CRC
+		 */
+		byte += 10;
+
+		l = blobGetDataSize(tmp) - byte;
+
+		if(l < dataforklen) {
+			cli_warnmsg("Corrupt BinHex file, claims it is %lu bytes long in a message of %lu bytes\n",
+				dataforklen, l);
+			dataforklen = l;
+		}
+		if(setCTX && m->ctx)
+			(*setCTX)(ret, m->ctx);
+
+		(*addData)(ret, &data[byte], dataforklen);
+
+		blobDestroy(tmp);
+
+		if(destroy_text)
+			m->binhex = NULL;
+
+		if((m->numberOfEncTypes == 0) ||
+		   ((m->numberOfEncTypes == 1) && (m->encodingTypes[0] == BINHEX))) {
+			cli_dbgmsg("Finished exporting binhex file\n");
+			return ret;
+		}
+	}
+
+	if(m->numberOfEncTypes == 0) {
+		/*
+		 * Fast copy
+		 */
+		cli_dbgmsg("messageExport: Entering fast copy mode\n");
+
+#if	0
+		filename = messageGetFilename(m);
+
+		if(filename == NULL) {
+			cli_dbgmsg("Unencoded attachment sent with no filename\n");
+			messageAddArgument(m, "name=attachment");
+		} else if((strcmp(filename, "textportion") != 0) && (strcmp(filename, "mixedtextportion") != 0))
+			/*
+			 * Some virus attachments don't say how they've
+			 * been encoded. We assume base64
+			 */
+			messageSetEncoding(m, "base64");
+#else
+		filename = (char *)messageFindArgument(m, "filename");
+		if(filename == NULL) {
+			filename = (char *)messageFindArgument(m, "name");
+
+			if(filename == NULL) {
+				cli_dbgmsg("Unencoded attachment sent with no filename\n");
+				messageAddArgument(m, "name=attachment");
+			} else
+				/*
+				 * Some virus attachments don't say how they've
+				 * been encoded. We assume base64
+				 */
+				messageSetEncoding(m, "base64");
+		}
+#endif
+
+		(*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment");
+
+		if(filename)
+			free((char *)filename);
+
+		if(m->numberOfEncTypes == 0)
+			return exportText(messageGetBody(m), ret, destroy_text);
+	}
+
+	if(setCTX && m->ctx)
+		(*setCTX)(ret, m->ctx);
+
+	for(i = 0; i < m->numberOfEncTypes; i++) {
+		encoding_type enctype = m->encodingTypes[i];
+		size_t size;
+
+		if(i > 0) {
+			void *newret;
+
+			newret = (*create)();
+			if(newret == NULL) {
+				cli_errmsg("Not all decoding algorithms were run\n");
+				return ret;
+			}
+			(*destroy)(ret);
+			ret = newret;
+		}
+		cli_dbgmsg("messageExport: enctype %d is %d\n", i, (int)enctype);
+		/*
+		 * Find the filename to decode
+		 */
+		if(((enctype == YENCODE) || (i == 0)) && yEncBegin(m)) {
+			const char *f;
+
+			/*
+			 * TODO: handle multipart yEnc encoded files
+			 */
+			t_line = yEncBegin(m);
+			f = lineGetData(t_line->t_line);
+
+			if((filename = strstr(f, " name=")) != NULL) {
+				filename = cli_strdup(&filename[6]);
+				if(filename) {
+					cli_chomp(filename);
+					strstrip(filename);
+					cli_dbgmsg("Set yEnc filename to \"%s\"\n", filename);
+				}
+			}
+
+			(*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment");
+			if(filename) {
+				free((char *)filename);
+				filename = NULL;
+			}
+			t_line = t_line->t_next;
+			enctype = YENCODE;
+			m->yenc = NULL;
+		} else {
+			if(enctype == UUENCODE) {
+				/*
+				 * The body will have been stripped out by the
+				 * fast track visa system. Treat as plain/text,
+				 * which means we'll still scan for funnies
+				 * outside of the uuencoded portion.
+				 */
+				cli_dbgmsg("messageExport: treat uuencode as text/plain\n");
+				enctype = m->encodingTypes[i] = NOENCODING;
+			}
+			filename = messageGetFilename(m);
+
+			if(filename == NULL) {
+				cli_dbgmsg("Attachment sent with no filename\n");
+				messageAddArgument(m, "name=attachment");
+			} else if(enctype == NOENCODING)
+				/*
+				 * Some virus attachments don't say how
+				 * they've been encoded. We assume
+				 * base64.
+				 *
+				 * FIXME: don't do this if it's a fall
+				 * through from uuencode
+				 */
+				messageSetEncoding(m, "base64");
+
+			(*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment");
+
+			t_line = messageGetBody(m);
+		}
+
+		if(filename)
+			free((char *)filename);
+
+		/*
+		 * t_line should now point to the first (encoded) line of the
+		 * message
+		 */
+		if(t_line == NULL) {
+			cli_warnmsg("Empty attachment not saved\n");
+			(*destroy)(ret);
+			return NULL;
+		}
+
+		if(enctype == NOENCODING) {
+			/*
+			 * Fast copy
+			 */
+			if(i == m->numberOfEncTypes - 1) {
+				/* last one */
+				(void)exportText(t_line, ret, destroy_text);
+				break;
+			}
+			(void)exportText(t_line, ret, 0);
+			continue;
+		}
+
+		size = 0;
+		do {
+			unsigned char smallbuf[1024];
+			unsigned char *uptr, *data;
+			const char *line = lineGetData(t_line->t_line);
+			unsigned char *bigbuf;
+			size_t datasize;
+
+			if(enctype == YENCODE) {
+				if(line == NULL)
+					continue;
+				if(strncmp(line, "=yend ", 6) == 0)
+					break;
+			}
+
+			/*
+			 * Add two bytes for '\n' and '\0'
+			 */
+			datasize = (line) ? strlen(line) + 2 : 0;
+
+			if(datasize >= sizeof(smallbuf))
+				data = bigbuf = (unsigned char *)cli_malloc(datasize);
+			else {
+				bigbuf = NULL;
+				data = smallbuf;
+				datasize = sizeof(smallbuf);
+			}
+
+			uptr = decodeLine(m, enctype, line, data, datasize);
+			if(uptr == NULL) {
+				if(data == bigbuf)
+					free(data);
+				break;
+			}
+
+			if(uptr != data) {
+				assert((size_t)(uptr - data) < datasize);
+				(*addData)(ret, data, (size_t)(uptr - data));
+				size += (size_t)(uptr - data);
+			}
+
+			if(data == bigbuf)
+				free(data);
+
+			/*
+			 * According to RFC2045, '=' is used to pad out
+			 * the last byte and should be used as evidence
+			 * of the end of the data. Some mail clients
+			 * annoyingly then put plain text after the '='
+			 * byte and viruses exploit this bug. Sigh
+			 */
+			/*if(enctype == BASE64)
+				if(strchr(line, '='))
+					break;*/
+			if(line && destroy_text && (i == m->numberOfEncTypes - 1)) {
+				lineUnlink(t_line->t_line);
+				t_line->t_line = NULL;
+			}
+		} while((t_line = t_line->t_next) != NULL);
+
+		cli_dbgmsg("Exported %lu bytes using enctype %d\n",
+			(unsigned long)size, (int)enctype);
+
+		/* Verify we have nothing left to flush out */
+		if(m->base64chars) {
+			unsigned char data[4];
+			unsigned char *ptr;
+
+			ptr = base64Flush(m, data);
+			if(ptr)
+				(*addData)(ret, data, (size_t)(ptr - data));
+		}
+	}
+
+	return ret;
+}
+
+unsigned char *
+base64Flush(message *m, unsigned char *buf)
+{
+	cli_dbgmsg("%d trailing bytes to export\n", m->base64chars);
+
+	if(m->base64chars) {
+		unsigned char *ret = decode(m, NULL, buf, base64, FALSE);
+
+		m->base64chars = 0;
+
+		return ret;
+	}
+	return NULL;
+}
+
+/*
+ * Decode and transfer the contents of the message into a fileblob
+ * The caller must free the returned fileblob
+ */
+fileblob *
+messageToFileblob(message *m, const char *dir, int destroy)
+{
+	fileblob *fb;
+
+	cli_dbgmsg("messageToFileblob\n");
+	fb = messageExport(m, dir,
+		(void *(*)(void))fileblobCreate,
+		(void(*)(void *))fileblobDestroy,
+		(void(*)(void *, const char *, const char *))fileblobSetFilename,
+		(void(*)(void *, const unsigned char *, size_t))fileblobAddData,
+		(void *(*)(text *, void *, int))textToFileblob,
+		(void(*)(void *, cli_ctx *))fileblobSetCTX,
+		destroy);
+	if(destroy && m->body_first) {
+		textDestroy(m->body_first);
+		m->body_first = m->body_last = NULL;
+	}
+	return fb;
+}
+
+/*
+ * Decode and transfer the contents of the message into a closed blob
+ * The caller must free the returned blob
+ */
+blob *
+messageToBlob(message *m, int destroy)
+{
+	blob *b;
+
+	cli_dbgmsg("messageToBlob\n");
+
+	b = messageExport(m, NULL,
+		(void *(*)(void))blobCreate,
+		(void(*)(void *))blobDestroy,
+		(void(*)(void *, const char *, const char *))blobSetFilename,
+		(void(*)(void *, const unsigned char *, size_t))blobAddData,
+		(void *(*)(text *, void *, int))textToBlob,
+		(void(*)(void *, cli_ctx *))NULL,
+		destroy);
+
+	if(destroy && m->body_first) {
+		textDestroy(m->body_first);
+		m->body_first = m->body_last = NULL;
+	}
+	return b;
+}
+
+/*
+ * Decode and transfer the contents of the message into a text area
+ * The caller must free the returned text
+ */
+text *
+messageToText(message *m)
+{
+	int i;
+	text *first = NULL, *last = NULL;
+	const text *t_line;
+
+	assert(m != NULL);
+
+	if(m->numberOfEncTypes == 0) {
+		/*
+		 * Fast copy
+		 */
+		for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next) {
+			if(first == NULL)
+				first = last = cli_malloc(sizeof(text));
+			else {
+				last->t_next = cli_malloc(sizeof(text));
+				last = last->t_next;
+			}
+
+			if(last == NULL) {
+				if(first)
+					textDestroy(first);
+				return NULL;
+			}
+			if(t_line->t_line)
+				last->t_line = lineLink(t_line->t_line);
+			else
+				last->t_line = NULL;	/* empty line */
+		}
+		if(last)
+			last->t_next = NULL;
+
+		return first;
+	}
+	/*
+	 * Scan over the data a number of times once for each claimed encoding
+	 * type
+	 */
+	for(i = 0; i < m->numberOfEncTypes; i++) {
+		const encoding_type enctype = m->encodingTypes[i];
+
+		cli_dbgmsg("messageToText: export transfer method %d = %d\n",
+			i, (int)enctype);
+
+		switch(enctype) {
+			case NOENCODING:
+			case BINARY:
+			case EIGHTBIT:
+				/*
+				 * Fast copy
+				 */
+				for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next) {
+					if(first == NULL)
+						first = last = cli_malloc(sizeof(text));
+					else {
+						last->t_next = cli_malloc(sizeof(text));
+						last = last->t_next;
+					}
+
+					if(last == NULL) {
+						if(first) {
+							last->t_next = NULL;
+							textDestroy(first);
+						}
+						return NULL;
+					}
+					if(t_line->t_line)
+						last->t_line = lineLink(t_line->t_line);
+					else
+						last->t_line = NULL;	/* empty line */
+				}
+				continue;
+			case UUENCODE:
+				cli_errmsg("messageToText: Unexpected attempt to handle uuencoded file - report to http://bugs.clamav.net\n");
+				if(first) {
+					last->t_next = NULL;
+					textDestroy(first);
+				}
+				return NULL;
+			case YENCODE:
+				t_line = yEncBegin(m);
+
+				if(t_line == NULL) {
+					/*cli_warnmsg("YENCODED attachment is missing begin statement\n");*/
+					if(first) {
+						last->t_next = NULL;
+						textDestroy(first);
+					}
+					return NULL;
+				}
+				t_line = t_line->t_next;
+			default:
+				if((i == 0) && binhexBegin(m))
+					cli_warnmsg("Binhex messages not supported yet.\n");
+				t_line = messageGetBody(m);
+		}
+
+		for(; t_line; t_line = t_line->t_next) {
+			unsigned char data[1024];
+			unsigned char *uptr;
+			const char *line = lineGetData(t_line->t_line);
+
+			if(enctype == BASE64)
+				/*
+				 * ignore blanks - breaks RFC which is
+				 * probably the point!
+				 */
+				if(line == NULL)
+					continue;
+
+			assert((line == NULL) || (strlen(line) <= sizeof(data)));
+
+			uptr = decodeLine(m, enctype, line, data, sizeof(data));
+
+			if(uptr == NULL)
+				break;
+
+			assert(uptr <= &data[sizeof(data)]);
+
+			if(first == NULL)
+				first = last = cli_malloc(sizeof(text));
+			else {
+				last->t_next = cli_malloc(sizeof(text));
+				last = last->t_next;
+			}
+
+			if(last == NULL)
+				break;
+
+			/*
+			 * If the decoded line is the same as the encoded
+			 * there's no need to take a copy, just link it.
+			 * Note that the comparison is done without the
+			 * trailing newline that the decoding routine may have
+			 * added - that's why there's a strncmp rather than a
+			 * strcmp - that'd be bad for MIME decoders, but is OK
+			 * for AV software
+			 */
+			if((data[0] == '\n') || (data[0] == '\0'))
+				last->t_line = NULL;
+			else if(line && (strncmp((const char *)data, line, strlen(line)) == 0)) {
+#ifdef	CL_DEBUG
+				cli_dbgmsg("messageToText: decoded line is the same(%s)\n", data);
+#endif
+				last->t_line = lineLink(t_line->t_line);
+			} else
+				last->t_line = lineCreate((char *)data);
+
+			if(line && enctype == BASE64)
+				if(strchr(line, '='))
+					break;
+		}
+		if(m->base64chars) {
+			unsigned char data[4];
+
+			memset(data, '\0', sizeof(data));
+			if(decode(m, NULL, data, base64, FALSE) && data[0]) {
+				if(first == NULL)
+					first = last = cli_malloc(sizeof(text));
+				else {
+					last->t_next = cli_malloc(sizeof(text));
+					last = last->t_next;
+				}
+
+				if(last != NULL)
+					last->t_line = lineCreate((char *)data);
+			}
+			m->base64chars = 0;
+		}
+	}
+
+	if(last)
+		last->t_next = NULL;
+
+	return first;
+}
+
+text *
+yEncBegin(message *m)
+{
+	return m->yenc;
+}
+
+/*
+ * Scan to find the BINHEX message (if any)
+ */
+#if	0
+const text *
+binhexBegin(message *m)
+{
+	const text *t_line;
+
+	for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next)
+		if(strcasecmp(t_line->t_text, "(This file must be converted with BinHex 4.0)") == 0)
+			return t_line;
+
+	return NULL;
+}
+#else
+text *
+binhexBegin(message *m)
+{
+	return m->binhex;
+}
+#endif
+
+/*
+ * Scan to find a bounce message. There is no standard for these, not
+ * even a convention, so don't expect this to be foolproof
+ */
+#if	0
+text *
+bounceBegin(message *m)
+{
+	const text *t_line;
+
+	for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next)
+		if(cli_filetype(t_line->t_text, strlen(t_line->t_text)) == CL_TYPE_MAIL)
+			return t_line;
+
+	return NULL;
+}
+#else
+text *
+bounceBegin(message *m)
+{
+	return m->bounce;
+}
+#endif
+
+/*
+ * If a message doesn't not contain another message which could be harmful
+ * it is deemed to be safe.
+ *
+ * TODO: ensure nothing can get through this
+ *
+ * TODO: check to see if we need to
+ * find anything else, perhaps anything
+ * from the RFC821 table?
+ */
+#if	0
+int
+messageIsAllText(const message *m)
+{
+	const text *t;
+
+	for(t = messageGetBody(m); t; t = t->t_next)
+		if(strncasecmp(t->t_text,
+			"Content-Transfer-Encoding",
+			strlen("Content-Transfer-Encoding")) == 0)
+				return 0;
+
+	return 1;
+}
+#else
+text *
+encodingLine(message *m)
+{
+	return m->encoding;
+}
+#endif
+
+void
+messageClearMarkers(message *m)
+{
+	m->encoding = m->bounce = m->binhex = NULL;
+}
+
+/*
+ * Decode a line and add it to a buffer, return the end of the buffer
+ * to help appending callers. There is no new line at the end of "line"
+ *
+ * len is sizeof(ptr)
+ */
+unsigned char *
+decodeLine(message *m, encoding_type et, const char *line, unsigned char *buf, size_t buflen)
+{
+	size_t len, reallen;
+	bool softbreak;
+	char *p2, *copy;
+	char base64buf[RFC2045LENGTH + 1];
+
+	/*cli_dbgmsg("decodeLine(et = %d buflen = %u)\n", (int)et, buflen);*/
+
+	assert(m != NULL);
+	assert(buf != NULL);
+
+	switch(et) {
+		case BINARY:
+			/*
+			 * TODO: find out what this is, encoded as binary??
+			 */
+			/* fall through */
+		case NOENCODING:
+		case EIGHTBIT:
+		default:	/* unknown encoding type - try our best */
+			if(line)	/* empty line? */
+				buf = (unsigned char *)cli_strrcpy((char *)buf, line);
+			/* Put the new line back in */
+			return (unsigned char *)cli_strrcpy((char *)buf, "\n");
+
+		case QUOTEDPRINTABLE:
+			if(line == NULL) {	/* empty line */
+				*buf++ = '\n';
+				break;
+			}
+
+			softbreak = FALSE;
+			while(buflen && *line) {
+				if(*line == '=') {
+					unsigned char byte;
+
+					if((*++line == '\0') || (*line == '\n')) {
+						softbreak = TRUE;
+						/* soft line break */
+						break;
+					}
+
+					byte = hex(*line);
+
+					if((*++line == '\0') || (*line == '\n')) {
+						/*
+						 * broken e-mail, not
+						 * adhering to RFC2045
+						 */
+						*buf++ = byte;
+						break;
+					}
+
+					/*
+					 * Fix by Torok Edvin
+					 * <edwintorok at gmail.com>
+					 * Handle messages that use a broken
+					 * quoted-printable encoding of
+					 * href=\"http://, instead of =3D
+					 */
+					if(byte != '=') {
+						byte <<= 4;
+						byte += hex(*line);
+					} else
+						line -= 2;
+
+					*buf++ = byte;
+				} else
+					*buf++ = *line;
+				++line;
+				--buflen;
+			}
+			if(!softbreak)
+				/* Put the new line back in */
+				*buf++ = '\n';
+			break;
+
+		case BASE64:
+			if(line == NULL)
+				break;
+			/*
+			 * RFC2045 sets the maximum length to 76 bytes
+			 * but many e-mail clients ignore that
+			 */
+			if(strlen(line) < sizeof(base64buf)) {
+				strcpy(base64buf, line);
+				copy = base64buf;
+			} else {
+				copy = cli_strdup(line);
+				if(copy == NULL)
+					break;
+			}
+
+			p2 = strchr(copy, '=');
+			if(p2)
+				*p2 = '\0';
+
+			sanitiseBase64(copy);
+
+			/*
+			 * Klez doesn't always put "=" on the last line
+			 */
+			buf = decode(m, copy, buf, base64, (p2 == NULL) && ((strlen(copy) & 3) == 0));
+
+			if(copy != base64buf)
+				free(copy);
+			break;
+
+		case UUENCODE:
+			assert(m->base64chars == 0);
+
+			if((line == NULL) || (*line == '\0'))	/* empty line */
+				break;
+			if(strcasecmp(line, "end") == 0)
+				break;
+			if(isuuencodebegin(line))
+				break;
+
+			if((line[0] & 0x3F) == ' ')
+				break;
+
+			/*
+			 * reallen contains the number of bytes that were
+			 *	encoded
+			 */
+			reallen = (size_t)uudecode(*line++);
+			if(reallen <= 0)
+				break;
+			if(reallen > 62)
+				break;
+			len = strlen(line);
+
+			if((len > buflen) || (reallen > len))
+				/*
+				 * In practice this should never occur since
+				 * the maximum length of a uuencoded line is
+				 * 62 characters
+				 */
+				cli_warnmsg("uudecode: buffer overflow stopped, attempting to ignore but decoding may fail\n");
+			else {
+				(void)decode(m, line, buf, uudecode, (len & 3) == 0);
+				buf = &buf[reallen];
+			}
+			m->base64chars = 0;	/* this happens with broken uuencoded files */
+			break;
+		case YENCODE:
+			if((line == NULL) || (*line == '\0'))	/* empty line */
+				break;
+			if(strncmp(line, "=yend ", 6) == 0)
+				break;
+
+			while(*line)
+				if(*line == '=') {
+					if(*++line == '\0')
+						break;
+					*buf++ = ((*line++ - 64) & 255);
+				} else
+					*buf++ = ((*line++ - 42) & 255);
+			break;
+	}
+
+	*buf = '\0';
+	return buf;
+}
+
+/*
+ * Remove the non base64 characters such as spaces from a string. Spaces
+ * shouldn't appear mid string in base64 files, but some broken mail clients
+ * ignore such errors rather than discarding the mail, and virus writers
+ * exploit this bug
+ */
+static void
+sanitiseBase64(char *s)
+{
+	/*cli_dbgmsg("sanitiseBase64 '%s'\n", s);*/
+	for(; *s; s++)
+		if(base64Table[(unsigned int)(*s & 0xFF)] == 255) {
+			char *p1;
+
+			for(p1 = s; p1[0] != '\0'; p1++)
+				p1[0] = p1[1];
+			--s;
+		}
+}
+
+/*
+ * Returns one byte after the end of the decoded data in "out"
+ *
+ * Update m->base64chars with the last few bytes of data that we haven't
+ * decoded. After the last line is found, decode will be called with in = NULL
+ * to flush these out
+ */
+static unsigned char *
+decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast)
+{
+	unsigned char b1, b2, b3, b4;
+	unsigned char cb1, cb2, cb3;	/* carried over from last line */
+
+	/*cli_dbgmsg("decode %s (len %d isFast %d base64chars %d)\n", in,
+		in ? strlen(in) : 0,
+		isFast, m->base64chars);*/
+
+	cb1 = cb2 = cb3 = '\0';
+
+	switch(m->base64chars) {
+		case 3:
+			cb3 = m->base64_3;
+			/* FALLTHROUGH */
+		case 2:
+			cb2 = m->base64_2;
+			/* FALLTHROUGH */
+		case 1:
+			cb1 = m->base64_1;
+			isFast = FALSE;
+			break;
+		default:
+			assert(m->base64chars <= 3);
+	}
+
+	if(isFast)
+		/* Fast decoding if not last line */
+		while(*in) {
+			b1 = (*decoder)(*in++);
+			b2 = (*decoder)(*in++);
+			b3 = (*decoder)(*in++);
+			/*
+			 * Put this line here to help on some compilers which
+			 * can make use of some architecure's ability to
+			 * multiprocess when different variables can be
+			 * updated at the same time - here b3 is used in
+			 * one line, b1/b2 in the next and b4 in the next after
+			 * that, b3 and b4 rely on in but b1/b2 don't
+			 */
+			*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
+			b4 = (*decoder)(*in++);
+			*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
+			*out++ = (b3 << 6) | (b4 & 0x3F);
+		}
+	else if(in == NULL) {	/* flush */
+		int nbytes;
+
+		if(m->base64chars == 0)
+			return out;
+
+		cli_dbgmsg("base64chars = %d (%c %c %c)\n", m->base64chars,
+			isalnum(cb1) ? cb1 : '@',
+			isalnum(cb2) ? cb2 : '@',
+			isalnum(cb3) ? cb3 : '@');
+
+		m->base64chars--;
+		b1 = cb1;
+		nbytes = 1;
+
+		if(m->base64chars) {
+			m->base64chars--;
+			b2 = cb2;
+
+			if(m->base64chars) {
+				nbytes = 2;
+				m->base64chars--;
+				b3 = cb3;
+				nbytes = 3;
+			} else if(b2)
+				nbytes = 2;
+		}
+
+		switch(nbytes) {
+			case 3:
+				b4 = '\0';
+				/* fall through */
+			case 4:
+				*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
+				*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
+				if((nbytes == 4) || b3)
+					*out++ = (b3 << 6) | (b4 & 0x3F);
+				break;
+			case 2:
+				*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
+				if((b2 << 4) & 0xFF)
+					*out++ = b2 << 4;
+				break;
+			case 1:
+				*out++ = b1 << 2;
+				break;
+			default:
+				assert(0);
+		}
+	} else while(*in) {
+		int nbytes;
+
+		if(m->base64chars) {
+			m->base64chars--;
+			b1 = cb1;
+		} else
+			b1 = (*decoder)(*in++);
+
+		if(*in == '\0') {
+			b2 = '\0';
+			nbytes = 1;
+		} else {
+			if(m->base64chars) {
+				m->base64chars--;
+				b2 = cb2;
+			} else
+				b2 = (*decoder)(*in++);
+
+			if(*in == '\0') {
+				b3 = '\0';
+				nbytes = 2;
+			} else {
+				if(m->base64chars) {
+					m->base64chars--;
+					b3 = cb3;
+				} else
+					b3 = (*decoder)(*in++);
+
+				if(*in == '\0') {
+					b4 = '\0';
+					nbytes = 3;
+				} else {
+					b4 = (*decoder)(*in++);
+					nbytes = 4;
+				}
+			}
+		}
+
+		switch(nbytes) {
+			case 4:
+				*out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
+				*out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
+				*out++ = (b3 << 6) | (b4 & 0x3F);
+				continue;
+			case 3:
+				m->base64_3 = b3;
+			case 2:
+				m->base64_2 = b2;
+			case 1:
+				m->base64_1 = b1;
+				m->base64chars = nbytes;
+				break;
+			default:
+				assert(0);
+		}
+		break;	/* nbytes != 4 => EOL */
+	}
+	return out;
+}
+
+static unsigned char
+hex(char c)
+{
+	if(isdigit(c))
+		return c - '0';
+	if((c >= 'A') && (c <= 'F'))
+		return c - 'A' + 10;
+	if((c >= 'a') && (c <= 'f'))
+		return c - 'a' + 10;
+	cli_dbgmsg("Illegal hex character '%c'\n", c);
+
+	/*
+	 * Some mails (notably some spam) break RFC2045 by failing to encode
+	 * the '=' character
+	 */
+	return '=';
+}
+
+static unsigned char
+base64(char c)
+{
+	const unsigned char ret = base64Table[(unsigned int)(c & 0xFF)];
+
+	if(ret == 255) {
+		/*cli_dbgmsg("Illegal character <%c> in base64 encoding\n", c);*/
+		return 63;
+	}
+	return ret;
+}
+
+static unsigned char
+uudecode(char c)
+{
+	return c - ' ';
+}
+
+/*
+ * These are the only arguments we're interested in.
+ * Do 'fgrep messageFindArgument *.c' if you don't believe me!
+ * It's probably not good doing this since each time a new
+ * messageFindArgument is added I need to remember to look here,
+ * but it can save a lot of memory...
+ */
+static int
+usefulArg(const char *arg)
+{
+	if((strncasecmp(arg, "name", 4) != 0) &&
+	   (strncasecmp(arg, "filename", 8) != 0) &&
+	   (strncasecmp(arg, "boundary", 8) != 0) &&
+	   (strncasecmp(arg, "protocol", 8) != 0) &&
+	   (strncasecmp(arg, "id", 2) != 0) &&
+	   (strncasecmp(arg, "number", 6) != 0) &&
+	   (strncasecmp(arg, "total", 5) != 0) &&
+	   (strncasecmp(arg, "type", 4) != 0)) {
+		cli_dbgmsg("Discarding unwanted argument '%s'\n", arg);
+		return 0;
+	}
+	return 1;
+}
+
+void
+messageSetCTX(message *m, cli_ctx *ctx)
+{
+	m->ctx = ctx;
+}
+
+int
+messageContainsVirus(const message *m)
+{
+	return m->isInfected ? TRUE : FALSE;
+}
+
+/*
+ * We've run out of memory. Try to recover some by
+ * deduping the message
+ *
+ * FIXME: this can take a long time. The real solution is for system admins
+ *	to refrain from setting ulimits too low, then this routine won't be
+ *	called
+ */
+static void
+messageDedup(message *m)
+{
+	const text *t1;
+	size_t saved = 0;
+
+	cli_dbgmsg("messageDedup\n");
+
+	t1 = m->dedupedThisFar ? m->dedupedThisFar : m->body_first;
+
+	for(t1 = m->body_first; t1; t1 = t1->t_next) {
+		const char *d1;
+		text *t2;
+		line_t *l1;
+		unsigned int r1;
+
+		if(saved >= 100*1000)
+			break;	/* that's enough */
+		l1 = t1->t_line;
+		if(l1 == NULL)
+			continue;
+		d1 = lineGetData(l1);
+		if(strlen(d1) < 8)
+			continue;	/* wouldn't recover many bytes */
+
+		r1 = (unsigned int)lineGetRefCount(l1);
+		if(r1 == 255)
+			continue;
+		/*
+		 * We don't want to foul up any pointers
+		 */
+		if(t1 == m->encoding)
+			continue;
+		if(t1 == m->bounce)
+			continue;
+		if(t1 == m->binhex)
+			continue;
+		if(t1 == m->yenc)
+			continue;
+
+		for(t2 = t1->t_next; t2; t2 = t2->t_next) {
+			const char *d2;
+			line_t *l2 = t2->t_line;
+
+			if(l2 == NULL)
+				continue;
+			d2 = lineGetData(l2);
+			if(d1 == d2)
+				/* already linked */
+				continue;
+			if(strcmp(d1, d2) == 0) {
+				if(lineUnlink(l2) == NULL)
+					saved += strlen(d1) + 1;
+				t2->t_line = lineLink(l1);
+				if(t2->t_line == NULL) {
+					cli_errmsg("messageDedup: out of memory\n");
+					return;
+				}
+				if(++r1 == 255)
+					break;
+			}
+		}
+	}
+
+	cli_dbgmsg("messageDedup reclaimed %lu bytes\n", (unsigned long)saved);
+	m->dedupedThisFar = t1;
+}
+
+/*
+ * Handle RFC2231 encoding. Returns a malloc'd buffer that the caller must
+ * free, or NULL on error.
+ *
+ * TODO: Currently only handles paragraph 4 of RFC2231 e.g.
+ *	 protocol*=ansi-x3.4-1968''application%2Fpgp-signature;
+ */
+static char *
+rfc2231(const char *in)
+{
+	const char *ptr;
+	char *ret, *out;
+	enum { LANGUAGE, CHARSET, CONTENTS } field;
+
+	if(strstr(in, "*0*=") != NULL) {
+		cli_warnmsg("RFC2231 parameter continuations are not yet handled\n");
+		return cli_strdup(in);
+	}
+
+	ptr = strstr(in, "*0=");
+	if(ptr != NULL)
+		/*
+		 * Parameter continuation, with no continuation
+		 * Thunderbird 1.5 (and possibly other versions) does this
+		 */
+		field = CONTENTS;
+	else {
+		ptr = strstr(in, "*=");
+		field = LANGUAGE;
+	}
+
+	if(ptr == NULL)	/* quick return */
+		return cli_strdup(in);
+
+	cli_dbgmsg("rfc2231 '%s'\n", in);
+
+	ret = cli_malloc(strlen(in) + 1);
+
+	if(ret == NULL)
+		return NULL;
+
+	/*
+	 * memcpy(out, in, (ptr - in));
+	 * out = &out[ptr - in];
+	 * in = ptr;
+	 */
+	out = ret;
+	while(in != ptr)
+		*out++ = *in++;
+
+	*out++ = '=';
+
+	while(*ptr++ != '=')
+		;
+
+	/*
+	 * We don't do anything with the language and character set, just skip
+	 * over them!
+	 */
+	while(*ptr) {
+		switch(field) {
+			case LANGUAGE:
+				if(*ptr == '\'')
+					field = CHARSET;
+				break;
+			case CHARSET:
+				if(*ptr == '\'')
+					field = CONTENTS;
+				break;
+			case CONTENTS:
+				if(*ptr == '%') {
+					unsigned char byte;
+
+					if((*++ptr == '\0') || (*ptr == '\n'))
+						break;
+
+					byte = hex(*ptr);
+
+					if((*++ptr == '\0') || (*ptr == '\n')) {
+						*out++ = byte;
+						break;
+					}
+
+					byte <<= 4;
+					byte += hex(*ptr);
+					*out++ = byte;
+				} else
+					*out++ = *ptr;
+		}
+		if(*ptr++ == '\0')
+			/*
+			 * Incorrect message that has just one character after
+			 * a '%'.
+			 * FIXME: stash something in out that would, for example
+			 *	treat %2 as %02, assuming field == CONTENTS
+			 */
+			break;
+	}
+
+	if(field != CONTENTS) {
+		free(ret);
+		cli_warnmsg("Invalid RFC2231 header: '%s'\n", in);
+		return cli_strdup("");
+	}
+
+	*out = '\0';
+
+	cli_dbgmsg("rfc2231 returns '%s'\n", ret);
+
+	return ret;
+}
+
+/*
+ * common/simil:
+ *	From Computing Magazine 20/8/92
+ * Returns %ge number from 0 to 100 - how similar are 2 strings?
+ * 100 for exact match, < for error
+ */
+struct	pstr_list {	/* internal stack */
+	char	*d1;
+	struct	pstr_list	*next;
+};
+
+#define	OUT_OF_MEMORY	(-2)
+#define	FAILURE	(-3)
+#define	SUCCESS	(-4)
+#define	ARRAY_OVERFLOW	(-5)
+typedef	struct	pstr_list	ELEMENT1;
+typedef	ELEMENT1		*LINK1;
+
+static	int	push(LINK1 *top, const char *string);
+static	int	pop(LINK1 *top, char *buffer);
+static	unsigned	int	compare(char *ls1, char **rs1, char *ls2, char **rs2);
+
+#define	MAX_PATTERN_SIZ	50	/* maximum string lengths */
+
+static int
+simil(const char *str1, const char *str2)
+{
+	LINK1 top = NULL;
+	unsigned int score = 0;
+	size_t common, total;
+	size_t len1, len2;
+	char *rs1 = NULL, *rs2 = NULL;
+	char *s1, *s2;
+	char ls1[MAX_PATTERN_SIZ], ls2[MAX_PATTERN_SIZ];
+
+	if(strcasecmp(str1, str2) == 0)
+		return 100;
+
+	if((s1 = cli_strdup(str1)) == NULL)
+		return OUT_OF_MEMORY;
+	if((s2 = cli_strdup(str2)) == NULL) {
+		free(s1);
+		return OUT_OF_MEMORY;
+	}
+
+	if(((total = strstrip(s1)) > MAX_PATTERN_SIZ - 1) || ((len2 = strstrip(s2)) > MAX_PATTERN_SIZ - 1)) {
+		free(s1);
+		free(s2);
+		return ARRAY_OVERFLOW;
+	}
+
+	total += len2;
+
+	if((push(&top, s1) == OUT_OF_MEMORY) ||
+	   (push(&top, s2) == OUT_OF_MEMORY)) {
+		free(s1);
+		free(s2);
+		return OUT_OF_MEMORY;
+	}
+
+	while(pop(&top, ls2) == SUCCESS) {
+		pop(&top, ls1);
+		common = compare(ls1, &rs1, ls2, &rs2);
+		if(common > 0) {
+			score += (unsigned int)common;
+			len1 = strlen(ls1);
+			len2 = strlen(ls2);
+
+			if((len1 > 1 && len2 >= 1) || (len2 > 1 && len1 >= 1))
+				if((push(&top, ls1) == OUT_OF_MEMORY) || (push(&top, ls2) == OUT_OF_MEMORY)) {
+					free(s1);
+					free(s2);
+					return OUT_OF_MEMORY;
+				}
+			len1 = strlen(rs1);
+			len2 = strlen(rs2);
+
+			if((len1 > 1 && len2 >= 1) || (len2 > 1 && len1 >= 1))
+				if((push(&top, rs1) == OUT_OF_MEMORY) || (push(&top, rs2) == OUT_OF_MEMORY)) {
+					free(s1);
+					free(s2);
+					return OUT_OF_MEMORY;
+				}
+		}
+	}
+	free(s1);
+	free(s2);
+	return (total > 0) ? ((score * 200) / total) : 0;
+}
+
+static unsigned int
+compare(char *ls1, char **rs1, char *ls2, char **rs2)
+{
+	unsigned int common, maxchars = 0;
+	bool some_similarity = FALSE;
+	char *s1, *s2;
+	char *maxs1 = NULL, *maxs2 = NULL, *maxe1 = NULL, *maxe2 = NULL;
+	char *cs1, *cs2, *start1, *end1, *end2;
+
+	end1 = ls1 + strlen(ls1);
+	end2 = ls2 + strlen(ls2);
+	start1 = ls1;
+
+	for(;;) {
+		s1 = start1;
+		s2 = ls2;
+
+		if(s1 < end1) {
+			while(s1 < end1 && s2 < end2) {
+				if(tolower(*s1) == tolower(*s2)) {
+					some_similarity = TRUE;
+					cs1 = s1;
+					cs2 = s2;
+					common = 0;
+					do
+						if(s1 == end1 || s2 == end2)
+							break;
+						else {
+							s1++;
+							s2++;
+							common++;
+						}
+					while(tolower(*s1) == tolower(*s2));
+
+					if(common > maxchars) {
+						unsigned int diff = common - maxchars;
+						maxchars = common;
+						maxs1 = cs1;
+						maxs2 = cs2;
+						maxe1 = s1;
+						maxe2 = s2;
+						end1 -= diff;
+						end2 -= diff;
+					} else
+						s1 -= common;
+				} else
+					s2++;
+			}
+			start1++;
+		} else
+			break;
+	}
+	if(some_similarity) {
+		*maxs1 = '\0';
+		*maxs2 = '\0';
+		*rs1 = maxe1;
+		*rs2 = maxe2;
+	}
+	return maxchars;
+}
+
+static int
+push(LINK1 *top, const char *string)
+{
+	LINK1 element;
+
+	if((element = (LINK1)cli_malloc(sizeof(ELEMENT1))) == NULL)
+		return OUT_OF_MEMORY;
+	if((element->d1 = cli_strdup(string)) == NULL)
+		return OUT_OF_MEMORY;
+	element->next = *top;
+	*top = element;
+
+	return SUCCESS;
+}
+
+static int
+pop(LINK1 *top, char *buffer)
+{
+	LINK1 t1;
+
+	if((t1 = *top) != NULL) {
+		(void)strcpy(buffer, t1->d1);
+		*top = t1->next;
+		free(t1->d1);
+		free((char *)t1);
+		return SUCCESS;
+	}
+	return FAILURE;
+}
+
+/*
+ * Have we found a line that is a start of a uuencoded file (see uuencode(5))?
+ */
+int
+isuuencodebegin(const char *line)
+{
+	if(line[0] != 'b')	/* quick check */
+		return 0;
+
+	if(strlen(line) < 10)
+		return 0;
+
+	return (strncasecmp(line, "begin ", 6) == 0) &&
+		isdigit(line[6]) && isdigit(line[7]) &&
+		isdigit(line[8]) && (line[9] == ' ');
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mew.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mew.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mew.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mew.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,890 @@
+/*
+ *  Copyright (C) 2006 Michal 'GiM' Spadlinski http://gim.org.pl/
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+
+/*
+ * lzma.c
+ *
+ * o2:28:18 CEST 2oo6-25-o6 		- initial 0xA4/0x536
+ * oo:29:4o CEST 2oo6-26-o6 		- 0x1cd/0x536 [+0x129]
+ * o2:13:19 CEST 2oo6-o1-o7, 2oo6-3o-o6 - 0x536/0x536
+ *
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#ifdef        HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#ifdef        HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+#ifdef        HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#ifdef        HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef        HAVE_STRING_H
+#include <string.h>
+#endif
+
+#include "cltypes.h"
+#include "pe.h"
+#include "others.h"
+#include "mew.h"
+#include "packlibs.h"
+#include "rebuildpe.h"
+
+#define EC32(x) le32_to_host(x) /* Convert little endian to host */
+#define CE32(x) be32_to_host(x) /* Convert big endian to host */
+#define PEALIGN(o,a) (((a))?(((o)/(a))*(a)):(o))
+#define PESALIGN(o,a) (((a))?(((o)/(a)+((o)%(a)!=0))*(a)):(o))
+
+/* modifies all parameters */
+/* northfox does this shitty way,
+ * this should be done with just a bswap
+ */
+static char *lzma_bswap_4861dc(struct lzmastate *p, char *old_edx)
+{
+	/* dumb_dump_start
+	 *
+
+	old_edx was 'uint32_t *' before and in mew_lzma there was
+	&new_edx where new_edx = var1C
+
+	uint32_t loc_esi, loc_edi;
+	uint8_t *loc_eax;
+
+	p->p2 = loc_esi = 0;
+	p->p0 = loc_eax = (uint8_t *)*old_edx;
+	*old_edx = 5;
+	do {
+		loc_esi = p->p2 << 8;
+		loc_edi = *(uint8_t *)((loc_eax)++);
+		loc_esi |= loc_edi;
+		(*old_edx)--;
+		p->p2 = loc_esi;
+	} while (*old_edx);
+	p->p0 = loc_eax;
+	p->p1 = 0xffffffff;
+
+	* dumb_dump_end
+	*/
+
+	/* XXX, mine replacement */
+	p->p2 = EC32(CE32(((uint32_t)cli_readint32(old_edx + 1))));
+	p->p1 = 0xffffffff;
+	p->p0 = old_edx + 5;
+
+	return p->p0;
+}
+
+static uint32_t lzma_486248 (struct lzmastate *p, char **old_ecx, char *src, uint32_t size)
+{
+	uint32_t loc_esi, loc_edi, loc_eax, loc_ecx, ret;
+	if (!CLI_ISCONTAINED(src, size, *old_ecx, 4) || !CLI_ISCONTAINED(src, size, p->p0, 1))
+		return 0xffffffff;
+	loc_esi = p->p1;
+	loc_eax = loc_esi >> 0xb;
+	loc_ecx = cli_readint32(*old_ecx);
+	ret = loc_ecx&0xffff;
+	(loc_eax) *=  ret;
+	loc_edi = p->p2;
+	if (loc_edi < loc_eax)
+	{
+		/* 48625f */
+		p->p1 = loc_eax;
+		loc_esi = ret;
+		loc_edi = ((int32_t)(0x800 - ret) >> 5) + ((loc_eax&0xffff0000) | ret); 
+								/* signed<-sar, &|<-mov ax, [ecx] */
+		loc_ecx = (loc_ecx&0xffff0000)|(loc_edi&0xffff);
+		cli_writeint32(*old_ecx, loc_ecx);
+
+		ret = 0;
+	} else {
+		/* 48629e */
+		loc_esi -= loc_eax;
+		loc_edi -= loc_eax;
+		p->p1 = loc_esi;
+		p->p2 = loc_edi;
+		loc_eax = (loc_eax & 0xffff0000) | ret;
+		loc_esi = (loc_esi & 0xffff0000) | (ret >> 5);
+		loc_eax -= loc_esi;
+
+		loc_ecx = (loc_ecx&0xffff0000)|(loc_eax&0xffff);
+		cli_writeint32(*old_ecx, loc_ecx);
+
+		ret = 1;
+	}
+	loc_eax = p->p1;
+	if (loc_eax < 0x1000000)
+	{
+		*old_ecx = p->p0;
+		loc_edi = (*(uint8_t *)(p->p0));
+		loc_esi = ((p->p2) << 8) | loc_edi;
+		(*old_ecx)++;
+		loc_eax <<= 8;
+		p->p2 = loc_esi;
+		p->p1 = loc_eax;
+		p->p0 = *old_ecx;
+	}
+	return ret;
+
+}
+
+static uint32_t lzma_48635C(uint8_t znaczek, char **old_ecx, struct lzmastate *p, uint32_t *retval, char *src, uint32_t size)
+{
+	uint32_t loc_esi = (znaczek&0xff) >> 7, /* msb */
+		loc_ebx, ret;
+	char *loc_edi;
+	znaczek <<= 1;
+	ret = loc_esi << 9;
+	loc_edi = *old_ecx;
+	*old_ecx = loc_edi + ret + 0x202;
+	if ((ret = lzma_486248 (p, old_ecx, src, size)) == 0xffffffff)
+		return 0xffffffff;
+	loc_ebx = ret | 2;
+
+	while (loc_esi == ret)
+	{
+		if (loc_ebx >= 0x100)
+		{
+			ret = (ret&0xffffff00) | (loc_ebx&0xff);
+			*retval = ret;
+			return 0;
+		}
+		loc_esi = (znaczek&0xff) >> 7;
+		znaczek <<= 1;
+		ret = ((loc_esi + 1) << 8) + loc_ebx;
+		*old_ecx = loc_edi + ret*2;
+		if ((ret = lzma_486248 (p, old_ecx, src, size)) == 0xffffffff)
+			return 0xffffffff;
+		loc_ebx += loc_ebx;
+		loc_ebx |= ret;
+	}
+	loc_esi = 0x100;
+	while (loc_ebx < loc_esi)
+	{
+		loc_ebx += loc_ebx;
+		*old_ecx = loc_edi + loc_ebx;
+		if ((ret = lzma_486248 (p, old_ecx, src, size)) == 0xffffffff)
+			return 0xffffffff;
+		loc_ebx |= ret;
+	}
+	ret = (ret&0xffffff00) | (loc_ebx&0xff);
+	*retval = ret;
+	return 0;
+}
+
+static uint32_t lzma_4862e0 (struct lzmastate *p, char **old_ecx, uint32_t *old_edx, uint32_t *retval, char *src, uint32_t size)
+{
+	uint32_t loc_ebx, loc_esi, stack_ecx, ret;
+	char *loc_edi;
+
+	loc_ebx = *old_edx;
+	ret = 1;
+	loc_edi = *old_ecx;
+	if (loc_ebx && !(loc_ebx&0x80000000))
+	{
+		/* loc_4862f1 */
+		stack_ecx = loc_ebx;
+		do {
+			loc_esi = ret+ret;
+			*old_ecx = loc_edi + loc_esi;
+			if ((ret = lzma_486248 (p, old_ecx, src, size)) == 0xffffffff)
+				return 0xffffffff;
+			ret += loc_esi;
+			stack_ecx--;
+		} while (stack_ecx);
+	} 
+	/* loc_48630b */
+	  /* unneeded
+	*old_ecx = (uint8_t *)loc_ebx;
+	  */
+	
+	*old_edx = 1 << (loc_ebx&0xff);
+	ret -= *old_edx;
+	*retval = ret;
+	return 0;
+}
+
+/* old_edx - write only */
+static uint32_t lzma_4863da (uint32_t var0, struct lzmastate *p, char  **old_ecx, uint32_t *old_edx, uint32_t *retval, char *src, uint32_t size)
+{
+	uint32_t ret;
+	char *loc_esi = *old_ecx;
+
+	if ((ret = lzma_486248 (p, old_ecx, src, size)) == 0xffffffff)
+		return -1;
+	if (ret)
+	{
+		/* loc_4863ff */
+		*old_ecx = loc_esi+2;
+		if ((ret = lzma_486248 (p, old_ecx, src, size)) == 0xffffffff)
+			return -1;
+		if (ret)
+		{
+			/* loc_486429 */
+			*old_edx = 8;
+			*old_ecx = loc_esi + 0x204;
+			if (lzma_4862e0 (p, old_ecx, old_edx, &ret, src, size) == 0xffffffff)
+				return -1;
+			ret += 0x10;
+		} else {
+			/* loc_48640e */
+			ret = var0 << 4;
+			*old_edx = 3;
+			*old_ecx = loc_esi + 0x104 + ret;
+			if (lzma_4862e0 (p, old_ecx, old_edx, &ret, src, size) == 0xffffffff)
+				return -1;
+			ret += 0x8;
+		}
+	} else {
+		/* loc_4863e9 */
+		ret = var0 << 4;
+		*old_edx = 3;
+		*old_ecx = loc_esi + 0x4 + ret;
+		if (lzma_4862e0 (p, old_ecx, old_edx, &ret, src, size) == 0xffffffff)
+			return -1;
+	}
+	*retval = ret;
+	return 0;
+}
+
+static uint32_t lzma_486204 (struct lzmastate *p, uint32_t old_edx, uint32_t *retval, char *src, uint32_t size)
+{
+	uint32_t loc_esi, loc_edi, loc_ebx, loc_eax;
+	char *loc_edx;
+	loc_esi = p->p1;
+	loc_edi = p->p2;
+	loc_eax = 0;
+	if (old_edx && !(old_edx&0x80000000))
+	{
+		/* loc_4866212 */
+		loc_ebx = old_edx;
+		do {
+			loc_esi >>= 1;
+			loc_eax <<= 1;
+			if (loc_edi >= loc_esi)
+			{
+				loc_edi -= loc_esi;
+				loc_eax |= 1;
+			}
+			/* loc_486222 */
+			if (loc_esi < 0x1000000)
+			{
+				if (!CLI_ISCONTAINED(src, size, p->p0, 1))
+					return 0xffffffff;
+				loc_edx = p->p0;
+				loc_edi <<= 8;
+				loc_esi <<= 8;
+				loc_edi |= (*loc_edx)&0xff; /* movzx ebp, byte ptr [edx] */
+				p->p0 = ++loc_edx;
+			}
+			loc_ebx--;
+		} while (loc_ebx);
+
+	}
+	p->p2 = loc_edi;
+	p->p1 = loc_esi;
+	*retval = loc_eax;
+	return 0;
+}
+
+static uint32_t lzma_48631a (struct lzmastate *p, char **old_ecx, uint32_t *old_edx, uint32_t *retval, char *src, uint32_t size)
+{
+	uint32_t copy1, copy2;
+	uint32_t loc_esi, loc_edi, ret;
+	char *loc_ebx;
+
+	copy1 = *old_edx;
+	loc_edi = 0;
+	loc_ebx = *old_ecx;
+	*old_edx = 1;
+	copy2 = (uint32_t)loc_edi;
+
+	if (copy1 <= (uint32_t)loc_edi)
+	{
+		*retval = copy2;
+		return 0;
+	}
+
+	do {
+		loc_esi = *old_edx + *old_edx;
+		*old_ecx = loc_esi + loc_ebx;
+		if ((ret = lzma_486248 (p, old_ecx, src, size)) == 0xffffffff)
+			return 0xffffffff;
+		/* unneeded *old_ecx  = loc_edi; */
+		*old_edx = loc_esi + ret;
+		/* ret <<= (uint32_t)(*old_ecx)&0xff; */
+		ret <<= (loc_edi&0xff);
+		copy2 |= ret;
+		loc_edi++;
+	} while (loc_edi < copy1);
+
+	*retval = copy2;
+	return 0;
+}
+
+
+int mew_lzma(char *orgsource, char *buf, uint32_t size_sum, uint32_t vma, uint32_t special)
+{
+	uint32_t var08, var0C, var10, var14, var20, var24, var28, var34;
+	struct lzmastate var40;
+	uint32_t new_eax, new_edx, temp;
+	int i, mainloop;
+
+	char var1, var30;
+	char *source = buf, *dest, *new_ebx, *new_ecx, *var0C_ecxcopy, *var2C;
+	char *pushed_esi = NULL, *pushed_ebx = NULL;
+	uint32_t pushed_edx=0;
+
+	uint32_t loc_esi, loc_edi;
+	uint8_t *var18;
+
+	if (special)
+	{
+		pushed_edx = cli_readint32(source);
+		source += 4;
+	}
+	temp = cli_readint32(source) - vma;
+	source += 4;
+	if (!special) pushed_ebx = source;
+	new_ebx = orgsource + temp;
+
+    do {
+        mainloop = 1;
+	do {
+		/* loc_486450 */
+		if (!special)
+		{
+			source = pushed_ebx;
+			if (cli_readint32(source) == 0)
+			{
+				return 0;
+			}
+		}
+		var28 = cli_readint32 (source);
+		source += 4;
+		temp = cli_readint32 (source) - vma;
+		var18 = orgsource + temp;
+		if (special) pushed_esi = orgsource + temp;
+		source += 4;
+		temp = cli_readint32 (source);
+		source += 5; /* yes, five */
+		var2C = source;
+		source += temp;
+		if (special) pushed_ebx = source;
+		else pushed_ebx = source;
+		var1 = 0;
+		dest = new_ebx;
+		
+		if(!CLI_ISCONTAINED(orgsource, size_sum, dest, 0x6E6C))
+			return -1;
+		for (i=0; i<0x1b9b; i++)
+		{
+			cli_writeint32(dest, 0x4000400);
+			dest += 4;
+		}
+		loc_esi = 0;
+		var08 = var20 = 0;
+		loc_edi = 1;
+		var14 = var10 = var24 = 1;
+
+		lzma_bswap_4861dc(&var40, var2C);
+		new_edx = 0;
+	} while (var28 <= loc_esi); /* source = 0 */
+
+	cli_dbgmsg("MEWlzma: entering do while loop\n");
+	do {
+		/* loc_4864a5 */
+		new_eax = var08 & 3;
+		new_ecx = (((loc_esi << 4) + new_eax)*2) + new_ebx;
+		var0C = new_eax;
+		if ((new_eax = lzma_486248 (&var40, &new_ecx, orgsource, size_sum)) == 0xffffffff)
+			return -1;
+		if (new_eax)
+		{
+			/* loc_486549 */
+			new_ecx = new_ebx + loc_esi*2 + 0x180;
+			var20 = 1;
+			/* eax=1 */
+			if ((new_eax = lzma_486248 (&var40, &new_ecx, orgsource, size_sum)) == 0xffffffff)
+				return -1;
+			if (new_eax != 1)
+			{
+				/* loc_486627 */
+				var24 = var10;
+				var10 = var14;
+				/* xor eax,eax; cmp esi, 7; setnl al; dec eax; add eax, 0Ah */
+				/* new_eax = (((loc_esi >= 7)-1)&0xFFFFFFFD) + 0xA; */
+				new_eax = loc_esi>=7 ? 10:7;
+				new_ecx = new_ebx + 0x664;
+				var14 = loc_edi;
+				loc_esi = new_eax;
+				if (lzma_4863da (var0C, &var40, &new_ecx, &new_edx, &new_eax, orgsource, size_sum) == 0xffffffff)
+					return -1;
+				var0C = new_eax;
+				if (var0C >= 4)
+					new_eax = 3;
+
+				/* loc_486662 */
+				new_edx = 6;
+				new_eax <<= 7;
+				new_ecx = new_eax + new_ebx + 0x360;
+				if (lzma_4862e0 (&var40, &new_ecx, &new_edx, &new_eax, orgsource, size_sum) == 0xffffffff)
+					return -1;
+				if (new_eax < 4)
+				{ 
+					/* loc_4866ca */
+					loc_edi = new_eax;
+				} else {
+					/* loc_48667d */
+					uint32_t loc_ecx;
+					loc_ecx = ((int32_t)new_eax >> 1)-1; /* sar */
+					loc_edi = ((new_eax&1)|2) << (loc_ecx&0xff);
+					if (new_eax >= 0xe)
+					{
+						/* loc_4866ab */
+						new_edx = loc_ecx - 4;
+						if (lzma_486204 (&var40, new_edx, &new_eax, orgsource, size_sum) == 0xffffffff)
+							return -1;
+						loc_edi += new_eax << 4;
+
+						new_edx = 4;
+						new_ecx = new_ebx + 0x644;
+					} else {
+						/* loc_486691 */
+						new_edx = loc_ecx;
+						loc_ecx = loc_edi - new_eax;
+						new_ecx =  new_ebx + loc_ecx*2 + 0x55e;
+					}
+					/* loc_4866a2 */
+					if (lzma_48631a (&var40, &new_ecx, &new_edx, &new_eax, orgsource, size_sum) == 0xffffffff)
+						return -1;
+					loc_edi += new_eax;
+				}
+				loc_edi++;
+			} else {
+				/* loc_486568 */
+				new_ecx = new_ebx + loc_esi*2 + 0x198;
+				if ((new_eax = lzma_486248 (&var40, &new_ecx, orgsource, size_sum)) == 0xffffffff)
+					return -1;
+				if (new_eax)
+				{
+					/* loc_4865bd */
+					new_ecx = new_ebx + loc_esi*2 + 0x1B0;
+					if ((new_eax = lzma_486248 (&var40, &new_ecx, orgsource, size_sum)) == 0xffffffff)
+						return -1;
+					if (new_eax)
+					{
+						/* loc_4865d2 */
+						new_ecx = new_ebx + loc_esi*2 + 0x1C8;
+						if ((new_eax = lzma_486248 (&var40, &new_ecx, orgsource, size_sum)) == 0xffffffff)
+							return -1;
+						if (new_eax) {
+							/* loc_4865ea */
+							new_eax = var24;
+							var24 = var10;
+						} else {
+							/* loc_4865e5 */
+							new_eax = var10;
+						}
+						/* loc_4865f3 */
+						var10 = var14;
+					} else {
+						/* loc_4865cd */
+						new_eax = var14;
+					}
+					/* loc_4865f9 */
+					var14 = loc_edi;
+					loc_edi = new_eax;
+				} else {
+					/* loc_48657e */
+					new_eax = ((loc_esi + 0xf) << 4) + var0C;
+					new_ecx = new_ebx + new_eax*2;
+					if ((new_eax = lzma_486248 (&var40, &new_ecx, orgsource, size_sum)) == 0xffffffff)
+						return -1;
+					if (!new_eax) {
+						uint32_t loc_ecx;
+						/* loc_486593 */
+						loc_ecx = var08;
+						loc_ecx -= loc_edi;
+						/* loc_esi = ((((loc_esi >= 7)-1)&0xFFFFFFFE) + 0xB); */
+						loc_esi = loc_esi>=7 ? 11:9;
+						if (!CLI_ISCONTAINED((uint8_t *)orgsource, size_sum, var18 + loc_ecx, 1))
+							return -1;
+						var1 = *(var18 + loc_ecx);
+						loc_ecx = (loc_ecx&0xffffff00) | var1;
+						/* loc_4865af */
+						new_edx = var08++;
+						if (!CLI_ISCONTAINED((uint8_t *)orgsource, size_sum, var18 + new_edx, 1))
+							return -1;
+						*(var18 + new_edx) = loc_ecx & 0xff;
+
+						/* loc_4866fe */
+						new_eax = var08;
+						continue; /* !!! */
+					}
+
+				}
+				/* loc_4865fe */
+				new_ecx = new_ebx + 0xa68;
+				if (lzma_4863da (var0C, &var40, &new_ecx, &new_edx, &new_eax, orgsource, size_sum) == 0xffffffff)
+					return -1;
+				var0C = new_eax;
+				/* new_eax = (((loc_esi >= 7)-1)&0xFFFFFFFD) + 0xB; */
+				new_eax = loc_esi>=7 ? 11:8;
+				loc_esi = new_eax;
+			}
+			/* loc_4866cd */
+			if (!loc_edi)
+			{
+				break;
+			} else {
+				var0C += 2;
+				new_ecx = var18;
+				new_edx = new_eax = var08;
+				new_eax -= loc_edi;
+				if ( ((var0C < var28 - new_edx) &&
+						(!CLI_ISCONTAINED(orgsource, size_sum, (char*)(new_ecx + new_eax), var0C) || 
+						 !CLI_ISCONTAINED(orgsource, size_sum, (char*)(new_ecx + new_edx), var0C))) ||
+						(!CLI_ISCONTAINED(orgsource, size_sum, (char*)(new_ecx + new_eax), var28 - new_edx) ||
+						 !CLI_ISCONTAINED(orgsource, size_sum, (char*)(new_ecx + new_edx), var28 - new_edx)) )
+					return -1;
+				do {
+					var1 = *(uint8_t *)(new_ecx + new_eax);
+					*(uint8_t *)(new_ecx + new_edx) = var1;
+
+					new_edx++;
+					new_eax++;
+					var0C--;
+					if (var0C <= 0)
+						break;
+				} while (new_edx < var28);
+				var08 = new_edx;
+			}
+		} else {
+			/* loc_4864C8 */
+			new_eax = (((var1 & 0xff) >> 4)*3) << 9;
+			new_ecx = new_eax + new_ebx + 0xe6c;
+			var0C_ecxcopy = new_ecx;
+			if (loc_esi >= 4)
+			{
+				/* loc_4864e8 */
+				if (loc_esi >= 10)
+					loc_esi -= 6;
+				else
+					loc_esi -= 3;
+
+			} else {
+				/* loc_4864e4 */
+				loc_esi = 0;
+			}
+
+			if (var20 == 0)	{
+				/* loc_48651D */
+				new_eax = 1;
+				do {
+					/* loc_486525 */
+					/*new_ecx = var0C_ecxcopy;*/
+					new_eax += new_eax;
+					new_ecx += new_eax;
+					var34 = new_eax;
+					if ((new_eax = lzma_486248(&var40, &new_ecx, orgsource, size_sum)) == 0xffffffff)
+						return -1;
+					new_eax |= var34;
+					/* loc_486522 */
+					/* keeping it here instead of at the top
+					 * seems to work faster
+					 */
+					if (new_eax < 0x100)
+					{
+						new_ecx = var0C_ecxcopy;
+					}
+				} while (new_eax < 0x100);
+				/* loc_48653e */
+				var1 = (uint8_t)(new_eax & 0xff);
+			} else {
+				int t;
+				/* loc_4864FB */
+				new_eax = var08 - loc_edi;
+				if (!CLI_ISCONTAINED((uint8_t *)orgsource, size_sum, var18 + new_eax, 1))
+					return -1;
+				t = *(var18+new_eax);
+				new_eax = (new_eax&0xffffff00) | t;
+
+				var30 = t;
+				if (lzma_48635C (t, &new_ecx, &var40, &new_eax, orgsource, size_sum) == 0xffffffff)
+					return -1;
+				var20 = 0;
+				var1 = new_eax&0xff;
+			}
+
+			/* loc_486541 */
+
+			/* unneeded: new_ecx = (new_ecx&0xffffff00) | var1; */
+
+			/* loc_4865af */
+			new_edx = var08++;
+
+			if (!CLI_ISCONTAINED((uint8_t *)orgsource, size_sum, var18 + new_edx, 1))
+				return -1;
+			*(var18 + new_edx) = var1;
+		}
+		/* loc_4866fe */
+		new_eax = var08;
+	} while (new_eax < var28);
+
+    	if (special) {
+		uint32_t loc_ecx;
+		/* let's fix calls */
+		loc_ecx = 0;
+		cli_dbgmsg("MEWlen: %08x ? %08x\n", new_edx, pushed_edx);
+
+		if (!CLI_ISCONTAINED(orgsource, size_sum, pushed_esi, pushed_edx))
+			return -1;
+		do {
+			/* 0xe8, 0xe9 call opcodes */
+			if (pushed_esi[loc_ecx] == '\xe8' || pushed_esi[loc_ecx] == '\xe9')
+			{
+				char *adr = (char *)(pushed_esi + loc_ecx + 1);
+				loc_ecx++;
+				
+				cli_writeint32(adr, EC32(CE32((uint32_t)cli_readint32(adr)))-loc_ecx);
+				loc_ecx += 4;
+			} else 
+				loc_ecx++;
+		} while (loc_ecx != pushed_edx);
+		return 0; /*pushed_edx;*/
+	}
+    } while (mainloop);
+
+    return 0xbadc0de;
+}
+
+
+/* UPack lzma */
+
+/* compare with 486248 */
+uint32_t lzma_upack_esi_00(struct lzmastate *p, char *old_ecx, char *bb, uint32_t bl)
+{
+	uint32_t loc_eax, ret, loc_edi;
+	loc_eax = p->p1 >> 0xb;
+	if (!CLI_ISCONTAINED(bb, bl, old_ecx, 4) || !CLI_ISCONTAINED(bb, bl, p->p0, 4))
+	{
+		if (!CLI_ISCONTAINED(bb, bl, old_ecx, 4))
+			cli_dbgmsg("contain error! %08x %08x ecx: %08x [%08x]\n", bb, bl, old_ecx,bb+bl);
+		else
+			cli_dbgmsg("contain error! %08x %08x p0: %08x [%08x]\n", bb, bl, p->p0,bb+bl);
+		return 0xffffffff;
+	}
+	ret = cli_readint32(old_ecx);
+	loc_eax *= ret;
+	loc_edi = cli_readint32((char *)p->p0);
+	loc_edi = EC32(CE32(loc_edi)); /* bswap */
+	loc_edi -= p->p2;
+	if (loc_edi < loc_eax)
+	{
+		p->p1 = loc_eax;
+		loc_eax = (0x800 - ret) >> 5;
+		cli_writeint32(old_ecx, cli_readint32(old_ecx) + loc_eax);
+		ret = 0;
+	} else {
+		p->p2 += loc_eax;
+		p->p1 -= loc_eax;
+		loc_eax = ret >> 5;
+		cli_writeint32(old_ecx, cli_readint32(old_ecx) - loc_eax);
+		ret = 1;
+	}
+	if(((p->p1)&0xff000000) == 0)
+	{
+		p->p2 <<= 8;
+		p->p1 <<= 8;
+		p->p0++;
+	}
+	return ret;
+}
+
+/* compare with lzma_4862e0 */
+/* lzma_upack_esi_4c 0x1 as eax!
+ */
+uint32_t lzma_upack_esi_50(struct lzmastate *p, uint32_t old_eax, uint32_t old_ecx, char **old_edx, char *old_ebp, uint32_t *retval, char *bs, uint32_t bl)
+{
+	uint32_t loc_eax = old_eax, original = old_eax, ret;
+
+	do {
+		*old_edx = old_ebp + (loc_eax<<2);
+		if ((ret = lzma_upack_esi_00(p, *old_edx, bs, bl)) == 0xffffffff)
+			return 0xffffffff;
+		loc_eax += loc_eax;
+		loc_eax += ret;
+	} while (loc_eax < old_ecx);
+
+/*	cli_dbgmsg("loc_eax: %08x - ecx: %08x = %08x || original: %08x\n", loc_eax, old_ecx, loc_eax - old_ecx, original); */
+	*retval = loc_eax - old_ecx;
+	return 0;
+}
+
+uint32_t lzma_upack_esi_54(struct lzmastate *p, uint32_t old_eax, uint32_t *old_ecx, char **old_edx, uint32_t *retval, char *bs, uint32_t bl)
+{
+	uint32_t ret, loc_eax = old_eax;
+
+	*old_ecx = ((*old_ecx)&0xffffff00)|8;
+	ret = lzma_upack_esi_00 (p, *old_edx, bs, bl);
+	*old_edx = ((*old_edx) + 4);
+	loc_eax = (loc_eax&0xffffff00)|1;
+	if (ret)
+	{
+		ret = lzma_upack_esi_00 (p, *old_edx, bs, bl);
+		loc_eax |= 8; /* mov al, 9 */
+		if (ret)
+		{
+			*old_ecx <<= 5;
+			loc_eax = 0x11; /* mov al, 11 */
+		}
+	}
+	ret = loc_eax;
+	if (lzma_upack_esi_50(p, 1, *old_ecx, old_edx, *old_edx + (loc_eax << 2), &loc_eax, bs, bl) == 0xffffffff)
+		return 0xffffffff;
+
+	*retval = ret + loc_eax;
+	return 0;
+}
+
+
+int unmew11(int sectnum, char *src, int off, int ssize, int dsize, uint32_t base, uint32_t vadd, int uselzma, char **endsrc, char **enddst, int filedesc)
+{
+	uint32_t entry_point, newedi, loc_ds=dsize, loc_ss=ssize;
+	char *source = src + dsize + off; /*EC32(section_hdr[sectnum].VirtualSize) + off;*/
+	char *lesi = source + 12, *ledi;
+	char *f1, *f2;
+	int i;
+	struct cli_exe_section *section = NULL;
+	uint32_t vma = base + vadd, size_sum = ssize + dsize;
+
+	entry_point  = cli_readint32(source + 4);
+	newedi = cli_readint32(source + 8);
+	ledi = src + (newedi - vma);
+
+	i = 0;
+	ssize -= 12;
+	while (1)
+	{
+  		cli_dbgmsg("MEW unpacking section %d (%08x->%08x)\n", i, lesi, ledi);
+		if (!CLI_ISCONTAINED(src, size_sum, lesi, 4) || !CLI_ISCONTAINED(src, size_sum, ledi, 4))
+		{
+			cli_dbgmsg("Possibly programmer error or hand-crafted PE file, report to clamav team\n");
+			return -1;
+		}
+		if (unmew(lesi, ledi, loc_ss, loc_ds, &f1, &f2))
+		{
+			free(section);
+			return -1;
+		}
+
+		/* we don't need last section in sections since this is information for fixing imptbl */
+		if (!CLI_ISCONTAINED(src, size_sum, f1, 4))
+		{
+			free(section);
+			return -1;
+		}
+
+		/* XXX */
+		loc_ss -= (f1+4-lesi);
+		loc_ds -= (f2-ledi);
+		ledi = src + (cli_readint32(f1) - vma);
+		lesi = f1+4;
+
+		if (!uselzma)
+		{
+			uint32_t val = PESALIGN(f2 - src, 0x1000);
+			void *newsect;
+
+			if (i && val < section[i].raw) {
+			  cli_dbgmsg("MEW: WTF - please report\n");
+			  free(section);
+			  return -1;
+			}
+
+			if (!(newsect=cli_realloc(section, (i+2)*sizeof(struct cli_exe_section)))) {
+			  cli_dbgmsg("MEW: Out of memory\n");
+			  free(section);
+			  return -1;
+			}
+
+			section = (struct cli_exe_section *)newsect;
+			section[0].raw = 0;
+			section[0].rva = vadd;
+			section[i+1].raw = val;
+			section[i+1].rva = val + vadd;
+			section[i].rsz = section[i].vsz = ((i)?(val - section[i].raw):val);
+		}
+		i++;
+
+		if (!cli_readint32(f1))
+			break;
+	}
+
+	/* LZMA stuff */
+	if (uselzma) {
+		free(section);
+
+		/* put everything in one section */
+		i = 1;
+		if (!CLI_ISCONTAINED(src, size_sum, src+uselzma+8, 1))
+		{
+			cli_dbgmsg("MEW: couldn't access lzma 'special' tag\n");
+			return -1;
+		}
+		/* 0x50 -> push eax */
+		cli_dbgmsg("MEW: lzma %swas used, unpacking\n", (*(src + uselzma+8) == '\x50')?"special ":"");
+		if (!CLI_ISCONTAINED(src, size_sum, f1+4, 20 + 4 + 5))
+		{
+			cli_dbgmsg("MEW: lzma initialization data not available!\n");
+			return -1;
+		}
+
+		if(mew_lzma(src, f1+4, size_sum, vma, *(src + uselzma+8) == '\x50'))
+		{
+			return -1;
+		}
+		loc_ds=PESALIGN(loc_ds, 0x1000);
+
+		section = cli_calloc(1, sizeof(struct cli_exe_section));
+		if(!section) {
+			cli_dbgmsg("MEW: Out of memory\n");
+			return -1;
+		}
+
+		section[0].raw = 0; section[0].rva = vadd;
+		section[0].rsz = section[0].vsz = dsize;
+	}
+	if (!cli_rebuildpe(src, section, i, base, entry_point - base, 0, 0, filedesc))
+	{
+		cli_dbgmsg("MEW: Rebuilding failed\n");
+		free(section);
+		return -1;
+	}
+	free(section);
+	return 1;
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_msexpand.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_msexpand.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_msexpand.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_msexpand.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,146 @@
+/*
+ *  msexpand: Microsoft "compress.exe/expand.exe" compatible decompressor
+ *
+ *  Copyright (c) 2000 Martin Hinner <mhi at penguin.cz>
+ *  Algorithm & data structures by M. Winterhoff <100326.2776 at compuserve.com>
+ *
+ *  Corrected and adapted to ClamAV by Tomasz Kojm <tkojm at clamav.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef	HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <string.h>
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+#include "cltypes.h"
+#include "others.h"
+#include "msexpand.h"
+
+int cli_msexpand(FILE *in, FILE *out)
+{
+	int bits, ch, i, j, len, mask;
+	unsigned char *buffer;
+	uint32_t magic1, magic2, magic3, filesize;
+	uint16_t reserved;
+
+
+    if(fread(&magic1, sizeof(magic1), 1, in) != 1) {
+	return -1;
+    }
+
+    if(magic1 == le32_to_host(0x44445A53L))
+    {
+	if(fread(&magic2, sizeof(magic2), 1, in) != 1) {
+	    return -1;
+	}
+
+	if(fread(&reserved, sizeof(reserved), 1, in) != 1) {
+	    return -1;
+	}
+
+	if(fread(&filesize, sizeof(filesize), 1, in) != 1) {
+	    return -1;
+	}
+
+	if(magic2 != le32_to_host(0x3327F088L))
+	{
+	    cli_warnmsg("msexpand: Not a MS-compressed file\n");
+	    return -1;
+	}
+
+    } else
+    if(magic1 == le32_to_host(0x4A41574BL))
+    {
+	if(fread(&magic2, sizeof(magic2), 1, in) != 1) {
+	    return -1;
+	}
+
+	if(fread(&magic3, sizeof(magic3), 1, in) != 1) {
+	    return -1;
+	}
+
+	if(fread(&reserved, sizeof(reserved), 1, in) != 1) {
+	    return -1;
+	}
+
+	if(magic2 != le32_to_host(0xD127F088L) || magic3 != le32_to_host(0x00120003L))
+	{
+	    cli_warnmsg("msexpand: Not a MS-compressed file\n");
+	    return -1;
+	}
+
+	cli_warnmsg("msexpand: unsupported version 6.22\n");
+	return -1;
+
+    } else {
+	cli_warnmsg("msexpand: Not a MS-compressed file\n");
+	return -1;
+    }
+
+    if((buffer = (unsigned char *) cli_calloc(4096, sizeof(char))) == NULL) {
+	cli_errmsg("msexpand: Can't allocate memory\n");
+	return -1;
+    }
+
+    i = 4096 - 16;
+
+    while (1) {
+	if((bits = fgetc(in)) == EOF)
+	    break;
+
+	for(mask = 0x01; mask & 0xFF; mask <<= 1) {
+	    if(!(bits & mask)) {
+		if((j = fgetc(in)) == EOF)
+		    break;
+		len = fgetc(in);
+		j += (len & 0xF0) << 4;
+		len = (len & 15) + 3;
+		while(len--) {
+		    buffer[i] = buffer[j];
+		    if(fwrite(&buffer[i], sizeof(unsigned char), 1, out) != 1) {
+			free(buffer);
+			return -1;
+		    }
+		    j++;
+		    j %= 4096;
+		    i++;
+		    i %= 4096;
+		}
+	    } else {
+		if((ch = fgetc(in)) == EOF)
+		    break;
+
+		buffer[i] = ch;
+		if(fwrite(&buffer[i], sizeof(unsigned char), 1, out) != 1) {
+		    free(buffer);
+		    return -1;
+		}
+		i++;
+		i %= 4096;
+	    }
+	}
+    }
+
+    free(buffer);
+    return 0;
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mspack.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mspack.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mspack.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mspack.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,1959 @@
+/*
+ * This file includes code from libmspack adapted for libclamav by
+ * tkojm at clamav.net
+ *
+ * Copyright (C) 2003-2004 Stuart Caie
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+
+#include "others.h"
+#include "clamav.h"
+#include "mspack.h"
+
+#if HAVE_LIMITS_H
+# include <limits.h>
+#endif
+#ifndef CHAR_BIT
+# define CHAR_BIT (8)
+#endif
+
+
+/***************************************************************************
+ *			 MS-ZIP decompression implementation 
+ ***************************************************************************
+ * The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted
+ * by Microsoft Corporation.
+ *
+ * The deflate method was created by Phil Katz. MSZIP is equivalent to the
+ * deflate method.
+ *
+ */
+
+/* match lengths for literal codes 257.. 285 */
+static const unsigned short mszip_lit_lengths[29] = {
+  3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27,
+  31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258
+};
+
+/* match offsets for distance codes 0 .. 29 */
+static const unsigned short mszip_dist_offsets[30] = {
+  1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385,
+  513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
+};
+
+/* extra bits required for literal codes 257.. 285 */
+static const unsigned char mszip_lit_extrabits[29] = {
+  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2,
+  2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
+};
+
+/* extra bits required for distance codes 0 .. 29 */
+static const unsigned char mszip_dist_extrabits[30] = {
+  0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
+  6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13
+};
+
+/* the order of the bit length Huffman code lengths */
+static const unsigned char mszip_bitlen_order[19] = {
+  16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
+};
+
+/* ANDing with mszip_bit_mask[n] masks the lower n bits */
+static const unsigned short mszip_bit_mask_tab[17] = {
+ 0x0000, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
+ 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff
+};
+
+#define MSZIP_STORE_BITS do {                                                 \
+  zip->i_ptr      = i_ptr;                                              \
+  zip->i_end      = i_end;                                              \
+  zip->bit_buffer = bit_buffer;                                         \
+  zip->bits_left  = bits_left;                                          \
+} while (0)
+
+#define MSZIP_RESTORE_BITS do {                                               \
+  i_ptr      = zip->i_ptr;                                              \
+  i_end      = zip->i_end;                                              \
+  bit_buffer = zip->bit_buffer;                                         \
+  bits_left  = zip->bits_left;                                          \
+} while (0)
+
+#define MSZIP_ENSURE_BITS(nbits) do {                                         \
+  while (bits_left < (nbits)) {                                         \
+    if (i_ptr >= i_end) {                                               \
+      if (mszip_read_input(zip)) return zip->error;                      \
+      i_ptr = zip->i_ptr;                                               \
+      i_end = zip->i_end;                                               \
+    }                                                                   \
+    bit_buffer |= *i_ptr++ << bits_left; bits_left  += 8;               \
+  }                                                                     \
+} while (0)
+
+#define MSZIP_PEEK_BITS(nbits)   (bit_buffer & ((1<<(nbits))-1))
+#define MSZIP_PEEK_BITS_T(nbits) (bit_buffer & mszip_bit_mask_tab[(nbits)])
+
+#define MSZIP_REMOVE_BITS(nbits) ((bit_buffer >>= (nbits)), (bits_left -= (nbits)))
+
+#define MSZIP_READ_BITS(val, nbits) do {                                      \
+  MSZIP_ENSURE_BITS(nbits); (val) = MSZIP_PEEK_BITS(nbits); MSZIP_REMOVE_BITS(nbits);     \
+} while (0)
+
+#define MSZIP_READ_BITS_T(val, nbits) do {                                    \
+  MSZIP_ENSURE_BITS(nbits); (val) = MSZIP_PEEK_BITS_T(nbits); MSZIP_REMOVE_BITS(nbits);   \
+} while (0)
+
+static int mszip_read_input(struct mszip_stream *zip) {
+  int read = zip->read ? zip->read(zip->file, zip->inbuf, (int)zip->inbuf_size) : cli_readn(zip->fd, zip->inbuf, (int)zip->inbuf_size);
+  if (read < 0) return zip->error = CL_EIO;
+
+  if (read == 0) {
+    if (zip->input_end) {
+      cli_dbgmsg("mszip_read_input: out of input bytes\n");
+      return zip->error = CL_EIO;
+    }
+    else {
+      read = 1;
+      zip->inbuf[0] = 0;
+      zip->input_end = 1;
+    }
+  }
+
+  zip->i_ptr = &zip->inbuf[0];
+  zip->i_end = &zip->inbuf[read];
+
+  return CL_SUCCESS;
+}
+
+/* inflate() error codes */
+#define INF_ERR_BLOCKTYPE   (-1)  /* unknown block type                      */
+#define INF_ERR_COMPLEMENT  (-2)  /* block size complement mismatch          */
+#define INF_ERR_FLUSH       (-3)  /* error from flush_window() callback      */
+#define INF_ERR_BITBUF      (-4)  /* too many bits in bit buffer             */
+#define INF_ERR_SYMLENS     (-5)  /* too many symbols in blocktype 2 header  */
+#define INF_ERR_BITLENTBL   (-6)  /* failed to build bitlens huffman table   */
+#define INF_ERR_LITERALTBL  (-7)  /* failed to build literals huffman table  */
+#define INF_ERR_DISTANCETBL (-8)  /* failed to build distance huffman table  */
+#define INF_ERR_BITOVERRUN  (-9)  /* bitlen RLE code goes over table size    */
+#define INF_ERR_BADBITLEN   (-10) /* invalid bit-length code                 */
+#define INF_ERR_LITCODE     (-11) /* out-of-range literal code               */
+#define INF_ERR_DISTCODE    (-12) /* out-of-range distance code              */
+#define INF_ERR_DISTANCE    (-13) /* somehow, distance is beyond 32k         */
+#define INF_ERR_HUFFSYM     (-14) /* out of bits decoding huffman symbol     */
+
+/* mszip_make_decode_table(nsyms, nbits, length[], table[])
+ *
+ * This function was coded by David Tritscher. It builds a fast huffman
+ * decoding table out of just a canonical huffman code lengths table.
+ *
+ * NOTE: this is NOT identical to the mszip_make_decode_table() in lzxd.c. This
+ * one reverses the quick-lookup bit pattern. Bits are read MSB to LSB in LZX,
+ * but LSB to MSB in MSZIP.
+ *
+ * nsyms  = total number of symbols in this huffman tree.
+ * nbits  = any symbols with a code length of nbits or less can be decoded
+ *          in one lookup of the table.
+ * length = A table to get code lengths from [0 to nsyms-1]
+ * table  = The table to fill up with decoded symbols and pointers.
+ *
+ * Returns 0 for OK or 1 for error
+ */
+static int mszip_make_decode_table(unsigned int nsyms, unsigned int nbits,
+			     unsigned char *length, unsigned short *table)
+{
+  register unsigned int leaf, reverse, fill;
+  register unsigned short sym, next_sym;
+  register unsigned char bit_num;
+  unsigned int pos         = 0; /* the current position in the decode table */
+  unsigned int table_mask  = 1 << nbits;
+  unsigned int mszip_bit_mask    = table_mask >> 1; /* don't do 0 length codes */
+
+  /* fill entries for codes short enough for a direct mapping */
+  for (bit_num = 1; bit_num <= nbits; bit_num++) {
+    for (sym = 0; sym < nsyms; sym++) {
+      if (length[sym] != bit_num) continue;
+
+      /* reverse the significant bits */
+      fill = length[sym]; reverse = pos >> (nbits - fill); leaf = 0;
+      do {leaf <<= 1; leaf |= reverse & 1; reverse >>= 1;} while (--fill);
+
+      if((pos += mszip_bit_mask) > table_mask) return 1; /* table overrun */
+
+      /* fill all possible lookups of this symbol with the symbol itself */
+      fill = mszip_bit_mask; next_sym = 1 << bit_num;
+      do { table[leaf] = sym; leaf += next_sym; } while (--fill);
+    }
+    mszip_bit_mask >>= 1;
+  }
+
+  /* exit with success if table is now complete */
+  if (pos == table_mask) return 0;
+
+  /* mark all remaining table entries as unused */
+  for (sym = pos; sym < table_mask; sym++) {
+    reverse = sym; leaf = 0; fill = nbits;
+    do { leaf <<= 1; leaf |= reverse & 1; reverse >>= 1; } while (--fill);
+    table[leaf] = 0xFFFF;
+  }
+
+  /* where should the longer codes be allocated from? */
+  next_sym = ((table_mask >> 1) < nsyms) ? nsyms : (table_mask >> 1);
+
+  /* give ourselves room for codes to grow by up to 16 more bits.
+   * codes now start at bit nbits+16 and end at (nbits+16-codelength) */
+  pos <<= 16;
+  table_mask <<= 16;
+  mszip_bit_mask = 1 << 15;
+
+  for (bit_num = nbits+1; bit_num <= MSZIP_MAX_HUFFBITS; bit_num++) {
+    for (sym = 0; sym < nsyms; sym++) {
+      if (length[sym] != bit_num) continue;
+
+      /* leaf = the first nbits of the code, reversed */
+      reverse = pos >> 16; leaf = 0; fill = nbits;
+      do {leaf <<= 1; leaf |= reverse & 1; reverse >>= 1;} while (--fill);
+
+      for (fill = 0; fill < (bit_num - nbits); fill++) {
+	/* if this path hasn't been taken yet, 'allocate' two entries */
+	if (table[leaf] == 0xFFFF) {
+	  table[(next_sym << 1)     ] = 0xFFFF;
+	  table[(next_sym << 1) + 1 ] = 0xFFFF;
+	  table[leaf] = next_sym++;
+	}
+	/* follow the path and select either left or right for next bit */
+	leaf = (table[leaf] << 1) | ((pos >> (15 - fill)) & 1);
+      }
+      table[leaf] = sym;
+
+      if ((pos += mszip_bit_mask) > table_mask) return 1; /* table overflow */
+    }
+    mszip_bit_mask >>= 1;
+  }
+
+  /* full table? */
+  return (pos != table_mask) ? 1 : 0;
+}
+
+/* MSZIP_READ_HUFFSYM(tablename, var) decodes one huffman symbol from the
+ * bitstream using the stated table and puts it in var.
+ */
+#define MSZIP_READ_HUFFSYM(tbl, var) do {                                     \
+  /* huffman symbols can be up to 16 bits long */                       \
+  MSZIP_ENSURE_BITS(MSZIP_MAX_HUFFBITS);                                      \
+  /* immediate table lookup of [tablebits] bits of the code */          \
+  sym = zip->tbl##_table[MSZIP_PEEK_BITS(MSZIP_##tbl##_TABLEBITS)];		\
+  /* is the symbol is longer than [tablebits] bits? (i=node index) */   \
+  if (sym >= MSZIP_##tbl##_MAXSYMBOLS) {                                \
+    /* decode remaining bits by tree traversal */                       \
+    i = MSZIP_##tbl##_TABLEBITS - 1;					\
+    do {                                                                \
+      /* check next bit. error if we run out of bits before decode */	\
+      if (i++ > MSZIP_MAX_HUFFBITS) {					\
+        cli_dbgmsg("zip_inflate: out of bits in huffman decode\n");	\
+        return INF_ERR_HUFFSYM;                                         \
+      }                                                                 \
+      sym = (sym << 1) | ((bit_buffer >> i) & 1);			\
+      if(sym >= MSZIP_##tbl##_TABLESIZE) {				\
+	cli_dbgmsg("zip_inflate: index out of table\n");		\
+        return INF_ERR_HUFFSYM;                                         \
+      }									\
+      /* double node index and add 0 (left branch) or 1 (right) */	\
+      sym = zip->tbl##_table[sym];					\
+      /* while we are still in node indicies, not decoded symbols */    \
+    } while (sym >= MSZIP_##tbl##_MAXSYMBOLS);                          \
+  }                                                                     \
+  /* result */                                                          \
+  (var) = sym;                                                          \
+  /* look up the code length of that symbol and discard those bits */   \
+  i = zip->tbl##_len[sym];                                              \
+  MSZIP_REMOVE_BITS(i);                                                       \
+} while (0)
+
+static int mszip_read_lens(struct mszip_stream *zip) {
+  /* for the bit buffer and huffman decoding */
+  register unsigned int bit_buffer;
+  register int bits_left;
+  unsigned char *i_ptr, *i_end;
+
+  /* bitlen Huffman codes -- immediate lookup, 7 bit max code length */
+  unsigned short bl_table[(1 << 7)];
+  unsigned char bl_len[19];
+
+  unsigned char lens[MSZIP_LITERAL_MAXSYMBOLS + MSZIP_DISTANCE_MAXSYMBOLS];
+  unsigned int lit_codes, dist_codes, code, last_code=0, bitlen_codes, i, run;
+
+  MSZIP_RESTORE_BITS;
+
+  /* read the number of codes */
+  MSZIP_READ_BITS(lit_codes,    5); lit_codes    += 257;
+  MSZIP_READ_BITS(dist_codes,   5); dist_codes   += 1;
+  MSZIP_READ_BITS(bitlen_codes, 4); bitlen_codes += 4;
+  if (lit_codes  > MSZIP_LITERAL_MAXSYMBOLS)  return INF_ERR_SYMLENS;
+  if (dist_codes > MSZIP_DISTANCE_MAXSYMBOLS) return INF_ERR_SYMLENS;
+
+  /* read in the bit lengths in their unusual order */
+  for (i = 0; i < bitlen_codes; i++) MSZIP_READ_BITS(bl_len[mszip_bitlen_order[i]], 3);
+  while (i < 19) bl_len[mszip_bitlen_order[i++]] = 0;
+
+  /* create decoding table with an immediate lookup */
+  if (mszip_make_decode_table(19, 7, &bl_len[0], &bl_table[0])) {
+    return INF_ERR_BITLENTBL;
+  }
+
+  /* read literal / distance code lengths */
+  for (i = 0; i < (lit_codes + dist_codes); i++) {
+    /* single-level huffman lookup */
+    MSZIP_ENSURE_BITS(7);
+    code = bl_table[MSZIP_PEEK_BITS(7)];
+    MSZIP_REMOVE_BITS(bl_len[code]);
+
+    if (code < 16) lens[i] = last_code = code;
+    else {
+      switch (code) {
+      case 16: MSZIP_READ_BITS(run, 2); run += 3;  code = last_code; break;
+      case 17: MSZIP_READ_BITS(run, 3); run += 3;  code = 0;         break;
+      case 18: MSZIP_READ_BITS(run, 7); run += 11; code = 0;         break;
+      default: cli_dbgmsg("zip_read_lens: bad code!: %u\n", code); return INF_ERR_BADBITLEN;
+      }
+      if ((i + run) > (lit_codes + dist_codes)) return INF_ERR_BITOVERRUN;
+      while (run--) lens[i++] = code;
+      i--;
+    }
+  }
+
+  /* copy LITERAL code lengths and clear any remaining */
+  i = lit_codes;
+  memcpy(&zip->LITERAL_len[0], &lens[0], i);
+  while (i < MSZIP_LITERAL_MAXSYMBOLS) zip->LITERAL_len[i++] = 0;
+
+  i = dist_codes;
+  memcpy(&zip->DISTANCE_len[0], &lens[lit_codes], i);
+  while (i < MSZIP_DISTANCE_MAXSYMBOLS) zip->DISTANCE_len[i++] = 0;
+
+  MSZIP_STORE_BITS;
+  return 0;
+}
+
+/* a clean implementation of RFC 1951 / inflate */
+static int mszip_inflate(struct mszip_stream *zip) {
+  unsigned int last_block, block_type, distance, length, this_run, i;
+
+  /* for the bit buffer and huffman decoding */
+  register unsigned int bit_buffer;
+  register int bits_left;
+  register unsigned short sym;
+  unsigned char *i_ptr, *i_end;
+
+  MSZIP_RESTORE_BITS;
+
+  do {
+    /* read in last block bit */
+    MSZIP_READ_BITS(last_block, 1);
+
+    /* read in block type */
+    MSZIP_READ_BITS(block_type, 2);
+
+    if (block_type == 0) {
+      /* uncompressed block */
+      unsigned char lens_buf[4];
+
+      /* go to byte boundary */
+      i = bits_left & 7; MSZIP_REMOVE_BITS(i);
+
+      /* read 4 bytes of data, emptying the bit-buffer if necessary */
+      for (i = 0; (bits_left >= 8); i++) {
+	if (i == 4) return INF_ERR_BITBUF;
+	lens_buf[i] = MSZIP_PEEK_BITS(8);
+	MSZIP_REMOVE_BITS(8);
+      }
+      if (bits_left != 0) return INF_ERR_BITBUF;
+      while (i < 4) {
+	if (i_ptr >= i_end) {
+	  if (mszip_read_input(zip)) return zip->error;
+	  i_ptr = zip->i_ptr;
+	  i_end = zip->i_end;
+	}
+	lens_buf[i++] = *i_ptr++;
+      }
+
+      /* get the length and its complement */
+      length = lens_buf[0] | (lens_buf[1] << 8);
+      i      = lens_buf[2] | (lens_buf[3] << 8);
+      if (length != (~i & 0xFFFF)) return INF_ERR_COMPLEMENT;
+
+      /* read and copy the uncompressed data into the window */
+      while (length > 0) {
+	if (i_ptr >= i_end) {
+	  if (mszip_read_input(zip)) return zip->error;
+	  i_ptr = zip->i_ptr;
+	  i_end = zip->i_end;
+	}
+
+	this_run = length;
+	if (this_run > (unsigned int)(i_end - i_ptr)) this_run = i_end - i_ptr;
+	if (this_run > (MSZIP_FRAME_SIZE - zip->window_posn))
+	  this_run = MSZIP_FRAME_SIZE - zip->window_posn;
+
+	memcpy(&zip->window[zip->window_posn], i_ptr, this_run);
+	zip->window_posn += this_run;
+	i_ptr    += this_run;
+	length   -= this_run;
+
+	if (zip->window_posn == MSZIP_FRAME_SIZE) {
+	  if (zip->flush_window(zip, MSZIP_FRAME_SIZE)) return INF_ERR_FLUSH;
+	  zip->window_posn = 0;
+	}
+      }
+    }
+    else if ((block_type == 1) || (block_type == 2)) {
+      /* Huffman-compressed LZ77 block */
+      unsigned int window_posn, match_posn, code;
+
+      if (block_type == 1) {
+	/* block with fixed Huffman codes */
+	i = 0;
+	while (i < 144) zip->LITERAL_len[i++] = 8;
+	while (i < 256) zip->LITERAL_len[i++] = 9;
+	while (i < 280) zip->LITERAL_len[i++] = 7;
+	while (i < 288) zip->LITERAL_len[i++] = 8;
+	for (i = 0; i < 32; i++) zip->DISTANCE_len[i] = 5;
+      }
+      else {
+	/* block with dynamic Huffman codes */
+	MSZIP_STORE_BITS;
+	if ((i = mszip_read_lens(zip))) return i;
+	MSZIP_RESTORE_BITS;
+      }
+
+      /* now huffman lengths are read for either kind of block, 
+       * create huffman decoding tables */
+      if (mszip_make_decode_table(MSZIP_LITERAL_MAXSYMBOLS, MSZIP_LITERAL_TABLEBITS,
+			    &zip->LITERAL_len[0], &zip->LITERAL_table[0]))
+      {
+	return INF_ERR_LITERALTBL;
+      }
+
+      if (mszip_make_decode_table(MSZIP_DISTANCE_MAXSYMBOLS,MSZIP_DISTANCE_TABLEBITS,
+			    &zip->DISTANCE_len[0], &zip->DISTANCE_table[0]))
+      {
+	return INF_ERR_DISTANCETBL;
+      }
+
+      /* decode forever until end of block code */
+      window_posn = zip->window_posn;
+      while (1) {
+	MSZIP_READ_HUFFSYM(LITERAL, code);
+	if (code < 256) {
+	  zip->window[window_posn++] = (unsigned char) code;
+	  if (window_posn == MSZIP_FRAME_SIZE) {
+	    if (zip->flush_window(zip, MSZIP_FRAME_SIZE)) return INF_ERR_FLUSH;
+	    window_posn = 0;
+	  }
+	}
+	else if (code == 256) {
+	  /* END OF BLOCK CODE: loop break point */
+	  break;
+	}
+	else {
+	  code -= 257;
+	  if (code > 29) return INF_ERR_LITCODE;
+	  MSZIP_READ_BITS_T(length, mszip_lit_extrabits[code]);
+	  length += mszip_lit_lengths[code];
+
+	  MSZIP_READ_HUFFSYM(DISTANCE, code);
+	  if (code > 30) return INF_ERR_DISTCODE;
+	  MSZIP_READ_BITS_T(distance, mszip_dist_extrabits[code]);
+	  distance += mszip_dist_offsets[code];
+
+	  /* match position is window position minus distance. If distance
+	   * is more than window position numerically, it must 'wrap
+	   * around' the frame size. */ 
+	  match_posn = ((distance > window_posn) ? MSZIP_FRAME_SIZE : 0)
+	    + window_posn - distance;
+
+	  /* copy match */
+	  if (length < 12) {
+	    /* short match, use slower loop but no loop setup code */
+	    while (length--) {
+	      zip->window[window_posn++] = zip->window[match_posn++];
+	      match_posn &= MSZIP_FRAME_SIZE - 1;
+
+	      if (window_posn == MSZIP_FRAME_SIZE) {
+		if (zip->flush_window(zip, MSZIP_FRAME_SIZE))
+		  return INF_ERR_FLUSH;
+		window_posn = 0;
+	      }
+	    }
+	  }
+	  else {
+	    /* longer match, use faster loop but with setup expense */
+	    unsigned char *runsrc, *rundest;
+	    do {
+	      this_run = length;
+	      if ((match_posn + this_run) > MSZIP_FRAME_SIZE)
+		this_run = MSZIP_FRAME_SIZE - match_posn;
+	      if ((window_posn + this_run) > MSZIP_FRAME_SIZE)
+		this_run = MSZIP_FRAME_SIZE - window_posn;
+
+	      rundest = &zip->window[window_posn]; window_posn += this_run;
+	      runsrc  = &zip->window[match_posn];  match_posn  += this_run;
+	      length -= this_run;
+	      while (this_run--) *rundest++ = *runsrc++;
+
+	      /* flush if necessary */
+	      if (window_posn == MSZIP_FRAME_SIZE) {
+		if (zip->flush_window(zip, MSZIP_FRAME_SIZE))
+		  return INF_ERR_FLUSH;
+		window_posn = 0;
+	      }
+	      if (match_posn == MSZIP_FRAME_SIZE) match_posn = 0;
+	    } while (length > 0);
+	  }
+
+	} /* else (code >= 257) */
+
+      } /* while (forever) -- break point at 'code == 256' */
+      zip->window_posn = window_posn;
+    }
+    else {
+      /* block_type == 3 -- bad block type */
+      return INF_ERR_BLOCKTYPE;
+    }
+  } while (!last_block);
+
+  /* flush the remaining data */
+  if (zip->window_posn) {
+    if (zip->flush_window(zip, zip->window_posn)) return INF_ERR_FLUSH;
+  }
+  MSZIP_STORE_BITS;
+
+  /* return success */
+  return 0;
+}
+
+/* inflate() calls this whenever the window should be flushed. As
+ * MSZIP only expands to the size of the window, the implementation used
+ * simply keeps track of the amount of data flushed, and if more than 32k
+ * is flushed, an error is raised.
+ */  
+static int mszip_flush_window(struct mszip_stream *zip,
+			       unsigned int data_flushed)
+{
+  zip->bytes_output += data_flushed;
+  if (zip->bytes_output > MSZIP_FRAME_SIZE) {
+    cli_dbgmsg("mszip_flush_window: overflow: %u bytes flushed, total is now %u\n", data_flushed, zip->bytes_output);
+    return 1;
+  }
+  return 0;
+}
+
+struct mszip_stream *mszip_init(int fd,
+				  int ofd,
+				  int input_buffer_size,
+				  int repair_mode,
+				  struct cab_file *file,
+			          int (*read)(struct cab_file *, unsigned char *, int))
+{
+  struct mszip_stream *zip;
+
+  input_buffer_size = (input_buffer_size + 1) & -2;
+  if (!input_buffer_size) return NULL;
+
+  /* allocate decompression state */
+  if (!(zip = cli_malloc(sizeof(struct mszip_stream)))) {
+    return NULL;
+  }
+
+  /* allocate input buffer */
+  zip->inbuf  = cli_malloc((size_t) input_buffer_size);
+  if (!zip->inbuf) {
+    free(zip);
+    return NULL;
+  }
+
+  /* initialise decompression state */
+  zip->fd	       = fd;
+  zip->ofd	       = ofd;
+  zip->wflag	       = 1;
+  zip->inbuf_size      = input_buffer_size;
+  zip->error           = CL_SUCCESS;
+  zip->repair_mode     = repair_mode;
+  zip->flush_window    = &mszip_flush_window;
+  zip->input_end       = 0;
+
+  zip->i_ptr = zip->i_end = &zip->inbuf[0];
+  zip->o_ptr = zip->o_end = NULL;
+  zip->bit_buffer = 0; zip->bits_left = 0;
+
+  zip->file = file;
+  zip->read = read;
+
+  return zip;
+}
+
+int mszip_decompress(struct mszip_stream *zip, off_t out_bytes) {
+  /* for the bit buffer */
+  register unsigned int bit_buffer;
+  register int bits_left;
+  unsigned char *i_ptr, *i_end;
+
+  int i, state, error;
+
+  /* easy answers */
+  if (!zip || (out_bytes < 0)) return CL_ENULLARG;
+  if (zip->error) return zip->error;
+
+  /* flush out any stored-up bytes before we begin */
+  i = zip->o_end - zip->o_ptr;
+  if ((off_t) i > out_bytes) i = (int) out_bytes;
+  if (i) {
+    if (zip->wflag && cli_writen(zip->ofd, zip->o_ptr, i) != i) {
+      return zip->error = CL_EIO;
+    }
+    zip->o_ptr  += i;
+    out_bytes   -= i;
+  }
+  if (out_bytes == 0) return CL_SUCCESS;
+
+  while (out_bytes > 0) {
+    /* unpack another block */
+    MSZIP_RESTORE_BITS;
+
+    /* skip to next read 'CK' header */
+    i = bits_left & 7; MSZIP_REMOVE_BITS(i); /* align to bytestream */
+    state = 0;
+    do {
+      MSZIP_READ_BITS(i, 8);
+      if (i == 'C') state = 1;
+      else if ((state == 1) && (i == 'K')) state = 2;
+      else state = 0;
+    } while (state != 2);
+
+    /* inflate a block, repair and realign if necessary */
+    zip->window_posn = 0;
+    zip->bytes_output = 0;
+    MSZIP_STORE_BITS;
+    if ((error = mszip_inflate(zip))) {
+      cli_dbgmsg("mszip_decompress: inflate error %d\n", error);
+      if (zip->repair_mode) {
+	cli_dbgmsg("mszip_decompress: MSZIP error, %u bytes of data lost\n",
+			  MSZIP_FRAME_SIZE - zip->bytes_output);
+	for (i = zip->bytes_output; i < MSZIP_FRAME_SIZE; i++) {
+	  zip->window[i] = '\0';
+	}
+	zip->bytes_output = MSZIP_FRAME_SIZE;
+      }
+      else {
+	return zip->error = (error > 0) ? error : CL_EFORMAT;
+      }
+    }
+    zip->o_ptr = &zip->window[0];
+    zip->o_end = &zip->o_ptr[zip->bytes_output];
+
+    /* write a frame */
+    i = (out_bytes < (off_t)zip->bytes_output) ?
+      (int)out_bytes : zip->bytes_output;
+    if (zip->wflag && cli_writen(zip->ofd, zip->o_ptr, i) != i) {
+      return zip->error = CL_EIO;
+    }
+
+    /* mspack errors (i.e. read errors) are fatal and can't be recovered */
+    if ((error > 0) && zip->repair_mode) return error;
+
+    zip->o_ptr  += i;
+    out_bytes   -= i;
+  }
+
+  if (out_bytes) {
+    cli_dbgmsg("mszip_decompress: bytes left to output\n");
+    return zip->error = CL_EFORMAT;
+  }
+  return CL_SUCCESS;
+}
+
+void mszip_free(struct mszip_stream *zip) {
+  if (zip) {
+    free(zip->inbuf);
+    free(zip);
+  }
+}
+
+/***************************************************************************
+ *			 LZX decompression implementation 
+ ***************************************************************************
+ * The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted
+ * by Microsoft Corporation.
+ *
+ */
+
+/* LZX decompressor input macros
+ *
+ * LZX_STORE_BITS        stores bitstream state in lzx_stream structure
+ * LZX_RESTORE_BITS      restores bitstream state from lzx_stream structure
+ * LZX_READ_BITS(var,n)  takes N bits from the buffer and puts them in var
+ * LZX_ENSURE_BITS(n)    ensures there are at least N bits in the bit buffer.
+ * LZX_PEEK_BITS(n)      extracts without removing N bits from the bit buffer
+ * LZX_REMOVE_BITS(n)    removes N bits from the bit buffer
+ *
+ */
+
+#define LZX_BITBUF_WIDTH (sizeof(bit_buffer) * CHAR_BIT)
+
+#define LZX_STORE_BITS do {                                                 \
+  lzx->i_ptr      = i_ptr;                                              \
+  lzx->i_end      = i_end;                                              \
+  lzx->bit_buffer = bit_buffer;                                         \
+  lzx->bits_left  = bits_left;                                          \
+} while (0)
+
+#define LZX_RESTORE_BITS do {                                               \
+  i_ptr      = lzx->i_ptr;                                              \
+  i_end      = lzx->i_end;                                              \
+  bit_buffer = lzx->bit_buffer;                                         \
+  bits_left  = lzx->bits_left;                                          \
+} while (0)
+
+#define LZX_ENSURE_BITS(nbits)                                              \
+  while (bits_left < (nbits)) {                                         \
+    if (i_ptr + 1 >= i_end) {                                               \
+      if (lzx_read_input(lzx)) return lzx->error;                      \
+      i_ptr = lzx->i_ptr;                                               \
+      i_end = lzx->i_end;                                               \
+    }                                                                   \
+    bit_buffer |= ((i_ptr[1] << 8) | i_ptr[0])                          \
+                  << (LZX_BITBUF_WIDTH - 16 - bits_left);                   \
+    bits_left  += 16;                                                   \
+    i_ptr      += 2;                                                    \
+  }
+
+#define LZX_PEEK_BITS(nbits) (bit_buffer >> (LZX_BITBUF_WIDTH - (nbits)))
+
+#define LZX_REMOVE_BITS(nbits) ((bit_buffer <<= (nbits)), (bits_left -= (nbits)))
+
+#define LZX_READ_BITS(val, nbits) do {                                      \
+  LZX_ENSURE_BITS(nbits);                                                   \
+  (val) = LZX_PEEK_BITS(nbits);                                             \
+  LZX_REMOVE_BITS(nbits);                                                   \
+} while (0)
+
+static int lzx_read_input(struct lzx_stream *lzx) {
+  int bread = lzx->read ? lzx->read(lzx->file, &lzx->inbuf[0], (int)lzx->inbuf_size) : cli_readn(lzx->fd, &lzx->inbuf[0], (int)lzx->inbuf_size);
+  if (bread < 0) return lzx->error = CL_EIO;
+
+  /* huff decode's ENSURE_BYTES(16) might overrun the input stream, even
+   * if those bits aren't used, so fake 2 more bytes */
+  if (bread == 0) {
+    if (lzx->input_end) {
+      cli_dbgmsg("lzx_read_input: out of input bytes\n");
+      return lzx->error = CL_EIO;
+    }
+    else {
+      bread = 2;
+      lzx->inbuf[0] = lzx->inbuf[1] = 0;
+      lzx->input_end = 1;
+    }
+  }
+
+  lzx->i_ptr = &lzx->inbuf[0];
+  lzx->i_end = &lzx->inbuf[bread];
+
+  return CL_SUCCESS;
+}
+
+/* Huffman decoding macros */
+
+/* LZX_READ_HUFFSYM(tablename, var) decodes one huffman symbol from the
+ * bitstream using the stated table and puts it in var.
+ */
+#define LZX_READ_HUFFSYM(tbl, var) do {                                     \
+  /* huffman symbols can be up to 16 bits long */                       \
+  LZX_ENSURE_BITS(16);                                                      \
+  /* immediate table lookup of [tablebits] bits of the code */          \
+  sym = lzx->tbl##_table[LZX_PEEK_BITS(LZX_##tbl##_TABLEBITS)];             \
+  /* is the symbol is longer than [tablebits] bits? (i=node index) */   \
+  if (sym >= LZX_##tbl##_MAXSYMBOLS) {                                  \
+    /* decode remaining bits by tree traversal */                       \
+    i = 1 << (LZX_BITBUF_WIDTH - LZX_##tbl##_TABLEBITS);                    \
+    do {                                                                \
+      /* one less bit. error if we run out of bits before decode */     \
+      i >>= 1;                                                          \
+      if (i == 0) {                                                     \
+        cli_dbgmsg("lzx: out of bits in huffman decode\n");             \
+        return lzx->error = CL_EFORMAT;					\
+      }                                                                 \
+      /* double node index and add 0 (left branch) or 1 (right) */      \
+      sym <<= 1; sym |= (bit_buffer & i) ? 1 : 0;                       \
+      /* hop to next node index / decoded symbol */                     \
+      if(sym >= (1 << LZX_##tbl##_TABLEBITS) + (LZX_##tbl##_MAXSYMBOLS * 2)) { \
+	cli_dbgmsg("lzx: index out of table\n");			\
+	return lzx->error = CL_EFORMAT;					\
+      }									\
+      sym = lzx->tbl##_table[sym];                                    \
+      /* while we are still in node indicies, not decoded symbols */    \
+    } while (sym >= LZX_##tbl##_MAXSYMBOLS);                            \
+  }                                                                     \
+  /* result */                                                          \
+  (var) = sym;                                                          \
+  /* look up the code length of that symbol and discard those bits */   \
+  i = lzx->tbl##_len[sym];                                              \
+  LZX_REMOVE_BITS(i);                                                       \
+} while (0)
+
+/* LZX_BUILD_TABLE(tbl) builds a huffman lookup table from code lengths */
+#define LZX_BUILD_TABLE(tbl)                                                \
+  if (lzx_make_decode_table(LZX_##tbl##_MAXSYMBOLS, LZX_##tbl##_TABLEBITS,  \
+			&lzx->tbl##_len[0], &lzx->tbl##_table[0]))      \
+  {                                                                     \
+    cli_dbgmsg("lzx: failed to build %s table\n", #tbl);                \
+    return lzx->error = CL_EFORMAT;					\
+  }
+
+/* lzx_make_decode_table(nsyms, nbits, length[], table[])
+ *
+ * This function was coded by David Tritscher. It builds a fast huffman
+ * decoding table from a canonical huffman code lengths table.
+ *
+ * nsyms  = total number of symbols in this huffman tree.
+ * nbits  = any symbols with a code length of nbits or less can be decoded
+ *          in one lookup of the table.
+ * length = A table to get code lengths from [0 to syms-1]
+ * table  = The table to fill up with decoded symbols and pointers.
+ *
+ * Returns 0 for OK or 1 for error
+ */
+
+static int lzx_make_decode_table(unsigned int nsyms, unsigned int nbits,
+			     unsigned char *length, unsigned short *table)
+{
+  register unsigned short sym;
+  register unsigned int leaf, fill;
+  register unsigned char bit_num;
+  unsigned int pos         = 0; /* the current position in the decode table */
+  unsigned int table_mask  = 1 << nbits;
+  unsigned int bit_mask    = table_mask >> 1; /* don't do 0 length codes */
+  unsigned int next_symbol = bit_mask; /* base of allocation for long codes */
+
+  /* fill entries for codes short enough for a direct mapping */
+  for (bit_num = 1; bit_num <= nbits; bit_num++) {
+    for (sym = 0; sym < nsyms; sym++) {
+      if (length[sym] != bit_num) continue;
+      leaf = pos;
+      if((pos += bit_mask) > table_mask) return 1; /* table overrun */
+      /* fill all possible lookups of this symbol with the symbol itself */
+      for (fill = bit_mask; fill-- > 0;) table[leaf++] = sym;
+    }
+    bit_mask >>= 1;
+  }
+
+  /* full table already? */
+  if (pos == table_mask) return 0;
+
+  /* clear the remainder of the table */
+  for (sym = pos; sym < table_mask; sym++) table[sym] = 0xFFFF;
+
+  /* allow codes to be up to nbits+16 long, instead of nbits */
+  pos <<= 16;
+  table_mask <<= 16;
+  bit_mask = 1 << 15;
+
+  for (bit_num = nbits+1; bit_num <= 16; bit_num++) {
+    for (sym = 0; sym < nsyms; sym++) {
+      if (length[sym] != bit_num) continue;
+
+      leaf = pos >> 16;
+      for (fill = 0; fill < bit_num - nbits; fill++) {
+	/* if this path hasn't been taken yet, 'allocate' two entries */
+	if (table[leaf] == 0xFFFF) {
+	  table[(next_symbol << 1)] = 0xFFFF;
+	  table[(next_symbol << 1) + 1] = 0xFFFF;
+	  table[leaf] = next_symbol++;
+	}
+	/* follow the path and select either left or right for next bit */
+	leaf = table[leaf] << 1;
+	if ((pos >> (15-fill)) & 1) leaf++;
+      }
+      table[leaf] = sym;
+
+      if ((pos += bit_mask) > table_mask) return 1; /* table overflow */
+    }
+    bit_mask >>= 1;
+  }
+
+  /* full table? */
+  if (pos == table_mask) return 0;
+
+  /* either erroneous table, or all elements are 0 - let's find out. */
+  for (sym = 0; sym < nsyms; sym++) if (length[sym]) return 1;
+  return 0;
+}
+
+/* LZX_READ_LENGTHS(tablename, first, last) reads in code lengths for symbols
+ * first to last in the given table. The code lengths are stored in their
+ * own special LZX way.
+ */
+#define LZX_READ_LENGTHS(tbl, first, last) do {                            \
+  LZX_STORE_BITS;                                                          \
+  if (lzx_read_lens(lzx, &lzx->tbl##_len[0], (first),                 \
+    (unsigned int)(last))) return lzx->error;                          \
+  LZX_RESTORE_BITS;                                                        \
+} while (0)
+
+static int lzx_read_lens(struct lzx_stream *lzx, unsigned char *lens,
+			  unsigned int first, unsigned int last)
+{
+  /* bit buffer and huffman symbol decode variables */
+  register unsigned int bit_buffer;
+  register int bits_left, i;
+  register unsigned short sym;
+  unsigned char *i_ptr, *i_end;
+
+  unsigned int x, y;
+  int z;
+
+  LZX_RESTORE_BITS;
+  
+  /* read lengths for pretree (20 symbols, lengths stored in fixed 4 bits) */
+  for (x = 0; x < 20; x++) {
+    LZX_READ_BITS(y, 4);
+    lzx->PRETREE_len[x] = y;
+  }
+  LZX_BUILD_TABLE(PRETREE);
+
+  for (x = first; x < last; ) {
+    LZX_READ_HUFFSYM(PRETREE, z);
+    if (z == 17) {
+      /* code = 17, run of ([read 4 bits]+4) zeros */
+      LZX_READ_BITS(y, 4); y += 4;
+      while (y--) lens[x++] = 0;
+    }
+    else if (z == 18) {
+      /* code = 18, run of ([read 5 bits]+20) zeros */
+      LZX_READ_BITS(y, 5); y += 20;
+      while (y--) lens[x++] = 0;
+    }
+    else if (z == 19) {
+      /* code = 19, run of ([read 1 bit]+4) [read huffman symbol] */
+      LZX_READ_BITS(y, 1); y += 4;
+      LZX_READ_HUFFSYM(PRETREE, z);
+      z = lens[x] - z; if (z < 0) z += 17;
+      while (y--) lens[x++] = z;
+    }
+    else {
+      /* code = 0 to 16, delta current length entry */
+      z = lens[x] - z; if (z < 0) z += 17;
+      lens[x++] = z;
+    }
+  }
+
+  LZX_STORE_BITS;
+
+  return CL_SUCCESS;
+}
+
+static void lzx_reset_state(struct lzx_stream *lzx) {
+  int i;
+
+  lzx->R0              = 1;
+  lzx->R1              = 1;
+  lzx->R2              = 1;
+  lzx->header_read     = 0;
+  lzx->block_remaining = 0;
+  lzx->block_type      = LZX_BLOCKTYPE_INVALID;
+
+  /* initialise tables to 0 (because deltas will be applied to them) */
+  for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) lzx->MAINTREE_len[i] = 0;
+  for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++)   lzx->LENGTH_len[i]   = 0;
+}
+
+/*-------- main LZX code --------*/
+
+struct lzx_stream *lzx_init(int fd,
+			      int ofd,
+			      int window_bits,
+			      int reset_interval,
+			      int input_buffer_size,
+			      off_t output_length,
+			      struct cab_file *file,
+			      int (*read)(struct cab_file *, unsigned char *, int))
+{
+  unsigned int window_size = 1 << window_bits;
+  struct lzx_stream *lzx;
+  int i, j;
+
+  /* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */
+  if (window_bits < 15 || window_bits > 21) return NULL;
+
+  input_buffer_size = (input_buffer_size + 1) & -2;
+  if (!input_buffer_size) return NULL;
+
+  /* allocate decompression state */
+  if (!(lzx = cli_calloc(1, sizeof(struct lzx_stream)))) {
+    return NULL;
+  }
+
+  for (i = 0, j = 0; i < 51; i += 2) {
+    lzx->extra_bits[i]   = j; /* 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7... */
+    if(i < 50)
+	lzx->extra_bits[i+1] = j;
+    if ((i != 0) && (j < 17)) j++; /* 0,0,1,2,3,4...15,16,17,17,17,17... */
+  }
+
+  for (i = 0, j = 0; i < 51; i++) {
+    lzx->position_base[i] = j; /* 0,1,2,3,4,6,8,12,16,24,32,... */
+    j += 1 << lzx->extra_bits[i]; /* 1,1,1,1,2,2,4,4,8,8,16,16,32,32,... */
+  }
+
+  /* allocate decompression window and input buffer */
+  lzx->window = cli_calloc(1, (size_t) window_size);
+  if(!lzx->window) {
+    free(lzx);
+    return NULL;
+  }
+
+  lzx->inbuf  = cli_calloc(1, (size_t) input_buffer_size);
+  if (!lzx->inbuf) {
+    free(lzx->window);
+    free(lzx);
+    return NULL;
+  }
+
+  /* initialise decompression state */
+  lzx->fd              = fd;
+  lzx->ofd	       = ofd;
+  lzx->wflag	       = 1;
+  lzx->offset          = 0;
+  lzx->length          = output_length;
+  lzx->file	       = file;
+  lzx->read	       = read;
+
+  lzx->inbuf_size      = input_buffer_size;
+  lzx->window_size     = 1 << window_bits;
+  lzx->window_posn     = 0;
+  lzx->frame_posn      = 0;
+  lzx->frame           = 0;
+  lzx->reset_interval  = reset_interval;
+  lzx->intel_filesize  = 0;
+  lzx->intel_curpos    = 0;
+
+  /* window bits:    15  16  17  18  19  20  21
+   * position slots: 30  32  34  36  38  42  50  */
+  lzx->posn_slots      = ((window_bits == 21) ? 50 :
+			  ((window_bits == 20) ? 42 : (window_bits << 1)));
+  lzx->intel_started   = 0;
+  lzx->input_end       = 0;
+
+  lzx->error = CL_SUCCESS;
+
+  lzx->i_ptr = lzx->i_end = &lzx->inbuf[0];
+  lzx->o_ptr = lzx->o_end = &lzx->e8_buf[0];
+  lzx->bit_buffer = lzx->bits_left = 0;
+
+  lzx_reset_state(lzx);
+  return lzx;
+}
+
+void lzx_set_output_length(struct lzx_stream *lzx, off_t out_bytes) {
+  if (lzx) lzx->length = out_bytes;
+}
+
+int lzx_decompress(struct lzx_stream *lzx, off_t out_bytes) {
+  /* bitstream reading and huffman variables */
+  register unsigned int bit_buffer;
+  register int bits_left, i=0;
+  register unsigned short sym;
+  unsigned char *i_ptr, *i_end;
+
+  int match_length, length_footer, extra, verbatim_bits, bytes_todo;
+  int this_run, main_element, aligned_bits, j;
+  unsigned char *window, *runsrc, *rundest, buf[12];
+  unsigned int frame_size=0, end_frame, match_offset, window_posn;
+  unsigned int R0, R1, R2;
+
+  /* easy answers */
+  if (!lzx || (out_bytes < 0)) return CL_ENULLARG;
+  if (lzx->error) return lzx->error;
+
+  /* flush out any stored-up bytes before we begin */
+  i = lzx->o_end - lzx->o_ptr;
+  if ((off_t) i > out_bytes) i = (int) out_bytes;
+  if (i) {
+    if (lzx->wflag && cli_writen(lzx->ofd, lzx->o_ptr, i) != i) {
+      return lzx->error = CL_EIO;
+    }
+    lzx->o_ptr  += i;
+    lzx->offset += i;
+    out_bytes   -= i;
+  }
+  if (out_bytes == 0) return CL_SUCCESS;
+
+  /* restore local state */
+  LZX_RESTORE_BITS;
+  window = lzx->window;
+  window_posn = lzx->window_posn;
+  R0 = lzx->R0;
+  R1 = lzx->R1;
+  R2 = lzx->R2;
+
+  end_frame = (unsigned int)((lzx->offset + out_bytes) / LZX_FRAME_SIZE) + 1;
+
+  while (lzx->frame < end_frame) {
+    /* have we reached the reset interval? (if there is one?) */
+    if (lzx->reset_interval && ((lzx->frame % lzx->reset_interval) == 0)) {
+      if (lzx->block_remaining) {
+	cli_dbgmsg("lzx_decompress: %d bytes remaining at reset interval\n", lzx->block_remaining);
+	return lzx->error = CL_EFORMAT;
+      }
+
+      /* re-read the intel header and reset the huffman lengths */
+      lzx_reset_state(lzx);
+    }
+
+    /* read header if necessary */
+    if (!lzx->header_read) {
+      /* read 1 bit. if bit=0, intel filesize = 0.
+       * if bit=1, read intel filesize (32 bits) */
+      j = 0; LZX_READ_BITS(i, 1); if (i) { LZX_READ_BITS(i, 16); LZX_READ_BITS(j, 16); }
+      lzx->intel_filesize = (i << 16) | j;
+      lzx->header_read = 1;
+    } 
+
+    /* calculate size of frame: all frames are 32k except the final frame
+     * which is 32kb or less. this can only be calculated when lzx->length
+     * has been filled in. */
+    frame_size = LZX_FRAME_SIZE;
+    if (lzx->length && (lzx->length - lzx->offset) < (off_t)frame_size) {
+      frame_size = lzx->length - lzx->offset;
+    }
+
+    /* decode until one more frame is available */
+    bytes_todo = lzx->frame_posn + frame_size - window_posn;
+    while (bytes_todo > 0) {
+      /* initialise new block, if one is needed */
+      if (lzx->block_remaining == 0) {
+	/* realign if previous block was an odd-sized UNCOMPRESSED block */
+	if ((lzx->block_type == LZX_BLOCKTYPE_UNCOMPRESSED) &&
+	    (lzx->block_length & 1))
+	{
+	  if (i_ptr == i_end) {
+	    if (lzx_read_input(lzx)) return lzx->error;
+	    i_ptr = lzx->i_ptr;
+	    i_end = lzx->i_end;
+	  }
+	  i_ptr++;
+	}
+
+	/* read block type (3 bits) and block length (24 bits) */
+	LZX_READ_BITS(lzx->block_type, 3);
+	LZX_READ_BITS(i, 16); LZX_READ_BITS(j, 8);
+	lzx->block_remaining = lzx->block_length = (i << 8) | j;
+
+	/* read individual block headers */
+	switch (lzx->block_type) {
+	case LZX_BLOCKTYPE_ALIGNED:
+	  /* read lengths of and build aligned huffman decoding tree */
+	  for (i = 0; i < 8; i++) { LZX_READ_BITS(j, 3); lzx->ALIGNED_len[i] = j; }
+	  LZX_BUILD_TABLE(ALIGNED);
+	  /* no break -- rest of aligned header is same as verbatim */
+	case LZX_BLOCKTYPE_VERBATIM:
+	  /* read lengths of and build main huffman decoding tree */
+	  LZX_READ_LENGTHS(MAINTREE, 0, 256);
+	  LZX_READ_LENGTHS(MAINTREE, 256, LZX_NUM_CHARS + (lzx->posn_slots << 3));
+	  LZX_BUILD_TABLE(MAINTREE);
+	  /* if the literal 0xE8 is anywhere in the block... */
+	  if (lzx->MAINTREE_len[0xE8] != 0) lzx->intel_started = 1;
+	  /* read lengths of and build lengths huffman decoding tree */
+	  LZX_READ_LENGTHS(LENGTH, 0, LZX_NUM_SECONDARY_LENGTHS);
+	  LZX_BUILD_TABLE(LENGTH);
+	  break;
+
+	case LZX_BLOCKTYPE_UNCOMPRESSED:
+	  /* because we can't assume otherwise */
+	  lzx->intel_started = 1;
+
+	  /* read 1-16 (not 0-15) bits to align to bytes */
+	  LZX_ENSURE_BITS(16);
+	  if (bits_left > 16) i_ptr -= 2;
+	  bits_left = 0; bit_buffer = 0;
+
+	  /* read 12 bytes of stored R0 / R1 / R2 values */
+	  for (rundest = &buf[0], i = 0; i < 12; i++) {
+	    if (i_ptr == i_end) {
+	      if (lzx_read_input(lzx)) return lzx->error;
+	      i_ptr = lzx->i_ptr;
+	      i_end = lzx->i_end;
+	    }
+	    *rundest++ = *i_ptr++;
+	  }
+	  R0 = buf[0] | (buf[1] << 8) | (buf[2]  << 16) | (buf[3]  << 24);
+	  R1 = buf[4] | (buf[5] << 8) | (buf[6]  << 16) | (buf[7]  << 24);
+	  R2 = buf[8] | (buf[9] << 8) | (buf[10] << 16) | (buf[11] << 24);
+	  break;
+
+	default:
+	  cli_dbgmsg("lzx_decompress: bad block type (0x%x)\n", lzx->block_type);
+	  return lzx->error = CL_EFORMAT;
+	}
+      }
+
+      /* decode more of the block:
+       * run = min(what's available, what's needed) */
+      this_run = lzx->block_remaining;
+      if (this_run > bytes_todo) this_run = bytes_todo;
+
+      /* assume we decode exactly this_run bytes, for now */
+      bytes_todo           -= this_run;
+      lzx->block_remaining -= this_run;
+
+      /* decode at least this_run bytes */
+      switch (lzx->block_type) {
+      case LZX_BLOCKTYPE_VERBATIM:
+	while (this_run > 0) {
+	  LZX_READ_HUFFSYM(MAINTREE, main_element);
+	  if (main_element < LZX_NUM_CHARS) {
+	    /* literal: 0 to LZX_NUM_CHARS-1 */
+	    window[window_posn++] = main_element;
+	    this_run--;
+	  }
+	  else {
+	    /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */
+	    main_element -= LZX_NUM_CHARS;
+
+	    /* get match length */
+	    match_length = main_element & LZX_NUM_PRIMARY_LENGTHS;
+	    if (match_length == LZX_NUM_PRIMARY_LENGTHS) {
+	      LZX_READ_HUFFSYM(LENGTH, length_footer);
+	      match_length += length_footer;
+	    }
+	    match_length += LZX_MIN_MATCH;
+	  
+	    /* get match offset */
+	    switch ((match_offset = (main_element >> 3))) {
+	    case 0: match_offset = R0;                                  break;
+	    case 1: match_offset = R1; R1=R0;        R0 = match_offset; break;
+	    case 2: match_offset = R2; R2=R0;        R0 = match_offset; break;
+	    case 3: match_offset = 1;  R2=R1; R1=R0; R0 = match_offset; break;
+	    default:
+	      extra = lzx->extra_bits[match_offset];
+	      LZX_READ_BITS(verbatim_bits, extra);
+	      match_offset = lzx->position_base[match_offset] - 2 + verbatim_bits;
+	      R2 = R1; R1 = R0; R0 = match_offset;
+	    }
+
+	    if ((window_posn + match_length) > lzx->window_size) {
+	      cli_dbgmsg("lzx_decompress: match ran over window wrap\n");
+	      return lzx->error = CL_EFORMAT;
+	    }
+	    
+	    /* copy match */
+	    rundest = &window[window_posn];
+	    i = match_length;
+	    /* does match offset wrap the window? */
+	    if (match_offset > window_posn) {
+	      /* j = length from match offset to end of window */
+	      j = match_offset - window_posn;
+	      if (j > (int) lzx->window_size) {
+	        cli_dbgmsg("lzx_decompress: match offset beyond window boundaries\n");
+		return lzx->error = CL_EFORMAT;
+	      }
+	      runsrc = &window[lzx->window_size - j];
+	      if (j < i) {
+		/* if match goes over the window edge, do two copy runs */
+		i -= j; while (j-- > 0) *rundest++ = *runsrc++;
+		runsrc = window;
+	      }
+	      while (i-- > 0) *rundest++ = *runsrc++;
+	    }
+	    else {
+	      runsrc = rundest - match_offset;
+	      while (i-- > 0) *rundest++ = *runsrc++;
+	    }
+
+	    this_run    -= match_length;
+	    window_posn += match_length;
+	  }
+	} /* while (this_run > 0) */
+	break;
+
+      case LZX_BLOCKTYPE_ALIGNED:
+	while (this_run > 0) {
+	  LZX_READ_HUFFSYM(MAINTREE, main_element);
+	  if (main_element < LZX_NUM_CHARS) {
+	    /* literal: 0 to LZX_NUM_CHARS-1 */
+	    window[window_posn++] = main_element;
+	    this_run--;
+	  }
+	  else {
+	    /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */
+	    main_element -= LZX_NUM_CHARS;
+
+	    /* get match length */
+	    match_length = main_element & LZX_NUM_PRIMARY_LENGTHS;
+	    if (match_length == LZX_NUM_PRIMARY_LENGTHS) {
+	      LZX_READ_HUFFSYM(LENGTH, length_footer);
+	      match_length += length_footer;
+	    }
+	    match_length += LZX_MIN_MATCH;
+
+	    /* get match offset */
+	    switch ((match_offset = (main_element >> 3))) {
+	    case 0: match_offset = R0;                             break;
+	    case 1: match_offset = R1; R1 = R0; R0 = match_offset; break;
+	    case 2: match_offset = R2; R2 = R0; R0 = match_offset; break;
+	    default:
+	      extra = lzx->extra_bits[match_offset];
+	      match_offset = lzx->position_base[match_offset] - 2;
+	      if (extra > 3) {
+		/* verbatim and aligned bits */
+		extra -= 3;
+		LZX_READ_BITS(verbatim_bits, extra);
+		match_offset += (verbatim_bits << 3);
+		LZX_READ_HUFFSYM(ALIGNED, aligned_bits);
+		match_offset += aligned_bits;
+	      }
+	      else if (extra == 3) {
+		/* aligned bits only */
+		LZX_READ_HUFFSYM(ALIGNED, aligned_bits);
+		match_offset += aligned_bits;
+	      }
+	      else if (extra > 0) { /* extra==1, extra==2 */
+		/* verbatim bits only */
+		LZX_READ_BITS(verbatim_bits, extra);
+		match_offset += verbatim_bits;
+	      }
+	      else /* extra == 0 */ {
+		/* ??? not defined in LZX specification! */
+		match_offset = 1;
+	      }
+	      /* update repeated offset LRU queue */
+	      R2 = R1; R1 = R0; R0 = match_offset;
+	    }
+
+	    if ((window_posn + match_length) > lzx->window_size) {
+	      cli_dbgmsg("lzx_decompress: match ran over window wrap\n");
+	      return lzx->error = CL_EFORMAT;
+	    }
+
+	    /* copy match */
+	    rundest = &window[window_posn];
+	    i = match_length;
+	    /* does match offset wrap the window? */
+	    if (match_offset > window_posn) {
+	      /* j = length from match offset to end of window */
+	      j = match_offset - window_posn;
+	      if (j > (int) lzx->window_size) {
+	        cli_dbgmsg("lzx_decompress: match offset beyond window boundaries\n");
+		return lzx->error = CL_EFORMAT;
+	      }
+	      runsrc = &window[lzx->window_size - j];
+	      if (j < i) {
+		/* if match goes over the window edge, do two copy runs */
+		i -= j; while (j-- > 0) *rundest++ = *runsrc++;
+		runsrc = window;
+	      }
+	      while (i-- > 0) *rundest++ = *runsrc++;
+	    }
+	    else {
+	      runsrc = rundest - match_offset;
+	      while (i-- > 0) *rundest++ = *runsrc++;
+	    }
+
+	    this_run    -= match_length;
+	    window_posn += match_length;
+	  }
+	} /* while (this_run > 0) */
+	break;
+
+      case LZX_BLOCKTYPE_UNCOMPRESSED:
+	/* as this_run is limited not to wrap a frame, this also means it
+	 * won't wrap the window (as the window is a multiple of 32k) */
+	rundest = &window[window_posn];
+	window_posn += this_run;
+	while (this_run > 0) {
+	  if ((i = i_end - i_ptr)) {
+	    if (i > this_run) i = this_run;
+	    memcpy(rundest, i_ptr, (size_t) i);
+	    rundest  += i;
+	    i_ptr    += i;
+	    this_run -= i;
+	  }
+	  else {
+	    if (lzx_read_input(lzx)) return lzx->error;
+	    i_ptr = lzx->i_ptr;
+	    i_end = lzx->i_end;
+	  }
+	}
+	break;
+
+      default:
+	return lzx->error = CL_EFORMAT; /* might as well */
+      }
+
+      /* did the final match overrun our desired this_run length? */
+      if (this_run < 0) {
+	if ((unsigned int)(-this_run) > lzx->block_remaining) {
+	  cli_dbgmsg("lzx_decompress: overrun went past end of block by %d (%d remaining)\n", -this_run, lzx->block_remaining);
+	  return lzx->error = CL_EFORMAT;
+	}
+	lzx->block_remaining -= -this_run;
+      }
+    } /* while (bytes_todo > 0) */
+
+    /* streams don't extend over frame boundaries */
+    if ((window_posn - lzx->frame_posn) != frame_size) {
+      cli_dbgmsg("lzx_decompress: decode beyond output frame limits! %d != %d\n", window_posn - lzx->frame_posn, frame_size);
+      return lzx->error = CL_EFORMAT;
+    }
+
+    /* re-align input bitstream */
+    if (bits_left > 0) LZX_ENSURE_BITS(16);
+    if (bits_left & 15) LZX_REMOVE_BITS(bits_left & 15);
+
+    /* check that we've used all of the previous frame first */
+    if (lzx->o_ptr != lzx->o_end) {
+      cli_dbgmsg("lzx_decompress: %d avail bytes, new %d frame\n", lzx->o_end-lzx->o_ptr, frame_size);
+      return lzx->error = CL_EFORMAT;
+    }
+
+    /* does this intel block _really_ need decoding? */
+    if (lzx->intel_started && lzx->intel_filesize &&
+	(lzx->frame <= 32768) && (frame_size > 10))
+    {
+      unsigned char *data    = &lzx->e8_buf[0];
+      unsigned char *dataend = &lzx->e8_buf[frame_size - 10];
+      signed int curpos      = lzx->intel_curpos;
+      signed int filesize    = lzx->intel_filesize;
+      signed int abs_off, rel_off;
+
+      /* copy e8 block to the e8 buffer and tweak if needed */
+      lzx->o_ptr = data;
+      memcpy(data, &lzx->window[lzx->frame_posn], frame_size);
+
+      while (data < dataend) {
+	if (*data++ != 0xE8) { curpos++; continue; }
+	abs_off = data[0] | (data[1]<<8) | (data[2]<<16) | (data[3]<<24);
+	if ((abs_off >= -curpos) && (abs_off < filesize)) {
+	  rel_off = (abs_off >= 0) ? abs_off - curpos : abs_off + filesize;
+	  data[0] = (unsigned char) rel_off;
+	  data[1] = (unsigned char) (rel_off >> 8);
+	  data[2] = (unsigned char) (rel_off >> 16);
+	  data[3] = (unsigned char) (rel_off >> 24);
+	}
+	data += 4;
+	curpos += 5;
+      }
+      lzx->intel_curpos += frame_size;
+    }
+    else {
+      lzx->o_ptr = &lzx->window[lzx->frame_posn];
+      if (lzx->intel_filesize) lzx->intel_curpos += frame_size;
+    }
+    lzx->o_end = &lzx->o_ptr[frame_size];
+
+    /* write a frame */
+    i = (out_bytes < (off_t)frame_size) ? (unsigned int)out_bytes : frame_size;
+    if (lzx->wflag && cli_writen(lzx->ofd, lzx->o_ptr, i) != i) {
+      return lzx->error = CL_EIO;
+    }
+    lzx->o_ptr  += i;
+    lzx->offset += i;
+    out_bytes   -= i;
+
+    /* advance frame start position */
+    lzx->frame_posn += frame_size;
+    lzx->frame++;
+
+    /* wrap window / frame position pointers */
+    if (window_posn == lzx->window_size)     window_posn = 0;
+    if (lzx->frame_posn == lzx->window_size) lzx->frame_posn = 0;
+
+  } /* while (lzx->frame < end_frame) */
+
+  if (out_bytes) {
+    cli_dbgmsg("lzx_decompress: bytes left to output\n");
+    return lzx->error = CL_EFORMAT;
+  }
+
+  /* store local state */
+  LZX_STORE_BITS;
+  lzx->window_posn = window_posn;
+  lzx->R0 = R0;
+  lzx->R1 = R1;
+  lzx->R2 = R2;
+
+  return CL_SUCCESS;
+}
+
+void lzx_free(struct lzx_stream *lzx) {
+  if (lzx) {
+    free(lzx->inbuf);
+    free(lzx->window);
+    free(lzx);
+  }
+}
+
+/***************************************************************************
+ *			 Quantum decompression implementation 
+ ***************************************************************************
+ * The Quantum method was created by David Stafford, adapted by Microsoft
+ * Corporation.
+ *
+ * This decompressor is based on an implementation by Matthew Russotto, used
+ * with permission.
+ *
+ * This decompressor was researched and implemented by Matthew Russotto. It
+ * has since been tidied up by Stuart Caie. More information can be found at
+ * http://www.speakeasy.org/~russotto/quantumcomp.html
+ */
+
+/* Quantum decompressor bitstream reading macros
+ *
+ * QTM_STORE_BITS        stores bitstream state in qtm_stream structure
+ * QTM_RESTORE_BITS      restores bitstream state from qtm_stream structure
+ * QTM_READ_BITS(var,n)  takes N bits from the buffer and puts them in var
+ * QTM_FILL_BUFFER       if there is room for another 16 bits, reads another
+ *                   16 bits from the input stream.
+ * QTM_PEEK_BITS(n)      extracts without removing N bits from the bit buffer
+ * QTM_REMOVE_BITS(n)    removes N bits from the bit buffer
+ *
+ * These bit access routines work by using the area beyond the MSB and the
+ * LSB as a free source of zeroes. This avoids having to mask any bits.
+ * So we have to know the bit width of the bitbuffer variable.
+ */
+
+#define QTM_BITBUF_WIDTH (sizeof(unsigned int) * CHAR_BIT)
+
+#define QTM_STORE_BITS do {                                                 \
+  qtm->i_ptr      = i_ptr;                                              \
+  qtm->i_end      = i_end;                                              \
+  qtm->bit_buffer = bit_buffer;                                         \
+  qtm->bits_left  = bits_left;                                          \
+} while (0)
+
+#define QTM_RESTORE_BITS do {                                               \
+  i_ptr      = qtm->i_ptr;                                              \
+  i_end      = qtm->i_end;                                              \
+  bit_buffer = qtm->bit_buffer;                                         \
+  bits_left  = qtm->bits_left;                                          \
+} while (0)
+
+/* adds 16 bits to bit buffer, if there's space for the new bits */
+#define QTM_FILL_BUFFER do {                                                \
+  if (bits_left <= (QTM_BITBUF_WIDTH - 16)) {                               \
+    if (i_ptr >= i_end) {                                               \
+      if (qtm_read_input(qtm)) return qtm->error;                      \
+      i_ptr = qtm->i_ptr;                                               \
+      i_end = qtm->i_end;                                               \
+    }                                                                   \
+    bit_buffer |= ((i_ptr[0] << 8) | i_ptr[1])                          \
+                  << (QTM_BITBUF_WIDTH - 16 - bits_left);                   \
+    bits_left  += 16;                                                   \
+    i_ptr      += 2;                                                    \
+  }                                                                     \
+} while (0)
+
+#define QTM_PEEK_BITS(n)   (bit_buffer >> (QTM_BITBUF_WIDTH - (n)))
+#define QTM_REMOVE_BITS(n) ((bit_buffer <<= (n)), (bits_left -= (n)))
+
+#define QTM_READ_BITS(val, bits) do {                                       \
+  (val) = 0;                                                            \
+  for (bits_needed = (bits); bits_needed > 0; bits_needed -= bit_run) { \
+    QTM_FILL_BUFFER;                                                        \
+    bit_run = (bits_left < bits_needed) ? bits_left : bits_needed;      \
+    (val) = ((val) << bit_run) | QTM_PEEK_BITS(bit_run);                    \
+    QTM_REMOVE_BITS(bit_run);                                               \
+  }                                                                     \
+} while (0)
+
+static int qtm_read_input(struct qtm_stream *qtm) {
+  int read = qtm->read ? qtm->read(qtm->file, &qtm->inbuf[0], (int)qtm->inbuf_size) : cli_readn(qtm->fd, &qtm->inbuf[0], (int)qtm->inbuf_size);
+  if (read < 0) return qtm->error = CL_EIO;
+
+  qtm->i_ptr = &qtm->inbuf[0];
+  qtm->i_end = &qtm->inbuf[read];
+  return CL_SUCCESS;
+}
+
+/* Arithmetic decoder:
+ * 
+ * QTM_GET_SYMBOL(model, var) fetches the next symbol from the stated model
+ * and puts it in var.
+ *
+ * If necessary, qtm_update_model() is called.
+ */
+#define QTM_GET_SYMBOL(model, var) do {                                     \
+  range = ((H - L) & 0xFFFF) + 1;                                       \
+  symf = ((((C - L + 1) * model.syms[0].cumfreq)-1) / range) & 0xFFFF;  \
+                                                                        \
+  for (i = 1; i < model.entries; i++) {                                 \
+    if (model.syms[i].cumfreq <= symf) break;                           \
+  }                                                                     \
+  (var) = model.syms[i-1].sym;                                          \
+                                                                        \
+  range = (H - L) + 1;                                                  \
+  symf = model.syms[0].cumfreq;                                         \
+  H = L + ((model.syms[i-1].cumfreq * range) / symf) - 1;               \
+  L = L + ((model.syms[i].cumfreq   * range) / symf);                   \
+                                                                        \
+  do { model.syms[--i].cumfreq += 8; } while (i > 0);                   \
+  if (model.syms[0].cumfreq > 3800) qtm_update_model(&model);          \
+                                                                        \
+  while (1) {                                                           \
+    if ((L & 0x8000) != (H & 0x8000)) {                                 \
+      if ((L & 0x4000) && !(H & 0x4000)) {                              \
+        /* underflow case */                                            \
+        C ^= 0x4000; L &= 0x3FFF; H |= 0x4000;                          \
+      }                                                                 \
+      else break;                                                       \
+    }                                                                   \
+    L <<= 1; H = (H << 1) | 1;                                          \
+    QTM_FILL_BUFFER;                                                        \
+    C  = (C << 1) | QTM_PEEK_BITS(1);                                       \
+    QTM_REMOVE_BITS(1);                                                     \
+  }                                                                     \
+} while (0)
+
+static void qtm_update_model(struct qtm_model *model) {
+  struct qtm_modelsym tmp;
+  int i, j;
+
+  if (--model->shiftsleft) {
+    for (i = model->entries - 1; i >= 0; i--) {
+      /* -1, not -2; the 0 entry saves this */
+      model->syms[i].cumfreq >>= 1;
+      if (model->syms[i].cumfreq <= model->syms[i+1].cumfreq) {
+	model->syms[i].cumfreq = model->syms[i+1].cumfreq + 1;
+      }
+    }
+  }
+  else {
+    model->shiftsleft = 50;
+    for (i = 0; i < model->entries; i++) {
+      /* no -1, want to include the 0 entry */
+      /* this converts cumfreqs into frequencies, then shifts right */
+      model->syms[i].cumfreq -= model->syms[i+1].cumfreq;
+      model->syms[i].cumfreq++; /* avoid losing things entirely */
+      model->syms[i].cumfreq >>= 1;
+    }
+
+    /* now sort by frequencies, decreasing order -- this must be an
+     * inplace selection sort, or a sort with the same (in)stability
+     * characteristics */
+    for (i = 0; i < model->entries - 1; i++) {
+      for (j = i + 1; j < model->entries; j++) {
+	if (model->syms[i].cumfreq < model->syms[j].cumfreq) {
+	  tmp = model->syms[i];
+	  model->syms[i] = model->syms[j];
+	  model->syms[j] = tmp;
+	}
+      }
+    }
+
+    /* then convert frequencies back to cumfreq */
+    for (i = model->entries - 1; i >= 0; i--) {
+      model->syms[i].cumfreq += model->syms[i+1].cumfreq;
+    }
+  }
+}
+
+/* Initialises a model to decode symbols from [start] to [start]+[len]-1 */
+static void qtm_init_model(struct qtm_model *model,
+			    struct qtm_modelsym *syms, int start, int len)
+{
+  int i;
+
+  model->shiftsleft = 4;
+  model->entries    = len;
+  model->syms       = syms;
+
+  for (i = 0; i <= len; i++) {
+    syms[i].sym     = start + i; /* actual symbol */
+    syms[i].cumfreq = len - i;   /* current frequency of that symbol */
+  }
+}
+
+
+/*-------- main Quantum code --------*/
+
+struct qtm_stream *qtm_init(int fd, int ofd,
+			      int window_bits, int input_buffer_size,
+			      struct cab_file *file,
+			      int (*read)(struct cab_file *, unsigned char *, int))
+{
+  unsigned int window_size = 1 << window_bits;
+  struct qtm_stream *qtm;
+  unsigned offset;
+  int i;
+
+  /* Quantum supports window sizes of 2^10 (1Kb) through 2^21 (2Mb) */
+
+  /* tk: temporary fix: only process 32KB+ window sizes */
+  if (window_bits < 15 || window_bits > 21) return NULL;
+
+  input_buffer_size = (input_buffer_size + 1) & -2;
+  if (input_buffer_size < 2) return NULL;
+
+  /* allocate decompression state */
+  if (!(qtm = cli_malloc(sizeof(struct qtm_stream)))) {
+    return NULL;
+  }
+
+  for (i = 0, offset = 0; i < 42; i++) {
+    qtm->position_base[i] = offset;
+    qtm->extra_bits[i] = ((i < 2) ? 0 : (i - 2)) >> 1;
+    offset += 1 << qtm->extra_bits[i];
+  }
+
+  for (i = 0, offset = 0; i < 26; i++) {
+    qtm->length_base[i] = offset;
+    qtm->length_extra[i] = (i < 2 ? 0 : i - 2) >> 2;
+    offset += 1 << qtm->length_extra[i];
+  }
+  qtm->length_base[26] = 254; qtm->length_extra[26] = 0;
+
+  /* allocate decompression window and input buffer */
+  qtm->window = cli_malloc((size_t) window_size);
+  if (!qtm->window) {
+    free(qtm);
+    return NULL;
+  }
+
+  qtm->inbuf  = cli_malloc((size_t) input_buffer_size);
+  if (!qtm->inbuf) {
+    free(qtm->window);
+    free(qtm);
+    return NULL;
+  }
+
+  /* initialise decompression state */
+  qtm->fd	   = fd;
+  qtm->ofd	   = ofd;
+  qtm->wflag	   = 1;
+  qtm->inbuf_size  = input_buffer_size;
+  qtm->window_size = window_size;
+  qtm->window_posn = 0;
+  qtm->frame_start = 0;
+  qtm->header_read = 0;
+  qtm->error       = CL_SUCCESS;
+
+  qtm->i_ptr = qtm->i_end = &qtm->inbuf[0];
+  qtm->o_ptr = qtm->o_end = &qtm->window[0];
+  qtm->bits_left = 0;
+  qtm->bit_buffer = 0;
+
+  /* initialise arithmetic coding models
+   * - model 4    depends on window size, ranges from 20 to 24
+   * - model 5    depends on window size, ranges from 20 to 36
+   * - model 6pos depends on window size, ranges from 20 to 42
+   */
+  i = window_bits * 2;
+  qtm_init_model(&qtm->model0,    &qtm->m0sym[0],   0, 64);
+  qtm_init_model(&qtm->model1,    &qtm->m1sym[0],  64, 64);
+  qtm_init_model(&qtm->model2,    &qtm->m2sym[0], 128, 64);
+  qtm_init_model(&qtm->model3,    &qtm->m3sym[0], 192, 64);
+  qtm_init_model(&qtm->model4,    &qtm->m4sym[0],   0, (i > 24) ? 24 : i);
+  qtm_init_model(&qtm->model5,    &qtm->m5sym[0],   0, (i > 36) ? 36 : i);
+  qtm_init_model(&qtm->model6,    &qtm->m6sym[0],   0, i);
+  qtm_init_model(&qtm->model6len, &qtm->m6lsym[0],  0, 27);
+  qtm_init_model(&qtm->model7,    &qtm->m7sym[0],   0, 7);
+
+  qtm->file = file;
+  qtm->read = read;
+
+  /* all ok */
+  return qtm;
+}
+
+int qtm_decompress(struct qtm_stream *qtm, off_t out_bytes) {
+  unsigned int frame_start, frame_end, window_posn, match_offset, range;
+  unsigned char *window, *i_ptr, *i_end, *runsrc, *rundest;
+  int i, j, selector, extra, sym, match_length;
+  unsigned short H, L, C, symf;
+
+  register unsigned int bit_buffer;
+  register unsigned char bits_left;
+  unsigned char bits_needed, bit_run;
+
+  /* easy answers */
+  if (!qtm || (out_bytes < 0)) return CL_ENULLARG;
+  if (qtm->error) return qtm->error;
+
+  /* flush out any stored-up bytes before we begin */
+  i = qtm->o_end - qtm->o_ptr;
+  if ((off_t) i > out_bytes) i = (int) out_bytes;
+  if (i) {
+    if (qtm->wflag && cli_writen(qtm->ofd, qtm->o_ptr, i) != i) {
+      return qtm->error = CL_EIO;
+    }
+    qtm->o_ptr  += i;
+    out_bytes   -= i;
+  }
+  if (out_bytes == 0) return CL_SUCCESS;
+
+  /* restore local state */
+  QTM_RESTORE_BITS;
+  window = qtm->window;
+  window_posn = qtm->window_posn;
+  frame_start = qtm->frame_start;
+  H = qtm->H;
+  L = qtm->L;
+  C = qtm->C;
+
+  /* while we do not have enough decoded bytes in reserve: */
+  while ((qtm->o_end - qtm->o_ptr) < out_bytes) {
+
+    /* read header if necessary. Initialises H, L and C */
+    if (!qtm->header_read) {
+      H = 0xFFFF; L = 0; QTM_READ_BITS(C, 16);
+      qtm->header_read = 1;
+    }
+
+    /* decode more, at most up to to frame boundary */
+    frame_end = window_posn + (out_bytes - (qtm->o_end - qtm->o_ptr));
+    if ((frame_start + QTM_FRAME_SIZE) < frame_end) {
+      frame_end = frame_start + QTM_FRAME_SIZE;
+    }
+
+    while (window_posn < frame_end) {
+      QTM_GET_SYMBOL(qtm->model7, selector);
+      if (selector < 4) {
+	struct qtm_model *mdl = (selector == 0) ? &qtm->model0 :
+	                        ((selector == 1) ? &qtm->model1 :
+				((selector == 2) ? &qtm->model2 :
+                                                   &qtm->model3));
+	QTM_GET_SYMBOL((*mdl), sym);
+	window[window_posn++] = sym;
+      }
+      else {
+	switch (selector) {
+	case 4: /* selector 4 = fixed length match (3 bytes) */
+	  QTM_GET_SYMBOL(qtm->model4, sym);
+	  QTM_READ_BITS(extra, qtm->extra_bits[sym]);
+	  match_offset = qtm->position_base[sym] + extra + 1;
+	  match_length = 3;
+	  break;
+
+	case 5: /* selector 5 = fixed length match (4 bytes) */
+	  QTM_GET_SYMBOL(qtm->model5, sym);
+	  QTM_READ_BITS(extra, qtm->extra_bits[sym]);
+	  match_offset = qtm->position_base[sym] + extra + 1;
+	  match_length = 4;
+	  break;
+
+	case 6: /* selector 6 = variable length match */
+	  QTM_GET_SYMBOL(qtm->model6len, sym);
+	  QTM_READ_BITS(extra, qtm->length_extra[sym]);
+	  match_length = qtm->length_base[sym] + extra + 5;
+
+	  QTM_GET_SYMBOL(qtm->model6, sym);
+	  QTM_READ_BITS(extra, qtm->extra_bits[sym]);
+	  match_offset = qtm->position_base[sym] + extra + 1;
+	  break;
+
+	default:
+	  /* should be impossible, model7 can only return 0-6 */
+	  return qtm->error = CL_EFORMAT;
+	}
+
+	rundest = &window[window_posn];
+	i = match_length;
+	/* does match offset wrap the window? */
+	if (match_offset > window_posn) {
+	  /* j = length from match offset to end of window */
+	  j = match_offset - window_posn;
+	  if (j > (int) qtm->window_size) {
+	    cli_dbgmsg("qtm_decompress: match offset beyond window boundaries\n");
+	    return qtm->error = CL_EFORMAT;
+	  }
+	  runsrc = &window[qtm->window_size - j];
+	  if (j < i) {
+	    /* if match goes over the window edge, do two copy runs */
+	    i -= j; while (j-- > 0) *rundest++ = *runsrc++;
+	    runsrc = window;
+	  }
+	  while (i-- > 0) *rundest++ = *runsrc++;
+	}
+	else {
+	  runsrc = rundest - match_offset;
+	  while (i-- > 0) *rundest++ = *runsrc++;
+	}
+	window_posn += match_length;
+      }
+    } /* while (window_posn < frame_end) */
+
+    qtm->o_end = &window[window_posn];
+
+    /* another frame completed? */
+    if ((window_posn - frame_start) >= QTM_FRAME_SIZE) {
+      if ((window_posn - frame_start) != QTM_FRAME_SIZE) {
+	cli_dbgmsg("qtm_decompress: overshot frame alignment\n");
+	return qtm->error = CL_EFORMAT;
+      }
+
+      /* re-align input */
+      if (bits_left & 7) QTM_REMOVE_BITS(bits_left & 7);
+      do { QTM_READ_BITS(i, 8); } while (i != 0xFF);
+      qtm->header_read = 0;
+
+      /* window wrap? */
+      if (window_posn == qtm->window_size) {
+	/* flush all currently stored data */
+	i = (qtm->o_end - qtm->o_ptr);
+	if (qtm->wflag && cli_writen(qtm->ofd, qtm->o_ptr, i) != i) {
+	  return qtm->error = CL_EIO;
+	}
+	out_bytes -= i;
+	qtm->o_ptr = &window[0];
+	qtm->o_end = &window[0];
+	window_posn = 0;
+      }
+
+      frame_start = window_posn;
+    }
+
+  } /* while (more bytes needed) */
+
+  if (out_bytes) {
+    i = (int) out_bytes;
+    if (qtm->wflag && cli_writen(qtm->ofd, qtm->o_ptr, i) != i) {
+      return qtm->error = CL_EIO;
+    }
+    qtm->o_ptr += i;
+  }
+
+  /* store local state */
+  QTM_STORE_BITS;
+  qtm->window_posn = window_posn;
+  qtm->frame_start = frame_start;
+  qtm->H = H;
+  qtm->L = L;
+  qtm->C = C;
+
+  return CL_SUCCESS;
+}
+
+void qtm_free(struct qtm_stream *qtm) {
+  if (qtm) {
+    free(qtm->window);
+    free(qtm->inbuf);
+    free(qtm);
+  }
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_LZMADecode.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_LZMADecode.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_LZMADecode.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_LZMADecode.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,557 @@
+/*
+ * LZMADecode.c
+ * 
+ * This file is a part of LZMA compression module for NSIS.
+ * 
+ * Original LZMA SDK Copyright (C) 1999-2006 Igor Pavlov
+ * Modifications Copyright (C) 2003-2007 Amir Szekely <kichik at netvision.net.il>
+ * 
+ * Licensed under the Common Public License version 1.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * 
+ * Licence details can be found in the file COPYING.nsis.
+ * 
+ * This software is provided 'as-is', without any express or implied
+ * warranty.
+ */
+
+#include <stdlib.h>
+#include "LZMADecode.h"
+
+#define LEAVE { goto saveStateAndReturn; }
+#define NEED_BYTE(c) case c: if (!avail_in) { mode = c; LEAVE; }
+#define NEED_BYTE_ if (!avail_in) LEAVE;
+#define NEXT_BYTE (avail_in--, *next_in++)
+#define NEED_OUT(c) case c: if (!avail_out) { mode = c; LEAVE; }
+#define PUT_BYTE_(b) { *next_out = b; next_out++; avail_out--; }
+#define PUT_BYTE(b) { totalOut++; PUT_BYTE_(b) }
+#define DECODE_BIT(c, x) prob = x; last = c; goto _LZMA_C_RDBD; case c:
+#define DECODE_LEN(c, x) probs = x; last2 = c; goto _LZMA_C_LEND; case c:
+#define DECODE_BIT_TREE(c, x, y) probs = x; numLevels = y; last3 = c; goto _LZMA_C_BTD; case c:
+
+enum {
+  /*  0 */ LZMA_C_INIT = 0,
+  /*  1 */ LZMA_C_GETDICT,
+  /*  2 */ LZMA_C_BLOCK,
+  /*  3 */ LZMA_C_RDI, /* RangeDecoderInit */
+  /*  4 */ LZMA_C_RDBD, /* RangeDecoderBitDecode */
+  /*  5 */ LZMA_C_RDBD_IN, /* RangeDecoderBitDecode */
+  /*  6 */ LZMA_C_TYPE,
+  /*  7 */ LZMA_C_ISREP,
+  /*  8 */ LZMA_C_ISREPG0,
+  /*  9 */ LZMA_C_ISREP0LONG,
+  /* 10 */ LZMA_C_ISREPG1,
+  /* 11 */ LZMA_C_ISREPG2,
+  /* 12 */ LZMA_C_NORM,
+  /* 13 */ LZMA_C_LITDM1, /* LzmaLiteralDecodeMatch */
+  /* 14 */ LZMA_C_LITDM2, /* LzmaLiteralDecodeMatch */
+  /* 15 */ LZMA_C_LITD, /* LzmaLiteralDecode */
+  /* 16 */ LZMA_C_RDRBTD, /* RangeDecoderReverseBitTreeDecode */
+  /* 17 */ LZMA_C_LEND, /* LzmaLenDecode */
+  /* 18 */ LZMA_C_LEND1, /* LzmaLenDecode */
+  /* 19 */ LZMA_C_LEND2, /* LzmaLenDecode */
+  /* 20 */ LZMA_C_LEND_RES, /* LzmaLenDecode */
+  /* 21 */ LZMA_C_LEND_C1,
+  /* 22 */ LZMA_C_LEND_C2,
+  /* 23 */ LZMA_C_BTD, /* RangeDecoderBitTreeDecode */
+  /* 24 */ LZMA_C_BTD_LOOP,
+  /* 25 */ LZMA_C_BTD_C1,
+  /* 26 */ LZMA_C_OUTPUT_1,
+  /* 27 */ LZMA_C_OUTPUT_2,
+  /* 28 */ LZMA_C_OUTPUT_3
+};
+
+#define kNumTopBits 24
+#define kTopValue ((UInt32)1 << kNumTopBits)
+
+#define kNumBitModelTotalBits 11
+#define kBitModelTotal (1 << kNumBitModelTotalBits)
+#define kNumMoveBits 5
+
+#define RC_NORMALIZE(c) if (range < kTopValue) { NEED_BYTE(c); range <<= 8; code = (code << 8) | NEXT_BYTE; }
+
+#define RC_GET_BIT2(c, prob, mi, A0, A1) { \
+  UInt32 bound = (range >> kNumBitModelTotalBits) * *prob; \
+  if (code < bound) \
+    { A0; range = bound; *prob += (kBitModelTotal - *prob) >> kNumMoveBits; mi <<= 1; } \
+  else \
+    { A1; range -= bound; code -= bound; *prob -= (*prob) >> kNumMoveBits; mi = (mi + mi) + 1; } \
+  RC_NORMALIZE(c) \
+}
+
+#define RC_GET_BIT(c, prob, mi) RC_GET_BIT2(c, prob, mi, ; , ;)
+
+#define kNumPosBitsMax 4
+#define kNumPosStatesMax (1 << kNumPosBitsMax)
+
+#define kLenNumLowBits 3
+#define kLenNumLowSymbols (1 << kLenNumLowBits)
+#define kLenNumMidBits 3
+#define kLenNumMidSymbols (1 << kLenNumMidBits)
+#define kLenNumHighBits 8
+#define kLenNumHighSymbols (1 << kLenNumHighBits)
+
+#define LenChoice 0
+#define LenChoice2 (LenChoice + 1)
+#define LenLow (LenChoice2 + 1)
+#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits))
+#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits))
+#define kNumLenProbs (LenHigh + kLenNumHighSymbols)
+
+#define kNumStates 12
+
+#define kStartPosModelIndex 4
+#define kEndPosModelIndex 14
+#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+
+#define kNumPosSlotBits 6
+#define kNumLenToPosStates 4
+
+#define kNumAlignBits 4
+#define kAlignTableSize (1 << kNumAlignBits)
+
+#define kMatchMinLen 2
+
+#define IsMatch 0
+#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax))
+#define IsRepG0 (IsRep + kNumStates)
+#define IsRepG1 (IsRepG0 + kNumStates)
+#define IsRepG2 (IsRepG1 + kNumStates)
+#define IsRep0Long (IsRepG2 + kNumStates)
+#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax))
+#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
+#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex)
+#define LenCoder (Align + kAlignTableSize)
+#define RepLenCoder (LenCoder + kNumLenProbs)
+#define Literal (RepLenCoder + kNumLenProbs)
+
+#define LZMA_BASE_SIZE 1846
+#define LZMA_LIT_SIZE 768
+
+#if Literal != LZMA_BASE_SIZE
+StopCompilingDueBUG
+#endif
+
+void lzmaInit(lzma_stream *s)
+{
+  /* size of lzma_stream minus the size of the two allocated buffer pointers.
+     we don't want to lose to pointer or else we won't be able to free them. */
+  size_t i = sizeof(lzma_stream) - (sizeof(unsigned char *) * 2);
+  while (i--)
+    ((lzByte *)s)[i] = 0;
+
+  s->rep0 = s->rep1 = s->rep2 = s->rep3 = 1;
+  s->range = (0xFFFFFFFF);
+}
+
+int lzmaDecode(lzma_stream *s)
+{
+  /* restore decoder state */
+  lzma_stream _s = *s;
+
+#define mode _s.mode
+#define last _s.last
+#define last2 _s.last2
+#define last3 _s.last3
+
+#define p (*(CProb **) &_s.dynamicData)
+#define dynamicDataSize _s.dynamicDataSize
+
+#define state _s.state
+#define isPreviousMatch _s.isPreviousMatch
+#define previousByte _s.previousByte
+#define rep0 _s.rep0
+#define rep1 _s.rep1
+#define rep2 _s.rep2
+#define rep3 _s.rep3
+#define lc _s.lc
+#define len _s.len
+#define totalOut _s.totalOut
+
+#define dictionary _s.dictionary
+#define dictionarySize _s.dictionarySize
+#define dictionaryPos _s.dictionaryPos
+
+#define posStateMask _s.posStateMask
+#define literalPosMask _s.literalPosMask
+
+#define avail_in _s.avail_in
+#define next_in _s.next_in
+#define avail_out _s.avail_out
+#define next_out _s.next_out
+
+#define range _s.range
+#define code _s.code
+
+#define probs _s.probs
+#define prob _s.prob
+
+#define symbol _s.temp2
+#define bit _s.temp3
+#define matchBit _s.temp1
+#define i _s.temp1
+#define result _s.temp2
+#define numLevels _s.temp3
+#define posSlot _s.temp2
+#define newDictionarySize (*(UInt32*) &_s.temp3)
+
+#define matchByte _s.matchByte
+#define mi _s.mi
+#define posState _s.posState
+
+  if (len == -1)
+    return LZMA_STREAM_END;
+
+  for (;;) switch (mode)
+  {
+  case LZMA_C_INIT:
+    {
+      lzByte firstByte;
+      UInt32 newDynamicDataSize;
+      UInt32 numProbs;
+      int lp;
+      int pb;
+
+      NEED_BYTE_;
+
+      firstByte = NEXT_BYTE;
+
+      if (firstByte > (9*5*5))
+        return LZMA_DATA_ERROR;
+
+      pb = firstByte / (9*5);
+      firstByte %= (9*5);
+      lp = firstByte / 9;
+      firstByte %= 9;
+      lc = firstByte;
+
+      posStateMask = (1 << (pb)) - 1;
+      literalPosMask = (1 << (lp)) - 1;
+
+      numProbs = Literal + (LZMA_LIT_SIZE << (lc + pb));
+      newDynamicDataSize = numProbs * sizeof(CProb);
+
+      if (newDynamicDataSize != dynamicDataSize)
+      {
+        if (p)
+          lzmafree(p);
+        p = lzmaalloc(newDynamicDataSize);
+        if (!p)
+          return LZMA_NOT_ENOUGH_MEM;
+        dynamicDataSize = newDynamicDataSize;
+      }
+
+      while (numProbs--)
+        p[numProbs] = kBitModelTotal >> 1;
+
+      for (i = 0, newDictionarySize = 0; i < 4; i++)
+      {
+        NEED_BYTE(LZMA_C_GETDICT);
+        newDictionarySize |= NEXT_BYTE << (i * 8);
+      }
+
+      if (newDictionarySize != dictionarySize)
+      {
+        dictionarySize = newDictionarySize;
+        if (dictionary)
+          lzmafree(dictionary);
+        dictionary = lzmaalloc(dictionarySize);
+        if (!dictionary)
+          return LZMA_NOT_ENOUGH_MEM;
+      }
+
+      dictionary[dictionarySize - 1] = 0;
+
+      i = 5;
+      while (i--)
+      {
+        NEED_BYTE(LZMA_C_RDI);
+        code = (code << 8) | NEXT_BYTE;
+      }
+    }
+  case LZMA_C_BLOCK:
+    posState = (int)(totalOut & posStateMask);
+    DECODE_BIT(LZMA_C_TYPE, p + IsMatch + (state << kNumPosBitsMax) + posState);
+    if (bit == 0)
+    {
+      probs = p + Literal + (LZMA_LIT_SIZE *
+        (((totalOut & literalPosMask) << lc) + (previousByte >> (8 - lc))));
+
+      if (state < 4) state = 0;
+      else if (state < 10) state -= 3;
+      else state -= 6;
+      if (isPreviousMatch)
+      {
+        UInt32 pos = dictionaryPos - rep0;
+        if (pos >= dictionarySize)
+          pos += dictionarySize;
+        matchByte = dictionary[pos];
+        {
+          symbol = 1;
+          do
+          {
+            matchBit = (matchByte >> 7) & 1;
+            matchByte <<= 1;
+            {
+              prob = probs + ((1 + matchBit) << 8) + symbol;
+              RC_GET_BIT2(LZMA_C_LITDM1, prob, symbol, bit = 0, bit = 1)
+            }
+            if (matchBit != bit)
+            {
+              while (symbol < 0x100)
+              {
+                prob = probs + symbol;
+                RC_GET_BIT(LZMA_C_LITDM2, prob, symbol)
+              }
+              break;
+            }
+          }
+          while (symbol < 0x100);
+          previousByte = symbol;
+        }
+        isPreviousMatch = 0;
+      }
+      else
+      {
+        symbol = 1;
+        do
+        {
+          prob = probs + symbol;
+          RC_GET_BIT(LZMA_C_LITD, prob, symbol)
+        }
+        while (symbol < 0x100);
+        previousByte = symbol;
+      }
+      NEED_OUT(LZMA_C_OUTPUT_1);
+      PUT_BYTE(previousByte);
+      dictionary[dictionaryPos] = previousByte;
+      dictionaryPos = (dictionaryPos + 1) % dictionarySize;
+    }
+    /* bit == 1 */
+    else
+    {
+      isPreviousMatch = 1;
+      DECODE_BIT(LZMA_C_ISREP, p + IsRep + state);
+      if (bit == 1)
+      {
+        DECODE_BIT(LZMA_C_ISREPG0, p + IsRepG0 + state);
+        if (bit == 0)
+        {
+          DECODE_BIT(LZMA_C_ISREP0LONG, p + IsRep0Long + (state << kNumPosBitsMax) + posState);
+          if (bit == 0)
+          {
+            UInt32 pos;
+            if (totalOut == 0)
+              return LZMA_DATA_ERROR;
+            state = state < 7 ? 9 : 11;
+            NEED_OUT(LZMA_C_OUTPUT_2);
+            pos = dictionaryPos - rep0;
+            if (pos >= dictionarySize)
+              pos += dictionarySize;
+            previousByte = dictionary[pos];
+            dictionary[dictionaryPos] = previousByte;
+            dictionaryPos = (dictionaryPos + 1) % dictionarySize;
+            PUT_BYTE(previousByte);
+            mode = LZMA_C_BLOCK;
+            break;
+          }
+        }
+        else
+        {
+          UInt32 distance;
+          DECODE_BIT(LZMA_C_ISREPG1, p + IsRepG1 + state);
+          if (bit == 0)
+          {
+            distance = rep1;
+          }
+          else
+          {
+            DECODE_BIT(LZMA_C_ISREPG2, p + IsRepG2 + state);
+            if (bit == 0)
+              distance = rep2;
+            else
+            {
+              distance = rep3;
+              rep3 = rep2;
+            }
+            rep2 = rep1;
+          }
+          rep1 = rep0;
+          rep0 = distance;
+        }
+        DECODE_LEN(LZMA_C_LEND_C1, p + RepLenCoder);
+        state = state < 7 ? 8 : 11;
+      }
+      else
+      {
+        rep3 = rep2;
+        rep2 = rep1;
+        rep1 = rep0;
+        state = state < 7 ? 7 : 10;
+        DECODE_LEN(LZMA_C_LEND_C2, p + LenCoder);
+        DECODE_BIT_TREE(
+          LZMA_C_BTD_C1,
+          p + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits),
+          kNumPosSlotBits
+        );
+        if (posSlot >= kStartPosModelIndex)
+        {
+          int numDirectBits = ((posSlot >> 1) - 1);
+          rep0 = ((2 | ((UInt32)posSlot & 1)) << numDirectBits);
+          if (posSlot < kEndPosModelIndex)
+          {
+            probs = p + SpecPos + rep0 - posSlot - 1;
+            numLevels = numDirectBits;
+          }
+          else
+          {
+            int numTotalBits = numDirectBits - kNumAlignBits;
+            result = 0;
+            for (i = numTotalBits; i > 0; i--)
+            {
+              /* UInt32 t; */
+              range >>= 1;
+
+              result <<= 1;
+              if (code >= range)
+              {
+                code -= range;
+                result |= 1;
+              }
+              /*
+              t = (code - range) >> 31;
+              t &= 1;
+              code -= range & (t - 1);
+              result = (result + result) | (1 - t);
+              */
+              RC_NORMALIZE(LZMA_C_NORM)
+            }
+            rep0 += result << kNumAlignBits;
+            probs = p + Align;
+            numLevels = kNumAlignBits;
+          }
+          mi = 1;
+          symbol = 0;
+          for(i = 0; i < numLevels; i++)
+          {
+            prob = probs + mi;
+            RC_GET_BIT2(LZMA_C_RDRBTD, prob, mi, ; , symbol |= (1 << i));
+          }
+          rep0 += symbol;
+        }
+        else
+          rep0 = posSlot;
+        rep0++;
+      }
+      if (rep0 == (UInt32)(0))
+      {
+        len = -1;
+        LEAVE;
+      }
+      if (rep0 > totalOut)
+      {
+        return LZMA_DATA_ERROR;
+      }
+      len += kMatchMinLen;
+      totalOut += len;
+      do
+      {
+        UInt32 pos;
+        NEED_OUT(LZMA_C_OUTPUT_3);
+        pos = dictionaryPos - rep0;
+        if (pos >= dictionarySize)
+          pos += dictionarySize;
+        previousByte = dictionary[pos];
+        dictionary[dictionaryPos] = previousByte;
+        dictionaryPos = (dictionaryPos + 1) % dictionarySize;
+        PUT_BYTE_(previousByte);
+        len--;
+      }
+      while(len > 0);
+    }
+    mode = LZMA_C_BLOCK;
+    break;
+  case LZMA_C_RDBD:
+  _LZMA_C_RDBD:
+    {
+      UInt32 bound = (range >> kNumBitModelTotalBits) * *prob;
+      if (code < bound)
+      {
+        range = bound;
+        *prob += (kBitModelTotal - *prob) >> kNumMoveBits;
+        bit = 0;
+      }
+      else
+      {
+        range -= bound;
+        code -= bound;
+        *prob -= (*prob) >> kNumMoveBits;
+        bit = 1;
+      }
+      RC_NORMALIZE(LZMA_C_RDBD_IN);
+    }
+    mode = last;
+    break;
+  case LZMA_C_LEND:
+  _LZMA_C_LEND:
+      DECODE_BIT(LZMA_C_LEND1, probs + LenChoice);
+      if (bit == 0)
+      {
+        len = 0;
+        probs += LenLow + (posState << kLenNumLowBits);
+        numLevels = kLenNumLowBits;
+      }
+      else {
+        DECODE_BIT(LZMA_C_LEND2, probs + LenChoice2);
+        if (bit == 0)
+        {
+          len = kLenNumLowSymbols;
+          probs += + LenMid + (posState << kLenNumMidBits);
+          numLevels = kLenNumMidBits;
+        }
+        else
+        {
+          len = kLenNumLowSymbols + kLenNumMidSymbols;
+          probs += LenHigh;
+          numLevels = kLenNumHighBits;
+        }
+      }
+
+      last3 = LZMA_C_LEND_RES;
+  case LZMA_C_BTD:
+  _LZMA_C_BTD:
+    mi = 1;
+    for(i = numLevels; i > 0; i--)
+    {
+      prob = probs + mi;
+      RC_GET_BIT(LZMA_C_BTD_LOOP, prob, mi)
+    }
+    result = mi - (1 << numLevels);
+    mode = last3;
+    break;
+  case LZMA_C_LEND_RES:
+    len += result;
+    mode = last2;
+    break;
+  default:
+    return LZMA_DATA_ERROR;
+  }
+
+saveStateAndReturn:
+
+  /* save decoder state */
+  *s = _s;
+
+  return LZMA_OK;
+}
+
+
+/* aCaB */
+void lzmaShutdown(lzma_stream *s) {
+  lzma_stream _s = *s;
+  if (p) lzmafree(p);
+  if (dictionary) lzmafree(dictionary);
+  p = NULL;
+  dictionary = NULL;
+  *s = _s;
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_bzlib.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_bzlib.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_bzlib.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_bzlib.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,1152 @@
+
+/*-------------------------------------------------------------*/
+/*--- Library top-level functions.                          ---*/
+/*---                                               bzlib.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+   This file is part of bzip2/libbzip2, a program and library for
+   lossless, block-sorting data compression.
+
+   bzip2/libbzip2 version 1.0.4 of 20 December 2006
+   Copyright (C) 1996-2006 Julian Seward <jseward at bzip.org>
+   This file was modified for ClamAV by aCaB <acab at clamav.net>
+
+   This program is released under the terms of the license contained
+   in the file COPYING.nsis.
+   ------------------------------------------------------------------ */
+
+/* CHANGES
+   0.9.0    -- original version.
+   0.9.0a/b -- no changes in this file.
+   0.9.0c   -- made zero-length BZ_FLUSH work correctly in bzCompress().
+     fixed bzWrite/bzRead to ignore zero-length requests.
+     fixed bzread to correctly handle read requests after EOF.
+     wrong parameter order in call to bzDecompressInit in
+     bzBuffToBuffDecompress.  Fixed.
+*/
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include "bzlib_private.h"
+#include "others.h"
+
+const Int32 BZ2_rNums[512] = { 
+   619, 720, 127, 481, 931, 816, 813, 233, 566, 247, 
+   985, 724, 205, 454, 863, 491, 741, 242, 949, 214, 
+   733, 859, 335, 708, 621, 574, 73, 654, 730, 472, 
+   419, 436, 278, 496, 867, 210, 399, 680, 480, 51, 
+   878, 465, 811, 169, 869, 675, 611, 697, 867, 561, 
+   862, 687, 507, 283, 482, 129, 807, 591, 733, 623, 
+   150, 238, 59, 379, 684, 877, 625, 169, 643, 105, 
+   170, 607, 520, 932, 727, 476, 693, 425, 174, 647, 
+   73, 122, 335, 530, 442, 853, 695, 249, 445, 515, 
+   909, 545, 703, 919, 874, 474, 882, 500, 594, 612, 
+   641, 801, 220, 162, 819, 984, 589, 513, 495, 799, 
+   161, 604, 958, 533, 221, 400, 386, 867, 600, 782, 
+   382, 596, 414, 171, 516, 375, 682, 485, 911, 276, 
+   98, 553, 163, 354, 666, 933, 424, 341, 533, 870, 
+   227, 730, 475, 186, 263, 647, 537, 686, 600, 224, 
+   469, 68, 770, 919, 190, 373, 294, 822, 808, 206, 
+   184, 943, 795, 384, 383, 461, 404, 758, 839, 887, 
+   715, 67, 618, 276, 204, 918, 873, 777, 604, 560, 
+   951, 160, 578, 722, 79, 804, 96, 409, 713, 940, 
+   652, 934, 970, 447, 318, 353, 859, 672, 112, 785, 
+   645, 863, 803, 350, 139, 93, 354, 99, 820, 908, 
+   609, 772, 154, 274, 580, 184, 79, 626, 630, 742, 
+   653, 282, 762, 623, 680, 81, 927, 626, 789, 125, 
+   411, 521, 938, 300, 821, 78, 343, 175, 128, 250, 
+   170, 774, 972, 275, 999, 639, 495, 78, 352, 126, 
+   857, 956, 358, 619, 580, 124, 737, 594, 701, 612, 
+   669, 112, 134, 694, 363, 992, 809, 743, 168, 974, 
+   944, 375, 748, 52, 600, 747, 642, 182, 862, 81, 
+   344, 805, 988, 739, 511, 655, 814, 334, 249, 515, 
+   897, 955, 664, 981, 649, 113, 974, 459, 893, 228, 
+   433, 837, 553, 268, 926, 240, 102, 654, 459, 51, 
+   686, 754, 806, 760, 493, 403, 415, 394, 687, 700, 
+   946, 670, 656, 610, 738, 392, 760, 799, 887, 653, 
+   978, 321, 576, 617, 626, 502, 894, 679, 243, 440, 
+   680, 879, 194, 572, 640, 724, 926, 56, 204, 700, 
+   707, 151, 457, 449, 797, 195, 791, 558, 945, 679, 
+   297, 59, 87, 824, 713, 663, 412, 693, 342, 606, 
+   134, 108, 571, 364, 631, 212, 174, 643, 304, 329, 
+   343, 97, 430, 751, 497, 314, 983, 374, 822, 928, 
+   140, 206, 73, 263, 980, 736, 876, 478, 430, 305, 
+   170, 514, 364, 692, 829, 82, 855, 953, 676, 246, 
+   369, 970, 294, 750, 807, 827, 150, 790, 288, 923, 
+   804, 378, 215, 828, 592, 281, 565, 555, 710, 82, 
+   896, 831, 547, 261, 524, 462, 293, 465, 502, 56, 
+   661, 821, 976, 991, 658, 869, 905, 758, 745, 193, 
+   768, 550, 608, 933, 378, 286, 215, 979, 792, 961, 
+   61, 688, 793, 644, 986, 403, 106, 366, 905, 644, 
+   372, 567, 466, 434, 645, 210, 389, 550, 919, 135, 
+   780, 773, 635, 389, 707, 100, 626, 958, 165, 504, 
+   920, 176, 193, 713, 857, 265, 203, 50, 668, 108, 
+   645, 990, 626, 197, 510, 357, 358, 850, 858, 364, 
+   936, 638
+};
+
+/*---------------------------------------------------*/
+static
+void makeMaps_d ( DState* s )
+{
+   Int32 i;
+   s->nInUse = 0;
+   for (i = 0; i < 256; i++)
+      if (s->inUse[i]) {
+         s->seqToUnseq[s->nInUse] = i;
+         s->nInUse++;
+      }
+}
+
+
+/*---------------------------------------------------*/
+#define RETURN(rrr)                               \
+   { retVal = rrr; goto save_state_and_return; };
+
+#define GET_BITS(lll,vvv,nnn)                     \
+   case lll: s->state = lll;                      \
+   while (True) {                                 \
+      if (s->bsLive >= nnn) {                     \
+         UInt32 v;                                \
+         v = (s->bsBuff >>                        \
+             (s->bsLive-nnn)) & ((1 << nnn)-1);   \
+         s->bsLive -= nnn;                        \
+         vvv = v;                                 \
+         break;                                   \
+      }                                           \
+      if (s->strm->avail_in == 0) RETURN(BZ_OK);  \
+      s->bsBuff                                   \
+         = (s->bsBuff << 8) |                     \
+           ((UInt32)                              \
+              (*(s->strm->next_in)));		  \
+      s->bsLive += 8;                             \
+      s->strm->next_in++;                         \
+      s->strm->avail_in--;                        \
+      s->strm->total_in_lo32++;                   \
+      if (s->strm->total_in_lo32 == 0)            \
+         s->strm->total_in_hi32++;                \
+   }
+
+#define GET_UCHAR(lll,uuu)                        \
+   GET_BITS(lll,uuu,8)
+
+#define GET_BIT(lll,uuu)                          \
+   GET_BITS(lll,uuu,1)
+
+/*---------------------------------------------------*/
+#define GET_MTF_VAL(label1,label2,lval)           \
+{                                                 \
+   if (groupPos == 0) {                           \
+      groupNo++;                                  \
+      if (groupNo >= nSelectors)                  \
+         RETURN(BZ_DATA_ERROR);                   \
+      groupPos = BZ_G_SIZE;                       \
+      gSel = s->selector[groupNo];                \
+      gMinlen = s->minLens[gSel];                 \
+      gLimit = &(s->limit[gSel][0]);              \
+      gPerm = &(s->perm[gSel][0]);                \
+      gBase = &(s->base[gSel][0]);                \
+   }                                              \
+   groupPos--;                                    \
+   zn = gMinlen;                                  \
+   GET_BITS(label1, zvec, zn);                    \
+   while (1) {                                    \
+      if (zn > 20 /* the longest code */)         \
+         RETURN(BZ_DATA_ERROR);                   \
+      if (zvec <= gLimit[zn]) break;              \
+      zn++;                                       \
+      GET_BIT(label2, zj);                        \
+      zvec = (zvec << 1) | zj;                    \
+   };                                             \
+   if (zvec - gBase[zn] < 0                       \
+       || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE)  \
+      RETURN(BZ_DATA_ERROR);                      \
+   lval = gPerm[zvec - gBase[zn]];                \
+}
+
+/*---------------------------------------------------*/
+
+inline static Int32 indexIntoF ( Int32 indx, Int32 *cftab )
+{
+   Int32 nb, na, mid;
+   nb = 0;
+   na = 256;
+   do {
+      mid = (nb + na) >> 1;
+      if (indx >= cftab[mid]) nb = mid; else na = mid;
+   }
+   while (na - nb != 1);
+   return nb;
+}
+
+/*---------------------------------------------------*/
+/* Return  True iff data corruption is discovered.
+   Returns False if there is no problem.
+*/
+static
+Bool unRLE_obuf_to_output_FAST ( DState* s )
+{
+   UChar k1;
+
+/*    if (s->blockRandomised) { */
+
+/*       while (True) { */
+/*          /\* try to finish existing run *\/ */
+/*          while (True) { */
+/*             if (s->strm->avail_out == 0) return False; */
+/*             if (s->state_out_len == 0) break; */
+/*             *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; */
+/* 	       BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); */
+/*             s->state_out_len--; */
+/*             s->strm->next_out++; */
+/*             s->strm->avail_out--; */
+/*             s->strm->total_out_lo32++; */
+/*             if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; */
+/*          } */
+
+/*          /\* can a new run be started? *\/ */
+/*          if (s->nblock_used == s->save_nblock+1) return False; */
+               
+/*          /\* Only caused by corrupt data stream? *\/ */
+/*          if (s->nblock_used > s->save_nblock+1) */
+/*             return True; */
+   
+/*          s->state_out_len = 1; */
+/*          s->state_out_ch = s->k0; */
+/*          BZ_GET_FAST(k1); BZ_RAND_UPD_MASK;  */
+
+
+
+/*          k1 ^= BZ_RAND_MASK; s->nblock_used++; */
+/*          if (s->nblock_used == s->save_nblock+1) continue; */
+/*          if (k1 != s->k0) { s->k0 = k1; continue; }; */
+   
+/*          s->state_out_len = 2; */
+/*          BZ_GET_FAST(k1); BZ_RAND_UPD_MASK;  */
+/*          k1 ^= BZ_RAND_MASK; s->nblock_used++; */
+/*          if (s->nblock_used == s->save_nblock+1) continue; */
+/*          if (k1 != s->k0) { s->k0 = k1; continue; }; */
+   
+/*          s->state_out_len = 3; */
+/*          BZ_GET_FAST(k1); BZ_RAND_UPD_MASK;  */
+/*          k1 ^= BZ_RAND_MASK; s->nblock_used++; */
+/*          if (s->nblock_used == s->save_nblock+1) continue; */
+/*          if (k1 != s->k0) { s->k0 = k1; continue; }; */
+   
+/*          BZ_GET_FAST(k1); BZ_RAND_UPD_MASK;  */
+/*          k1 ^= BZ_RAND_MASK; s->nblock_used++; */
+/*          s->state_out_len = ((Int32)k1) + 4; */
+/*          BZ_GET_FAST(s->k0); BZ_RAND_UPD_MASK;  */
+/*          s->k0 ^= BZ_RAND_MASK; s->nblock_used++; */
+/*       } */
+
+/*    } else */ {
+
+      /* restore */
+      UInt32        c_calculatedBlockCRC = s->calculatedBlockCRC;
+      UChar         c_state_out_ch       = s->state_out_ch;
+      Int32         c_state_out_len      = s->state_out_len;
+      Int32         c_nblock_used        = s->nblock_used;
+      Int32         c_k0                 = s->k0;
+      UInt32*       c_tt                 = s->tt;
+      UInt32        c_tPos               = s->tPos;
+      UChar*        cs_next_out          = s->strm->next_out;
+      unsigned int  cs_avail_out         = s->strm->avail_out;
+      /* end restore */
+
+      UInt32       avail_out_INIT = cs_avail_out;
+      Int32        s_save_nblockPP = s->save_nblock+1;
+      unsigned int total_out_lo32_old;
+
+      while (True) {
+
+         /* try to finish existing run */
+         if (c_state_out_len > 0) {
+            while (True) {
+               if (cs_avail_out == 0) goto return_notr;
+               if (c_state_out_len == 1) break;
+               *cs_next_out = c_state_out_ch;
+	       /* aCaB BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch ); */
+               c_state_out_len--;
+               cs_next_out++;
+               cs_avail_out--;
+            }
+            s_state_out_len_eq_one:
+            {
+               if (cs_avail_out == 0) { 
+                  c_state_out_len = 1; goto return_notr;
+               };
+               *cs_next_out = c_state_out_ch;
+	       /* aCaB BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch ); */
+               cs_next_out++;
+               cs_avail_out--;
+            }
+         }   
+         /* Only caused by corrupt data stream? */
+         if (c_nblock_used > s_save_nblockPP)
+            return True;
+
+         /* can a new run be started? */
+         if (c_nblock_used == s_save_nblockPP) {
+            c_state_out_len = 0; goto return_notr;
+         };   
+         c_state_out_ch = c_k0;
+         BZ_GET_FAST_C(k1); c_nblock_used++;
+         if (k1 != c_k0) { 
+            c_k0 = k1; goto s_state_out_len_eq_one; 
+         };
+         if (c_nblock_used == s_save_nblockPP) 
+            goto s_state_out_len_eq_one;
+   
+         c_state_out_len = 2;
+         BZ_GET_FAST_C(k1); c_nblock_used++;
+         if (c_nblock_used == s_save_nblockPP) continue;
+         if (k1 != c_k0) { c_k0 = k1; continue; };
+   
+         c_state_out_len = 3;
+         BZ_GET_FAST_C(k1); c_nblock_used++;
+         if (c_nblock_used == s_save_nblockPP) continue;
+         if (k1 != c_k0) { c_k0 = k1; continue; };
+   
+         BZ_GET_FAST_C(k1); c_nblock_used++;
+         c_state_out_len = ((Int32)k1) + 4;
+         BZ_GET_FAST_C(c_k0); c_nblock_used++;
+      }
+
+      return_notr:
+      total_out_lo32_old = s->strm->total_out_lo32;
+      s->strm->total_out_lo32 += (avail_out_INIT - cs_avail_out);
+      if (s->strm->total_out_lo32 < total_out_lo32_old)
+         s->strm->total_out_hi32++;
+
+      /* save */
+      s->calculatedBlockCRC = c_calculatedBlockCRC;
+      s->state_out_ch       = c_state_out_ch;
+      s->state_out_len      = c_state_out_len;
+      s->nblock_used        = c_nblock_used;
+      s->k0                 = c_k0;
+      s->tt                 = c_tt;
+      s->tPos               = c_tPos;
+      s->strm->next_out     = cs_next_out;
+      s->strm->avail_out    = cs_avail_out;
+      /* end save */
+   }
+   return False;
+}
+
+
+/*---------------------------------------------------*/
+/* Return  True iff data corruption is discovered.
+   Returns False if there is no problem.
+*/
+static
+Bool unRLE_obuf_to_output_SMALL ( DState* s )
+{
+   UChar k1;
+
+/*    if (s->blockRandomised) { */
+
+/*       while (True) { */
+/*          /\* try to finish existing run *\/ */
+/*          while (True) { */
+/*             if (s->strm->avail_out == 0) return False; */
+/*             if (s->state_out_len == 0) break; */
+/*             *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; */
+/* 	       BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); */
+/*             s->state_out_len--; */
+/*             s->strm->next_out++; */
+/*             s->strm->avail_out--; */
+/*             s->strm->total_out_lo32++; */
+/*             if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; */
+/*          } */
+   
+/*          /\* can a new run be started? *\/ */
+/*          if (s->nblock_used == s->save_nblock+1) return False; */
+
+/*          /\* Only caused by corrupt data stream? *\/ */
+/*          if (s->nblock_used > s->save_nblock+1) */
+/*             return True; */
+   
+/*          s->state_out_len = 1; */
+/*          s->state_out_ch = s->k0; */
+/*          BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK;  */
+/*          k1 ^= BZ_RAND_MASK; s->nblock_used++; */
+/*          if (s->nblock_used == s->save_nblock+1) continue; */
+/*          if (k1 != s->k0) { s->k0 = k1; continue; }; */
+   
+/*          s->state_out_len = 2; */
+/*          BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK;  */
+/*          k1 ^= BZ_RAND_MASK; s->nblock_used++; */
+/*          if (s->nblock_used == s->save_nblock+1) continue; */
+/*          if (k1 != s->k0) { s->k0 = k1; continue; }; */
+   
+/*          s->state_out_len = 3; */
+/*          BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK;  */
+/*          k1 ^= BZ_RAND_MASK; s->nblock_used++; */
+/*          if (s->nblock_used == s->save_nblock+1) continue; */
+/*          if (k1 != s->k0) { s->k0 = k1; continue; }; */
+   
+/*          BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK;  */
+/*          k1 ^= BZ_RAND_MASK; s->nblock_used++; */
+/*          s->state_out_len = ((Int32)k1) + 4; */
+/*          BZ_GET_SMALL(s->k0); BZ_RAND_UPD_MASK;  */
+/*          s->k0 ^= BZ_RAND_MASK; s->nblock_used++; */
+/*       } */
+
+/*    } else */ {
+
+      while (True) {
+         /* try to finish existing run */
+         while (True) {
+            if (s->strm->avail_out == 0) return False;
+            if (s->state_out_len == 0) break;
+            *(s->strm->next_out) = s->state_out_ch;
+	    /* aCaB BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); */
+            s->state_out_len--;
+            s->strm->next_out++;
+            s->strm->avail_out--;
+            s->strm->total_out_lo32++;
+            if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
+         }
+   
+         /* can a new run be started? */
+         if (s->nblock_used == s->save_nblock+1) return False;
+
+         /* Only caused by corrupt data stream? */
+         if (s->nblock_used > s->save_nblock+1)
+            return True;
+   
+         s->state_out_len = 1;
+         s->state_out_ch = s->k0;
+         BZ_GET_SMALL(k1); s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         s->state_out_len = 2;
+         BZ_GET_SMALL(k1); s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         s->state_out_len = 3;
+         BZ_GET_SMALL(k1); s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         BZ_GET_SMALL(k1); s->nblock_used++;
+         s->state_out_len = ((Int32)k1) + 4;
+         BZ_GET_SMALL(s->k0); s->nblock_used++;
+      }
+
+   }
+}
+/*---------------------------------------------------*/
+
+static void CreateDecodeTables ( Int32 *limit,
+                                Int32 *base,
+                                Int32 *perm,
+                                UChar *length,
+                                Int32 minLen,
+                                Int32 maxLen,
+                                Int32 alphaSize )
+{
+   Int32 pp, i, j, vec;
+
+   pp = 0;
+   for (i = minLen; i <= maxLen; i++)
+      for (j = 0; j < alphaSize; j++)
+         if (length[j] == i) { perm[pp] = j; pp++; };
+
+   for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0;
+   for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
+
+   for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
+
+   for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
+   vec = 0;
+
+   for (i = minLen; i <= maxLen; i++) {
+      vec += (base[i+1] - base[i]);
+      limit[i] = vec-1;
+      vec <<= 1;
+   }
+   for (i = minLen + 1; i <= maxLen; i++)
+      base[i] = ((limit[i-1] + 1) << 1) - base[i];
+}
+
+/*---------------------------------------------------*/
+static Int32 BZ2_decompress ( DState* s )
+{
+   UChar      uc;
+   Int32      retVal;
+   Int32      minLen, maxLen;
+   nsis_bzstream* strm = s->strm;
+
+   /* stuff that needs to be saved/restored */
+   Int32  i;
+   Int32  j;
+   Int32  t;
+   Int32  alphaSize;
+   Int32  nGroups;
+   Int32  nSelectors;
+   Int32  EOB;
+   Int32  groupNo;
+   Int32  groupPos;
+   Int32  nextSym;
+   Int32  nblockMAX;
+   Int32  nblock;
+   Int32  es;
+   Int32  N;
+   Int32  curr;
+   Int32  zt;
+   Int32  zn; 
+   Int32  zvec;
+   Int32  zj;
+   Int32  gSel;
+   Int32  gMinlen;
+   Int32* gLimit;
+   Int32* gBase;
+   Int32* gPerm;
+
+   if (s->state == BZ_X_MAGIC_1) {
+      /*initialise the save area*/
+      s->save_i           = 0;
+      s->save_j           = 0;
+      s->save_t           = 0;
+      s->save_alphaSize   = 0;
+      s->save_nGroups     = 0;
+      s->save_nSelectors  = 0;
+      s->save_EOB         = 0;
+      s->save_groupNo     = 0;
+      s->save_groupPos    = 0;
+      s->save_nextSym     = 0;
+      s->save_nblockMAX   = 0;
+      s->save_nblock      = 0;
+      s->save_es          = 0;
+      s->save_N           = 0;
+      s->save_curr        = 0;
+      s->save_zt          = 0;
+      s->save_zn          = 0;
+      s->save_zvec        = 0;
+      s->save_zj          = 0;
+      s->save_gSel        = 0;
+      s->save_gMinlen     = 0;
+      s->save_gLimit      = NULL;
+      s->save_gBase       = NULL;
+      s->save_gPerm       = NULL;
+   }
+
+   /*restore from the save area*/
+   i           = s->save_i;
+   j           = s->save_j;
+   t           = s->save_t;
+   alphaSize   = s->save_alphaSize;
+   nGroups     = s->save_nGroups;
+   nSelectors  = s->save_nSelectors;
+   EOB         = s->save_EOB;
+   groupNo     = s->save_groupNo;
+   groupPos    = s->save_groupPos;
+   nextSym     = s->save_nextSym;
+   nblockMAX   = s->save_nblockMAX;
+   nblock      = s->save_nblock;
+   es          = s->save_es;
+   N           = s->save_N;
+   curr        = s->save_curr;
+   zt          = s->save_zt;
+   zn          = s->save_zn; 
+   zvec        = s->save_zvec;
+   zj          = s->save_zj;
+   gSel        = s->save_gSel;
+   gMinlen     = s->save_gMinlen;
+   gLimit      = s->save_gLimit;
+   gBase       = s->save_gBase;
+   gPerm       = s->save_gPerm;
+
+   retVal = BZ_OK;
+
+   switch (s->state) {
+
+     /* aCaB
+      GET_UCHAR(BZ_X_MAGIC_1, uc);
+      if (uc != BZ_HDR_B) RETURN(BZ_DATA_ERROR_MAGIC);
+
+      GET_UCHAR(BZ_X_MAGIC_2, uc);
+      if (uc != BZ_HDR_Z) RETURN(BZ_DATA_ERROR_MAGIC);
+
+      GET_UCHAR(BZ_X_MAGIC_3, uc)
+      if (uc != BZ_HDR_h) RETURN(BZ_DATA_ERROR_MAGIC);
+
+      GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8)
+      if (s->blockSize100k < (BZ_HDR_0 + 1) || 
+          s->blockSize100k > (BZ_HDR_0 + 9)) RETURN(BZ_DATA_ERROR_MAGIC);
+      s->blockSize100k -= BZ_HDR_0;
+     */
+
+   case BZ_X_MAGIC_1:
+
+      s->blockSize100k = 9;
+
+      if (s->smallDecompress) {
+         s->ll16 = BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) );
+         s->ll4  = BZALLOC( 
+                      ((1 + s->blockSize100k * 100000) >> 1) * sizeof(UChar) 
+                   );
+         if (s->ll16 == NULL || s->ll4 == NULL) RETURN(BZ_MEM_ERROR);
+      } else {
+         s->tt  = BZALLOC( s->blockSize100k * 100000 * sizeof(Int32) );
+         if (s->tt == NULL) RETURN(BZ_MEM_ERROR);
+      }
+
+      GET_UCHAR(BZ_X_BLKHDR_1, uc);
+
+      if (uc == 0x17) goto endhdr_2;
+      if (uc != 0x31) RETURN(BZ_DATA_ERROR);
+
+      /* aCaB
+      GET_UCHAR(BZ_X_BLKHDR_2, uc);
+      if (uc != 0x41) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_BLKHDR_3, uc);
+      if (uc != 0x59) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_BLKHDR_4, uc);
+      if (uc != 0x26) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_BLKHDR_5, uc);
+      if (uc != 0x53) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_BLKHDR_6, uc);
+      if (uc != 0x59) RETURN(BZ_DATA_ERROR);
+
+      s->currBlockNo++;
+      if (s->verbosity >= 2)
+         VPrintf1 ( "\n    [%d: huff+mtf ", s->currBlockNo );
+ 
+      s->storedBlockCRC = 0;
+      GET_UCHAR(BZ_X_BCRC_1, uc);
+      s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_BCRC_2, uc);
+      s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_BCRC_3, uc);
+      s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_BCRC_4, uc);
+      s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+
+
+      GET_BITS(BZ_X_RANDBIT, s->blockRandomised, 1);
+      */
+
+      s->origPtr = 0;
+      GET_UCHAR(BZ_X_ORIGPTR_1, uc);
+      s->origPtr = (s->origPtr << 8) | ((Int32)uc);
+      GET_UCHAR(BZ_X_ORIGPTR_2, uc);
+      s->origPtr = (s->origPtr << 8) | ((Int32)uc);
+      GET_UCHAR(BZ_X_ORIGPTR_3, uc);
+      s->origPtr = (s->origPtr << 8) | ((Int32)uc);
+
+      if (s->origPtr < 0)
+         RETURN(BZ_DATA_ERROR);
+      if (s->origPtr > 10 + 100000*s->blockSize100k) 
+         RETURN(BZ_DATA_ERROR);
+
+      /*--- Receive the mapping table ---*/
+      for (i = 0; i < 16; i++) {
+         GET_BIT(BZ_X_MAPPING_1, uc);
+         if (uc == 1) 
+            s->inUse16[i] = True; else 
+            s->inUse16[i] = False;
+      }
+
+      for (i = 0; i < 256; i++) s->inUse[i] = False;
+
+      for (i = 0; i < 16; i++)
+         if (s->inUse16[i])
+            for (j = 0; j < 16; j++) {
+               GET_BIT(BZ_X_MAPPING_2, uc);
+               if (uc == 1) s->inUse[i * 16 + j] = True;
+            }
+      makeMaps_d ( s );
+      if (s->nInUse == 0) RETURN(BZ_DATA_ERROR);
+      alphaSize = s->nInUse+2;
+
+      /*--- Now the selectors ---*/
+      GET_BITS(BZ_X_SELECTOR_1, nGroups, 3);
+      if (nGroups < 2 || nGroups > 6) RETURN(BZ_DATA_ERROR);
+      GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15);
+      if (nSelectors < 1) RETURN(BZ_DATA_ERROR);
+      for (i = 0; i < nSelectors; i++) {
+         j = 0;
+         while (True) {
+            GET_BIT(BZ_X_SELECTOR_3, uc);
+            if (uc == 0) break;
+            j++;
+            if (j >= nGroups) RETURN(BZ_DATA_ERROR);
+         }
+         s->selectorMtf[i] = j;
+      }
+
+      /*--- Undo the MTF values for the selectors. ---*/
+      {
+         UChar pos[BZ_N_GROUPS], tmp, v;
+         for (v = 0; v < nGroups; v++) pos[v] = v;
+   
+         for (i = 0; i < nSelectors; i++) {
+            v = s->selectorMtf[i];
+            tmp = pos[v];
+            while (v > 0) { pos[v] = pos[v-1]; v--; }
+            pos[0] = tmp;
+            s->selector[i] = tmp;
+         }
+      }
+
+      /*--- Now the coding tables ---*/
+      for (t = 0; t < nGroups; t++) {
+         GET_BITS(BZ_X_CODING_1, curr, 5);
+         for (i = 0; i < alphaSize; i++) {
+            while (True) {
+               if (curr < 1 || curr > 20) RETURN(BZ_DATA_ERROR);
+               GET_BIT(BZ_X_CODING_2, uc);
+               if (uc == 0) break;
+               GET_BIT(BZ_X_CODING_3, uc);
+               if (uc == 0) curr++; else curr--;
+            }
+            s->len[t][i] = curr;
+         }
+      }
+
+      /*--- Create the Huffman decoding tables ---*/
+      for (t = 0; t < nGroups; t++) {
+         minLen = 32;
+         maxLen = 0;
+         for (i = 0; i < alphaSize; i++) {
+            if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
+            if (s->len[t][i] < minLen) minLen = s->len[t][i];
+         }
+         CreateDecodeTables ( 
+            &(s->limit[t][0]), 
+            &(s->base[t][0]), 
+            &(s->perm[t][0]), 
+            &(s->len[t][0]),
+            minLen, maxLen, alphaSize
+         );
+         s->minLens[t] = minLen;
+      }
+
+      /*--- Now the MTF values ---*/
+
+      EOB      = s->nInUse+1;
+      nblockMAX = 100000 * s->blockSize100k;
+      groupNo  = -1;
+      groupPos = 0;
+
+      for (i = 0; i <= 255; i++) s->unzftab[i] = 0;
+
+      /*-- MTF init --*/
+      {
+         Int32 ii, jj, kk;
+         kk = MTFA_SIZE-1;
+         for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) {
+            for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
+               s->mtfa[kk] = (UChar)(ii * MTFL_SIZE + jj);
+               kk--;
+            }
+            s->mtfbase[ii] = kk + 1;
+         }
+      }
+      /*-- end MTF init --*/
+
+      nblock = 0;
+      GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym);
+
+      while (True) {
+
+         if (nextSym == EOB) break;
+
+         if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) {
+
+            es = -1;
+            N = 1;
+            do {
+               if (nextSym == BZ_RUNA) es = es + (0+1) * N; else
+               if (nextSym == BZ_RUNB) es = es + (1+1) * N;
+               N = N * 2;
+               GET_MTF_VAL(BZ_X_MTF_3, BZ_X_MTF_4, nextSym);
+            }
+               while (nextSym == BZ_RUNA || nextSym == BZ_RUNB);
+
+            es++;
+            uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ];
+            s->unzftab[uc] += es;
+
+            if (s->smallDecompress)
+               while (es > 0) {
+                  if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
+                  s->ll16[nblock] = (UInt16)uc;
+                  nblock++;
+                  es--;
+               }
+            else
+               while (es > 0) {
+                  if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
+                  s->tt[nblock] = (UInt32)uc;
+                  nblock++;
+                  es--;
+               };
+
+            continue;
+
+         } else {
+
+            if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
+
+            /*-- uc = MTF ( nextSym-1 ) --*/
+            {
+               Int32 ii, jj, kk, pp, lno, off;
+               UInt32 nn;
+               nn = (UInt32)(nextSym - 1);
+
+               if (nn < MTFL_SIZE) {
+                  /* avoid general-case expense */
+                  pp = s->mtfbase[0];
+                  uc = s->mtfa[pp+nn];
+                  while (nn > 3) {
+                     Int32 z = pp+nn;
+                     s->mtfa[(z)  ] = s->mtfa[(z)-1];
+                     s->mtfa[(z)-1] = s->mtfa[(z)-2];
+                     s->mtfa[(z)-2] = s->mtfa[(z)-3];
+                     s->mtfa[(z)-3] = s->mtfa[(z)-4];
+                     nn -= 4;
+                  }
+                  while (nn > 0) { 
+                     s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--; 
+                  };
+                  s->mtfa[pp] = uc;
+               } else { 
+                  /* general case */
+                  lno = nn / MTFL_SIZE;
+                  off = nn % MTFL_SIZE;
+                  pp = s->mtfbase[lno] + off;
+                  uc = s->mtfa[pp];
+                  while (pp > s->mtfbase[lno]) { 
+                     s->mtfa[pp] = s->mtfa[pp-1]; pp--; 
+                  };
+                  s->mtfbase[lno]++;
+                  while (lno > 0) {
+                     s->mtfbase[lno]--;
+                     s->mtfa[s->mtfbase[lno]] 
+                        = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1];
+                     lno--;
+                  }
+                  s->mtfbase[0]--;
+                  s->mtfa[s->mtfbase[0]] = uc;
+                  if (s->mtfbase[0] == 0) {
+                     kk = MTFA_SIZE-1;
+                     for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) {
+                        for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
+                           s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj];
+                           kk--;
+                        }
+                        s->mtfbase[ii] = kk + 1;
+                     }
+                  }
+               }
+            }
+            /*-- end uc = MTF ( nextSym-1 ) --*/
+
+            s->unzftab[s->seqToUnseq[uc]]++;
+            if (s->smallDecompress)
+               s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else
+               s->tt[nblock]   = (UInt32)(s->seqToUnseq[uc]);
+            nblock++;
+
+            GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym);
+            continue;
+         }
+      }
+
+      /* Now we know what nblock is, we can do a better sanity
+         check on s->origPtr.
+      */
+      if (s->origPtr < 0 || s->origPtr >= nblock)
+         RETURN(BZ_DATA_ERROR);
+
+      /*-- Set up cftab to facilitate generation of T^(-1) --*/
+      s->cftab[0] = 0;
+      for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1];
+      for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1];
+      for (i = 0; i <= 256; i++) {
+         if (s->cftab[i] < 0 || s->cftab[i] > nblock) {
+            RETURN(BZ_DATA_ERROR);
+         }
+      }
+      s->state_out_len = 0;
+      s->state_out_ch  = 0;
+      /* BZ_INITIALISE_CRC ( s->calculatedBlockCRC ); */
+      s->state = BZ_X_OUTPUT;
+      if (s->verbosity >= 2) VPrintf0 ( "rt+rld" );
+
+      if (s->smallDecompress) {
+
+         /*-- Make a copy of cftab, used in generation of T --*/
+         for (i = 0; i <= 256; i++) s->cftabCopy[i] = s->cftab[i];
+
+         /*-- compute the T vector --*/
+         for (i = 0; i < nblock; i++) {
+            uc = (UChar)(s->ll16[i]);
+            SET_LL(i, s->cftabCopy[uc]);
+            s->cftabCopy[uc]++;
+         }
+
+         /*-- Compute T^(-1) by pointer reversal on T --*/
+         i = s->origPtr;
+         j = GET_LL(i);
+         do {
+            Int32 tmp = GET_LL(j);
+            SET_LL(j, i);
+            i = j;
+            j = tmp;
+         }
+            while (i != s->origPtr);
+
+         s->tPos = s->origPtr;
+         s->nblock_used = 0;
+	 /* aCaB
+         if (s->blockRandomised) {
+            BZ_RAND_INIT_MASK;
+            BZ_GET_SMALL(s->k0); s->nblock_used++;
+            BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK; 
+	    } else */{
+            BZ_GET_SMALL(s->k0); s->nblock_used++;
+         }
+
+      } else {
+
+         /*-- compute the T^(-1) vector --*/
+         for (i = 0; i < nblock; i++) {
+            uc = (UChar)(s->tt[i] & 0xff);
+            s->tt[s->cftab[uc]] |= (i << 8);
+            s->cftab[uc]++;
+         }
+
+         s->tPos = s->tt[s->origPtr] >> 8;
+         s->nblock_used = 0;
+	 /* aCaB
+         if (s->blockRandomised) {
+            BZ_RAND_INIT_MASK;
+            BZ_GET_FAST(s->k0); s->nblock_used++;
+            BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK; 
+	    } else */{
+            BZ_GET_FAST(s->k0); s->nblock_used++;
+         }
+
+      }
+
+      RETURN(BZ_OK);
+
+
+
+    endhdr_2:
+      /* aCaB
+      GET_UCHAR(BZ_X_ENDHDR_2, uc);
+      if (uc != 0x72) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_ENDHDR_3, uc);
+      if (uc != 0x45) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_ENDHDR_4, uc);
+      if (uc != 0x38) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_ENDHDR_5, uc);
+      if (uc != 0x50) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_ENDHDR_6, uc);
+      if (uc != 0x90) RETURN(BZ_DATA_ERROR);
+
+      s->storedCombinedCRC = 0;
+      GET_UCHAR(BZ_X_CCRC_1, uc);
+      s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_CCRC_2, uc);
+      s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_CCRC_3, uc);
+      s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_CCRC_4, uc);
+      s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+      */
+      s->state = BZ_X_IDLE;
+      RETURN(BZ_STREAM_END);
+
+   default: /* aCaB AssertH ( False, 4001 ); */
+     RETURN(BZ_DATA_ERROR);
+   }
+
+   /* aCaB AssertH ( False, 4002 ); */
+   RETURN(BZ_DATA_ERROR);
+
+   save_state_and_return:
+
+   s->save_i           = i;
+   s->save_j           = j;
+   s->save_t           = t;
+   s->save_alphaSize   = alphaSize;
+   s->save_nGroups     = nGroups;
+   s->save_nSelectors  = nSelectors;
+   s->save_EOB         = EOB;
+   s->save_groupNo     = groupNo;
+   s->save_groupPos    = groupPos;
+   s->save_nextSym     = nextSym;
+   s->save_nblockMAX   = nblockMAX;
+   s->save_nblock      = nblock;
+   s->save_es          = es;
+   s->save_N           = N;
+   s->save_curr        = curr;
+   s->save_zt          = zt;
+   s->save_zn          = zn;
+   s->save_zvec        = zvec;
+   s->save_zj          = zj;
+   s->save_gSel        = gSel;
+   s->save_gMinlen     = gMinlen;
+   s->save_gLimit      = gLimit;
+   s->save_gBase       = gBase;
+   s->save_gPerm       = gPerm;
+
+   return retVal;   
+}
+
+
+/*---------------------------------------------------*/
+static
+int bz_config_ok ( void )
+{
+   if (sizeof(int)   != 4) return 0;
+   if (sizeof(short) != 2) return 0;
+   if (sizeof(char)  != 1) return 0;
+   return 1;
+}
+
+
+/*---------------------------------------------------*/
+static
+void* default_bzalloc ( void* opaque, Int32 items, Int32 size )
+{
+   void* v = cli_malloc ( items * size );
+   return v;
+}
+
+static
+void default_bzfree ( void* opaque, void* addr )
+{
+   if (addr != NULL) free ( addr );
+}
+
+/*---------------------------------------------------*/
+int BZ_API(nsis_BZ2_bzDecompressInit) 
+                     ( nsis_bzstream* strm, 
+                       int        verbosity,
+                       int        small )
+{
+   DState* s;
+
+   if (!bz_config_ok()) return BZ_CONFIG_ERROR;
+
+   if (strm == NULL) return BZ_PARAM_ERROR;
+   if (small != 0 && small != 1) return BZ_PARAM_ERROR;
+   if (verbosity < 0 || verbosity > 4) return BZ_PARAM_ERROR;
+
+   if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc;
+   if (strm->bzfree == NULL) strm->bzfree = default_bzfree;
+
+   s = BZALLOC( sizeof(DState) );
+   if (s == NULL) return BZ_MEM_ERROR;
+   s->strm                  = strm;
+   strm->state              = s;
+   s->state                 = BZ_X_MAGIC_1;
+   s->bsLive                = 0;
+   s->bsBuff                = 0;
+   s->calculatedCombinedCRC = 0;
+   strm->total_in_lo32      = 0;
+   strm->total_in_hi32      = 0;
+   strm->total_out_lo32     = 0;
+   strm->total_out_hi32     = 0;
+   s->smallDecompress       = (Bool)small;
+   s->ll4                   = NULL;
+   s->ll16                  = NULL;
+   s->tt                    = NULL;
+   s->currBlockNo           = 0;
+   s->verbosity             = verbosity;
+
+   return BZ_OK;
+}
+
+/*---------------------------------------------------*/
+int BZ_API(nsis_BZ2_bzDecompress) ( nsis_bzstream *strm )
+{
+   Bool    corrupt;
+   DState* s;
+   if (strm == NULL) return BZ_PARAM_ERROR;
+   s = strm->state;
+   if (s == NULL) return BZ_PARAM_ERROR;
+   if (s->strm != strm) return BZ_PARAM_ERROR;
+
+   while (True) {
+      if (s->state == BZ_X_IDLE) return BZ_SEQUENCE_ERROR;
+      if (s->state == BZ_X_OUTPUT) {
+         if (s->smallDecompress)
+            corrupt = unRLE_obuf_to_output_SMALL ( s ); else
+            corrupt = unRLE_obuf_to_output_FAST  ( s );
+         if (corrupt) return BZ_DATA_ERROR;
+         if (s->nblock_used == s->save_nblock+1 && s->state_out_len == 0) {
+	   /* BZ_FINALISE_CRC ( s->calculatedBlockCRC );
+            if (s->verbosity >= 3)
+               VPrintf2 ( " {0x%08x, 0x%08x}", s->storedBlockCRC,
+                          s->calculatedBlockCRC );
+            if (s->verbosity >= 2) VPrintf0 ( "]" );
+            if (s->calculatedBlockCRC != s->storedBlockCRC)
+               return BZ_DATA_ERROR;
+            s->calculatedCombinedCRC
+               = (s->calculatedCombinedCRC << 1) |
+                    (s->calculatedCombinedCRC >> 31);
+            s->calculatedCombinedCRC ^= s->calculatedBlockCRC;
+	   */
+            s->state = BZ_X_BLKHDR_1;
+         } else {
+            return BZ_OK;
+         }
+      }
+      if (s->state >= BZ_X_MAGIC_1) {
+         Int32 r = BZ2_decompress ( s );
+         if (r == BZ_STREAM_END) {
+	   /* aCaB
+            if (s->verbosity >= 3)
+               VPrintf2 ( "\n    combined CRCs: stored = 0x%08x, computed = 0x%08x", 
+                          s->storedCombinedCRC, s->calculatedCombinedCRC );
+            if (s->calculatedCombinedCRC != s->storedCombinedCRC)
+               return BZ_DATA_ERROR;
+	   */
+            return r;
+         }
+         if (s->state != BZ_X_OUTPUT) return r;
+      }
+   }
+
+   /* AssertH ( 0, 6001 ); */
+   return BZ_DATA_ERROR;
+   /* return 0; */  /*NOTREACHED*/
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(nsis_BZ2_bzDecompressEnd)  ( nsis_bzstream *strm )
+{
+   DState* s;
+   if (strm == NULL) return BZ_PARAM_ERROR;
+   s = strm->state;
+   if (s == NULL) return BZ_PARAM_ERROR;
+   if (s->strm != strm) return BZ_PARAM_ERROR;
+
+   if (s->tt   != NULL) BZFREE(s->tt);
+   if (s->ll16 != NULL) BZFREE(s->ll16);
+   if (s->ll4  != NULL) BZFREE(s->ll4);
+
+   BZFREE(strm->state);
+   strm->state = NULL;
+
+   return BZ_OK;
+}
+
+/*-------------------------------------------------------------*/
+/*--- end                                           bzlib.c ---*/
+/*-------------------------------------------------------------*/

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_infblock.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_infblock.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_infblock.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_infblock.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,715 @@
+/*
+ * This file is a part of the zlib compression module for NSIS.
+ * 
+ * Copyright and license information can be found below.
+ * Modifications Copyright (C) 1999-2007 Nullsoft and Contributors
+ * 
+ * The original zlib source code is available at
+ * http://www.zlib.net/
+ * 
+ * This software is provided 'as-is', without any express or implied
+ * warranty.
+ */
+
+/*
+ * Copyright (C) 1995-1998 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in COPYING.nsis
+ */
+
+#include "nsis_zutil.h"
+#include <string.h>
+
+#ifndef min
+#  define min(x,y) ((x<y)?x:y)
+#endif
+
+/* defines for inflate input/output */
+/*   update pointers and return */
+#define UPDBITS {s->bitb=b;s->bitk=k;}
+#define UPDIN {z->avail_in=n;z->next_in=p;}
+#define UPDOUT {s->write=q;}
+#define UPDATE {UPDBITS UPDIN UPDOUT}
+#define LEAVE(r) {UPDATE inflate_flush(z); return r;}
+
+/*   get bytes and bits */
+#define LOADIN {p=z->next_in;n=z->avail_in;b=s->bitb;k=s->bitk;}
+
+
+#define NEEDBYTE {if(!n)LEAVE(Z_OK)}
+#define NEXTBYTE (n--,*p++)
+#define NEEDBITS(j) {while(k<(j)){NEEDBYTE;b|=((uLong)NEXTBYTE)<<k;k+=8;}}
+
+#define DUMPBITS(j) {b>>=(j);k-=(j);}
+/*   output bytes */
+#define WAVAIL (uInt)(q<s->read?s->read-q-1:s->end-q)
+#define LOADOUT {q=s->write;m=(uInt)WAVAIL;}
+#define WRAP {if(q==s->end&&s->read!=s->window){q=s->window;m=(uInt)WAVAIL;}}
+#define FLUSH {UPDOUT inflate_flush(z); LOADOUT}
+#define NEEDOUT {if(m==0){WRAP if(m==0){FLUSH WRAP if(m==0) LEAVE(Z_OK)}}}
+#define OUTBYTE(a) {*q++=(Byte)(a);m--;}
+/*   load local pointers */
+#define LOAD {LOADIN LOADOUT}
+
+#define LAST (s->last == DRY)
+
+#define FIXEDH 544      /* number of hufts used by fixed tables */
+
+
+
+typedef struct inflate_blocks_state FAR inflate_blocks_statef;
+#define exop word.what.Exop
+#define bits word.what.Bits
+
+/* And'ing with mask[n] masks the lower n bits */
+local unsigned short inflate_mask[17] = {
+    0x0000,
+    0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
+    0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff
+}; /* use to reduce .data #define INFLATE_MASK(x, n) (x & (~((unsigned short) 0xFFFF << n))) */
+local const char border[] = { /* Order of the bit length code lengths */
+        16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+/* Tables for deflate from PKZIP's appnote.txt. */
+local const unsigned short  cplens[31] = { /* Copy lengths for literal codes 257..285 */
+        3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
+        35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
+        /* see note #13 above about 258 */
+local const unsigned short  cplext[31] = { /* Extra bits for literal codes 257..285 */
+        0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
+        3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 112, 112}; /* 112==invalid */
+local const unsigned short  cpdist[30] = { /* Copy offsets for distance codes 0..29 */
+        1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
+        257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
+        8193, 12289, 16385, 24577};
+local const unsigned short  cpdext[30] = { /* Extra bits for distance codes */
+        0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
+        7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
+        12, 12, 13, 13};
+
+/* build fixed tables only once--keep them here */
+local char fixed_built = 0;
+local inflate_huft fixed_mem[FIXEDH];
+local uInt fixed_bl=9;
+local uInt fixed_bd=5;
+local inflate_huft *fixed_tl;
+local inflate_huft *fixed_td;
+
+/* copy as much as possible from the sliding window to the output area */
+local void ZEXPORT inflate_flush(nsis_z_streamp z)
+{
+  inflate_blocks_statef *s = &z->blocks;
+  uInt n;
+  Bytef *q;
+
+  /* local copies of source and destination pointers */
+  q = s->read;
+
+again:
+  /* compute number of bytes to copy as far as end of window */
+  n = (uInt)((q <= s->write ? s->write : s->end) - q);
+  n = min(n, z->avail_out);
+
+  /* update counters */
+  z->avail_out -= n;
+  /* z->total_out += n; */
+
+  /* copy as far as end of window */
+  zmemcpy(z->next_out, q, n);
+  z->next_out += n;
+  q += n;
+
+  /* see if more to copy at beginning of window */
+  if (q == s->end)
+  {
+    /* wrap pointers */
+    q = s->window;
+    if (s->write == s->end)
+      s->write = s->window;
+
+    /* do the same for the beginning of the window */
+    goto again;
+  }
+
+  /* update pointers */
+  s->read = q;
+}
+
+#define BMAX 15         /* maximum bit length of any code */
+
+local int ZEXPORT huft_build(
+uIntf *b,               /* code lengths in bits (all assumed <= BMAX) */
+uInt n,                 /* number of codes (assumed <= 288) */
+uInt s,                 /* number of simple-valued codes (0..s-1) */
+const unsigned short *d,         /* list of base values for non-simple codes */
+const unsigned short *e,         /* list of extra bits for non-simple codes */
+inflate_huft * FAR *t,  /* result: starting table */
+uIntf *m,               /* maximum lookup bits, returns actual */
+inflate_huft *hp,       /* space for trees */
+uInt *hn)               /* working area: values in order of bit length */
+{
+  static uIntf v[288];             /* work area for huft_build */
+  uInt a;                       /* counter for codes of length k */
+  uInt c[BMAX+1];               /* bit length count table */
+  uInt f;                       /* i repeats in table every f entries */
+  int g;                        /* maximum code length */
+  int h;                        /* table level */
+  uInt i;              /* counter, current code */
+  uInt j;              /* counter */
+  int k;               /* number of bits in current code */
+  int l;                        /* bits per table (returned in m) */
+  uIntf *p;            /* pointer into c[], b[], or v[] */
+  inflate_huft *q;              /* points to current table */
+  struct inflate_huft_s r;      /* table entry for structure assignment */
+  inflate_huft *u[BMAX];        /* table stack */
+  int w;               /* bits before this table == (l * h) */
+  uInt x[BMAX+1];               /* bit offsets, then code stack */
+  uIntf *xp;                    /* pointer into x */
+  int y;                        /* number of dummy codes added */
+  uInt z;                       /* number of entries in current table */
+
+
+  /* Generate counts for each bit length */
+  p=c;
+  y=16; while (y--) *p++ = 0;
+  p = b;
+  i = n;
+  do {
+    c[*p++]++;                  /* assume all entries <= BMAX */
+  } while (--i);
+  if (c[0] == n)                /* null input--all zero length codes */
+  {
+    *t = (inflate_huft *)Z_NULL;
+    *m = 0;
+    return Z_OK;
+  }
+
+
+  /* Find minimum and maximum length, bound *m by those */
+  l = *m;
+  for (j = 1; j <= BMAX; j++)
+    if (c[j])
+      break;
+  k = j;                        /* minimum code length */
+  if ((uInt)l < j)
+    l = j;
+  for (i = BMAX; i; i--)
+    if (c[i])
+      break;
+  g = i;                        /* maximum code length */
+  if ((uInt)l > i)
+    l = i;
+  *m = l;
+
+
+  /* Adjust last length count to fill out codes, if needed */
+  for (y = 1 << j; j < i; j++, y <<= 1)
+    if ((y -= c[j]) < 0)
+      return Z_DATA_ERROR;
+  if ((y -= c[i]) < 0)
+    return Z_DATA_ERROR;
+  c[i] += y;
+
+
+  /* Generate starting offsets into the value table for each length */
+  x[1] = j = 0;
+  p = c + 1;  xp = x + 2;
+  while (--i) {                 /* note that i == g from above */
+    *xp++ = (j += *p++);
+  }
+
+
+  /* Make a table of values in order of bit lengths */
+  p = b;  i = 0;
+  do {
+    if ((j = *p++) != 0)
+      v[x[j]++] = i;
+  } while (++i < n);
+  n = x[g];                     /* set n to length of v */
+
+
+  /* Generate the Huffman codes and for each, make the table entries */
+  x[0] = i = 0;                 /* first Huffman code is zero */
+  p = v;                        /* grab values in bit order */
+  h = -1;                       /* no tables yet--level -1 */
+  w = -l;                       /* bits decoded == (l * h) */
+  u[0] = (inflate_huft *)Z_NULL;        /* just to keep compilers happy */
+  q = (inflate_huft *)Z_NULL;   /* ditto */
+  z = 0;                        /* ditto */
+
+  r.base = 0;
+
+  /* go through the bit lengths (k already is bits in shortest code) */
+  for (; k <= g; k++)
+  {
+    a = c[k];
+    while (a--)
+    {
+      int nextw=w;
+      /* here i is the Huffman code of length k bits for value *p */
+      /* make tables up to required level */
+      while (k > (nextw=w + l))
+      {
+        h++;
+
+        /* compute minimum size table less than or equal to l bits */
+        z = g - nextw;
+        z = z > (uInt)l ? (uInt)l : z;        /* table size upper limit */
+        if ((f = 1 << (j = k - nextw)) > a + 1)     /* try a k-w bit table */
+        {                       /* too few codes for k-w bit table */
+          f -= a + 1;           /* deduct codes from patterns left */
+          xp = c + k;
+          if (j < z)
+            while (++j < z && (f <<= 1) > *++xp)     /* try smaller tables up to z bits */
+            {
+              f -= *xp;         /* else deduct codes from patterns */
+            }
+        }
+        z = 1 << j;             /* table entries for j-bit table */
+
+        /* allocate new table */
+        if (*hn + z > MANY)     /* (note: doesn't matter for fixed) */
+          return Z_MEM_ERROR;   /* not enough memory */
+        u[h] = q = hp + *hn;
+        *hn += z;
+
+        /* connect to last table, if there is one */
+        if (h)
+        {
+          x[h] = i;             /* save pattern for backing up */
+          r.bits = (Byte)l;     /* bits to dump before this table */
+          r.exop = (Byte)j;     /* bits in this table */
+          j = i >> w;
+          r.base = (uInt)(q - u[h-1] - j);   /* offset to this table */
+          u[h-1][j] = r;        /* connect to last table */
+        }
+        else
+          *t = q;               /* first table is returned result */
+        w=nextw;                 /* previous table always l bits */
+      }
+
+      /* set up table entry in r */
+      r.bits = (Byte)(k - w);
+      if (p >= v + n)
+        r.exop = 128 + 64;      /* out of values--invalid code */
+      else if (*p < s)
+      {
+        r.exop = (Byte)(*p < 256 ? 0 : 32 + 64);     /* 256 is end-of-block */
+        r.base = *p++;          /* simple code is just the value */
+      }
+      else
+      {
+        r.exop = (Byte)(e[*p - s] + 16 + 64);/* non-simple--look up in lists */
+        r.base = d[*p++ - s];
+      }
+
+      /* fill code-like entries with r */
+      f = 1 << (k - w);
+      for (j = i >> w; j < z; j += f)
+        q[j] = r;
+
+      /* backwards increment the k-bit code i */
+      for (j = 1 << (k - 1); i & j; j >>= 1)
+        i ^= j;
+      i ^= j;
+
+      /* backup over finished tables */
+      while ((i & ((1 << w) - 1)) != x[h])
+      {
+        h--;                    /* don't need to update q */
+        w -= l;
+      }
+    }
+  }
+
+
+  /* Return Z_BUF_ERROR if we were given an incomplete table */
+  return (y != 0 && g != 1) ? Z_BUF_ERROR : Z_OK;
+}
+
+int ZEXPORT nsis_inflate(nsis_z_streamp z)
+{
+  inflate_blocks_statef *s = &z->blocks;
+  inflate_codes_statef *c = &s->sub.decode.t_codes;  /* codes state */
+
+  /* lousy two bytes saved by doing this */
+  struct
+  {
+    uInt t;               /* temporary storage */
+    uLong b;              /* bit buffer */
+    uInt k;               /* bits in bit buffer */
+    Bytef *p;             /* input data pointer */
+    uInt n;               /* bytes available there */
+    Bytef *q;             /* output window write pointer */
+    uInt m;               /* bytes to end of window or read pointer */
+
+    /* CODES variables */
+
+    inflate_huft *j;      /* temporary pointer */
+    uInt e;               /* extra bits or operation */
+    Bytef *f;             /* pointer to copy strings from */
+  } _state;
+
+#define t _state.t
+#define b _state.b
+#define k _state.k
+#define p _state.p
+#define n _state.n
+#define q _state.q
+#define m _state.m
+
+  /* copy input/output information to locals (UPDATE macro restores) */
+  LOAD
+
+  /* process input based on current state */
+  for (;;) switch (s->mode)
+  {
+    case TYPE:
+      NEEDBITS(3)
+      t = (uInt)b & 7;
+      DUMPBITS(3)
+      s->last = (t & 1) ? DRY : TYPE;
+      switch (t >> 1)
+      {
+        case 0:                         /* stored */
+          Tracev((stderr, "inflate:     stored block%s\n",
+                 LAST ? " (last)" : ""));
+          DUMPBITS(k&7)
+          s->mode = LENS;               /* get length of stored block */
+          break;
+        case 1:                         /* fixed */
+          Tracev((stderr, "inflate:     fixed codes block%s\n",
+                 LAST ? " (last)" : ""));
+          {
+            if (!fixed_built)
+            {
+              int _k;              /* temporary variable */
+              uInt f = 0;         /* number of hufts used in fixed_mem */
+              static uIntf lc[288];           /* length list for huft_build */
+
+              /* literal table */
+              for (_k = 0; _k < 288; _k++)
+              {
+                char v=8;
+                if (_k > 143)
+                {
+                  if (_k < 256) v++;
+                  else if (_k < 280) v--;
+                }
+                lc[_k] = v;
+              }
+
+              huft_build(lc, 288, 257, cplens, cplext, &fixed_tl, &fixed_bl, fixed_mem, &f);
+
+              /* distance table */
+              for (_k = 0; _k < 30; _k++) lc[_k] = 5;
+
+              huft_build(lc, 30, 0, cpdist, cpdext, &fixed_td, &fixed_bd, fixed_mem, &f);
+
+              /* done */
+              fixed_built++;
+            }
+
+            /* s->sub.decode.t_codes.mode = CODES_START; */
+            s->sub.decode.t_codes.lbits = (Byte)fixed_bl;
+            s->sub.decode.t_codes.dbits = (Byte)fixed_bd;
+            s->sub.decode.t_codes.ltree = fixed_tl;
+            s->sub.decode.t_codes.dtree = fixed_td;
+          }
+          s->mode = CODES_START;
+          break;
+        case 2:                         /* dynamic */
+          Tracev((stderr, "inflate:     dynamic codes block%s\n",
+                 LAST ? " (last)" : ""));
+          s->mode = TABLE;
+          break;
+        case 3:                         /* illegal */
+          /* the only illegal value possible is 3 because we check only 2 bits */
+          goto bad;
+      }
+      break;
+    case LENS:
+      NEEDBITS(16)
+      s->sub.left = (uInt)b & 0xffff;
+      b = k = 0;                      /* dump bits */
+      Tracev((stderr, "inflate:       stored length %u\n", s->sub.left));
+      s->mode = s->sub.left ? STORED : (inflate_mode)s->last;
+      break;
+    case STORED:
+    {
+      uInt mn;
+
+      if (n == 0)
+        LEAVE(Z_OK)
+      NEEDOUT
+      mn = min(m, n);
+      t = min(s->sub.left, mn);
+      zmemcpy(q, p, t);
+      p += t;  n -= t;
+      q += t;  m -= t;
+      if (!(s->sub.left -= t))
+        s->mode = (inflate_mode)s->last;
+      break;
+    }
+    case TABLE:
+      NEEDBITS(14)
+      s->sub.trees.table = t = (uInt)b & 0x3fff;
+      if ((t & 0x1f) > 29 || ((t >> 5) & 0x1f) > 29)
+      {
+        s->mode = NZ_BAD;
+        LEAVE(Z_DATA_ERROR);
+      }
+      /* t = 258 + (t & 0x1f) + ((t >> 5) & 0x1f); */
+      DUMPBITS(14)
+      s->sub.trees.index = 0;
+      Tracev((stderr, "inflate:       table sizes ok\n"));
+      s->mode = BTREE;
+    case BTREE:
+      while (s->sub.trees.index < 4 + (s->sub.trees.table >> 10))
+      {
+        NEEDBITS(3)
+        s->sub.trees.t_blens[(int)border[s->sub.trees.index++]] = (uInt)b & 7;
+        DUMPBITS(3)
+      }
+      while (s->sub.trees.index < 19)
+        s->sub.trees.t_blens[(int)border[s->sub.trees.index++]] = 0;
+      s->sub.trees.bb = 7;
+
+      {
+        uInt hn = 0;          /* hufts used in space */
+
+        t = huft_build(s->sub.trees.t_blens, 19, 19, Z_NULL, Z_NULL,
+		       &s->sub.trees.tb, &s->sub.trees.bb, s->hufts, &hn);
+        if (t != Z_OK || !s->sub.trees.bb)
+        {
+          s->mode = NZ_BAD;
+          break;
+        }
+      }
+
+      s->sub.trees.index = 0;
+      Tracev((stderr, "inflate:       bits tree ok\n"));
+      s->mode = DTREE;
+    case DTREE:
+      while (t = s->sub.trees.table,
+             s->sub.trees.index < 258 + (t & 0x1f) + ((t >> 5) & 0x1f))
+      {
+        inflate_huft *h;
+        uInt i, j, d;
+
+        t = s->sub.trees.bb;
+        NEEDBITS(t)
+        h = s->sub.trees.tb + ((uInt)b & (uInt)inflate_mask[t]);
+        t = h->bits;
+        d = h->base;
+        if (d < 16)
+        {
+          DUMPBITS(t)
+          s->sub.trees.t_blens[s->sub.trees.index++] = d;
+        }
+        else /* d == 16..18 */
+        {
+          if (d == 18)
+          {
+            i=7;
+            j=11;
+          }
+          else
+          {
+            i=d-14;
+            j=3;
+          }
+          NEEDBITS(t+i)
+          DUMPBITS(t)
+          j += (uInt)b & (uInt)inflate_mask[i];
+          DUMPBITS(i)
+          i = s->sub.trees.index;
+          t = s->sub.trees.table;
+          if (i + j > 258 + (t & 0x1f) + ((t >> 5) & 0x1f) ||
+              (d == 16 && i < 1))
+          {
+            s->mode = NZ_BAD;
+            LEAVE(Z_DATA_ERROR);
+          }
+          d = d == 16 ? s->sub.trees.t_blens[i - 1] : 0;
+          do {
+            s->sub.trees.t_blens[i++] = d;
+          } while (--j);
+          s->sub.trees.index = i;
+        }
+      }
+      s->sub.trees.tb = Z_NULL;
+      {
+        uInt hn = 0;          /* hufts used in space */
+        uInt bl, bd;
+        inflate_huft *tl, *td;
+        int nl,nd;
+        t = s->sub.trees.table;
+
+        nl = 257 + (t & 0x1f);
+        nd = 1 + ((t >> 5) & 0x1f);
+        bl = 9;         /* must be <= 9 for lookahead assumptions */
+        bd = 6;         /* must be <= 9 for lookahead assumptions */
+
+        t = huft_build(s->sub.trees.t_blens, nl, 257, cplens, cplext, &tl, &bl, s->hufts, &hn);
+        if (bl == 0) t = Z_DATA_ERROR;
+        if (t == Z_OK)
+        {
+          /* build distance tree */
+          t = huft_build(s->sub.trees.t_blens + nl, nd, 0, cpdist, cpdext, &td, &bd, s->hufts, &hn);
+        }
+        if (t != Z_OK || (bd == 0 && nl > 257))
+        {
+          s->mode = NZ_BAD;
+          LEAVE(Z_DATA_ERROR);
+        }
+        Tracev((stderr, "inflate:       trees ok\n"));
+
+        /* s->sub.decode.t_codes.mode = CODES_START; */
+        s->sub.decode.t_codes.lbits = (Byte)bl;
+        s->sub.decode.t_codes.dbits = (Byte)bd;
+        s->sub.decode.t_codes.ltree = tl;
+        s->sub.decode.t_codes.dtree = td;
+      }
+      s->mode = CODES_START;
+
+#define j (_state.j)
+#define e (_state.e)
+#define f (_state.f)
+
+    /* waiting for "i:"=input, "o:"=output, "x:"=nothing */
+
+    case CODES_START:         /* x: set up for LEN */
+      c->sub.code.need = c->lbits;
+      c->sub.code.tree = c->ltree;
+      s->mode = CODES_LEN;
+    case CODES_LEN:           /* i: get length/literal/eob next */
+      t = c->sub.code.need;
+      NEEDBITS(t)
+      j = c->sub.code.tree + ((uInt)b & (uInt)inflate_mask[t]);
+      DUMPBITS(j->bits)
+      e = (uInt)(j->exop);
+      if (e == 0)               /* literal */
+      {
+        c->sub.lit = j->base;
+        s->mode = CODES_LIT;
+        break;
+      }
+      if (e & 16)               /* length */
+      {
+        c->sub.copy.get = e & 15;
+        c->len = j->base;
+        s->mode = CODES_LENEXT;
+        break;
+      }
+      if ((e & 64) == 0)        /* next table */
+      {
+        c->sub.code.need = e;
+        c->sub.code.tree = j + j->base;
+        break;
+      }
+      if (e & 32)               /* end of block */
+      {
+        s->mode = CODES_WASH;
+        break;
+      }
+    goto bad;
+    case CODES_LENEXT:        /* i: getting length extra (have base) */
+      t = c->sub.copy.get;
+      NEEDBITS(t)
+      c->len += (uInt)b & (uInt)inflate_mask[t];
+      DUMPBITS(t)
+      c->sub.code.need = c->dbits;
+      c->sub.code.tree = c->dtree;
+      s->mode = CODES_DIST;
+    case CODES_DIST:          /* i: get distance next */
+      t = c->sub.code.need;
+      NEEDBITS(t)
+      j = c->sub.code.tree + ((uInt)b & (uInt)inflate_mask[t]);
+      DUMPBITS(j->bits)
+      e = (uInt)(j->exop);
+      if (e & 16)               /* distance */
+      {
+        c->sub.copy.get = e & 15;
+        c->sub.copy.dist = j->base;
+        s->mode = CODES_DISTEXT;
+        break;
+      }
+      if ((e & 64) == 0)        /* next table */
+      {
+        c->sub.code.need = e;
+        c->sub.code.tree = j + j->base;
+        break;
+      }
+      goto bad;        /* invalid code */
+    case CODES_DISTEXT:       /* i: getting distance extra */
+      t = c->sub.copy.get;
+      NEEDBITS(t)
+      c->sub.copy.dist += (uInt)b & (uInt)inflate_mask[t];
+      DUMPBITS(t)
+      s->mode = CODES_COPY;
+    case CODES_COPY:          /* o: copying bytes in window, waiting for space */
+      f = (uInt)(q - s->window) < c->sub.copy.dist ?
+          s->end - (c->sub.copy.dist - (q - s->window)) :
+          q - c->sub.copy.dist;
+
+      while (c->len)
+      {
+        NEEDOUT
+        OUTBYTE(*f++)
+        if (f == s->end)
+          f = s->window;
+        c->len--;
+      }
+      s->mode = CODES_START;
+      break;
+    case CODES_LIT:           /* o: got literal, waiting for output space */
+      NEEDOUT
+      OUTBYTE(c->sub.lit)
+      s->mode = CODES_START;
+      break;
+    case CODES_WASH:          /* o: got eob, possibly more output */
+      if (k > 7)        /* return unused byte, if any */
+      {
+        k -= 8;
+        n++;
+        p--;            /* can always return one */
+      }
+      /* flushing will be done in DRY */
+
+#undef j
+#undef e
+#undef f
+
+    case DRY:
+      FLUSH
+      if (s->write != s->read)
+        LEAVE(Z_OK)
+      if (s->mode == CODES_WASH)
+      {
+        Tracev((stderr, "inflate:       codes end, %lu total out\n",
+               z->total_out + (q >= s->read ? q - s->read :
+               (s->end - s->read) + (q - s->window))));
+      }
+      /* DRY if last, TYPE if not */
+      s->mode = (inflate_mode)s->last;
+      if (s->mode == TYPE)
+        break;
+      LEAVE(Z_STREAM_END)
+    /*case BAD:
+      r = Z_DATA_ERROR;
+      LEAVE
+    */
+    default: /* we'll call Z_STREAM_ERROR if BAD anyway */
+    bad:
+      s->mode = NZ_BAD;
+      LEAVE(Z_STREAM_ERROR)
+  }
+}
+
+#undef t
+#undef b
+#undef k
+#undef p
+#undef n
+#undef q
+#undef m

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_nulsft.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_nulsft.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_nulsft.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_nulsft.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,544 @@
+/*
+ *  Copyright (C) 2007 aCaB <acab at clamav.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "others.h"
+#include "cltypes.h"
+#include "nsis_bzlib.h"
+#include "LZMADecode.h"
+#include "nsis_zlib.h"
+#include "matcher.h"
+#include "scanners.h"
+#include "nulsft.h" /* SHUT UP GCC -Wextra */
+
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
+#define EC32(x) le32_to_host(x)
+
+enum {
+  COMP_NOT_DETECTED,
+  COMP_BZIP2,
+  COMP_LZMA,
+  COMP_ZLIB,
+  COMP_NOCOMP
+};
+
+struct nsis_st {
+  int ifd;
+  int ofd;
+  off_t off;
+  char *dir;
+  uint32_t asz;
+  uint32_t hsz;
+  uint32_t fno;
+  struct {
+    uint32_t avail_in;
+    unsigned char *next_in;
+    uint32_t avail_out;
+    unsigned char *next_out;
+  } nsis;
+  nsis_bzstream bz;
+  lzma_stream lz;
+  nsis_z_stream z;
+  unsigned char *freeme;
+  uint8_t comp;
+  uint8_t solid;
+  uint8_t freecomp;
+  uint8_t eof;
+  char ofn[1024];
+};
+
+
+#define LINESTR(x) #x
+#define LINESTR2(x) LINESTR(x)
+#define __AT__  " at "__FILE__":"LINESTR2(__LINE__)
+
+static int nsis_init(struct nsis_st *n) {
+  switch(n->comp) {
+  case COMP_BZIP2:
+    if (nsis_BZ2_bzDecompressInit(&n->bz, 0, 0)!=BZ_OK)
+      return CL_EBZIP;
+    n->freecomp=1;
+    break;
+  case COMP_LZMA:
+    lzmaInit(&n->lz);
+    n->freecomp=1;
+    break;
+  case COMP_ZLIB:
+    nsis_inflateInit(&n->z);
+    n->freecomp=0;
+  }
+  return CL_SUCCESS;
+}
+
+static void nsis_shutdown(struct nsis_st *n) {
+  if(!n->freecomp)
+    return;
+
+  switch(n->comp) {
+  case COMP_BZIP2:
+    nsis_BZ2_bzDecompressEnd(&n->bz);
+    break;
+  case COMP_LZMA:
+    lzmaShutdown(&n->lz);
+  case COMP_ZLIB:
+    break;
+  }
+
+  n->freecomp=0;
+}
+
+static int nsis_decomp(struct nsis_st *n) {
+  int ret = CL_EFORMAT;
+  switch(n->comp) {
+  case COMP_BZIP2:
+    n->bz.avail_in = n->nsis.avail_in;
+    n->bz.next_in = n->nsis.next_in;
+    n->bz.avail_out = n->nsis.avail_out;
+    n->bz.next_out = n->nsis.next_out;
+    switch (nsis_BZ2_bzDecompress(&n->bz)) {
+    case BZ_OK:
+      ret = CL_SUCCESS;
+      break;
+    case BZ_STREAM_END:
+      ret = CL_BREAK;
+    }
+    n->nsis.avail_in = n->bz.avail_in;
+    n->nsis.next_in = n->bz.next_in;
+    n->nsis.avail_out = n->bz.avail_out;
+    n->nsis.next_out = n->bz.next_out;
+    break;
+  case COMP_LZMA:
+    n->lz.avail_in = n->nsis.avail_in;
+    n->lz.next_in = n->nsis.next_in;
+    n->lz.avail_out = n->nsis.avail_out;
+    n->lz.next_out = n->nsis.next_out;
+    switch (lzmaDecode(&n->lz)) {
+    case LZMA_OK:
+      ret = CL_SUCCESS;
+      break;
+    case LZMA_STREAM_END:
+      ret = CL_BREAK;
+    }
+    n->nsis.avail_in = n->lz.avail_in;
+    n->nsis.next_in = n->lz.next_in;
+    n->nsis.avail_out = n->lz.avail_out;
+    n->nsis.next_out = n->lz.next_out;
+    break;
+  case COMP_ZLIB:
+    n->z.avail_in = n->nsis.avail_in;
+    n->z.next_in = n->nsis.next_in;
+    n->z.avail_out = n->nsis.avail_out;
+    n->z.next_out = n->nsis.next_out;
+    switch (nsis_inflate(&n->z)) {
+    case Z_OK:
+      ret = CL_SUCCESS;
+      break;
+    case Z_STREAM_END:
+      ret = CL_BREAK;
+    }
+    n->nsis.avail_in = n->z.avail_in;
+    n->nsis.next_in = n->z.next_in;
+    n->nsis.avail_out = n->z.avail_out;
+    n->nsis.next_out = n->z.next_out;
+    break;
+  }
+  return ret;
+}
+
+static int nsis_unpack_next(struct nsis_st *n, cli_ctx *ctx) {
+  unsigned char *ibuf;
+  uint32_t size, loops;
+  int ret;
+  unsigned char obuf[BUFSIZ];
+
+  if (n->eof) {
+    cli_dbgmsg("NSIS: extraction complete\n");
+    return CL_BREAK;
+  }
+  if (ctx->limits && ctx->limits->maxfiles && n->fno >= ctx->limits->maxfiles) {
+    cli_dbgmsg("NSIS: Files limit reached (max: %u)\n", ctx->limits->maxfiles);
+    return CL_EMAXFILES;
+  }
+
+  if (n->fno)
+    snprintf(n->ofn, 1023, "%s/content.%.3u", n->dir, n->fno);
+  else
+    snprintf(n->ofn, 1023, "%s/headers", n->dir);
+
+  n->fno++;
+
+  if ((n->ofd=open(n->ofn, O_RDWR|O_CREAT|O_TRUNC|O_BINARY, 0600))==-1) {
+    cli_errmsg("NSIS: unable to create output file %s - aborting.", n->ofn);
+    return CL_EIO;
+  }
+
+  if (!n->solid) {
+    if (cli_readn(n->ifd, &size, 4)!=4) {
+      cli_dbgmsg("NSIS: reached EOF - extraction complete\n");
+      close(n->ofd);
+      return CL_BREAK;
+    }
+    if (n->asz==4) {
+      cli_dbgmsg("NSIS: reached CRC - extraction complete\n");
+      close(n->ofd);
+      return CL_BREAK;
+    }
+    loops = EC32(size);
+    if (!(size = (loops&~0x80000000))) {
+      cli_dbgmsg("NSIS: empty file found\n");
+      return CL_SUCCESS;
+    }
+    if (n->asz <4 || size > n->asz-4) {
+      cli_dbgmsg("NSIS: next file is outside the archive\n");
+      close(n->ofd);
+      return CL_BREAK;
+    }
+
+    n->asz -= size+4;
+
+    if (ctx->limits && ctx->limits->maxfilesize && size > ctx->limits->maxfilesize) {
+      cli_dbgmsg("NSIS: Skipping file due to size limit (%u, max: %lu)\n", size, ctx->limits->maxfilesize);
+      close(n->ofd);
+      if (lseek(n->ifd, size, SEEK_CUR)==-1) return CL_EIO;
+      return CL_EMAXSIZE;
+    }
+    if (!(ibuf= (unsigned char *) cli_malloc(size))) {
+      	cli_dbgmsg("NSIS: out of memory"__AT__"\n");
+      close(n->ofd);
+      return CL_EMEM;
+    }
+    if (cli_readn(n->ifd, ibuf, size) != (ssize_t) size) {
+      cli_dbgmsg("NSIS: cannot read %u bytes"__AT__"\n", size);
+      free(ibuf);
+      close(n->ofd);
+      return CL_EIO;
+    }
+    if (loops==size) {
+      if (cli_writen(n->ofd, ibuf, size) != (ssize_t) size) {
+	cli_dbgmsg("NSIS: cannot write output file"__AT__"\n");
+	free(ibuf);
+	close(n->ofd);
+	return CL_EIO;
+      }
+    } else {
+      if ((ret=nsis_init(n))!=CL_SUCCESS) {
+	cli_dbgmsg("NSIS: decompressor init failed"__AT__"\n");
+	free(ibuf);
+	close(n->ofd);
+	return ret;
+      }
+      
+      n->nsis.avail_in = size;
+      n->nsis.next_in = ibuf;
+      n->nsis.next_out = obuf;
+      n->nsis.avail_out = BUFSIZ;
+      loops=0;
+
+      while ((ret=nsis_decomp(n))==CL_SUCCESS) {
+	if ((size = n->nsis.next_out - obuf)) {
+	  if (cli_writen(n->ofd, obuf, size) != (ssize_t) size) {
+	    cli_dbgmsg("NSIS: cannot write output file"__AT__"\n");
+	    free(ibuf);
+	    close(n->ofd);
+	    return CL_EIO;
+	  }
+	  n->nsis.next_out = obuf;
+	  n->nsis.avail_out = BUFSIZ;
+	  loops=0;
+	  if (ctx->limits && ctx->limits->maxfilesize && size > ctx->limits->maxfilesize) {
+	    cli_dbgmsg("NSIS: Skipping file due to size limit (%u, max: %lu)\n", size, ctx->limits->maxfilesize);
+	    free(ibuf);
+	    close(n->ofd);
+	    nsis_shutdown(n);
+	    return CL_EMAXSIZE;
+	  }
+	} else if (++loops > 10) {
+	  cli_dbgmsg("NSIS: xs looping, breaking out"__AT__"\n");
+	  ret = CL_BREAK;
+	  break;
+	}
+      }
+
+      if (ret != CL_BREAK) {
+	cli_dbgmsg("NSIS: bad stream"__AT__"\n");
+	free(ibuf);
+	close(n->ofd);
+	return CL_EFORMAT;
+      }
+
+      if (cli_writen(n->ofd, obuf, n->nsis.next_out - obuf) != n->nsis.next_out - obuf) {
+	cli_dbgmsg("NSIS: cannot write output file"__AT__"\n");
+	free(ibuf);
+	close(n->ofd);
+	return CL_EIO;
+      }
+      nsis_shutdown(n);
+    }
+
+    free(ibuf);
+    return CL_SUCCESS;
+
+  } else {
+    if (!n->freeme) {
+      if ((ret=nsis_init(n))!=CL_SUCCESS) {
+	cli_dbgmsg("NSIS: decompressor init failed\n");
+	close(n->ofd);
+	return ret;
+      }
+      if (!(n->freeme= (unsigned char *) cli_malloc(n->asz))) {
+	cli_dbgmsg("NSIS: out of memory\n");
+	close(n->ofd);
+	return CL_EMEM;
+      }
+      if (cli_readn(n->ifd, n->freeme, n->asz) != (ssize_t) n->asz) {
+	cli_dbgmsg("NSIS: cannot read %u bytes"__AT__"\n", n->asz);
+	close(n->ofd);
+	return CL_EIO;
+      }
+      n->nsis.next_in = n->freeme;
+      n->nsis.avail_in = n->asz;
+    }
+
+    if (n->nsis.avail_in<=4) {
+      cli_dbgmsg("NSIS: extraction complete\n");
+      close(n->ofd);
+      return CL_BREAK;
+    }
+    n->nsis.next_out = obuf;
+    n->nsis.avail_out = 4;
+    loops = 0;
+
+    while ((ret=nsis_decomp(n))==CL_SUCCESS) {
+      if (n->nsis.next_out - obuf == 4) break;
+      if (++loops > 20) {
+	cli_dbgmsg("NSIS: xs looping, breaking out"__AT__"\n");
+	ret = CL_BREAK;
+	break;
+      }
+    }
+
+    if (ret != CL_SUCCESS) {
+      cli_dbgmsg("NSIS: bad stream"__AT__"\n");
+      close(n->ofd);
+      return CL_EFORMAT;
+    }
+
+    size=cli_readint32(obuf);
+    if (ctx->limits && ctx->limits->maxfilesize && size > ctx->limits->maxfilesize) {
+      cli_dbgmsg("NSIS: Breaking out due to filesize limit (%u, max: %lu) in solid archive\n", size, ctx->limits->maxfilesize);
+      close(n->ofd);
+      return CL_EFORMAT;
+    }
+
+    n->nsis.next_out = obuf;
+    n->nsis.avail_out = MIN(BUFSIZ,size);
+    loops = 0;
+
+    while (size && (ret=nsis_decomp(n))==CL_SUCCESS) {
+      unsigned int wsz;
+      if ((wsz = n->nsis.next_out - obuf)) {
+	if (cli_writen(n->ofd, obuf, wsz) != (ssize_t) wsz) {
+	  close(n->ofd);
+	  return CL_EIO;
+	}
+	size-=wsz;
+	n->nsis.next_out = obuf;
+	n->nsis.avail_out = MIN(size,BUFSIZ);
+      } else if ( ++loops > 20 ) {
+	cli_dbgmsg("NSIS: xs looping, breaking out"__AT__"\n");
+	ret = CL_BREAK;
+	break;
+      }
+    }
+
+    if (ret == CL_BREAK) {
+      if (cli_writen(n->ofd, obuf, n->nsis.next_out - obuf) != n->nsis.next_out - obuf) {
+	close(n->ofd);
+	return CL_EIO;
+      }
+      n->eof=1;
+    } else if (ret != CL_SUCCESS) {
+      cli_dbgmsg("NSIS: bad stream"__AT__"\n");
+      close(n->ofd);
+      return CL_EFORMAT;
+    }
+    
+    return CL_SUCCESS;
+  }
+
+}
+
+static uint8_t nsis_detcomp(const char *b) {
+  if (*b=='1') return COMP_BZIP2;
+  if ((cli_readint32(b)&~0x80000000)==0x5d) return COMP_LZMA;
+  return COMP_ZLIB;
+}
+
+static int nsis_headers(struct nsis_st *n, cli_ctx *ctx) {
+  char buf[28];
+  struct stat st;
+  uint32_t pos;
+  int i;
+  uint8_t comps[] = {0, 0, 0, 0}, trunc = 0;
+  
+  if (fstat(n->ifd, &st)==-1 ||
+      lseek(n->ifd, n->off, SEEK_SET)==-1 ||
+      cli_readn(n->ifd, buf, 28) != 28)
+    return CL_EIO;
+
+  n->hsz = (uint32_t)cli_readint32(buf+0x14);
+  n->asz = (uint32_t)cli_readint32(buf+0x18);
+
+  cli_dbgmsg("NSIS: Header info - Flags=%x, Header size=%x, Archive size=%x\n", cli_readint32(buf), n->hsz, n->asz);
+
+  if (st.st_size - n->off < (off_t) n->asz) {
+    cli_dbgmsg("NSIS: Possibly truncated file\n");
+    n->asz = st.st_size - n->off;
+    trunc++;
+  } else if (st.st_size - n->off != (off_t) n->asz) {
+    cli_dbgmsg("NSIS: Overlays found\n");
+  }
+
+  n->asz -= 0x1c;
+
+  /* Guess if solid */
+  for (i=0, pos=0;pos < n->asz-4;i++) {
+    int32_t nextsz;
+    if (cli_readn(n->ifd, buf+4, 4)!=4) return CL_EIO;
+    nextsz=cli_readint32(buf+4);
+    if (!i) n->comp = nsis_detcomp(buf+4);
+    if (nextsz&0x80000000) {
+      nextsz&=~0x80000000;
+      if (cli_readn(n->ifd, buf+4, 4)!=4) return CL_EIO;
+      comps[nsis_detcomp(buf+4)]++;
+      nextsz-=4;
+      pos+=4;
+    }
+    if ((pos+=4+nextsz) > n->asz) {
+      n->solid = 1;
+      break;
+    }
+
+    if (lseek(n->ifd, nextsz, SEEK_CUR)==-1) return CL_EIO;
+  }
+  
+  if (trunc && i>=2) n->solid=0;
+
+  cli_dbgmsg("NSIS: solid compression%s detected\n", (n->solid)?"":" not");
+
+  /* Guess the compression method */
+  if (!n->solid) {
+    cli_dbgmsg("NSIS: bzip2 %u - lzma %u - zlib %u\n", comps[1], comps[2], comps[3]);
+    n->comp = (comps[1]<comps[2]) ? (comps[2]<comps[3] ? COMP_ZLIB : COMP_LZMA) : (comps[1]<comps[3] ? COMP_ZLIB : COMP_BZIP2);
+  }
+
+  if (lseek(n->ifd, n->off+0x1c, SEEK_SET)==-1) return CL_EIO;
+
+  return nsis_unpack_next(n, ctx);
+}
+
+
+
+static int cli_nsis_unpack(struct nsis_st *n, cli_ctx *ctx) {
+  return (n->fno) ? nsis_unpack_next(n, ctx) : nsis_headers(n, ctx);
+}
+
+static void cli_nsis_free(struct nsis_st *n) {
+  nsis_shutdown(n);
+  if (n->solid && n->freeme) free(n->freeme);
+}
+
+int cli_scannulsft(int desc, cli_ctx *ctx, off_t offset) {
+        int ret;
+	struct nsis_st nsist;
+
+    cli_dbgmsg("in scannulsft()\n");
+    if(ctx->limits && ctx->limits->maxreclevel && ctx->arec >= ctx->limits->maxreclevel) {
+        cli_dbgmsg("Archive recursion limit exceeded (arec == %u).\n", ctx->arec+1);
+	return CL_EMAXREC;
+    }
+
+    memset(&nsist, 0, sizeof(struct nsis_st));
+
+    nsist.ifd = desc;
+    nsist.off = offset;
+    if (!(nsist.dir = cli_gentemp(NULL)))
+        return CL_ETMPDIR;
+    if(mkdir(nsist.dir, 0700)) {
+	cli_dbgmsg("NSIS: Can't create temporary directory %s\n", nsist.dir);
+	free(nsist.dir);
+	return CL_ETMPDIR;
+    }
+
+    if(cli_leavetemps_flag) cli_dbgmsg("NSIS: Extracting files to %s\n", nsist.dir);
+
+    ctx->arec++;
+
+    do {
+        ret = cli_nsis_unpack(&nsist, ctx);
+	if(ret != CL_SUCCESS) {
+	    if(ret == CL_EMAXSIZE) {
+	        if(BLOCKMAX) {
+		    *ctx->virname = "NSIS.ExceededFileSize";
+		    ret=CL_VIRUS;
+		} else {
+		    ret = nsist.solid ? CL_BREAK : CL_SUCCESS;
+		}
+	    }
+	} else {
+	    cli_dbgmsg("NSIS: Successully extracted file #%u\n", nsist.fno);
+	    lseek(nsist.ofd, 0, SEEK_SET);
+	    if(nsist.fno == 1)
+	        ret=cli_scandesc(nsist.ofd, ctx, 0, 0, 0, NULL);
+	    else
+	        ret=cli_magic_scandesc(nsist.ofd, ctx);
+	    close(nsist.ofd);
+	    if(!cli_leavetemps_flag)
+	        unlink(nsist.ofn);
+	}
+    } while(ret == CL_SUCCESS);
+
+    if(ret == CL_BREAK)
+	ret = CL_CLEAN;
+
+    cli_nsis_free(&nsist);
+
+    if(!cli_leavetemps_flag)
+        cli_rmdirs(nsist.dir);
+
+    free(nsist.dir);
+
+    ctx->arec--;    
+    return ret;
+}
+

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_ole2_extract.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_ole2_extract.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_ole2_extract.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_ole2_extract.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,891 @@
+/*
+ *  Extract component parts of OLE2 files (e.g. MS Office Documents)
+ *
+ *  Copyright (C) 2004-2007 trog at uncon.org
+ *
+ *  This code is based on the OpenOffice and libgsf sources.
+ *                  
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#ifdef	HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <ctype.h>
+#include <stdlib.h>
+#include "clamav.h"
+
+#if HAVE_MMAP
+#if HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#else /* HAVE_SYS_MMAN_H */
+#undef HAVE_MMAP
+#endif
+#endif
+
+#include "cltypes.h"
+#include "others.h"
+#include "ole2_extract.h"
+
+#include "mbox.h"
+#include "blob.h" /* sanitiseName() */
+
+#define ole2_endian_convert_16(v) le16_to_host((uint16_t)(v))
+#define ole2_endian_convert_32(v) le32_to_host((uint32_t)(v))
+
+#ifndef HAVE_ATTRIB_PACKED
+#define __attribute__(x)
+#endif
+
+#ifdef HAVE_PRAGMA_PACK
+#pragma pack(1)
+#endif
+
+#ifdef HAVE_PRAGMA_PACK_HPPA
+#pragma pack 1
+#endif
+
+#ifndef	O_BINARY
+#define	O_BINARY	0
+#endif
+
+typedef struct ole2_header_tag
+{
+	unsigned char magic[8];		/* should be: 0xd0cf11e0a1b11ae1 */
+	unsigned char clsid[16];
+	uint16_t minor_version __attribute__ ((packed));
+	uint16_t dll_version __attribute__ ((packed));
+	int16_t byte_order __attribute__ ((packed));			/* -2=intel */
+
+	uint16_t log2_big_block_size __attribute__ ((packed));		/* usually 9 (2^9 = 512) */
+	uint32_t log2_small_block_size __attribute__ ((packed));	/* usually 6 (2^6 = 128) */
+
+	int32_t reserved[2] __attribute__ ((packed));
+	int32_t bat_count __attribute__ ((packed));
+	int32_t prop_start __attribute__ ((packed));
+
+	uint32_t signature __attribute__ ((packed));
+	uint32_t sbat_cutoff __attribute__ ((packed));			/* cutoff for files held in small blocks (4096) */
+
+	int32_t sbat_start __attribute__ ((packed));
+	int32_t sbat_block_count __attribute__ ((packed));
+	int32_t xbat_start __attribute__ ((packed));
+	int32_t xbat_count __attribute__ ((packed));
+	int32_t bat_array[109] __attribute__ ((packed));
+
+	/* not part of the ole2 header, but stuff we need in order to decode */
+	/* must take account of the size of variables below here when
+	   reading the header */
+	int32_t sbat_root_start __attribute__ ((packed));
+	unsigned char *m_area;
+	off_t m_length;
+	bitset_t *bitset;
+	uint32_t max_block_no;
+} ole2_header_t;
+
+typedef struct property_tag
+{
+	char name[64];		/* in unicode */
+	uint16_t name_size __attribute__ ((packed));
+	unsigned char type;		/* 1=dir 2=file 5=root */
+	unsigned char color;		/* black or red */
+	uint32_t prev __attribute__ ((packed));
+	uint32_t next __attribute__ ((packed));
+	uint32_t child __attribute__ ((packed));
+
+	unsigned char clsid[16];
+	uint32_t user_flags __attribute__ ((packed));
+
+	uint32_t create_lowdate __attribute__ ((packed));
+	uint32_t create_highdate __attribute__ ((packed));
+	uint32_t mod_lowdate __attribute__ ((packed));
+	uint32_t mod_highdate __attribute__ ((packed));
+	uint32_t start_block __attribute__ ((packed));
+	uint32_t size __attribute__ ((packed));
+	unsigned char reserved[4];
+} property_t;
+
+#ifdef HAVE_PRAGMA_PACK
+#pragma pack()
+#endif
+
+#ifdef HAVE_PRAGMA_PACK_HPPA
+#pragma pack
+#endif
+
+static unsigned char magic_id[] = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1};
+
+static char *get_property_name(char *name, int size)
+{
+	int i, j;
+	char *newname;
+
+	if (*name == 0 || size <= 0 || size > 64) {
+		return NULL;
+	}
+
+	newname = (char *) cli_malloc(size*7);
+	if (!newname) {
+		return NULL;
+	}
+	j=0;
+	/* size-2 to ignore trailing NULL */
+	for (i=0 ; i < size-2; i+=2) {
+		if((!(name[i]&0x80)) && isprint(name[i])) {
+			newname[j++] = name[i];
+		} else {
+			if (name[i] < 10 && name[i] >= 0) {
+				newname[j++] = '_';
+				newname[j++] = name[i] + '0';
+			}
+			else {
+				const uint16_t x = (((uint16_t)name[i]) << 8) | name[i+1];
+				newname[j++] = '_';
+				newname[j++] = 'a'+((x&0xF));
+				newname[j++] = 'a'+((x>>4)&0xF);
+				newname[j++] = 'a'+((x>>8)&0xF);
+				newname[j++] = 'a'+((x>>16)&0xF);
+				newname[j++] = 'a'+((x>>24)&0xF);
+			}
+			newname[j++] = '_';
+		}
+	}
+	newname[j] = '\0';
+	if (strlen(newname) == 0) {
+		free(newname);
+		return NULL;
+	}
+	return newname;
+}
+
+static void print_property_name(char *pname, int size)
+{
+        char *name;
+                                                                                                                                              
+        name = get_property_name(pname, size);
+        if (!name) {
+                return;
+        }
+        cli_dbgmsg("%34s ", name);
+        free(name);
+        return;
+}
+
+static void print_ole2_property(property_t *property)
+{
+	if (property->name_size > 64) {
+                cli_dbgmsg("[err name len: %d]\n", property->name_size);
+                return;
+        }
+	print_property_name(property->name, property->name_size);
+	switch (property->type) {
+	case 2:
+		cli_dbgmsg(" [file] ");
+		break;
+	case 1:
+		cli_dbgmsg(" [dir ] ");
+		break;
+	case 5:
+		cli_dbgmsg(" [root] ");
+		break;
+	default:
+		cli_dbgmsg(" [%d]", property->type);
+	}
+	switch (property->color) {
+	case 0:
+		cli_dbgmsg(" r ");
+		break;
+	case 1:
+		cli_dbgmsg(" b ");
+		break;
+	default:
+		cli_dbgmsg(" u ");
+	}
+	cli_dbgmsg(" 0x%.8x 0x%.8x\n", property->size, property->user_flags);
+}
+
+static void print_ole2_header(ole2_header_t *hdr)
+{
+	int i;
+	
+	if (!hdr) {
+		return;
+	}
+	
+	cli_dbgmsg("\nMagic:\t\t\t0x");
+	for (i=0 ; i<8; i++) {
+		cli_dbgmsg("%x", hdr->magic[i]);
+	}
+	cli_dbgmsg("\n");
+
+	cli_dbgmsg("CLSID:\t\t\t{");
+	for (i=0 ; i<16; i++) {
+		cli_dbgmsg("%x ", hdr->clsid[i]);
+	}
+	cli_dbgmsg("}\n");
+
+	cli_dbgmsg("Minor version:\t\t0x%x\n", hdr->minor_version);
+	cli_dbgmsg("DLL version:\t\t0x%x\n", hdr->dll_version);
+	cli_dbgmsg("Byte Order:\t\t%d\n", hdr->byte_order);
+	cli_dbgmsg("Big Block Size:\t\t%i\n", hdr->log2_big_block_size);
+	cli_dbgmsg("Small Block Size:\t%i\n", hdr->log2_small_block_size);
+	cli_dbgmsg("BAT count:\t\t%d\n", hdr->bat_count);
+	cli_dbgmsg("Prop start:\t\t%d\n", hdr->prop_start);
+	cli_dbgmsg("SBAT cutoff:\t\t%d\n", hdr->sbat_cutoff);
+	cli_dbgmsg("SBat start:\t\t%d\n", hdr->sbat_start);
+	cli_dbgmsg("SBat block count:\t%d\n", hdr->sbat_block_count);
+	cli_dbgmsg("XBat start:\t\t%d\n", hdr->xbat_start);
+	cli_dbgmsg("XBat block count:\t%d\n\n", hdr->xbat_count);
+	return;
+}
+
+static int ole2_read_block(int fd, ole2_header_t *hdr, void *buff, int32_t blockno)
+{
+	off_t offset, offend;
+
+	if (blockno < 0) {
+		return FALSE;
+	}
+	
+	/* other methods: (blockno+1) * 512 or (blockno * block_size) + 512; */
+	offset = (blockno << hdr->log2_big_block_size) + 512;	/* 512 is header size */
+	
+	if (hdr->m_area == NULL) {
+		if (lseek(fd, offset, SEEK_SET) != offset) {
+			return FALSE;
+		}
+		if (cli_readn(fd, buff, (1 << hdr->log2_big_block_size)) != (1 << hdr->log2_big_block_size)) {
+			return FALSE;
+		}
+	} else {
+		offend = offset + (1 << hdr->log2_big_block_size);
+		if ((offend <= 0) || (offend > hdr->m_length)) {
+			return FALSE;
+		}
+		memcpy(buff, hdr->m_area+offset, (1 << hdr->log2_big_block_size));
+	}
+	return TRUE;
+}
+
+static int32_t ole2_get_next_bat_block(int fd, ole2_header_t *hdr, int32_t current_block)
+{
+	int32_t bat_array_index;
+	uint32_t bat[128];
+
+	if (current_block < 0) {
+		return -1;
+	}
+	
+	bat_array_index = current_block / 128;
+	if (bat_array_index > hdr->bat_count) {
+		cli_dbgmsg("bat_array index error\n");
+		return -10;
+	}
+	if (!ole2_read_block(fd, hdr, &bat,
+			ole2_endian_convert_32(hdr->bat_array[bat_array_index]))) {
+		return -1;
+	}
+	return ole2_endian_convert_32(bat[current_block-(bat_array_index * 128)]);
+}
+
+static int32_t ole2_get_next_xbat_block(int fd, ole2_header_t *hdr, int32_t current_block)
+{
+	int32_t xbat_index, xbat_block_index, bat_index, bat_blockno;
+	uint32_t xbat[128], bat[128];
+
+	if (current_block < 0) {
+		return -1;
+	}
+	
+	xbat_index = current_block / 128;
+
+	/* NB:	The last entry in each XBAT points to the next XBAT block.
+		This reduces the number of entries in each block by 1.
+	*/
+	xbat_block_index = (xbat_index - 109) / 127;
+	bat_blockno = (xbat_index - 109) % 127;
+
+	bat_index = current_block % 128;
+
+	if (!ole2_read_block(fd, hdr, &xbat, hdr->xbat_start)) {
+		return -1;
+	}
+
+	/* Follow the chain of XBAT blocks */
+	while (xbat_block_index > 0) {
+		if (!ole2_read_block(fd, hdr, &xbat,
+				ole2_endian_convert_32(xbat[127]))) {
+			return -1;
+		}
+		xbat_block_index--;
+	}
+
+	if (!ole2_read_block(fd, hdr, &bat, ole2_endian_convert_32(xbat[bat_blockno]))) {
+		return -1;
+	}
+
+	return ole2_endian_convert_32(bat[bat_index]);
+}
+
+static int32_t ole2_get_next_block_number(int fd, ole2_header_t *hdr, int32_t current_block)
+{
+	if (current_block < 0) {
+		return -1;
+	}
+
+	if ((current_block / 128) > 108) {
+		return ole2_get_next_xbat_block(fd, hdr, current_block);
+	} else {
+		return ole2_get_next_bat_block(fd, hdr, current_block);
+	}
+}
+
+static int32_t ole2_get_next_sbat_block(int fd, ole2_header_t *hdr, int32_t current_block)
+{
+	int32_t iter, current_bat_block;
+	uint32_t sbat[128];
+
+	if (current_block < 0) {
+		return -1;
+	}
+	
+	current_bat_block = hdr->sbat_start;
+	iter = current_block / 128;
+	while (iter > 0) {
+		current_bat_block = ole2_get_next_block_number(fd, hdr, current_bat_block);
+		iter--;
+	}
+	if (!ole2_read_block(fd, hdr, &sbat, current_bat_block)) {
+		return -1;
+	}
+	return ole2_endian_convert_32(sbat[current_block % 128]);
+}
+
+/* Retrieve the block containing the data for the given sbat index */
+static int32_t ole2_get_sbat_data_block(int fd, ole2_header_t *hdr, void *buff, int32_t sbat_index)
+{
+	int32_t block_count, current_block;
+
+	if (sbat_index < 0) {
+		return FALSE;
+	}
+	
+	if (hdr->sbat_root_start < 0) {
+		cli_errmsg("No root start block\n");
+		return FALSE;
+	}
+
+	block_count = sbat_index / 8;			/* 8 small blocks per big block */
+	current_block = hdr->sbat_root_start;
+	while (block_count > 0) {
+		current_block = ole2_get_next_block_number(fd, hdr, current_block);
+		block_count--;
+	}
+	/* current_block now contains the block number of the sbat array
+	   containing the entry for the required small block */
+
+	return(ole2_read_block(fd, hdr, buff, current_block));
+}
+
+/* Read the property tree.
+   It is read as just an array rather than a tree */
+/*
+static void ole2_read_property_tree(int fd, ole2_header_t *hdr, const char *dir,
+				int (*handler)(int fd, ole2_header_t *hdr, property_t *prop, const char *dir))
+{
+	property_t prop_block[4];
+	int32_t index, current_block, count=0;
+	
+	current_block = hdr->prop_start;
+
+	while(current_block >= 0) {
+		if (!ole2_read_block(fd, hdr, prop_block,
+					current_block)) {
+			return;
+		}
+		for (index=0 ; index < 4 ; index++) {
+			if (prop_block[index].type > 0) {
+				prop_block[index].name_size = ole2_endian_convert_16(prop_block[index].name_size);
+				prop_block[index].prev = ole2_endian_convert_32(prop_block[index].prev);
+				prop_block[index].next = ole2_endian_convert_32(prop_block[index].next);
+				prop_block[index].child = ole2_endian_convert_32(prop_block[index].child);
+				prop_block[index].user_flags = ole2_endian_convert_32(prop_block[index].user_flags);
+				prop_block[index].create_lowdate = ole2_endian_convert_32(prop_block[index].create_lowdate);
+				prop_block[index].create_highdate = ole2_endian_convert_32(prop_block[index].create_highdate);
+				prop_block[index].mod_lowdate = ole2_endian_convert_32(prop_block[index].mod_lowdate);
+				prop_block[index].mod_highdate = ole2_endian_convert_32(prop_block[index].mod_highdate);
+				prop_block[index].start_block = ole2_endian_convert_32(prop_block[index].start_block);
+				prop_block[index].size = ole2_endian_convert_32(prop_block[index].size);
+				if (prop_block[index].type > 5) {
+					cli_dbgmsg("ERROR: invalid property type: %d\n", prop_block[index].type);
+					return;
+				}
+				if (prop_block[index].type == 5) {
+					hdr->sbat_root_start = prop_block[index].start_block;
+				}
+				print_ole2_property(&prop_block[index]);
+				if (!handler(fd, hdr, &prop_block[index], dir)) {
+					cli_dbgmsg("ERROR: handler failed\n");
+					return;
+				}
+			}
+		}
+		current_block = ole2_get_next_block_number(fd, hdr, current_block);
+		if (++count > 100000) {
+			cli_dbgmsg("ERROR: loop detected\n");
+			return;
+		}
+	}
+	return;
+}
+*/
+
+static void ole2_walk_property_tree(int fd, ole2_header_t *hdr, const char *dir, int32_t prop_index,
+				int (*handler)(int fd, ole2_header_t *hdr, property_t *prop, const char *dir),
+				unsigned int rec_level, unsigned int *file_count, const struct cl_limits *limits)
+{
+	property_t prop_block[4];
+	int32_t index, current_block, i;
+	char *dirname;
+
+	current_block = hdr->prop_start;
+
+	if ((prop_index < 0) || (prop_index > hdr->max_block_no) || (rec_level > 100) || (*file_count > 100000)) {
+		return;
+	}
+
+	if (limits && limits->maxfiles && (*file_count > limits->maxfiles)) {
+		cli_dbgmsg("OLE2: File limit reached (max: %d)\n", limits->maxfiles);
+		return;
+	}
+	
+	if (limits && limits->maxreclevel && (rec_level > limits->maxreclevel)) {
+		cli_dbgmsg("OLE2: Recursion limit reached (max: %d)\n", limits->maxreclevel);
+		return;
+	}
+
+	index = prop_index / 4;
+	for (i=0 ; i < index ; i++) {
+		current_block = ole2_get_next_block_number(fd, hdr, current_block);
+		if (current_block < 0) {
+			return;
+		}
+	}
+	index = prop_index % 4;
+	if (!ole2_read_block(fd, hdr, prop_block,
+			current_block)) {
+		return;
+	}	
+	if (prop_block[index].type <= 0) {
+		return;
+	}
+	prop_block[index].name_size = ole2_endian_convert_16(prop_block[index].name_size);
+	prop_block[index].prev = ole2_endian_convert_32(prop_block[index].prev);
+	prop_block[index].next = ole2_endian_convert_32(prop_block[index].next);
+	prop_block[index].child = ole2_endian_convert_32(prop_block[index].child);
+	prop_block[index].user_flags = ole2_endian_convert_32(prop_block[index].user_flags);
+	prop_block[index].create_lowdate = ole2_endian_convert_32(prop_block[index].create_lowdate);
+	prop_block[index].create_highdate = ole2_endian_convert_32(prop_block[index].create_highdate);
+	prop_block[index].mod_lowdate = ole2_endian_convert_32(prop_block[index].mod_lowdate);
+	prop_block[index].mod_highdate = ole2_endian_convert_32(prop_block[index].mod_highdate);
+	prop_block[index].start_block = ole2_endian_convert_32(prop_block[index].start_block);
+	prop_block[index].size = ole2_endian_convert_32(prop_block[index].size);
+	
+	print_ole2_property(&prop_block[index]);
+
+	/* Check we aren't in a loop */
+	if (cli_bitset_test(hdr->bitset, (unsigned long) prop_index)) {
+		/* Loop in property tree detected */
+		cli_dbgmsg("OLE2: Property tree loop detected at index %d\n", prop_index);
+		return;
+	}
+	if (!cli_bitset_set(hdr->bitset, (unsigned long) prop_index)) {
+		return;
+	}
+
+	switch (prop_block[index].type) {
+		case 5: /* Root Entry */
+			if ((prop_index != 0) || (rec_level !=0) ||
+					(*file_count != 0)) {
+				/* Can only have RootEntry as the top */
+				cli_dbgmsg("ERROR: illegal Root Entry\n");
+				return;
+			}
+			hdr->sbat_root_start = prop_block[index].start_block;
+			ole2_walk_property_tree(fd, hdr, dir,
+				prop_block[index].prev, handler, rec_level+1, file_count, limits);
+			ole2_walk_property_tree(fd, hdr, dir,
+				prop_block[index].next, handler, rec_level+1, file_count, limits);
+			ole2_walk_property_tree(fd, hdr, dir,
+				prop_block[index].child, handler, rec_level+1, file_count, limits);
+			break;
+		case 2: /* File */
+			(*file_count)++;
+			if (!handler(fd, hdr, &prop_block[index], dir)) {
+				cli_dbgmsg("ERROR: handler failed\n");
+				/* If we don't return on this error then
+					we can sometimes pull VBA code
+					from corrupted files.
+				*/
+			
+			}
+			ole2_walk_property_tree(fd, hdr, dir,
+				prop_block[index].prev, handler, rec_level, file_count, limits);
+			ole2_walk_property_tree(fd, hdr, dir,
+				prop_block[index].next, handler, rec_level, file_count, limits);
+			ole2_walk_property_tree(fd, hdr, dir,
+				prop_block[index].child, handler, rec_level, file_count, limits);
+			break;
+		case 1: /* Directory */
+			dirname = (char *) cli_malloc(strlen(dir)+8);
+			if (!dirname)  {
+				return;
+			}
+			snprintf(dirname, strlen(dir)+8, "%s/%.6d", dir, prop_index);
+			if (mkdir(dirname, 0700) != 0) {
+				free(dirname);
+				return;
+			}
+			cli_dbgmsg("OLE2 dir entry: %s\n",dirname);
+			ole2_walk_property_tree(fd, hdr, dir,
+				prop_block[index].prev, handler, rec_level+1, file_count, limits);
+			ole2_walk_property_tree(fd, hdr, dir,
+				prop_block[index].next, handler, rec_level+1, file_count, limits);
+			ole2_walk_property_tree(fd, hdr, dirname,
+				prop_block[index].child, handler, rec_level+1, file_count, limits);
+			free(dirname);
+			break;
+		default:
+			cli_dbgmsg("ERROR: unknown OLE2 entry type: %d\n", prop_block[index].type);
+			break;
+	}
+	return;
+}
+/* Write file Handler - write the contents of the entry to a file */
+static int handler_writefile(int fd, ole2_header_t *hdr, property_t *prop, const char *dir)
+{
+	unsigned char *buff;
+	int32_t current_block, ofd, len, offset;
+	char *name, *newname;
+	bitset_t *blk_bitset;
+
+	if (prop->type != 2) {
+		/* Not a file */
+		return TRUE;
+	}
+
+	if (prop->name_size > 64) {
+		cli_dbgmsg("\nERROR: property name too long: %d\n", prop->name_size);
+		return FALSE;
+	}
+
+	if (! (name = get_property_name(prop->name, prop->name_size))) {
+		/* File without a name - create a name for it */
+		off_t i;
+                                                                                                                            
+		i = lseek(fd, 0, SEEK_CUR);
+		name = (char *) cli_malloc(11);
+		if (!name) {
+			return FALSE;
+		}
+		snprintf(name, 11, "%.10ld", i + (long int) prop);
+	} else {
+		/* Sanitize the file name */
+		sanitiseName(name);
+	}
+
+	newname = (char *) cli_malloc(strlen(name) + strlen(dir) + 2);
+	if (!newname) {
+		free(name);
+		return FALSE;
+	}
+
+	sprintf(newname, "%s/%s", dir, name);
+	free(name);
+
+	ofd = open(newname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, S_IRWXU);
+	if (ofd < 0) {
+		cli_errmsg("ERROR: failed to create file: %s\n", newname);
+		free(newname);
+		return FALSE;
+	}
+	free(newname);
+	current_block = prop->start_block;
+	len = prop->size;
+
+	buff = (unsigned char *) cli_malloc(1 << hdr->log2_big_block_size);
+	if (!buff) {
+		close(ofd);
+		return FALSE;
+	}
+	
+	blk_bitset = cli_bitset_init();
+	if (!blk_bitset) {
+		cli_errmsg("ERROR [handler_writefile]: init bitset failed\n");
+		close(ofd);
+		return FALSE;
+	}
+	while((current_block >= 0) && (len > 0)) {
+		if (current_block > hdr->max_block_no) {
+                        cli_dbgmsg("OLE2: Max block number for file size exceeded: %d\n", current_block);
+                        close(ofd);
+                        free(buff);
+                        cli_bitset_free(blk_bitset);
+                        return FALSE;
+                }
+		/* Check we aren't in a loop */
+		if (cli_bitset_test(blk_bitset, (unsigned long) current_block)) {
+			/* Loop in block list */
+			cli_dbgmsg("OLE2: Block list loop detected\n");
+			close(ofd);
+			free(buff);
+			cli_bitset_free(blk_bitset);
+			return FALSE;
+		}
+		if (!cli_bitset_set(blk_bitset, (unsigned long) current_block)) {
+			close(ofd);
+			free(buff);
+			cli_bitset_free(blk_bitset);
+			return FALSE;
+		}			
+		if (prop->size < (int64_t)hdr->sbat_cutoff) {
+			/* Small block file */
+			if (!ole2_get_sbat_data_block(fd, hdr, buff, current_block)) {
+				cli_dbgmsg("ole2_get_sbat_data_block failed\n");
+				close(ofd);
+				free(buff);
+				cli_bitset_free(blk_bitset);
+				return FALSE;
+			}
+			/* buff now contains the block with 8 small blocks in it */
+			offset = 64 * (current_block % 8);
+			if (cli_writen(ofd, &buff[offset], MIN(len,64)) != MIN(len,64)) {
+				close(ofd);
+				free(buff);
+				cli_bitset_free(blk_bitset);
+				return FALSE;
+			}
+
+			len -= MIN(len,64);
+			current_block = ole2_get_next_sbat_block(fd, hdr, current_block);
+		} else {
+			/* Big block file */
+			if (!ole2_read_block(fd, hdr, buff, current_block)) {
+				close(ofd);
+				free(buff);
+				cli_bitset_free(blk_bitset);
+				return FALSE;
+			}
+			if (cli_writen(ofd, buff, MIN(len,(1 << hdr->log2_big_block_size))) !=
+							MIN(len,(1 << hdr->log2_big_block_size))) {
+				close(ofd);
+				free(buff);
+				cli_bitset_free(blk_bitset);
+				return FALSE;
+			}
+
+			current_block = ole2_get_next_block_number(fd, hdr, current_block);
+			len -= MIN(len,(1 << hdr->log2_big_block_size));
+		}
+	}
+	close(ofd);
+	free(buff);
+	cli_bitset_free(blk_bitset);
+	return TRUE;
+}
+
+#if !defined(HAVE_ATTRIB_PACKED) && !defined(HAVE_PRAGMA_PACK) && !defined(HAVE_PRAGMA_PACK_HPPA)
+static int ole2_read_header(int fd, ole2_header_t *hdr)
+{
+	int i;
+	
+	if (cli_readn(fd, &hdr->magic, 8) != 8) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &hdr->clsid, 16) != 16) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &hdr->minor_version, 2) != 2) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &hdr->dll_version, 2) != 2) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &hdr->byte_order, 2) != 2) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &hdr->log2_big_block_size, 2) != 2) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &hdr->log2_small_block_size, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &hdr->reserved, 8) != 8) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &hdr->bat_count, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &hdr->prop_start, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &hdr->signature, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &hdr->sbat_cutoff, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &hdr->sbat_start, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &hdr->sbat_block_count, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &hdr->xbat_start, 4) != 4) {
+		return FALSE;
+	}
+	if (cli_readn(fd, &hdr->xbat_count, 4) != 4) {
+		return FALSE;
+	}
+	for (i=0 ; i < 109 ; i++) {
+		if (cli_readn(fd, &hdr->bat_array[i], 4) != 4) {
+			return FALSE;
+		}
+	}
+	return TRUE;
+}
+#endif
+
+int cli_ole2_extract(int fd, const char *dirname, const struct cl_limits *limits)
+{
+	ole2_header_t hdr;
+	int hdr_size;
+	struct stat statbuf;
+	unsigned int file_count=0;
+	
+	cli_dbgmsg("in cli_ole2_extract()\n");
+	
+	/* size of header - size of other values in struct */
+	hdr_size = sizeof(struct ole2_header_tag) - sizeof(int32_t) -
+			sizeof(unsigned char *) - sizeof(off_t) - sizeof(bitset_t *) -
+			sizeof(uint32_t);
+
+	hdr.m_area = NULL;
+
+	if (fstat(fd, &statbuf) == 0) {
+		if (statbuf.st_size < hdr_size) {
+			return 0;
+		}
+#ifdef HAVE_MMAP
+		hdr.m_length = statbuf.st_size;
+		hdr.m_area = (unsigned char *) mmap(NULL, hdr.m_length, PROT_READ, MAP_PRIVATE, fd, 0);
+		if (hdr.m_area == MAP_FAILED) {
+			hdr.m_area = NULL;
+		} else {
+			cli_dbgmsg("mmap'ed file\n");
+			memcpy(&hdr, hdr.m_area, hdr_size);
+		}
+#endif
+	}
+
+	if (hdr.m_area == NULL) {
+#if defined(HAVE_ATTRIB_PACKED) || defined(HAVE_PRAGMA_PACK) || defined(HAVE_PRAGMA_PACK_HPPA)
+		if (cli_readn(fd, &hdr, hdr_size) != hdr_size) {
+			return 0;
+		}
+#else
+		if (!ole2_read_header(fd, &hdr)) {
+			return 0;
+		}
+#endif
+	}
+	
+	hdr.minor_version = ole2_endian_convert_16(hdr.minor_version);
+	hdr.dll_version = ole2_endian_convert_16(hdr.dll_version);
+	hdr.byte_order = ole2_endian_convert_16(hdr.byte_order);
+	hdr.log2_big_block_size = ole2_endian_convert_16(hdr.log2_big_block_size);
+	hdr.log2_small_block_size = ole2_endian_convert_32(hdr.log2_small_block_size);
+	hdr.bat_count = ole2_endian_convert_32(hdr.bat_count);
+	hdr.prop_start = ole2_endian_convert_32(hdr.prop_start);
+	hdr.sbat_cutoff = ole2_endian_convert_32(hdr.sbat_cutoff);
+	hdr.sbat_start = ole2_endian_convert_32(hdr.sbat_start);
+	hdr.sbat_block_count = ole2_endian_convert_32(hdr.sbat_block_count);
+	hdr.xbat_start = ole2_endian_convert_32(hdr.xbat_start);
+	hdr.xbat_count = ole2_endian_convert_32(hdr.xbat_count);
+
+	hdr.sbat_root_start = -1;
+
+	hdr.bitset = cli_bitset_init();
+	if (!hdr.bitset) {
+		return CL_EOLE2;
+	}
+
+	if (memcmp(hdr.magic, magic_id, 8) != 0) {
+		cli_dbgmsg("OLE2 magic failed!\n");
+#ifdef HAVE_MMAP
+		if (hdr.m_area != NULL) {
+			munmap(hdr.m_area, hdr.m_length);
+		}
+#endif
+		cli_bitset_free(hdr.bitset);
+		return CL_EOLE2;
+	}
+
+	if (hdr.log2_big_block_size != 9) {
+		cli_errmsg("WARNING: not scanned; untested big block size - please report\n");
+		goto abort;
+	}
+	if (hdr.log2_small_block_size != 6) {
+		cli_errmsg("WARNING: not scanned; untested small block size - please report\n");
+		goto abort;
+	}
+	if (hdr.sbat_cutoff != 4096) {
+		cli_errmsg("WARNING: not scanned; untested sbat cutoff - please report\n");
+		goto abort;
+	}
+
+	/* 8 SBAT blocks per file block */
+	hdr.max_block_no = ((statbuf.st_size / hdr.log2_big_block_size) + 1) * 8;
+	
+	print_ole2_header(&hdr);
+	cli_dbgmsg("Max block number: %lu\n", hdr.max_block_no);
+
+	/* NOTE: Select only ONE of the following two methods */
+	
+	/* ole2_read_property_tree(fd, &hdr, dirname, handler_writefile); */
+	
+	/* OR */
+	
+	ole2_walk_property_tree(fd, &hdr, dirname, 0, handler_writefile, 0, &file_count, limits);
+
+abort:
+#ifdef HAVE_MMAP
+	if (hdr.m_area != NULL) {
+		munmap(hdr.m_area, hdr.m_length);
+	}
+#endif
+	cli_bitset_free(hdr.bitset);
+	return 0;
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_others.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_others.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_others.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_others.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,846 @@
+/*
+ *  Copyright (C) 1999 - 2005 Tomasz Kojm <tkojm at clamav.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ *
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#ifdef	HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifndef	C_WINDOWS
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <dirent.h>
+#endif
+#include <time.h>
+#include <fcntl.h>
+#ifndef	C_WINDOWS
+#include <pwd.h>
+#endif
+#include <errno.h>
+#include "target.h"
+#ifndef	C_WINDOWS
+#include <sys/time.h>
+#endif
+#ifdef	HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+#ifdef	HAVE_MALLOC_H
+#include <malloc.h>
+#endif
+#if	defined(_MSC_VER) && defined(_DEBUG)
+#include <crtdbg.h>
+#endif
+
+#ifdef CL_THREAD_SAFE
+#  include <pthread.h>
+static pthread_mutex_t cli_gentemp_mutex = PTHREAD_MUTEX_INITIALIZER;
+#endif
+
+#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
+#include <limits.h>
+#include <stddef.h>
+#endif
+
+#include "clamav.h"
+#include "others.h"
+#include "md5.h"
+#include "cltypes.h"
+
+#ifndef	O_BINARY
+#define	O_BINARY	0
+#endif
+
+#ifdef        C_WINDOWS
+#undef        P_tmpdir
+#define       P_tmpdir        "C:\\WINDOWS\\TEMP"
+#endif
+
+#define CL_FLEVEL 25 /* don't touch it */
+
+uint8_t cli_debug_flag = 0, cli_leavetemps_flag = 0;
+
+static unsigned char name_salt[16] = { 16, 38, 97, 12, 8, 4, 72, 196, 217, 144, 33, 124, 18, 11, 17, 253 };
+
+
+#define MSGCODE(x)					    \
+	va_list args;					    \
+	int len = sizeof(x) - 1;			    \
+	char buff[BUFSIZ];				    \
+    strncpy(buff, x, len);				    \
+    va_start(args, str);				    \
+    vsnprintf(buff + len, sizeof(buff) - len, str, args);   \
+    buff[sizeof(buff) - 1] = '\0';			    \
+    fputs(buff, stderr);				    \
+    va_end(args)
+
+
+void cli_warnmsg(const char *str, ...)
+{
+    MSGCODE("LibClamAV Warning: ");
+}
+
+void cli_errmsg(const char *str, ...)
+{
+    MSGCODE("LibClamAV Error: ");
+}
+
+void cli_dbgmsg(const char *str, ...)
+{
+    if(cli_debug_flag) {
+	puts(str);
+    }
+}
+
+void cl_debug(void)
+{
+    cli_debug_flag = 1;
+}
+
+unsigned int cl_retflevel(void)
+{
+    return CL_FLEVEL;
+}
+
+const char *cl_retver(void)
+{
+    return VERSION;
+}
+
+const char *cl_strerror(int clerror)
+{
+    switch(clerror) {
+	case CL_CLEAN:
+	    return "No viruses detected";
+	case CL_VIRUS:
+	    return "Virus(es) detected";
+	case CL_EMAXREC:
+	    return "Recursion limit exceeded";
+	case CL_EMAXSIZE:
+	    return "File size limit exceeded";
+	case CL_EMAXFILES:
+	    return "Files number limit exceeded";
+	case CL_ERAR:
+	    return "RAR module failure";
+	case CL_EZIP:
+	    return "Zip module failure";
+	case CL_EGZIP:
+	    return "GZip module failure";
+	case CL_EMSCOMP:
+	    return "MS Expand module failure";
+	case CL_EMSCAB:
+	    return "MS CAB module failure";
+	case CL_EOLE2:
+	    return "OLE2 module failure";
+	case CL_ETMPFILE:
+	    return "Unable to create temporary file";
+	case CL_ETMPDIR:
+	    return "Unable to create temporary directory";
+	case CL_EFSYNC:
+	    return "Unable to synchronize file <-> disk";
+	case CL_EMEM:
+	    return "Unable to allocate memory";
+	case CL_EOPEN:
+	    return "Unable to open file or directory";
+	case CL_EMALFDB:
+	    return "Malformed database";
+	case CL_EPATSHORT:
+	    return "Too short pattern detected";
+	case CL_ECVD:
+	    return "Broken or not a CVD file";
+	case CL_ECVDEXTR:
+	    return "CVD extraction failure";
+	case CL_EMD5:
+	    return "MD5 verification error";
+	case CL_EDSIG:
+	    return "Digital signature verification error";
+	case CL_ENULLARG:
+	    return "Null argument passed while initialized is required";
+	case CL_EIO:
+	    return "Input/Output error";
+	case CL_EFORMAT:
+	    return "Bad format or broken data";
+	case CL_ESUPPORT:
+	    return "Not supported data format";
+	case CL_ELOCKDB:
+	    return "Unable to lock database directory";
+	case CL_EARJ:
+	    return "ARJ module failure";
+	default:
+	    return "Unknown error code";
+    }
+}
+
+unsigned char *cli_md5digest(int desc)
+{
+	unsigned char *digest;
+	char buff[FILEBUFF];
+	cli_md5_ctx ctx;
+	int bytes;
+
+
+    if(!(digest = cli_malloc(16)))
+	return NULL;
+
+    cli_md5_init(&ctx);
+
+    while((bytes = cli_readn(desc, buff, FILEBUFF)))
+	cli_md5_update(&ctx, buff, bytes);
+
+    cli_md5_final(digest, &ctx);
+
+    return digest;
+}
+
+char *cli_md5stream(FILE *fs, unsigned char *digcpy)
+{
+	unsigned char digest[16];
+	char buff[FILEBUFF];
+	cli_md5_ctx ctx;
+	char *md5str, *pt;
+	int i, bytes;
+
+
+    cli_md5_init(&ctx);
+
+    while((bytes = fread(buff, 1, FILEBUFF, fs)))
+	cli_md5_update(&ctx, buff, bytes);
+
+    cli_md5_final(digest, &ctx);
+
+    if(!(md5str = (char *) cli_calloc(32 + 1, sizeof(char))))
+	return NULL;
+
+    pt = md5str;
+    for(i = 0; i < 16; i++) {
+	sprintf(pt, "%02x", digest[i]);
+	pt += 2;
+    }
+
+    if(digcpy)
+	memcpy(digcpy, digest, 16);
+
+    return md5str;
+}
+
+char *cli_md5file(const char *filename)
+{
+	FILE *fs;
+	char *md5str;
+
+
+    if((fs = fopen(filename, "rb")) == NULL) {
+	cli_errmsg("cli_md5file(): Can't read file %s\n", filename);
+	return NULL;
+    }
+
+    md5str = cli_md5stream(fs, NULL);
+    fclose(fs);
+
+    return md5str;
+}
+
+static char *cli_md5buff(const unsigned char *buffer, unsigned int len, unsigned char *dig)
+{
+	unsigned char digest[16];
+	char *md5str, *pt;
+	cli_md5_ctx ctx;
+	int i;
+
+
+    cli_md5_init(&ctx);
+    cli_md5_update(&ctx, buffer, len);
+    cli_md5_final(digest, &ctx);
+
+    if(dig)
+	memcpy(dig, digest, 16);
+
+    if(!(md5str = (char *) cli_calloc(32 + 1, sizeof(char))))
+	return NULL;
+
+    pt = md5str;
+    for(i = 0; i < 16; i++) {
+	sprintf(pt, "%02x", digest[i]);
+	pt += 2;
+    }
+
+    return md5str;
+}
+
+void *cli_malloc(size_t size)
+{
+	void *alloc;
+
+
+    if(!size || size > CLI_MAX_ALLOCATION) {
+	cli_errmsg("cli_malloc(): Attempt to allocate %u bytes. Please report to http://bugs.clamav.net\n", size);
+	return NULL;
+    }
+
+#if defined(_MSC_VER) && defined(_DEBUG)
+    alloc = _malloc_dbg(size, _NORMAL_BLOCK, __FILE__, __LINE__);
+#else
+    alloc = malloc(size);
+#endif
+
+    if(!alloc) {
+	cli_errmsg("cli_malloc(): Can't allocate memory (%u bytes).\n", size);
+	perror("malloc_problem");
+	return NULL;
+    } else return alloc;
+}
+
+void *cli_calloc(size_t nmemb, size_t size)
+{
+	void *alloc;
+
+
+    if(!size || size > CLI_MAX_ALLOCATION) {
+	cli_errmsg("cli_calloc(): Attempt to allocate %u bytes. Please report to http://bugs.clamav.net\n", size);
+	return NULL;
+    }
+
+#if defined(_MSC_VER) && defined(_DEBUG)
+    alloc = _calloc_dbg(nmemb, size, _NORMAL_BLOCK, __FILE__, __LINE__);
+#else
+    alloc = calloc(nmemb, size);
+#endif
+
+    if(!alloc) {
+	cli_errmsg("cli_calloc(): Can't allocate memory (%u bytes).\n", nmemb * size);
+	perror("calloc_problem");
+	return NULL;
+    } else return alloc;
+}
+
+void *cli_realloc(void *ptr, size_t size)
+{
+	void *alloc;
+
+
+    if(!size || size > CLI_MAX_ALLOCATION) {
+	cli_errmsg("cli_realloc(): Attempt to allocate %u bytes. Please report to http://bugs.clamav.net\n", size);
+	return NULL;
+    }
+
+    alloc = realloc(ptr, size);
+
+    if(!alloc) {
+	cli_errmsg("cli_realloc(): Can't re-allocate memory to %u bytes.\n", size);
+	perror("realloc_problem");
+	return NULL;
+    } else return alloc;
+}
+
+void *cli_realloc2(void *ptr, size_t size)
+{
+	void *alloc;
+
+
+    if(!size || size > CLI_MAX_ALLOCATION) {
+	cli_errmsg("cli_realloc2(): Attempt to allocate %u bytes. Please report to http://bugs.clamav.net\n", size);
+	return NULL;
+    }
+
+    alloc = realloc(ptr, size);
+
+    if(!alloc) {
+	cli_errmsg("cli_realloc2(): Can't re-allocate memory to %u bytes.\n", size);
+	perror("realloc_problem");
+	if(ptr)
+	    free(ptr);
+	return NULL;
+    } else return alloc;
+}
+
+char *cli_strdup(const char *s)
+{
+        char *alloc;
+
+
+    if(s == NULL) {
+        cli_errmsg("cli_strdup(): s == NULL. Please report to http://bugs.clamav.net\n");
+        return NULL;
+    }
+
+#if defined(_MSC_VER) && defined(_DEBUG)
+    alloc = _strdup_dbg(s, _NORMAL_BLOCK, __FILE__, __LINE__);
+#else
+    alloc = strdup(s);
+#endif
+
+    if(!alloc) {
+        cli_errmsg("cli_strdup(): Can't allocate memory (%u bytes).\n", strlen(s));
+        perror("strdup_problem");
+        return NULL;
+    }
+
+    return alloc;
+}
+
+unsigned int cli_rndnum(unsigned int max)
+{
+    struct timeval tv;
+
+  gettimeofday(&tv, (struct timezone *) 0);
+  srand(tv.tv_usec+clock());
+
+  return rand() % max;
+}
+
+void cl_settempdir(const char *dir, short leavetemps)
+{
+	char *var;
+
+    if(dir) {
+	var = (char *) cli_malloc(8 + strlen(dir));
+	sprintf(var, "TMPDIR=%s", dir);
+	if(!putenv(var))
+	    cli_dbgmsg("Setting %s as global temporary directory\n", dir);
+	else
+	    cli_warnmsg("Can't set TMPDIR variable - insufficient space in the environment.\n");
+
+	/* WARNING: var must not be released - see putenv(3) */
+    }
+
+    cli_leavetemps_flag = leavetemps;
+}
+
+char *cli_gentemp(const char *dir)
+{
+	char *name, *tmp;
+        const char *mdir;
+	unsigned char salt[16 + 32];
+	int i;
+
+    if(!dir) {
+	if((mdir = getenv("TMPDIR")) == NULL)
+#ifdef P_tmpdir
+	    mdir = P_tmpdir;
+#else
+	    mdir = "/tmp";
+#endif
+    } else
+	mdir = dir;
+
+    name = (char *) cli_calloc(strlen(mdir) + 1 + 32 + 1 + 7, sizeof(char));
+    if(!name) {
+	cli_dbgmsg("cli_gentemp('%s'): out of memory\n", mdir);
+	return NULL;
+    }
+
+#ifdef CL_THREAD_SAFE
+    pthread_mutex_lock(&cli_gentemp_mutex);
+#endif
+
+    memcpy(salt, name_salt, 16);
+
+    for(i = 16; i < 48; i++)
+	salt[i] = cli_rndnum(256);
+
+    tmp = cli_md5buff(salt, 48, name_salt);
+
+#ifdef CL_THREAD_SAFE
+    pthread_mutex_unlock(&cli_gentemp_mutex);
+#endif
+
+    if(!tmp) {
+	free(name);
+	cli_dbgmsg("cli_gentemp('%s'): out of memory\n", mdir);
+	return NULL;
+    }
+
+#ifdef	C_WINDOWS
+	sprintf(name, "%s\\clamav-", mdir);
+#else
+	sprintf(name, "%s/clamav-", mdir);
+#endif
+    strncat(name, tmp, 32);
+    free(tmp);
+
+    return(name);
+}
+
+int cli_gentempfd(const char *dir, char **name, int *fd)
+{
+
+    *name = cli_gentemp(dir);
+    if(!*name)
+	return CL_EMEM;
+
+    *fd = open(*name, O_RDWR|O_CREAT|O_TRUNC|O_BINARY, S_IRWXU);
+    if(*fd == -1) {
+	cli_errmsg("cli_gentempfd: Can't create temporary file %s: %s\n", *name, strerror(errno));
+	free(*name);
+	return CL_EIO;
+    }
+
+    return CL_SUCCESS;
+}
+
+#ifdef	C_WINDOWS
+/*
+ * Windows doesn't allow you to delete a directory while it is still open
+ */
+int
+cli_rmdirs(const char *name)
+{
+	int rc;
+	struct stat statb;	
+	DIR *dd;
+	struct dirent *dent;
+#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
+	union {
+	    struct dirent d;
+	    char b[offsetof(struct dirent, d_name) + NAME_MAX + 1];
+	} result;
+#endif
+
+
+    if(stat(name, &statb) < 0) {
+	cli_warnmsg("cli_rmdirs: Can't locate %s: %s\n", name, strerror(errno));
+	return -1;
+    }
+
+    if(!S_ISDIR(statb.st_mode)) {
+	if(unlink(name) < 0) {
+	    cli_warnmsg("cli_rmdirs: Can't remove %s: %s\n", name, strerror(errno));
+	    return -1;
+	}
+	return 0;
+    }
+
+    if((dd = opendir(name)) == NULL)
+	return -1;
+
+    rc = 0;
+
+#ifdef HAVE_READDIR_R_3
+    while((readdir_r(dd, &result.d, &dent) == 0) && dent) {
+#elif defined(HAVE_READDIR_R_2)
+    while((dent = (struct dirent *)readdir_r(dd, &result.d)) != NULL) {
+#else
+    while((dent = readdir(dd)) != NULL) {
+#endif
+	    char *path;
+
+	if(strcmp(dent->d_name, ".") == 0)
+	    continue;
+	if(strcmp(dent->d_name, "..") == 0)
+	    continue;
+
+	path = cli_malloc(strlen(name) + strlen(dent->d_name) + 2);
+
+	if(path == NULL) {
+	    closedir(dd);
+	    return -1;
+	}
+
+	sprintf(path, "%s\\%s", name, dent->d_name);
+	rc = cli_rmdirs(path);
+	free(path);
+	if(rc != 0)
+	    break;
+    }
+
+    closedir(dd);
+
+    if(rmdir(name) < 0) {
+	cli_errmsg("cli_rmdirs: Can't remove temporary directory %s: %s\n", name, strerror(errno));
+	return -1;
+    }
+
+    return rc;	
+}
+#else
+int cli_rmdirs(const char *dirname)
+{
+	DIR *dd;
+	struct dirent *dent;
+#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
+	union {
+	    struct dirent d;
+	    char b[offsetof(struct dirent, d_name) + NAME_MAX + 1];
+	} result;
+#endif
+	struct stat maind, statbuf;
+	char *path;
+
+
+    chmod(dirname, 0700);
+    if((dd = opendir(dirname)) != NULL) {
+	while(stat(dirname, &maind) != -1) {
+	    if(!rmdir(dirname)) break;
+	    if(errno != ENOTEMPTY && errno != EEXIST && errno != EBADF) {
+		cli_errmsg("cli_rmdirs: Can't remove temporary directory %s: %s\n", dirname, strerror(errno));
+		closedir(dd);
+		return -1;
+	    }
+
+#ifdef HAVE_READDIR_R_3
+	    while(!readdir_r(dd, &result.d, &dent) && dent) {
+#elif defined(HAVE_READDIR_R_2)
+	    while((dent = (struct dirent *) readdir_r(dd, &result.d))) {
+#else
+	    while((dent = readdir(dd))) {
+#endif
+#if	(!defined(C_CYGWIN)) && (!defined(C_INTERIX)) && (!defined(C_WINDOWS))
+		if(dent->d_ino)
+#endif
+		{
+		    if(strcmp(dent->d_name, ".") && strcmp(dent->d_name, "..")) {
+			path = cli_malloc(strlen(dirname) + strlen(dent->d_name) + 2);
+			if(!path) {
+			    closedir(dd);
+			    return -1;
+			}
+
+#ifdef	C_WINDOWS
+			sprintf(path, "%s\\%s", dirname, dent->d_name);
+#else
+			sprintf(path, "%s/%s", dirname, dent->d_name);
+#endif
+
+			/* stat the file */
+			if(lstat(path, &statbuf) != -1) {
+			    if(S_ISDIR(statbuf.st_mode) && !S_ISLNK(statbuf.st_mode)) {
+				if(rmdir(path) == -1) { /* can't be deleted */
+				    if(errno == EACCES) {
+					cli_errmsg("cli_rmdirs: Can't remove some temporary directories due to access problem.\n");
+					closedir(dd);
+					free(path);
+					return -1;
+				    }
+				    if(cli_rmdirs(path)) {
+					cli_warnmsg("cli_rmdirs: Can't remove nested directory %s\n", path);
+					free(path);
+					closedir(dd);
+					return -1;
+				    }
+				}
+			    } else
+				if(unlink(path) < 0) {
+				    cli_warnmsg("cli_rmdirs: Couldn't remove %s: %s\n", path, strerror(errno));
+				    free(path);
+				    closedir(dd);
+				    return -1;
+				}
+			}
+			free(path);
+		    }
+		}
+	    }
+	    rewinddir(dd);
+	}
+
+    } else { 
+	return -1;
+    }
+
+    closedir(dd);
+    return 0;
+}
+#endif
+
+/* Function: readn
+        Try hard to read the requested number of bytes
+*/
+int cli_readn(int fd, void *buff, unsigned int count)
+{
+        int retval;
+        unsigned int todo;
+        unsigned char *current;
+
+
+        todo = count;
+        current = (unsigned char *) buff;
+
+        do {
+                retval = read(fd, current, todo);
+                if (retval == 0) {
+                        return (count - todo);
+                }
+                if (retval < 0) {
+			if (errno == EINTR) {
+				continue;
+			}
+			cli_errmsg("cli_readn: read error: %s\n", strerror(errno));
+                        return -1;
+                }
+                todo -= retval;
+                current += retval;
+        } while (todo > 0);
+
+
+        return count;
+}
+
+/* Function: writen
+        Try hard to write the specified number of bytes
+*/
+int cli_writen(int fd, const void *buff, unsigned int count)
+{
+        int retval;
+        unsigned int todo;
+        const unsigned char *current;
+
+
+        todo = count;
+        current = (const unsigned char *) buff;
+
+        do {
+                retval = write(fd, current, todo);
+                if (retval < 0) {
+			if (errno == EINTR) {
+				continue;
+			}
+			cli_errmsg("cli_writen: write error: %s\n", strerror(errno));
+                        return -1;
+                }
+                todo -= retval;
+                current += retval;
+        } while (todo > 0);
+
+
+        return count;
+}
+
+int cli_filecopy(const char *src, const char *dest)
+{
+	char *buffer;
+	int s, d, bytes;
+
+
+    if((s = open(src, O_RDONLY|O_BINARY)) == -1)
+	return -1;
+
+    if((d = open(dest, O_CREAT|O_WRONLY|O_TRUNC|O_BINARY, S_IRWXU)) == -1) {
+	close(s);
+	return -1;
+    }
+
+    if(!(buffer = cli_malloc(FILEBUFF)))
+	return -1;
+
+    while((bytes = cli_readn(s, buffer, FILEBUFF)) > 0)
+	cli_writen(d, buffer, bytes);
+
+    free(buffer);
+    close(s);
+
+    return close(d);
+}
+
+/* Implement a generic bitset, trog at clamav.net */
+
+#define BITS_PER_CHAR (8)
+#define BITSET_DEFAULT_SIZE (1024)
+#define FALSE (0)
+#define TRUE (1)
+
+static unsigned long nearest_power(unsigned long num)
+{
+	unsigned long n = BITSET_DEFAULT_SIZE;
+
+	while (n < num) {
+		n <<= 1;
+		if (n == 0) {
+			return num;
+		}
+	}
+	return n;
+}
+
+bitset_t *cli_bitset_init(void)
+{
+	bitset_t *bs;
+	
+	bs = cli_malloc(sizeof(bitset_t));
+	if (!bs) {
+		return NULL;
+	}
+	bs->length = BITSET_DEFAULT_SIZE;
+	bs->bitset = cli_calloc(BITSET_DEFAULT_SIZE, 1);
+	return bs;
+}
+
+void cli_bitset_free(bitset_t *bs)
+{
+	if (!bs) {
+		return;
+	}
+	if (bs->bitset) {
+		free(bs->bitset);
+	}
+	free(bs);
+}
+
+static bitset_t *bitset_realloc(bitset_t *bs, unsigned long min_size)
+{
+	unsigned long new_length;
+	unsigned char *new_bitset;
+	
+	new_length = nearest_power(min_size);
+	new_bitset = (unsigned char *) cli_realloc(bs->bitset, new_length);
+	if (!new_bitset) {
+		return NULL;
+	}
+	bs->bitset = new_bitset;
+	memset(bs->bitset+bs->length, 0, new_length-bs->length);
+	bs->length = new_length;
+	return bs;
+}
+
+int cli_bitset_set(bitset_t *bs, unsigned long bit_offset)
+{
+	unsigned long char_offset;
+	
+	char_offset = bit_offset / BITS_PER_CHAR;
+	bit_offset = bit_offset % BITS_PER_CHAR;
+
+	if (char_offset >= bs->length) {
+		bs = bitset_realloc(bs, char_offset+1);
+		if (!bs) {
+			return FALSE;
+		}
+	}
+	bs->bitset[char_offset] |= ((unsigned char)1 << bit_offset);
+	return TRUE;
+}
+
+int cli_bitset_test(bitset_t *bs, unsigned long bit_offset)
+{
+	unsigned long char_offset;
+	
+	char_offset = bit_offset / BITS_PER_CHAR;
+	bit_offset = bit_offset % BITS_PER_CHAR;
+
+	if (char_offset >= bs->length) {	
+		return FALSE;
+	}
+	return (bs->bitset[char_offset] & ((unsigned char)1 << bit_offset));
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_packlibs.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_packlibs.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_packlibs.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_packlibs.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,309 @@
+/*
+ *  Copyright (C) 2006 aCaB <acab at clamav.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include "others.h"
+#include "execs.h"
+#include "pe.h"
+#include "packlibs.h"
+
+static int doubledl(char **scur, uint8_t *mydlptr, char *buffer, uint32_t buffersize)
+{
+  unsigned char mydl = *mydlptr;
+  unsigned char olddl = mydl;
+
+  mydl*=2;
+  if ( !(olddl & 0x7f)) {
+    if ( *scur < buffer || *scur >= buffer+buffersize-1 )
+      return -1;
+    olddl = **scur;
+    mydl = olddl*2+1;
+    *scur=*scur + 1;
+  }
+  *mydlptr = mydl;
+  return (olddl>>7)&1;
+}
+
+
+int cli_unfsg(char *source, char *dest, int ssize, int dsize, char **endsrc, char **enddst) {
+  uint8_t mydl=0x80;
+  uint32_t backbytes, backsize, oldback = 0;
+  char *csrc = source, *cdst = dest;
+  int oob, lostbit = 1;
+
+  if (ssize<=0 || dsize<=0) return -1;
+  *cdst++=*csrc++;
+
+  while ( 1 ) {
+    if ((oob=doubledl(&csrc, &mydl, source, ssize))) {
+      if (oob == -1)
+	return -1;
+      /* 164 */
+      backsize = 0;
+      if ((oob=doubledl(&csrc, &mydl, source, ssize))) {
+	if (oob == -1)
+	  return -1;
+	/* 16a */
+	backbytes = 0;
+	if ((oob=doubledl(&csrc, &mydl, source, ssize))) {
+	  if (oob == -1)
+	    return -1;
+	  /* 170 */
+	  lostbit = 1;
+	  backsize++;
+	  backbytes = 0x10;
+	  while ( backbytes < 0x100 ) {
+	    if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+	      return -1;
+	    backbytes = backbytes*2+oob;
+	  }
+	  backbytes &= 0xff;
+	  if ( ! backbytes ) {
+	    if (cdst >= dest+dsize)
+	      return -1;
+	    *cdst++=0x00;
+	    continue;
+	  }
+	} else {
+	  /* 18f */
+	  if (csrc >= source+ssize)
+	    return -1;
+	  backbytes = *(unsigned char*)csrc;
+	  backsize = backsize * 2 + (backbytes & 1);
+	  backbytes = (backbytes & 0xff)>>1;
+	  csrc++;
+	  if (! backbytes)
+	    break;
+	  backsize+=2;
+	  oldback = backbytes;
+	  lostbit = 0;
+	}
+      } else {
+	/* 180 */
+	backsize = 1;
+	do {
+	  if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+	    return -1;
+	  backsize = backsize*2+oob;
+	  if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+	    return -1;
+	} while (oob);
+
+	backsize = backsize - 1 - lostbit;
+	if (! backsize) {
+	  /* 18a */
+	  backsize = 1;
+	  do {
+	    if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+	      return -1;
+	    backsize = backsize*2+oob;
+	    if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+	      return -1;
+	  } while (oob);
+
+	  backbytes = oldback;
+	} else {
+	  /* 198 */
+	  if (csrc >= source+ssize)
+	    return -1;
+	  backbytes = *(unsigned char*)csrc;
+	  backbytes += (backsize-1)<<8;
+	  backsize = 1;
+	  csrc++;
+	  do {
+	    if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+	      return -1;
+	    backsize = backsize*2+oob;
+	    if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+	      return -1;
+	  } while (oob);
+
+          if (backbytes >= 0x7d00)
+            backsize++;
+          if (backbytes >= 0x500)
+            backsize++;
+          if (backbytes <= 0x7f)
+            backsize += 2;
+
+	  oldback = backbytes;
+	}
+	lostbit = 0;
+      }
+      if (!CLI_ISCONTAINED(dest, dsize, cdst, backsize) || !CLI_ISCONTAINED(dest, dsize, cdst-backbytes, backsize))
+	return -1;
+      while(backsize--) {
+	*cdst=*(cdst-backbytes);
+	cdst++;
+      }
+
+    } else {
+      /* 15d */
+      if (cdst < dest || cdst >= dest+dsize || csrc < source || csrc >= source+ssize)
+	return -1;
+      *cdst++=*csrc++;
+      lostbit=1;
+    }
+  }
+
+  if (endsrc) *endsrc = csrc;
+  if (enddst) *enddst = cdst;
+  return 0;
+}
+
+int unmew(char *source, char *dest, int ssize, int dsize, char **endsrc, char **enddst) {
+  uint8_t mydl=0x80;
+  uint32_t myeax_backbytes, myecx_backsize, oldback = 0;
+  char *csrc = source, *cdst = dest;
+  int oob, lostbit = 1;
+
+  *cdst++=*csrc++;
+
+  while ( 1 ) {
+    if ((oob=doubledl(&csrc, &mydl, source, ssize))) {
+      if (oob == -1)
+	return -1;
+      /* 164 */
+      myecx_backsize = 0;
+      if ((oob=doubledl(&csrc, &mydl, source, ssize))) {
+	if (oob == -1)
+	  return -1;
+	/* 16a */
+	myeax_backbytes = 0;
+	if ((oob=doubledl(&csrc, &mydl, source, ssize))) {
+	  if (oob == -1)
+	    return -1;
+	  /* 170 */
+	  lostbit = 1;
+	  myecx_backsize++;
+	  myeax_backbytes = 0x10;
+	  while ( myeax_backbytes < 0x100 ) {
+	    if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+	      return -1;
+	    myeax_backbytes = myeax_backbytes*2+oob;
+	  }
+	  myeax_backbytes &= 0xff;
+	  if ( ! myeax_backbytes ) {
+	    if (cdst >= dest+dsize)
+	      return -1;
+	    *cdst++=0x00;
+	    /*cli_dbgmsg("X%02x  ", *(cdst-1)&0xff);*/
+	    continue;
+	  }
+	} else {
+	  /* 18f */
+	  if (csrc >= source+ssize)
+	    return -1;
+	  myeax_backbytes = *(unsigned char*)csrc;
+	  myecx_backsize = myecx_backsize * 2 + (myeax_backbytes & 1);
+	  myeax_backbytes = (myeax_backbytes & 0xff)>>1;
+	  csrc++;
+	  if (! myeax_backbytes)
+	  {
+	    /* cli_dbgmsg("\nBREAK \n"); */
+	    break;
+	  }
+	  myecx_backsize+=2;
+	  oldback = myeax_backbytes;
+	  lostbit = 0;
+	}
+      } else {
+	/* 180 */
+	myecx_backsize = 1;
+	do {
+	  if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+	    return -1;
+	  myecx_backsize = myecx_backsize*2+oob;
+	  if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+	    return -1;
+	} while (oob);
+
+	myecx_backsize = myecx_backsize - 1 - lostbit;
+	if (! myecx_backsize) {
+	  /* 18a */
+	  myecx_backsize = 1;
+	  do {
+	    if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+	      return -1;
+	    myecx_backsize = myecx_backsize*2+oob;
+	    if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+	      return -1;
+	  } while (oob);
+
+	  myeax_backbytes = oldback;
+	} else {
+	  /* 198 */
+	  if (csrc >= source+ssize)
+	    return -1;
+	  myeax_backbytes = *(unsigned char*)csrc;
+	  myeax_backbytes += (myecx_backsize-1)<<8;
+	  myecx_backsize = 1;
+	  csrc++;
+	  do {
+	    if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+	      return -1;
+	    myecx_backsize = myecx_backsize*2+oob;
+	    if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+	      return -1;
+	  } while (oob);
+
+          if (myeax_backbytes >= 0x7d00)
+            myecx_backsize++;
+          if (myeax_backbytes >= 0x500)
+            myecx_backsize++;
+          if (myeax_backbytes <= 0x7f)
+            myecx_backsize += 2;
+
+	  oldback = myeax_backbytes;
+	}
+	lostbit = 0;
+      }
+      if (!CLI_ISCONTAINED(dest, dsize, cdst, myecx_backsize) || !CLI_ISCONTAINED(dest, dsize, cdst-myeax_backbytes, myecx_backsize))
+      {
+	cli_dbgmsg("MEW: rete: %d %d %d %d %d || %d %d %d %d %d\n", dest, dsize, cdst, myecx_backsize,
+			CLI_ISCONTAINED(dest, dsize, cdst, myecx_backsize),
+			dest, dsize, cdst-myeax_backbytes, myecx_backsize,
+      			CLI_ISCONTAINED(dest, dsize, cdst-myeax_backbytes, myecx_backsize) );
+	return -1;
+      }
+      while(myecx_backsize--) {
+	*cdst=*(cdst-myeax_backbytes);
+	cdst++;
+      }
+
+    } else {
+      /* 15d */
+      if (cdst < dest || cdst >= dest+dsize || csrc < source || csrc >= source+ssize)
+      {
+	cli_dbgmsg("MEW: retf %08x %08x+%08x=%08x, %08x %08x+%08x=%08x\n",
+			cdst, dest, dsize, dest+dsize, csrc, source, ssize, source+ssize);
+	return -1;
+      }
+      *cdst++=*csrc++;
+      /* cli_dbgmsg("Z%02x  ", *(cdst-1)&0xff); */
+      lostbit=1;
+    }
+  }
+
+  *endsrc = csrc;
+  *enddst = cdst;
+  return 0;
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pdf.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pdf.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pdf.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pdf.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,891 @@
+/*
+ *  Copyright (C) 2005-2007 Nigel Horne <njh at bandsman.co.uk>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * TODO: Embedded fonts
+ * TODO: Predictor image handling
+ */
+static	char	const	rcsid[] = "$Id: pdf.c,v 1.61 2007/02/12 20:46:09 njh Exp $";
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#ifdef	HAVE_MMAP
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <ctype.h>
+#include <string.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <errno.h>
+#ifdef	HAVE_LIMITS_H
+#include <limits.h>
+#endif
+#ifdef	HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+
+#ifdef HAVE_ZLIB_H
+#include <zlib.h>
+#endif
+
+#ifdef	C_WINDOWS
+#include <io.h>
+#endif
+
+#include "clamav.h"
+#include "others.h"
+#include "mbox.h"
+#include "pdf.h"
+
+#ifdef	CL_DEBUG
+/*#define	SAVE_TMP	/* Save the file being worked on in tmp */
+#endif
+
+static	int	try_flatedecode(unsigned char *buf, off_t real_len, off_t calculated_len, int fout, const cli_ctx *ctx);
+static	int	flatedecode(unsigned char *buf, off_t len, int fout, const cli_ctx *ctx);
+static	int	ascii85decode(const char *buf, off_t len, unsigned char *output);
+static	const	char	*pdf_nextlinestart(const char *ptr, size_t len);
+static	const	char	*pdf_nextobject(const char *ptr, size_t len);
+static	const	char	*cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns);
+
+/*
+ * TODO: handle embedded URLs if (options&CL_SCAN_MAILURL)
+ */
+int
+cli_pdf(const char *dir, int desc, const cli_ctx *ctx)
+{
+	off_t size;	/* total number of bytes in the file */
+	off_t bytesleft, trailerlength;
+	char *buf, *alloced;	/* start of memory mapped area */
+	const char *p, *q, *trailerstart;
+	const char *xrefstart;	/* cross reference table */
+	const struct cl_limits *limits;
+	/*size_t xreflength;*/
+	table_t *md5table;
+	int printed_predictor_message, printed_embedded_font_message, rc;
+	unsigned int files;
+	struct stat statb;
+
+	cli_dbgmsg("in cli_pdf(%s)\n", dir);
+
+	if(fstat(desc, &statb) < 0)
+		return CL_EOPEN;
+
+	size = statb.st_size;
+
+	if(size == 0)
+		return CL_CLEAN;
+
+	if(size <= 7)	/* doesn't even include the file header */
+		return CL_EFORMAT;
+
+	p = buf = mmap(NULL, size, PROT_READ, MAP_PRIVATE, desc, 0);
+	if(buf == MAP_FAILED)
+		return CL_EMEM;
+
+	alloced = cli_malloc(size);
+	if(alloced) {
+		/*
+		 * FIXME: now I have this, there's no need for the lack of
+		 *	support on systems without mmap, e.g. cygwin
+		 */
+		memcpy(alloced, buf, size);
+		munmap(buf, size);
+		p = alloced;
+	}
+
+	cli_dbgmsg("cli_pdf: scanning %lu bytes\n", (unsigned long)size);
+
+	/* Lines are terminated by \r, \n or both */
+
+	/* File Header */
+	if(memcmp(p, "%PDF-1.", 7) != 0) {
+		if(alloced)
+			free(alloced);
+		else
+			munmap(buf, size);
+		return CL_EFORMAT;
+	}
+
+#if	0
+	q = pdf_nextlinestart(&p[6], size - 6);
+	if(q == NULL) {
+		if(alloced)
+			free(alloced);
+		else
+			munmap(buf, size);
+		return CL_EFORMAT;
+	}
+	bytesleft = size - (long)(q - p);
+	p = q;
+#else
+	p = &p[6];
+	bytesleft = size - 6;
+#endif
+
+	/* Find the file trailer */
+	for(q = &p[bytesleft - 6]; q > p; --q)
+		if(memcmp(q, "%%EOF", 5) == 0)
+			break;
+
+	if(q <= p) {
+		if(alloced)
+			free(alloced);
+		else
+			munmap(buf, size);
+		return CL_EFORMAT;
+	}
+
+	for(trailerstart = &q[-7]; trailerstart > p; --trailerstart)
+		if(memcmp(trailerstart, "trailer", 7) == 0)
+			break;
+
+	/*
+	 * q points to the end of the trailer section
+	 */
+	trailerlength = (long)(q - trailerstart);
+	if(cli_pmemstr(trailerstart, trailerlength, "Encrypt", 7)) {
+		/*
+		 * This tends to mean that the file is, in effect, read-only
+		 */
+		if(alloced)
+			free(alloced);
+		else
+			munmap(buf, size);
+		cli_warnmsg("Encrypted PDF files not yet supported\n");
+		return CL_EFORMAT;
+	}
+
+	/*
+	 * not true, since edits may put data after the trailer
+	bytesleft -= trailerlength;
+	 */
+
+	/*
+	 * FIXME: Handle more than one xref section in the xref table
+	 */
+	for(xrefstart = trailerstart; xrefstart > p; --xrefstart)
+		if(memcmp(xrefstart, "xref", 4) == 0)
+			/*
+			 * Make sure it's the start of the line, not a startxref
+			 * token
+			 */
+			if((xrefstart[-1] == '\n') || (xrefstart[-1] == '\r'))
+				break;
+
+	if(xrefstart == p) {
+		if(alloced)
+			free(alloced);
+		else
+			munmap(buf, size);
+		return CL_EFORMAT;
+	}
+
+	printed_predictor_message = printed_embedded_font_message = 0;
+
+	md5table = tableCreate();
+	/*
+	 * not true, since edits may put data after the trailer
+	xreflength = (size_t)(trailerstart - xrefstart);
+	bytesleft -= xreflength;
+	 */
+
+	rc = CL_CLEAN;
+	files = 0;
+	limits = ctx->limits;
+
+	/*
+	 * The body section consists of a sequence of indirect objects
+	 */
+	while((p < xrefstart) && (rc == CL_CLEAN) &&
+	      ((q = pdf_nextobject(p, bytesleft)) != NULL)) {
+		int is_ascii85decode, is_flatedecode, fout, len, has_cr;
+		/*int object_number, generation_number;*/
+		const char *objstart, *objend, *streamstart, *streamend;
+		char *md5digest;
+		unsigned long length, objlen, real_streamlen, calculated_streamlen;
+		int is_embedded_font, predictor;
+		char fullname[NAME_MAX + 1];
+
+		if(q == xrefstart)
+			break;
+		if(memcmp(q, "xref", 4) == 0)
+			break;
+
+		/*object_number = atoi(q);*/
+		bytesleft -= (off_t)(q - p);
+		p = q;
+
+		if(memcmp(q, "endobj", 6) == 0)
+			continue;
+		if(!isdigit(*q)) {
+			cli_warnmsg("cli_pdf: Object number missing\n");
+			rc = CL_EFORMAT;
+			break;
+		}
+		q = pdf_nextobject(p, bytesleft);
+		if((q == NULL) || !isdigit(*q)) {
+			cli_warnmsg("cli_pdf: Generation number missing\n");
+			rc = CL_EFORMAT;
+			break;
+		}
+		/*generation_number = atoi(q);*/
+		bytesleft -= (off_t)(q - p);
+		p = q;
+
+		q = pdf_nextobject(p, bytesleft);
+		if((q == NULL) || (memcmp(q, "obj", 3) != 0)) {
+			cli_warnmsg("Indirect object missing \"obj\"\n");
+			rc = CL_EFORMAT;
+			break;
+		}
+
+		bytesleft -= (off_t)((q - p) + 3);
+		objstart = p = &q[3];
+		objend = cli_pmemstr(p, bytesleft, "endobj", 6);
+		if(objend == NULL) {
+			cli_dbgmsg("No matching endobj\n");
+			break;
+		}
+		bytesleft -= (off_t)((objend - p) + 6);
+		p = &objend[6];
+		objlen = (unsigned long)(objend - objstart);
+
+		/* Is this object a stream? */
+		streamstart = cli_pmemstr(objstart, objlen, "stream", 6);
+		if(streamstart == NULL)
+			continue;
+
+		is_embedded_font = length = is_ascii85decode =
+			is_flatedecode = 0;
+		predictor = 1;
+
+		/*
+		 * TODO: handle F and FFilter?
+		 */
+		q = objstart;
+		while(q < streamstart) {
+			if(*q == '/') {	/* name object */
+				/*cli_dbgmsg("Name object %8.8s\n", q+1, q+1);*/
+				if(strncmp(++q, "Length ", 7) == 0) {
+					q += 7;
+					length = atoi(q);
+					while(isdigit(*q))
+						q++;
+					/*
+					 * Note: incremental updates are not
+					 *	supported
+					 */
+					if((bytesleft > 11) && strncmp(q, " 0 R", 4) == 0) {
+						const char *r;
+						char b[14];
+
+						q += 4;
+						cli_dbgmsg("Length is in indirect obj %ld\n",
+							length);
+						snprintf(b, sizeof(b),
+							"\n%ld 0 obj", length);
+						length = (unsigned long)strlen(b);
+						r = cli_pmemstr(alloced ? alloced : buf,
+							size, b, length);
+						if(r == NULL) {
+							b[0] = '\r';
+							r = cli_pmemstr(alloced ? alloced : buf,
+								size, b, length);
+						}
+						if(r) {
+							r += length - 1;
+							r = pdf_nextobject(r, bytesleft - (r - q));
+							if(r) {
+								length = atoi(r);
+								while(isdigit(*r))
+									r++;
+								cli_dbgmsg("length in '%s' %ld\n",
+									&b[1],
+									length);
+							}
+						} else
+							cli_warnmsg("Couldn't find '%s'\n",
+								&b[1]);
+					}
+					q--;
+				} else if(strncmp(q, "Length2 ", 8) == 0)
+					is_embedded_font = 1;
+				else if(strncmp(q, "Predictor ", 10) == 0) {
+					q += 10;
+					predictor = atoi(q);
+					while(isdigit(*q))
+						q++;
+					q--;
+				} else if(strncmp(q, "FlateDecode", 11) == 0) {
+					is_flatedecode = 1;
+					q += 11;
+				} else if(strncmp(q, "ASCII85Decode", 13) == 0) {
+					is_ascii85decode = 1;
+					q += 13;
+				}
+			}
+			q = pdf_nextobject(q, (size_t)(streamstart - q));
+			if(q == NULL)
+				break;
+		}
+
+		if(is_embedded_font) {
+			/*
+			 * Need some documentation, the only I can find a
+			 * reference to is not free, if some kind soul wishes
+			 * to donate a copy, please contact me!
+			 * (http://safari.adobepress.com/0321304748)
+			 */
+			if(!printed_embedded_font_message) {
+				cli_dbgmsg("Embedded fonts not yet supported\n");
+				printed_embedded_font_message = 1;
+			}
+			continue;
+		}
+		if(predictor > 1) {
+			/*
+			 * Needs some thought
+			 */
+			if(!printed_predictor_message) {
+				cli_dbgmsg("Predictor %d not honoured for embedded image\n",
+					predictor);
+				printed_predictor_message = 1;
+			}
+			continue;
+		}
+
+		/* objend points to the end of the object (start of "endobj") */
+		streamstart += 6;	/* go past the word "stream" */
+		len = (int)(objend - streamstart);
+		q = pdf_nextlinestart(streamstart, len);
+		if(q == NULL)
+			break;
+		len -= (int)(q - streamstart);
+		streamstart = q;
+		streamend = cli_pmemstr(streamstart, len, "endstream\n", 10);
+		if(streamend == NULL) {
+			streamend = cli_pmemstr(streamstart, len, "endstream\r", 10);
+			if(streamend == NULL) {
+				cli_dbgmsg("No endstream\n");
+				break;
+			}
+			has_cr = 1;
+		} else
+			has_cr = 0;
+		snprintf(fullname, sizeof(fullname), "%s/pdfXXXXXX", dir);
+#if	defined(C_LINUX) || defined(C_BSD) || defined(HAVE_MKSTEMP) || defined(C_SOLARIS) || defined(C_CYGWIN)
+		fout = mkstemp(fullname);
+#elif	defined(C_WINDOWS)
+		if(_mktemp(fullname) == NULL) {
+			/* mktemp only allows 26 files */
+			char *name = cli_gentemp(dir);
+			if(name == NULL)
+				fout = -1;
+			else {
+				strcpy(fullname, name);
+				free(name);
+				fout = open(fullname,
+					O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
+			}
+		} else
+			fout = open(fullname, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
+#else
+		mktemp(fullname);
+		fout = open(fullname, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
+#endif
+
+		if(fout < 0) {
+			cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, strerror(errno));
+			rc = CL_ETMPFILE;
+			break;
+		}
+
+		/*
+		 * Calculate the length ourself, the Length parameter is often
+		 * wrong
+		 */
+		if((*--streamend != '\n') && (*streamend != '\r'))
+			streamend++;
+		else if(has_cr && (*--streamend != '\r'))
+			streamend++;
+
+		if(streamend <= streamstart) {
+			close(fout);
+			cli_dbgmsg("Empty stream\n");
+			unlink(fullname);
+			continue;
+		}
+		calculated_streamlen = (int)(streamend - streamstart);
+		real_streamlen = length;
+
+		if(calculated_streamlen != real_streamlen)
+			cli_dbgmsg("cli_pdf: Incorrect Length field in file attempting to recover\n");
+
+		cli_dbgmsg("length %ld, calculated_streamlen %ld isFlate %d isASCII85 %d\n",
+			length, calculated_streamlen,
+			is_flatedecode, is_ascii85decode);
+
+#if	0
+		/* FIXME: this isn't right... */
+		if(length)
+			/*streamlen = (is_flatedecode) ? length : MIN(length, streamlen);*/
+			streamlen = MIN(length, streamlen);
+#endif
+
+		if(is_ascii85decode) {
+			unsigned char *tmpbuf = cli_malloc(calculated_streamlen * 5);
+			int ret;
+
+			if(tmpbuf == NULL) {
+				close(fout);
+				unlink(fullname);
+				rc = CL_EMEM;
+				continue;
+			}
+
+			ret = ascii85decode(streamstart, calculated_streamlen, tmpbuf);
+
+			if(ret == -1) {
+				free(tmpbuf);
+				close(fout);
+				unlink(fullname);
+				rc = CL_EFORMAT;
+				continue;
+			}
+			if(ret) {
+				unsigned char *t;
+
+				real_streamlen = ret;
+				/* free unused trailing bytes */
+				t = (unsigned char *)cli_realloc(tmpbuf,
+					calculated_streamlen);
+				if(t == NULL) {
+					free(tmpbuf);
+					close(fout);
+					unlink(fullname);
+					rc = CL_EMEM;
+					continue;
+				}
+				tmpbuf = t;
+				/*
+				 * Note that it will probably be both
+				 * ascii85encoded and flateencoded
+				 */
+				if(is_flatedecode)
+					rc = try_flatedecode((unsigned char *)tmpbuf, real_streamlen, real_streamlen, fout, ctx);
+				else
+					cli_writen(fout, (const char *)streamstart, real_streamlen);
+			}
+			free(tmpbuf);
+		} else if(is_flatedecode)
+			rc = try_flatedecode((unsigned char *)streamstart, real_streamlen, calculated_streamlen, fout, ctx);
+
+		else {
+			cli_dbgmsg("cli_pdf: writing %lu bytes from the stream\n",
+				(unsigned long)real_streamlen);
+			cli_writen(fout, (const char *)streamstart, real_streamlen);
+		}
+
+		close(fout);
+		md5digest = cli_md5file(fullname);
+		if(tableFind(md5table, md5digest) >= 0) {
+			cli_dbgmsg("cli_pdf: not scanning duplicate embedded file '%s'\n", fullname);
+			unlink(fullname);
+		} else
+			tableInsert(md5table, md5digest, 1);
+		free(md5digest);
+		cli_dbgmsg("cli_pdf: extracted file %d to %s\n", ++files,
+			fullname);
+		if(limits && limits->maxfiles && (files >= limits->maxfiles)) {
+			/* Bug 698 */
+			cli_dbgmsg("cli_pdf: number of files exceeded %u\n", limits->maxfiles);
+			rc = CL_EMAXFILES;
+		}
+	}
+
+	if(alloced)
+		free(alloced);
+	else
+		munmap(buf, size);
+
+	tableDestroy(md5table);
+
+	cli_dbgmsg("cli_pdf: returning %d\n", rc);
+	return rc;
+}
+
+/*
+ * flate inflation - returns clamAV status, e.g CL_SUCCESS, CL_EZIP
+ */
+static int
+try_flatedecode(unsigned char *buf, off_t real_len, off_t calculated_len, int fout, const cli_ctx *ctx)
+{
+	int ret = flatedecode(buf, real_len, fout, ctx);
+
+	if(ret == CL_SUCCESS)
+		return CL_SUCCESS;
+
+	if(real_len == calculated_len) {
+		/*
+		 * Nothing more we can do to inflate
+		 */
+		cli_warnmsg("Bad compression in flate stream\n");
+		return (ret == CL_SUCCESS) ? CL_EFORMAT : ret;
+	}
+
+	ret = flatedecode(buf, calculated_len, fout, ctx);
+	if(ret == CL_SUCCESS)
+		return CL_SUCCESS;
+
+	/* i.e. the PDF file is broken :-( */
+	cli_warnmsg("cli_pdf: Bad compressed block length in flate stream\n");
+
+	return ret;
+}
+
+static int
+flatedecode(unsigned char *buf, off_t len, int fout, const cli_ctx *ctx)
+{
+	int zstat;
+	off_t nbytes;
+	z_stream stream;
+	unsigned char output[BUFSIZ];
+#ifdef	SAVE_TMP
+	char tmpfilename[16];
+	int tmpfd;
+#endif
+
+	cli_dbgmsg("cli_pdf: flatedecode %lu bytes\n", (unsigned long)len);
+
+	if(len == 0) {
+		cli_warnmsg("cli_pdf: flatedecode len == 0\n");
+		return CL_CLEAN;
+	}
+
+#ifdef	SAVE_TMP
+	/*
+	 * Copy the embedded area for debugging, so that if it falls over
+	 * we have a copy of the offending data. This is debugging code
+	 * that you shouldn't of course install in a live environment. I am
+	 * not interested in hearing about security issues with this section
+	 * of the parser.
+	 */
+	strcpy(tmpfilename, "/tmp/pdfXXXXXX");
+	tmpfd = mkstemp(tmpfilename);
+	if(tmpfd < 0) {
+		perror(tmpfilename);
+		cli_errmsg("Can't make debugging file\n");
+	} else {
+		FILE *tmpfp = fdopen(tmpfd, "w");
+
+		if(tmpfp) {
+			fwrite(buf, sizeof(char), len, tmpfp);
+			fclose(tmpfp);
+			cli_dbgmsg("cli_pdf: flatedecode: debugging file is %s\n",
+				tmpfilename);
+		} else
+			cli_errmsg("cli_pdf: can't fdopen debugging file\n");
+	}
+#endif
+	stream.zalloc = (alloc_func)Z_NULL;
+	stream.zfree = (free_func)Z_NULL;
+	stream.opaque = (void *)NULL;
+	stream.next_in = (Bytef *)buf;
+	stream.avail_in = len;
+	stream.next_out = output;
+	stream.avail_out = sizeof(output);
+
+	zstat = inflateInit(&stream);
+	if(zstat != Z_OK) {
+		cli_warnmsg("cli_pdf: inflateInit failed");
+		return CL_EZIP;
+	}
+
+	nbytes = 0;
+
+	while(stream.avail_in) {
+		zstat = inflate(&stream, Z_NO_FLUSH);	/* zlib */
+		switch(zstat) {
+			case Z_OK:
+				if(stream.avail_out == 0) {
+
+					nbytes += cli_writen(fout, output, sizeof(output));
+
+					if(ctx->limits &&
+					   ctx->limits->maxfilesize &&
+					   (nbytes > (off_t) ctx->limits->maxfilesize)) {
+						cli_dbgmsg("cli_pdf: flatedecode size exceeded (%lu)\n",
+							(unsigned long)nbytes);
+						inflateEnd(&stream);
+						if(BLOCKMAX) {
+							*ctx->virname = "PDF.ExceededFileSize";
+							return CL_VIRUS;
+						}
+						return CL_EZIP;
+					}
+					stream.next_out = output;
+					stream.avail_out = sizeof(output);
+				}
+				continue;
+			case Z_STREAM_END:
+				break;
+			default:
+				if(stream.msg)
+					cli_dbgmsg("pdf: after writing %lu bytes, got error \"%s\" inflating PDF attachment\n",
+						(unsigned long)nbytes,
+						stream.msg);
+				else
+					cli_dbgmsg("pdf: after writing %lu bytes, got error %d inflating PDF attachment\n",
+						(unsigned long)nbytes, zstat);
+				inflateEnd(&stream);
+				return (zstat == Z_OK) ? CL_SUCCESS : CL_EZIP;
+		}
+		break;
+	}
+
+	if(stream.avail_out != sizeof(output))
+		if(cli_writen(fout, output, sizeof(output) - stream.avail_out) < 0)
+			return CL_EIO;
+
+	/*
+	 * On BSD systems total_in and total_out are "long long", so these
+	 * numbers could (in theory) get truncated in the debug statement
+	 */
+	cli_dbgmsg("cli_pdf: flatedecode in=%lu out=%lu ratio %lu (max %u)\n",
+		(unsigned long)stream.total_in, (unsigned long)stream.total_out,
+		(unsigned long)(stream.total_out / stream.total_in),
+		ctx->limits ? ctx->limits->maxratio : 0);
+
+	if(ctx->limits &&
+	   ctx->limits->maxratio &&
+	   ((stream.total_out / stream.total_in) > ctx->limits->maxratio)) {
+		cli_dbgmsg("cli_pdf: flatedecode Max ratio reached\n");
+		inflateEnd(&stream);
+		if(BLOCKMAX) {
+			*ctx->virname = "Oversized.PDF";
+			return CL_VIRUS;
+		}
+		return CL_EZIP;
+	}
+
+#ifdef	SAVE_TMP
+	unlink(tmpfilename);
+#endif
+	return inflateEnd(&stream) == Z_OK ? CL_SUCCESS : CL_EZIP;
+}
+
+/*
+ * ascii85 inflation, returns number of bytes in output, -1 for error
+ *
+ * See http://www.piclist.com/techref/method/encode.htm (look for base85)
+ */
+static int
+ascii85decode(const char *buf, off_t len, unsigned char *output)
+{
+	const char *ptr;
+	uint32_t sum = 0;
+	int quintet = 0;
+	int ret = 0;
+
+	if(cli_pmemstr(buf, len, "~>", 2) == NULL)
+		cli_warnmsg("ascii85decode: no EOF marker found\n");
+
+	ptr = buf;
+
+	cli_dbgmsg("cli_pdf: ascii85decode %lu bytes\n", (unsigned long)len);
+
+	while(len > 0) {
+		int byte = (len--) ? (int)*ptr++ : EOF;
+
+		if((byte == '~') && (*ptr == '>'))
+			byte = EOF;
+
+		if(byte >= '!' && byte <= 'u') {
+			sum = (sum * 85) + ((uint32_t)byte - '!');
+			if(++quintet == 5) {
+				*output++ = (unsigned char)(sum >> 24);
+				*output++ = (unsigned char)((sum >> 16) & 0xFF);
+				*output++ = (unsigned char)((sum >> 8) & 0xFF);
+				*output++ = (unsigned char)(sum & 0xFF);
+				ret += 4;
+				quintet = 0;
+				sum = 0;
+			}
+		} else if(byte == 'z') {
+			if(quintet) {
+				cli_warnmsg("ascii85decode: unexpected 'z'\n");
+				return -1;
+			}
+			*output++ = '\0';
+			*output++ = '\0';
+			*output++ = '\0';
+			*output++ = '\0';
+			ret += 4;
+		} else if(byte == EOF) {
+			cli_dbgmsg("ascii85decode: quintet %d\n", quintet);
+			if(quintet) {
+				int i;
+
+				if(quintet == 1) {
+					cli_warnmsg("ascii85Decode: only 1 byte in last quintet\n");
+					return -1;
+				}
+				for(i = quintet; i < 5; i++)
+					sum *= 85;
+
+				if(quintet > 1)
+					sum += (0xFFFFFF >> ((quintet - 2) * 8));
+				ret += quintet;
+				for(i = 0; i < quintet - 1; i++)
+					*output++ = (unsigned char)((sum >> (24 - 8 * i)) & 0xFF);
+				quintet = 0;
+			}
+			len = 0;
+			break;
+		} else if(!isspace(byte)) {
+			cli_warnmsg("ascii85Decode: invalid character 0x%x, len %lu\n",
+				byte & 0xFF, (unsigned long)len);
+			return -1;
+		}
+	}
+	return ret;
+}
+
+/*
+ * Find the start of the next line
+ */
+static const char *
+pdf_nextlinestart(const char *ptr, size_t len)
+{
+	while(strchr("\r\n", *ptr) == NULL) {
+		if(--len == 0L)
+			return NULL;
+		ptr++;
+	}
+	while(strchr("\r\n", *ptr) != NULL) {
+		if(--len == 0L)
+			return NULL;
+		ptr++;
+	}
+	return ptr;
+}
+
+/*
+ * Return the start of the next PDF object.
+ * This assumes that we're not in a stream.
+ */
+static const char *
+pdf_nextobject(const char *ptr, size_t len)
+{
+	const char *p;
+	int inobject = 1;
+
+	while(len) {
+		switch(*ptr) {
+			case '\n':
+			case '\r':
+			case '%':	/* comment */
+				p = pdf_nextlinestart(ptr, len);
+				if(p == NULL)
+					return NULL;
+				len -= (size_t)(p - ptr);
+				ptr = p;
+				inobject = 0;
+				break;
+
+			case ' ':
+			case '\t':
+			case '[':	/* Start of an array object */
+			case '\v':
+			case '\f':
+			case '<':	/* Start of a dictionary object */
+				inobject = 0;
+				ptr++;
+				len--;
+				break;
+			case '/':	/* Start of a name object */
+				return ptr;
+			default:
+				if(!inobject)
+					/* TODO: parse and return object type */
+					return ptr;
+				ptr++;
+				len--;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * like cli_memstr - but returns the location of the match
+ * FIXME: need a case insensitive version
+ */
+static const char *
+cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns)
+{
+	const char *pt, *hay;
+	size_t n;
+
+	if(haystack == needle)
+		return haystack;
+
+	if(hs < ns)
+		return NULL;
+
+	if(memcmp(haystack, needle, ns) == 0)
+		return haystack;
+
+	pt = hay = haystack;
+	n = hs;
+
+	while((pt = memchr(hay, needle[0], n)) != NULL) {
+		n -= (size_t)(pt - hay);
+		if(n < ns)
+			break;
+
+		if(memcmp(pt, needle, ns) == 0)
+			return pt;
+
+		if(hay == pt) {
+			n--;
+			hay++;
+		} else
+			hay = pt;
+	}
+
+	return NULL;
+}
+#else	/*!HAVE_MMAP*/
+
+#include "clamav.h"
+#include "others.h"
+#include "pdf.h"
+
+int
+cli_pdf(const char *dir, int desc, const cli_ctx *ctx)
+{
+	cli_warnmsg("File not decoded - PDF decoding needs mmap() (for now)\n");
+	return CL_CLEAN;
+}
+#endif

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pe.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pe.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pe.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pe.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,2232 @@
+/*
+ *  Copyright (C) 2004 - 2006 Tomasz Kojm <tkojm at clamav.net>
+ *			      aCaB <acab at clamav.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <time.h>
+#include <stdarg.h>
+
+#include "cltypes.h"
+#include "clamav.h"
+#include "others.h"
+#include "pe.h"
+#include "petite.h"
+#include "fsg.h"
+#include "spin.h"
+#include "upx.h"
+#include "yc.h"
+#include "aspack.h"
+#include "wwunpack.h"
+#include "unsp.h"
+#include "scanners.h"
+#include "str.h"
+#include "execs.h"
+#include "md5.h"
+#include "mew.h"
+#include "upack.h"
+#include "matcher.h"
+#include "matcher-bm.h"
+
+#ifndef	O_BINARY
+#define	O_BINARY	0
+#endif
+
+#define DCONF ctx->dconf->pe
+
+#define IMAGE_DOS_SIGNATURE	    0x5a4d	    /* MZ */
+#define IMAGE_DOS_SIGNATURE_OLD	    0x4d5a          /* ZM */
+#define IMAGE_NT_SIGNATURE	    0x00004550
+#define PE32_SIGNATURE		    0x010b
+#define PE32P_SIGNATURE		    0x020b
+
+#define optional_hdr64 pe_opt.opt64
+#define optional_hdr32 pe_opt.opt32
+
+#define UPX_NRV2B "\x11\xdb\x11\xc9\x01\xdb\x75\x07\x8b\x1e\x83\xee\xfc\x11\xdb\x11\xc9\x11\xc9\x75\x20\x41\x01\xdb"
+#define UPX_NRV2D "\x83\xf0\xff\x74\x78\xd1\xf8\x89\xc5\xeb\x0b\x01\xdb\x75\x07\x8b\x1e\x83\xee\xfc\x11\xdb\x11\xc9"
+#define UPX_NRV2E "\xeb\x52\x31\xc9\x83\xe8\x03\x72\x11\xc1\xe0\x08\x8a\x06\x46\x83\xf0\xff\x74\x75\xd1\xf8\x89\xc5"
+
+#define EC32(x) le32_to_host(x) /* Convert little endian to host */
+#define EC16(x) le16_to_host(x)
+/* lower and upper bondary alignment (size vs offset) */
+#define PEALIGN(o,a) (((a))?(((o)/(a))*(a)):(o))
+#define PESALIGN(o,a) (((a))?(((o)/(a)+((o)%(a)!=0))*(a)):(o))
+
+#define CLI_UNPSIZELIMITS(NAME,CHK) \
+if(ctx->limits && ctx->limits->maxfilesize && (CHK) > ctx->limits->maxfilesize) { \
+    cli_dbgmsg(NAME": Sizes exceeded (%lu > %lu)\n", (CHK), ctx->limits->maxfilesize); \
+    free(exe_sections); \
+    if(BLOCKMAX) { \
+        *ctx->virname = "PE."NAME".ExceededFileSize"; \
+        return CL_VIRUS; \
+    } else { \
+        return CL_CLEAN; \
+    } \
+}
+
+#define CLI_UNPTEMP(NAME,FREEME) \
+if(!(tempfile = cli_gentemp(NULL))) { \
+    cli_multifree FREEME; \
+    return CL_EMEM; \
+} \
+if((ndesc = open(tempfile, O_RDWR|O_CREAT|O_TRUNC|O_BINARY, S_IRWXU)) < 0) { \
+    cli_dbgmsg(NAME": Can't create file %s\n", tempfile); \
+    free(tempfile); \
+    cli_multifree FREEME; \
+    return CL_EIO; \
+}
+
+#define CLI_TMPUNLK() if(!cli_leavetemps_flag) unlink(tempfile)
+
+#define FSGCASE(NAME,FREESEC) \
+    case 0: /* Unpacked and NOT rebuilt */ \
+	cli_dbgmsg(NAME": Successfully decompressed\n"); \
+	close(ndesc); \
+	unlink(tempfile); \
+	free(tempfile); \
+	FREESEC; \
+	found = 0; \
+	upx_success = 1; \
+	break; /* FSG ONLY! - scan raw data after upx block */
+
+#define SPINCASE() \
+    case 2: \
+	free(spinned); \
+	close(ndesc); \
+	unlink(tempfile); \
+	cli_dbgmsg("PESpin: Size exceeded\n"); \
+	if(BLOCKMAX) { \
+	    free(tempfile); \
+	    free(exe_sections); \
+	    *ctx->virname = "PE.Pespin.ExceededFileSize"; \
+	    return CL_VIRUS; \
+	} \
+	free(tempfile); \
+	break; \
+
+#define CLI_UNPRESULTS_(NAME,FSGSTUFF,EXPR,GOOD,FREEME) \
+    switch(EXPR) { \
+    case GOOD: /* Unpacked and rebuilt */ \
+	if(cli_leavetemps_flag) \
+	    cli_dbgmsg(NAME": Unpacked and rebuilt executable saved in %s\n", tempfile); \
+	else \
+	    cli_dbgmsg(NAME": Unpacked and rebuilt executable\n"); \
+	cli_multifree FREEME; \
+        free(exe_sections); \
+	fsync(ndesc); \
+	lseek(ndesc, 0, SEEK_SET); \
+	cli_dbgmsg("***** Scanning rebuilt PE file *****\n"); \
+	if(cli_magic_scandesc(ndesc, ctx) == CL_VIRUS) { \
+	    close(ndesc); \
+	    CLI_TMPUNLK(); \
+	    free(tempfile); \
+	    return CL_VIRUS; \
+	} \
+	close(ndesc); \
+	CLI_TMPUNLK(); \
+	free(tempfile); \
+	return CL_CLEAN; \
+\
+FSGSTUFF; \
+\
+    default: \
+	cli_dbgmsg(NAME": Unpacking failed\n"); \
+	close(ndesc); \
+	unlink(tempfile); \
+	cli_multifree FREEME; \
+        free(tempfile); \
+    }
+
+
+#define CLI_UNPRESULTS(NAME,EXPR,GOOD,FREEME) CLI_UNPRESULTS_(NAME,NULL,EXPR,GOOD,FREEME)
+#define CLI_UNPRESULTSFSG1(NAME,EXPR,GOOD,FREEME) CLI_UNPRESULTS_(NAME,FSGCASE(NAME,free(sections)),EXPR,GOOD,FREEME)
+#define CLI_UNPRESULTSFSG2(NAME,EXPR,GOOD,FREEME) CLI_UNPRESULTS_(NAME,FSGCASE(NAME,NULL),EXPR,GOOD,FREEME)
+
+struct offset_list {
+    uint32_t offset;
+    struct offset_list *next;
+};
+
+static void cli_multifree(void *f, ...) {
+    void *ff;
+    va_list ap;
+    free(f);
+    va_start(ap, f);
+    while((ff=va_arg(ap, void*))) free(ff);
+    va_end(ap);
+}
+
+static uint32_t cli_rawaddr(uint32_t rva, struct cli_exe_section *shp, uint16_t nos, unsigned int *err,	size_t fsize, uint32_t hdr_size)
+{
+    int i, found = 0;
+    uint32_t ret;
+
+    if (rva<hdr_size) { /* Out of section EP - mapped to imagebase+rva */
+	if (rva >= fsize) {
+	    *err=1;
+	    return 0;
+	}
+        *err=0;
+	return rva;
+    }
+
+    for(i = nos-1; i >= 0; i--) {
+        if(shp[i].rsz && shp[i].rva <= rva && shp[i].rsz > rva - shp[i].rva) {
+	    found = 1;
+	    break;
+	}
+    }
+
+    if(!found) {
+	*err = 1;
+	return 0;
+    }
+
+    ret = rva - shp[i].rva + shp[i].raw;
+    *err = 0;
+    return ret;
+}
+
+
+/*
+static int cli_ddump(int desc, int offset, int size, const char *file) {
+	int pos, ndesc, bread, sum = 0;
+	char buff[FILEBUFF];
+
+
+    cli_dbgmsg("in ddump()\n");
+
+    if((pos = lseek(desc, 0, SEEK_CUR)) == -1) {
+	cli_dbgmsg("Invalid descriptor\n");
+	return -1;
+    }
+
+    if(lseek(desc, offset, SEEK_SET) == -1) {
+	cli_dbgmsg("lseek() failed\n");
+	lseek(desc, pos, SEEK_SET);
+	return -1;
+    }
+
+    if((ndesc = open(file, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, S_IRWXU)) < 0) {
+	cli_dbgmsg("Can't create file %s\n", file);
+	lseek(desc, pos, SEEK_SET);
+	return -1;
+    }
+
+    while((bread = cli_readn(desc, buff, FILEBUFF)) > 0) {
+	if(sum + bread >= size) {
+	    if(write(ndesc, buff, size - sum) == -1) {
+		cli_dbgmsg("Can't write to file\n");
+		lseek(desc, pos, SEEK_SET);
+		close(ndesc);
+		unlink(file);
+		return -1;
+	    }
+	    break;
+	} else {
+	    if(write(ndesc, buff, bread) == -1) {
+		cli_dbgmsg("Can't write to file\n");
+		lseek(desc, pos, SEEK_SET);
+		close(ndesc);
+		unlink(file);
+		return -1;
+	    }
+	}
+	sum += bread;
+    }
+
+    close(ndesc);
+    lseek(desc, pos, SEEK_SET);
+    return 0;
+}
+*/
+
+static off_t cli_seeksect(int fd, struct cli_exe_section *s) {
+    off_t ret;
+
+    if(!s->rsz) return 0;
+    if((ret=lseek(fd, s->raw, SEEK_SET)) == -1)
+	cli_dbgmsg("cli_seeksect: lseek() failed\n");
+    return ret+1;
+}
+
+static unsigned int cli_md5sect(int fd, struct cli_exe_section *s, unsigned char *digest) {
+    void *hashme;
+    cli_md5_ctx md5;
+
+    if (s->rsz > CLI_MAX_ALLOCATION) {
+	cli_dbgmsg("cli_md5sect: skipping md5 calculation for too big section\n");
+	return 0;
+    }
+
+    if(!cli_seeksect(fd, s)) return 0;
+
+    if(!(hashme=cli_malloc(s->rsz))) {
+	cli_dbgmsg("cli_md5sect: out of memory\n");
+	return 0;
+    }
+
+    if(cli_readn(fd, hashme, s->rsz)!=s->rsz) {
+	cli_dbgmsg("cli_md5sect: unable to read section data\n");
+	return 0;
+    }
+
+    cli_md5_init(&md5);
+    cli_md5_update(&md5, hashme, s->rsz);
+    free(hashme);
+    cli_md5_final(digest, &md5);
+    return 1;
+}
+
+int cli_scanpe(int desc, cli_ctx *ctx)
+{
+	uint16_t e_magic; /* DOS signature ("MZ") */
+	uint16_t nsections;
+	uint32_t e_lfanew; /* address of new exe header */
+	uint32_t ep, vep; /* entry point (raw, virtual) */
+	uint8_t polipos = 0;
+	time_t timestamp;
+	struct pe_image_file_hdr file_hdr;
+	union {
+	    struct pe_image_optional_hdr64 opt64;
+	    struct pe_image_optional_hdr32 opt32;
+	} pe_opt;
+	struct pe_image_section_hdr *section_hdr;
+	struct stat sb;
+	char sname[9], buff[4096], epbuff[4096], *tempfile;
+	uint32_t epsize;
+	ssize_t bytes;
+	unsigned int i, found, upx_success = 0, min = 0, max = 0, err;
+	unsigned int ssize = 0, dsize = 0, dll = 0, pe_plus = 0;
+	int (*upxfn)(char *, uint32_t, char *, uint32_t *, uint32_t, uint32_t, uint32_t) = NULL;
+	char *src = NULL, *dest = NULL;
+	int ndesc, ret = CL_CLEAN, upack = 0, native=0;
+	size_t fsize;
+	uint32_t valign, falign, hdr_size, j;
+	struct cli_exe_section *exe_sections;
+	struct cli_matcher *md5_sect;
+
+
+    if(!ctx) {
+	cli_errmsg("cli_scanpe: ctx == NULL\n");
+	return CL_ENULLARG;
+    }
+
+    if(cli_readn(desc, &e_magic, sizeof(e_magic)) != sizeof(e_magic)) {
+	cli_dbgmsg("Can't read DOS signature\n");
+	return CL_CLEAN;
+    }
+
+    if(EC16(e_magic) != IMAGE_DOS_SIGNATURE && EC16(e_magic) != IMAGE_DOS_SIGNATURE_OLD) {
+	cli_dbgmsg("Invalid DOS signature\n");
+	return CL_CLEAN;
+    }
+
+    lseek(desc, 58, SEEK_CUR); /* skip to the end of the DOS header */
+
+    if(cli_readn(desc, &e_lfanew, sizeof(e_lfanew)) != sizeof(e_lfanew)) {
+	cli_dbgmsg("Can't read new header address\n");
+	/* truncated header? */
+	if(DETECT_BROKEN) {
+	    if(ctx->virname)
+		*ctx->virname = "Broken.Executable";
+	    return CL_VIRUS;
+	}
+	return CL_CLEAN;
+    }
+
+    e_lfanew = EC32(e_lfanew);
+    cli_dbgmsg("e_lfanew == %d\n", e_lfanew);
+    if(!e_lfanew) {
+	cli_dbgmsg("Not a PE file\n");
+	return CL_CLEAN;
+    }
+
+    if(lseek(desc, e_lfanew, SEEK_SET) < 0) {
+	/* probably not a PE file */
+	cli_dbgmsg("Can't lseek to e_lfanew\n");
+	return CL_CLEAN;
+    }
+
+    if(cli_readn(desc, &file_hdr, sizeof(struct pe_image_file_hdr)) != sizeof(struct pe_image_file_hdr)) {
+	/* bad information in e_lfanew - probably not a PE file */
+	cli_dbgmsg("Can't read file header\n");
+	return CL_CLEAN;
+    }
+
+    if(EC32(file_hdr.Magic) != IMAGE_NT_SIGNATURE) {
+	cli_dbgmsg("Invalid PE signature (probably NE file)\n");
+	return CL_CLEAN;
+    }
+
+    if(EC16(file_hdr.Characteristics) & 0x2000) {
+	cli_dbgmsg("File type: DLL\n");
+	dll = 1;
+    } else if(EC16(file_hdr.Characteristics) & 0x01) {
+	cli_dbgmsg("File type: Executable\n");
+    }
+
+    switch(EC16(file_hdr.Machine)) {
+	case 0x0:
+	    cli_dbgmsg("Machine type: Unknown\n");
+	    break;
+	case 0x14c:
+	    cli_dbgmsg("Machine type: 80386\n");
+	    break;
+	case 0x14d:
+	    cli_dbgmsg("Machine type: 80486\n");
+	    break;
+	case 0x14e:
+	    cli_dbgmsg("Machine type: 80586\n");
+	    break;
+	case 0x160:
+	    cli_dbgmsg("Machine type: R30000 (big-endian)\n");
+	    break;
+	case 0x162:
+	    cli_dbgmsg("Machine type: R3000\n");
+	    break;
+	case 0x166:
+	    cli_dbgmsg("Machine type: R4000\n");
+	    break;
+	case 0x168:
+	    cli_dbgmsg("Machine type: R10000\n");
+	    break;
+	case 0x184:
+	    cli_dbgmsg("Machine type: DEC Alpha AXP\n");
+	    break;
+	case 0x284:
+	    cli_dbgmsg("Machine type: DEC Alpha AXP 64bit\n");
+	    break;
+	case 0x1f0:
+	    cli_dbgmsg("Machine type: PowerPC\n");
+	    break;
+	case 0x200:
+	    cli_dbgmsg("Machine type: IA64\n");
+	    break;
+	case 0x268:
+	    cli_dbgmsg("Machine type: M68k\n");
+	    break;
+	case 0x266:
+	    cli_dbgmsg("Machine type: MIPS16\n");
+	    break;
+	case 0x366:
+	    cli_dbgmsg("Machine type: MIPS+FPU\n");
+	    break;
+	case 0x466:
+	    cli_dbgmsg("Machine type: MIPS16+FPU\n");
+	    break;
+	case 0x1a2:
+	    cli_dbgmsg("Machine type: Hitachi SH3\n");
+	    break;
+	case 0x1a3:
+	    cli_dbgmsg("Machine type: Hitachi SH3-DSP\n");
+	    break;
+	case 0x1a4:
+	    cli_dbgmsg("Machine type: Hitachi SH3-E\n");
+	    break;
+	case 0x1a6:
+	    cli_dbgmsg("Machine type: Hitachi SH4\n");
+	    break;
+	case 0x1a8:
+	    cli_dbgmsg("Machine type: Hitachi SH5\n");
+	    break;
+	case 0x1c0:
+	    cli_dbgmsg("Machine type: ARM\n");
+	    break;
+	case 0x1c2:
+	    cli_dbgmsg("Machine type: THUMB\n");
+	    break;
+	case 0x1d3:
+	    cli_dbgmsg("Machine type: AM33\n");
+	    break;
+	case 0x520:
+	    cli_dbgmsg("Machine type: Infineon TriCore\n");
+	    break;
+	case 0xcef:
+	    cli_dbgmsg("Machine type: CEF\n");
+	    break;
+	case 0xebc:
+	    cli_dbgmsg("Machine type: EFI Byte Code\n");
+	    break;
+	case 0x9041:
+	    cli_dbgmsg("Machine type: M32R\n");
+	    break;
+	case 0xc0ee:
+	    cli_dbgmsg("Machine type: CEE\n");
+	    break;
+	case 0x8664:
+	    cli_dbgmsg("Machine type: AMD64\n");
+	    break;
+	default:
+	    cli_warnmsg("Unknown machine type in PE header (0x%x)\n", EC16(file_hdr.Machine));
+    }
+
+    nsections = EC16(file_hdr.NumberOfSections);
+    if(nsections < 1 || nsections > 96) {
+	if(DETECT_BROKEN) {
+	    if(ctx->virname)
+		*ctx->virname = "Broken.Executable";
+	    return CL_VIRUS;
+	}
+	if(nsections)
+	    cli_warnmsg("PE file contains %d sections\n", nsections);
+	else
+	    cli_warnmsg("PE file contains no sections\n");
+	return CL_CLEAN;
+    }
+    cli_dbgmsg("NumberOfSections: %d\n", nsections);
+
+    timestamp = (time_t) EC32(file_hdr.TimeDateStamp);
+    cli_dbgmsg("TimeDateStamp: %s", ctime(&timestamp));
+
+    cli_dbgmsg("SizeOfOptionalHeader: %x\n", EC16(file_hdr.SizeOfOptionalHeader));
+
+    if (EC16(file_hdr.SizeOfOptionalHeader) < sizeof(struct pe_image_optional_hdr32)) {
+        cli_dbgmsg("SizeOfOptionalHeader too small\n");
+	if(DETECT_BROKEN) {
+	    if(ctx->virname)
+	        *ctx->virname = "Broken.Executable";
+	    return CL_VIRUS;
+	}
+	return CL_CLEAN;
+    }
+
+    if(cli_readn(desc, &optional_hdr32, sizeof(struct pe_image_optional_hdr32)) != sizeof(struct pe_image_optional_hdr32)) {
+        cli_dbgmsg("Can't read optional file header\n");
+	if(DETECT_BROKEN) {
+	    if(ctx->virname)
+	        *ctx->virname = "Broken.Executable";
+	    return CL_VIRUS;
+	}
+	return CL_CLEAN;
+    }
+
+    /* This will be a chicken and egg problem until we drop 9x */
+    if(EC32(optional_hdr64.Magic)==PE32P_SIGNATURE) {
+        if(EC16(file_hdr.SizeOfOptionalHeader)!=sizeof(struct pe_image_optional_hdr64)) {
+	    /* FIXME: need to play around a bit more with xp64 */
+	    cli_dbgmsg("Incorrect SizeOfOptionalHeader for PE32+\n");
+	    if(DETECT_BROKEN) {
+	        if(ctx->virname)
+		    *ctx->virname = "Broken.Executable";
+		return CL_VIRUS;
+	    }
+	    return CL_CLEAN;
+	}
+	pe_plus = 1;
+    } else {
+        /*
+	    either it's got a PE32_SIGNATURE or
+	    we enable win9x compatibility in that we don't honor magic (see bb#119)
+	    either way it's a 32bit thingy
+	*/
+        if(EC16(optional_hdr32.Magic) != PE32_SIGNATURE) {
+	    cli_warnmsg("Incorrect magic number in optional header\n");
+	    if(DETECT_BROKEN) {
+	        if(ctx->virname)
+		    *ctx->virname = "Broken.Executable";
+		return CL_VIRUS;
+	    }
+	    cli_dbgmsg("9x compatibility mode\n");
+	}
+    }
+
+    if(!pe_plus) { /* PE */
+	if (EC16(file_hdr.SizeOfOptionalHeader)!=sizeof(struct pe_image_optional_hdr32)) {
+	    /* Seek to the end of the long header */
+	    lseek(desc, (EC16(file_hdr.SizeOfOptionalHeader)-sizeof(struct pe_image_optional_hdr32)), SEEK_CUR);
+	}
+
+	if(DCONF & PE_CONF_UPACK)
+	    upack = (EC16(file_hdr.SizeOfOptionalHeader)==0x148);
+
+	vep = EC32(optional_hdr32.AddressOfEntryPoint);
+	hdr_size = EC32(optional_hdr32.SizeOfHeaders);
+	cli_dbgmsg("File format: PE\n");
+
+	cli_dbgmsg("MajorLinkerVersion: %d\n", optional_hdr32.MajorLinkerVersion);
+	cli_dbgmsg("MinorLinkerVersion: %d\n", optional_hdr32.MinorLinkerVersion);
+	cli_dbgmsg("SizeOfCode: 0x%x\n", EC32(optional_hdr32.SizeOfCode));
+	cli_dbgmsg("SizeOfInitializedData: 0x%x\n", EC32(optional_hdr32.SizeOfInitializedData));
+	cli_dbgmsg("SizeOfUninitializedData: 0x%x\n", EC32(optional_hdr32.SizeOfUninitializedData));
+	cli_dbgmsg("AddressOfEntryPoint: 0x%x\n", vep);
+	cli_dbgmsg("BaseOfCode: 0x%x\n", EC32(optional_hdr32.BaseOfCode));
+	cli_dbgmsg("SectionAlignment: 0x%x\n", EC32(optional_hdr32.SectionAlignment));
+	cli_dbgmsg("FileAlignment: 0x%x\n", EC32(optional_hdr32.FileAlignment));
+	cli_dbgmsg("MajorSubsystemVersion: %d\n", EC16(optional_hdr32.MajorSubsystemVersion));
+	cli_dbgmsg("MinorSubsystemVersion: %d\n", EC16(optional_hdr32.MinorSubsystemVersion));
+	cli_dbgmsg("SizeOfImage: 0x%x\n", EC32(optional_hdr32.SizeOfImage));
+	cli_dbgmsg("SizeOfHeaders: 0x%x\n", hdr_size);
+	cli_dbgmsg("NumberOfRvaAndSizes: %d\n", EC32(optional_hdr32.NumberOfRvaAndSizes));
+
+    } else { /* PE+ */
+        /* read the remaining part of the header */
+        if(cli_readn(desc, &optional_hdr32 + 1, sizeof(struct pe_image_optional_hdr64) - sizeof(struct pe_image_optional_hdr32)) != sizeof(struct pe_image_optional_hdr64) - sizeof(struct pe_image_optional_hdr32)) {
+	    cli_dbgmsg("Can't read optional file header\n");
+	    if(DETECT_BROKEN) {
+	        if(ctx->virname)
+		    *ctx->virname = "Broken.Executable";
+		return CL_VIRUS;
+	    }
+	    return CL_CLEAN;
+	}
+
+	vep = EC32(optional_hdr64.AddressOfEntryPoint);
+	hdr_size = EC32(optional_hdr64.SizeOfHeaders);
+	cli_dbgmsg("File format: PE32+\n");
+
+	cli_dbgmsg("MajorLinkerVersion: %d\n", optional_hdr64.MajorLinkerVersion);
+	cli_dbgmsg("MinorLinkerVersion: %d\n", optional_hdr64.MinorLinkerVersion);
+	cli_dbgmsg("SizeOfCode: 0x%x\n", EC32(optional_hdr64.SizeOfCode));
+	cli_dbgmsg("SizeOfInitializedData: 0x%x\n", EC32(optional_hdr64.SizeOfInitializedData));
+	cli_dbgmsg("SizeOfUninitializedData: 0x%x\n", EC32(optional_hdr64.SizeOfUninitializedData));
+	cli_dbgmsg("AddressOfEntryPoint: 0x%x\n", vep);
+	cli_dbgmsg("BaseOfCode: 0x%x\n", EC32(optional_hdr64.BaseOfCode));
+	cli_dbgmsg("SectionAlignment: 0x%x\n", EC32(optional_hdr64.SectionAlignment));
+	cli_dbgmsg("FileAlignment: 0x%x\n", EC32(optional_hdr64.FileAlignment));
+	cli_dbgmsg("MajorSubsystemVersion: %d\n", EC16(optional_hdr64.MajorSubsystemVersion));
+	cli_dbgmsg("MinorSubsystemVersion: %d\n", EC16(optional_hdr64.MinorSubsystemVersion));
+	cli_dbgmsg("SizeOfImage: 0x%x\n", EC32(optional_hdr64.SizeOfImage));
+	cli_dbgmsg("SizeOfHeaders: 0x%x\n", hdr_size);
+	cli_dbgmsg("NumberOfRvaAndSizes: %d\n", EC32(optional_hdr64.NumberOfRvaAndSizes));
+    }
+
+
+    switch(pe_plus ? EC16(optional_hdr64.Subsystem) : EC16(optional_hdr32.Subsystem)) {
+	case 0:
+	    cli_dbgmsg("Subsystem: Unknown\n");
+	    break;
+	case 1:
+	    cli_dbgmsg("Subsystem: Native (svc)\n");
+	    native = 1;
+	    break;
+	case 2:
+	    cli_dbgmsg("Subsystem: Win32 GUI\n");
+	    break;
+	case 3:
+	    cli_dbgmsg("Subsystem: Win32 console\n");
+	    break;
+	case 5:
+	    cli_dbgmsg("Subsystem: OS/2 console\n");
+	    break;
+	case 7:
+	    cli_dbgmsg("Subsystem: POSIX console\n");
+	    break;
+	case 8:
+	    cli_dbgmsg("Subsystem: Native Win9x driver\n");
+	    break;
+	case 9:
+	    cli_dbgmsg("Subsystem: WinCE GUI\n");
+	    break;
+	case 10:
+	    cli_dbgmsg("Subsystem: EFI application\n");
+	    break;
+	case 11:
+	    cli_dbgmsg("Subsystem: EFI driver\n");
+	    break;
+	case 12:
+	    cli_dbgmsg("Subsystem: EFI runtime driver\n");
+	    break;
+	default:
+	    cli_warnmsg("Unknown subsystem in PE header (0x%x)\n", pe_plus ? EC16(optional_hdr64.Subsystem) : EC16(optional_hdr32.Subsystem));
+    }
+
+    cli_dbgmsg("------------------------------------\n");
+
+    if (DETECT_BROKEN && !native && (!(pe_plus?EC32(optional_hdr64.SectionAlignment):EC32(optional_hdr32.SectionAlignment)) || (pe_plus?EC32(optional_hdr64.SectionAlignment):EC32(optional_hdr32.SectionAlignment))%0x1000)) {
+        cli_dbgmsg("Bad virtual alignemnt\n");
+        if(ctx->virname)
+	    *ctx->virname = "Broken.Executable";
+	return CL_VIRUS;
+    }
+
+    if (DETECT_BROKEN && !native && (!(pe_plus?EC32(optional_hdr64.FileAlignment):EC32(optional_hdr32.FileAlignment)) || (pe_plus?EC32(optional_hdr64.FileAlignment):EC32(optional_hdr32.FileAlignment))%0x200)) {
+        cli_dbgmsg("Bad file alignemnt\n");
+	if(ctx->virname)
+	    *ctx->virname = "Broken.Executable";
+	return CL_VIRUS;
+    }
+
+    if(fstat(desc, &sb) == -1) {
+	cli_dbgmsg("fstat failed\n");
+	return CL_EIO;
+    }
+
+    fsize = sb.st_size;
+
+    section_hdr = (struct pe_image_section_hdr *) cli_calloc(nsections, sizeof(struct pe_image_section_hdr));
+
+    if(!section_hdr) {
+	cli_dbgmsg("Can't allocate memory for section headers\n");
+	return CL_EMEM;
+    }
+
+    exe_sections = (struct cli_exe_section *) cli_calloc(nsections, sizeof(struct cli_exe_section));
+    
+    if(!exe_sections) {
+	cli_dbgmsg("Can't allocate memory for section headers\n");
+	free(section_hdr);
+	return CL_EMEM;
+    }
+
+    valign = (pe_plus)?EC32(optional_hdr64.SectionAlignment):EC32(optional_hdr32.SectionAlignment);
+    falign = (pe_plus)?EC32(optional_hdr64.FileAlignment):EC32(optional_hdr32.FileAlignment);
+
+    if(cli_readn(desc, section_hdr, sizeof(struct pe_image_section_hdr)*nsections) != (int)(nsections*sizeof(struct pe_image_section_hdr))) {
+        cli_dbgmsg("Can't read section header\n");
+	cli_dbgmsg("Possibly broken PE file\n");
+	free(section_hdr);
+	free(exe_sections);
+	if(DETECT_BROKEN) {
+	    if(ctx->virname)
+		*ctx->virname = "Broken.Executable";
+	    return CL_VIRUS;
+	}
+	return CL_CLEAN;
+    }
+    
+    for(i = 0; falign!=0x200 && i<nsections; i++) {
+	/* file alignment fallback mode - blah */
+	if (falign && section_hdr[i].SizeOfRawData && EC32(section_hdr[i].PointerToRawData)%falign && !(EC32(section_hdr[i].PointerToRawData)%0x200)) {
+	    cli_dbgmsg("Found misaligned section, using 0x200\n");
+	    falign = 0x200;
+	}
+    }
+
+    hdr_size = PESALIGN(hdr_size, valign); /* Aligned headers virtual size */
+
+    for(i = 0; i < nsections; i++) {
+	strncpy(sname, (char *) section_hdr[i].Name, 8);
+	sname[8] = 0;
+	exe_sections[i].rva = PEALIGN(EC32(section_hdr[i].VirtualAddress), valign);
+	exe_sections[i].vsz = PESALIGN(EC32(section_hdr[i].VirtualSize), valign);
+	exe_sections[i].raw = PEALIGN(EC32(section_hdr[i].PointerToRawData), falign);
+	exe_sections[i].rsz = PESALIGN(EC32(section_hdr[i].SizeOfRawData), falign);
+	exe_sections[i].chr = EC32(section_hdr[i].Characteristics);
+	exe_sections[i].urva = EC32(section_hdr[i].VirtualAddress); /* Just in case */
+	exe_sections[i].uvsz = EC32(section_hdr[i].VirtualSize);
+	exe_sections[i].uraw = EC32(section_hdr[i].PointerToRawData);
+	exe_sections[i].ursz = EC32(section_hdr[i].SizeOfRawData);
+
+	if (!exe_sections[i].vsz && exe_sections[i].rsz)
+	    exe_sections[i].vsz=PESALIGN(exe_sections[i].ursz, valign);
+
+	if (exe_sections[i].rsz && fsize>exe_sections[i].raw && !CLI_ISCONTAINED(0, (uint32_t) fsize, exe_sections[i].raw, exe_sections[i].rsz))
+	    exe_sections[i].rsz = fsize - exe_sections[i].raw;
+	
+	cli_dbgmsg("Section %d\n", i);
+	cli_dbgmsg("Section name: %s\n", sname);
+	cli_dbgmsg("Section data (from headers - in memory)\n");
+	cli_dbgmsg("VirtualSize: 0x%x 0x%x\n", exe_sections[i].uvsz, exe_sections[i].vsz);
+	cli_dbgmsg("VirtualAddress: 0x%x 0x%x\n", exe_sections[i].urva, exe_sections[i].rva);
+	cli_dbgmsg("SizeOfRawData: 0x%x 0x%x\n", exe_sections[i].ursz, exe_sections[i].rsz);
+	cli_dbgmsg("PointerToRawData: 0x%x 0x%x\n", exe_sections[i].uraw, exe_sections[i].raw);
+
+	if(exe_sections[i].chr & 0x20) {
+	    cli_dbgmsg("Section contains executable code\n");
+
+	    if(exe_sections[i].vsz < exe_sections[i].rsz) {
+		cli_dbgmsg("Section contains free space\n");
+		/*
+		cli_dbgmsg("Dumping %d bytes\n", section_hdr.SizeOfRawData - section_hdr.VirtualSize);
+		ddump(desc, section_hdr.PointerToRawData + section_hdr.VirtualSize, section_hdr.SizeOfRawData - section_hdr.VirtualSize, cli_gentemp(NULL));
+		*/
+
+	    }
+	}
+
+	if(exe_sections[i].chr & 0x20000000)
+	    cli_dbgmsg("Section's memory is executable\n");
+
+	if(exe_sections[i].chr & 0x80000000)
+	    cli_dbgmsg("Section's memory is writeable\n");
+
+	cli_dbgmsg("------------------------------------\n");
+
+	if (DETECT_BROKEN && (exe_sections[i].urva % valign)) { /* Bad virtual alignment */
+	    cli_dbgmsg("VirtualAddress is misaligned\n");
+	    if(ctx->virname)
+	        *ctx->virname = "Broken.Executable";
+	    free(section_hdr);
+	    free(exe_sections);
+	    return CL_VIRUS;
+	}
+
+	if (exe_sections[i].rsz) { /* Don't bother with virtual only sections */
+	    if (exe_sections[i].raw >= fsize) { /* really broken */
+	        cli_dbgmsg("Broken PE file - Section %d starts beyond the end of file (Offset@ %d, Total filesize %d)\n", i, exe_sections[i].raw, fsize);
+		free(section_hdr);
+		free(exe_sections);
+		if(DETECT_BROKEN) {
+		    if(ctx->virname)
+		        *ctx->virname = "Broken.Executable";
+		    return CL_VIRUS;
+		}
+		return CL_CLEAN; /* no ninjas to see here! move along! */
+	    }
+
+	    if(SCAN_ALGO && (DCONF & PE_CONF_POLIPOS) && !*sname && exe_sections[i].vsz > 40000 && exe_sections[i].vsz < 70000 && exe_sections[i].chr == 0xe0000060) polipos = i;
+
+	    /* check MD5 section sigs */
+	    md5_sect = ctx->engine->md5_sect;
+	    if((DCONF & PE_CONF_MD5SECT) && md5_sect) {
+		found = 0;
+		for(j = 0; j < md5_sect->soff_len && md5_sect->soff[j] <= exe_sections[i].rsz; j++) {
+		    if(md5_sect->soff[j] == exe_sections[i].rsz) {
+			unsigned char md5_dig[16];
+			if(cli_md5sect(desc, &exe_sections[i], md5_dig) && cli_bm_scanbuff(md5_dig, 16, ctx->virname, ctx->engine->md5_sect, 0, 0, -1) == CL_VIRUS) {
+				free(section_hdr);
+				free(exe_sections);
+				return CL_VIRUS;
+			}
+			break;
+		    }
+		}
+	    }
+	}
+
+	if(!i) {
+	    if (DETECT_BROKEN && exe_sections[i].urva!=hdr_size) { /* Bad first section RVA */
+	        cli_dbgmsg("First section is in the wrong place\n");
+	        if(ctx->virname)
+		    *ctx->virname = "Broken.Executable";
+		free(section_hdr);
+		free(exe_sections);
+		return CL_VIRUS;
+	    }
+	    min = exe_sections[i].rva;
+	    max = exe_sections[i].rva + exe_sections[i].rsz;
+	} else {
+	    if (DETECT_BROKEN && exe_sections[i].urva - exe_sections[i-1].urva != exe_sections[i-1].vsz) { /* No holes, no overlapping, no virtual disorder */
+	        cli_dbgmsg("Virtually misplaced section (wrong order, overlapping, non contiguous)\n");
+	        if(ctx->virname)
+		    *ctx->virname = "Broken.Executable";
+		free(section_hdr);
+		free(exe_sections);
+		return CL_VIRUS;
+	    }
+	    if(exe_sections[i].rva < min)
+	        min = exe_sections[i].rva;
+
+	    if(exe_sections[i].rva + exe_sections[i].rsz > max)
+	        max = exe_sections[i].rva + exe_sections[i].rsz;
+	}
+    }
+
+    free(section_hdr);
+
+    if(!(ep = cli_rawaddr(vep, exe_sections, nsections, &err, fsize, hdr_size)) && err) {
+	cli_dbgmsg("EntryPoint out of file\n");
+	free(exe_sections);
+	if(DETECT_BROKEN) {
+	    if(ctx->virname)
+		*ctx->virname = "Broken.Executable";
+	    return CL_VIRUS;
+	}
+	return CL_CLEAN;
+    }
+
+    cli_dbgmsg("EntryPoint offset: 0x%x (%d)\n", ep, ep);
+
+    if(pe_plus) { /* Do not continue for PE32+ files */
+	free(exe_sections);
+	return CL_CLEAN;
+    }
+
+    lseek(desc, ep, SEEK_SET);
+    epsize = cli_readn(desc, epbuff, 4096);
+
+    /* Attempt to detect some popular polymorphic viruses */
+
+    /* W32.Parite.B */
+    if(SCAN_ALGO && (DCONF & PE_CONF_PARITE) && !dll && epsize == 4096 && ep == exe_sections[nsections - 1].raw) {
+        const char *pt = cli_memstr(epbuff, 4040, "\x47\x65\x74\x50\x72\x6f\x63\x41\x64\x64\x72\x65\x73\x73\x00", 15);
+	if(pt) {
+	    pt += 15;
+	    if((((uint32_t)cli_readint32(pt) ^ (uint32_t)cli_readint32(pt + 4)) == 0x505a4f) && (((uint32_t)cli_readint32(pt + 8) ^ (uint32_t)cli_readint32(pt + 12)) == 0xffffb) && (((uint32_t)cli_readint32(pt + 16) ^ (uint32_t)cli_readint32(pt + 20)) == 0xb8)) {
+	        *ctx->virname = "W32.Parite.B";
+		free(exe_sections);
+		return CL_VIRUS;
+	    }
+	}
+    }
+
+    /* Kriz */
+    if(SCAN_ALGO && (DCONF & PE_CONF_KRIZ) && epsize >= 200 && CLI_ISCONTAINED(exe_sections[nsections - 1].raw, exe_sections[nsections - 1].rsz, ep, 0x0fd2) && epbuff[1]=='\x9c' && epbuff[2]=='\x60') {
+	enum {KZSTRASH,KZSCDELTA,KZSPDELTA,KZSGETSIZE,KZSXORPRFX,KZSXOR,KZSDDELTA,KZSLOOP,KZSTOP};
+	uint8_t kzs[] = {KZSTRASH,KZSCDELTA,KZSPDELTA,KZSGETSIZE,KZSTRASH,KZSXORPRFX,KZSXOR,KZSTRASH,KZSDDELTA,KZSTRASH,KZSLOOP,KZSTOP};
+	uint8_t *kzstate = kzs;
+	uint8_t *kzcode = (uint8_t *)epbuff + 3;
+	uint8_t kzdptr=0xff, kzdsize=0xff;
+	int kzlen = 197, kzinitlen=0xffff, kzxorlen=-1;
+	cli_dbgmsg("in kriz\n");
+
+	while(*kzstate!=KZSTOP) {
+	    uint8_t op;
+	    if(kzlen<=6) break;
+	    op = *kzcode++;
+	    kzlen--;
+	    switch (*kzstate) {
+	    case KZSTRASH: case KZSGETSIZE: {
+		int opsz=0;
+		switch(op) {
+		case 0x81:
+		    kzcode+=5;
+		    kzlen-=5;
+		    break;
+		case 0xb8: case 0xb9: case 0xba: case 0xbb: case 0xbd: case 0xbe: case 0xbf:
+		    if(*kzstate==KZSGETSIZE && cli_readint32(kzcode)==0x0fd2) {
+			kzinitlen = kzlen-5;
+			kzdsize=op-0xb8;
+			kzstate++;
+			op=4; /* fake the register to avoid breaking out */
+			cli_dbgmsg("kriz: using #%d as size counter\n", kzdsize);
+		    }
+		    opsz=4;
+		case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4d: case 0x4e: case 0x4f:
+		    op&=7;
+		    if(op!=kzdptr && op!=kzdsize) {
+			kzcode+=opsz;
+			kzlen-=opsz;
+			break;
+		    }
+		default:
+		    kzcode--;
+		    kzlen++;
+		    kzstate++;
+		}
+		break;
+	    }
+	    case KZSCDELTA:
+		if(op==0xe8 && (uint32_t)cli_readint32(kzcode) < 0xff) {
+		    kzlen-=*kzcode+4;
+		    kzcode+=*kzcode+4;
+		    kzstate++;
+		} else *kzstate=KZSTOP;
+		break;
+	    case KZSPDELTA:
+		if((op&0xf8)==0x58 && (kzdptr=op-0x58)!=4) {
+		    kzstate++;
+		    cli_dbgmsg("kriz: using #%d as pointer\n", kzdptr);
+		} else *kzstate=KZSTOP;
+		break;
+	    case KZSXORPRFX:
+		kzstate++;
+		if(op==0x3e) break;
+	    case KZSXOR:
+		if (op==0x80 && *kzcode==kzdptr+0xb0) {
+		    kzxorlen=kzlen;
+		    kzcode+=+6;
+		    kzlen-=+6;
+		    kzstate++;
+		} else *kzstate=KZSTOP;
+		break;
+	    case KZSDDELTA:
+		if (op==kzdptr+0x48) kzstate++;
+		else *kzstate=KZSTOP;
+		break;
+	    case KZSLOOP:
+		if (op==kzdsize+0x48 && *kzcode==0x75 && kzlen-(int8_t)kzcode[1]-3<=kzinitlen && kzlen-(int8_t)kzcode[1]>=kzxorlen) {
+		    *ctx->virname = "W32.Kriz";
+		    free(exe_sections);
+		    return CL_VIRUS;
+		}
+		cli_dbgmsg("kriz: loop out of bounds, corrupted sample?\n");
+		kzstate++;
+	    }
+	}
+    }
+
+    /* W32.Magistr.A/B */
+    if(SCAN_ALGO && (DCONF & PE_CONF_MAGISTR) && !dll && (nsections>1) && (exe_sections[nsections - 1].chr & 0x80000000)) {
+        uint32_t rsize, vsize, dam = 0;
+
+	vsize = exe_sections[nsections - 1].uvsz;
+	rsize = exe_sections[nsections - 1].rsz;
+	if(rsize < exe_sections[nsections - 1].ursz) {
+	    rsize = exe_sections[nsections - 1].ursz;
+	    dam = 1;
+	}
+
+	if(vsize >= 0x612c && rsize >= 0x612c && ((vsize & 0xff) == 0xec)) {
+		int bw = rsize < 0x7000 ? rsize : 0x7000;
+
+	    lseek(desc, exe_sections[nsections - 1].raw + rsize - bw, SEEK_SET);
+	    if(cli_readn(desc, buff, 4096) == 4096) {
+		if(cli_memstr(buff, 4091, "\xe8\x2c\x61\x00\x00", 5)) {
+		    *ctx->virname = dam ? "W32.Magistr.A.dam" : "W32.Magistr.A";
+		    free(exe_sections);
+		    return CL_VIRUS;
+		} 
+	    }
+
+	} else if(rsize >= 0x7000 && vsize >= 0x7000 && ((vsize & 0xff) == 0xed)) {
+		int bw = rsize < 0x8000 ? rsize : 0x8000;
+
+	    lseek(desc, exe_sections[nsections - 1].raw + rsize - bw, SEEK_SET);
+	    if(cli_readn(desc, buff, 4096) == 4096) {
+		if(cli_memstr(buff, 4091, "\xe8\x04\x72\x00\x00", 5)) {
+		    *ctx->virname = dam ? "W32.Magistr.B.dam" : "W32.Magistr.B";
+		    free(exe_sections);
+		    return CL_VIRUS;
+		} 
+	    }
+	}
+    }
+
+    /* W32.Polipos.A */
+    while(polipos && !dll && nsections > 2 && nsections < 13 && e_lfanew <= 0x800 && (EC16(optional_hdr32.Subsystem) == 2 || EC16(optional_hdr32.Subsystem) == 3) && EC16(file_hdr.Machine) == 0x14c && optional_hdr32.SizeOfStackReserve >= 0x80000) {
+	uint32_t jump, jold, *jumps = NULL;
+	uint8_t *code;
+	unsigned int xsjs = 0;
+
+	if(exe_sections[0].rsz > CLI_MAX_ALLOCATION) break;
+	if(!cli_seeksect(desc, &exe_sections[0])) break;
+	if(!(code=cli_malloc(exe_sections[0].rsz))) {
+	    free(exe_sections);
+	    return CL_EMEM;
+	}
+	if(cli_readn(desc, code, exe_sections[0].rsz)!=exe_sections[0].rsz) {
+	    free(exe_sections);
+	    return CL_EIO;
+	}
+	for(i=0; i<exe_sections[0].rsz - 5; i++) {
+	    if((uint8_t)(code[i]-0xe8) > 1) continue;
+	    jump = cli_rawaddr(exe_sections[0].rva+i+5+cli_readint32(&code[i+1]), exe_sections, nsections, &err, fsize, hdr_size);
+	    if(err || !CLI_ISCONTAINED(exe_sections[polipos].raw, exe_sections[polipos].rsz, jump, 9)) continue;
+	    if(xsjs % 128 == 0) {
+		if(xsjs == 1280) break;
+		if(!(jumps=(uint32_t *)cli_realloc2(jumps, (xsjs+128)*sizeof(uint32_t)))) {
+		    free(code);
+		    free(exe_sections);
+		    return CL_EMEM;
+		}
+	    }
+	    j=0;
+	    for(; j<xsjs; j++) {
+		if(jumps[j]<jump) continue;
+		if(jumps[j]==jump) {
+		    xsjs--;
+		    break;
+		}
+		jold=jumps[j];
+		jumps[j]=jump;
+		jump=jold;
+	    }
+	    jumps[j]=jump;
+	    xsjs++;
+	}
+	free(code);
+	if(!xsjs) break;
+	cli_dbgmsg("Polipos: Checking %d xsect jump(s)\n", xsjs);
+	for(i=0;i<xsjs;i++) {
+	    lseek(desc, jumps[i], SEEK_SET);
+	    if(cli_readn(desc, buff, 9) != 9) continue;
+	    if((jump=cli_readint32(buff))==0x60ec8b55 || (buff[4]=='\xec' && ((jump==0x83ec8b55 && buff[6]=='\x60') || (jump==0x81ec8b55 && !buff[7] && !buff[8])))) {
+		*ctx->virname = "W32.Polipos.A";
+		free(jumps);
+		free(exe_sections);
+		return CL_VIRUS;
+	    }
+	}
+	free(jumps);
+	break;
+    }
+
+
+    /* UPX, FSG, MEW support */
+
+    /* try to find the first section with physical size == 0 */
+    found = 0;
+    if(DCONF & (PE_CONF_UPX | PE_CONF_FSG | PE_CONF_MEW)) {
+	for(i = 0; i < (unsigned int) nsections - 1; i++) {
+	    if(!exe_sections[i].rsz && exe_sections[i].vsz && exe_sections[i + 1].rsz && exe_sections[i + 1].vsz) {
+		found = 1;
+		cli_dbgmsg("UPX/FSG/MEW: empty section found - assuming compression\n");
+		break;
+	    }
+	}
+    }
+
+    /* MEW support */
+    if (found && (DCONF & PE_CONF_MEW) && epsize>=16 && epbuff[0]=='\xe9') {
+	uint32_t fileoffset;
+
+	fileoffset = (vep + cli_readint32(epbuff + 1) + 5);
+	while (fileoffset == 0x154 || fileoffset == 0x158) {
+	    uint32_t offdiff, uselzma;
+
+	    cli_dbgmsg ("MEW: found MEW characteristics %08X + %08X + 5 = %08X\n", 
+			cli_readint32(epbuff + 1), vep, cli_readint32(epbuff + 1) + vep + 5);
+
+	    if(lseek(desc, fileoffset, SEEK_SET) == -1) {
+	        cli_dbgmsg("MEW: lseek() failed\n");
+		free(exe_sections);
+		return CL_EIO;
+	    }
+
+	    if((bytes = read(desc, buff, 0xb0)) != 0xb0) {
+	        cli_dbgmsg("MEW: Can't read 0xb0 bytes at 0x%x (%d) %d\n", fileoffset, fileoffset, bytes);
+		break;
+	    }
+
+	    if (fileoffset == 0x154) cli_dbgmsg("MEW: Win9x compatibility was set!\n");
+	    else cli_dbgmsg("MEW: Win9x compatibility was NOT set!\n");
+
+	    if((offdiff = cli_readint32(buff+1) - EC32(optional_hdr32.ImageBase)) <= exe_sections[i + 1].rva || offdiff >= exe_sections[i + 1].rva + exe_sections[i + 1].raw - 4) {
+	        cli_dbgmsg("MEW: ESI is not in proper section\n");
+		break;
+	    }
+	    offdiff -= exe_sections[i + 1].rva;
+
+	    if(!cli_seeksect(desc, &exe_sections[i + 1])) {
+		free(exe_sections);
+		return CL_EIO;
+	    }
+	    ssize = exe_sections[i + 1].vsz;
+	    dsize = exe_sections[i].vsz;
+
+	    cli_dbgmsg("MEW: ssize %08x dsize %08x offdiff: %08x\n", ssize, dsize, offdiff);
+
+	    CLI_UNPSIZELIMITS("MEW", MAX(ssize, dsize));
+	    CLI_UNPSIZELIMITS("MEW", MAX(ssize + dsize, exe_sections[i + 1].rsz));
+
+	    /* allocate needed buffer */
+	    if (!(src = cli_calloc (ssize + dsize, sizeof(char)))) {
+	        free(exe_sections);
+		return CL_EMEM;
+	    }
+
+	    if (exe_sections[i + 1].rsz < offdiff + 12 || exe_sections[i + 1].rsz > ssize) {
+	        cli_dbgmsg("MEW: Size mismatch: %08x\n", exe_sections[i + 1].rsz);
+		free(src);
+		break;
+	    }
+
+	    if((bytes = read(desc, src + dsize, exe_sections[i + 1].rsz)) != exe_sections[i + 1].rsz) {
+	        cli_dbgmsg("MEW: Can't read %d bytes [read: %d]\n", exe_sections[i + 1].rsz, bytes);
+		free(exe_sections);
+		free(src);
+		return CL_EIO;
+	    }
+	    cli_dbgmsg("MEW: %d (%08x) bytes read\n", bytes, bytes);
+
+	    /* count offset to lzma proc, if lzma used, 0xe8 -> call */
+	    if (buff[0x7b] == '\xe8') {
+	        if (!CLI_ISCONTAINED(exe_sections[1].rva, exe_sections[1].vsz, cli_readint32(buff + 0x7c) + fileoffset + 0x80, 4)) {
+		    cli_dbgmsg("MEW: lzma proc out of bounds!\n");
+		    free(src);
+		    break; /* to next unpacker in chain */
+		}
+		uselzma = cli_readint32(buff + 0x7c) - (exe_sections[0].rva - fileoffset - 0x80);
+	    } else {
+	        uselzma = 0;
+	    }
+
+	    CLI_UNPTEMP("MEW",(src,exe_sections,0));
+	    CLI_UNPRESULTS("MEW",(unmew11(i, src, offdiff, ssize, dsize, EC32(optional_hdr32.ImageBase), exe_sections[0].rva, uselzma, NULL, NULL, ndesc)),1,(src,0));
+	    break;
+	}
+    }
+
+    if(epsize<168) {
+	free(exe_sections);
+	return CL_CLEAN;
+    }
+
+    if (found || upack) {
+	/* Check EP for UPX vs. FSG vs. Upack */
+
+	/* Upack 0.39 produces 2 types of executables
+	 * 3 sections:           | 2 sections (one empty, I don't chech found if !upack, since it's in OR above):
+	 *   mov esi, value      |   pusha
+	 *   lodsd               |   call $+0x9
+	 *   push eax            |
+	 *
+	 * Upack 1.1/1.2 Beta produces [based on 2 samples (sUx) provided by aCaB]:
+	 * 2 sections
+	 *   mov esi, value
+	 *   loads
+	 *   mov edi, eax
+	 *
+	 * Upack unknown [sample 0297729]
+	 * 3 sections
+	 *   mov esi, value
+	 *   push [esi]
+	 *   jmp
+	 * 
+	 */
+	/* upack 0.39-3s + sample 0151477*/
+ 	while(((upack && nsections == 3) && /* 3 sections */
+	    ((
+	     epbuff[0] == '\xbe' && cli_readint32(epbuff + 1) - EC32(optional_hdr32.ImageBase) > min && /* mov esi */
+	     epbuff[5] == '\xad' && epbuff[6] == '\x50' /* lodsd; push eax */
+	     )
+	    || 
+	    /* based on 0297729 sample from aCaB */
+	    (epbuff[0] == '\xbe' && cli_readint32(epbuff + 1) - EC32(optional_hdr32.ImageBase) > min && /* mov esi */
+	     epbuff[5] == '\xff' && epbuff[6] == '\x36' /* push [esi] */
+	     )
+	   )) 
+	   ||
+	   ((!upack && nsections == 2) && /* 2 sections */
+	    (( /* upack 0.39-2s */
+	     epbuff[0] == '\x60' && epbuff[1] == '\xe8' && cli_readint32(epbuff+2) == 0x9 /* pusha; call+9 */
+	     )
+	    ||
+	    ( /* upack 1.1/1.2, based on 2 samples */
+	     epbuff[0] == '\xbe' && cli_readint32(epbuff+1) - EC32(optional_hdr32.ImageBase) < min &&  /* mov esi */
+	     cli_readint32(epbuff + 1) - EC32(optional_hdr32.ImageBase) > 0 &&
+	     epbuff[5] == '\xad' && epbuff[6] == '\x8b' && epbuff[7] == '\xf8' /* loads;  mov edi, eax */
+	     )
+	   ))
+	   ) { 
+	    uint32_t vma, off;
+	    int a,b,c;
+
+	    cli_dbgmsg("Upack characteristics found.\n");
+	    a = exe_sections[0].vsz;
+	    b = exe_sections[1].vsz;
+	    if (upack) {
+	        cli_dbgmsg("Upack: var set\n");
+		c = exe_sections[2].vsz;
+		ssize = exe_sections[0].ursz + exe_sections[0].uraw;
+		off = exe_sections[0].rva;
+		vma = EC32(optional_hdr32.ImageBase) + exe_sections[0].rva;
+	    } else {
+	        cli_dbgmsg("Upack: var NOT set\n");
+		c = exe_sections[1].rva;
+		ssize = exe_sections[1].uraw;
+		off = 0;
+		vma = exe_sections[1].rva - exe_sections[1].uraw;
+	    }
+
+	    dsize = a+b+c;
+
+	    CLI_UNPSIZELIMITS("Upack", MAX(MAX(dsize, ssize), exe_sections[1].ursz));
+
+	    if (exe_sections[1].rva - off > dsize || exe_sections[1].rva - off > dsize - exe_sections[1].ursz || (upack && (exe_sections[2].rva - exe_sections[0].rva > dsize || exe_sections[2].rva - exe_sections[0].rva > dsize - ssize)) || ssize > dsize) {
+	        cli_dbgmsg("Upack: probably malformed pe-header, skipping to next unpacker\n");
+		break;
+	    }
+			
+	    if((dest = (char *) cli_calloc(dsize, sizeof(char))) == NULL) {
+	        free(exe_sections);
+		return CL_EMEM;
+	    }
+
+	    lseek(desc, 0, SEEK_SET);
+	    if(read(desc, dest, ssize) != ssize) {
+	        cli_dbgmsg("Upack: Can't read raw data of section 0\n");
+		free(exe_sections);
+		free(dest);
+		return CL_EIO;
+	    }
+
+	    if(upack) memmove(dest + exe_sections[2].rva - exe_sections[0].rva, dest, ssize);
+
+	    lseek(desc, exe_sections[1].uraw, SEEK_SET);
+
+	    if(read(desc, dest + exe_sections[1].rva - off, exe_sections[1].ursz) != exe_sections[1].ursz) {
+		cli_dbgmsg("Upack: Can't read raw data of section 1\n");
+		free(exe_sections);
+		free(dest);
+		return CL_EIO;
+	    }
+
+	    CLI_UNPTEMP("Upack",(dest,exe_sections,0));
+	    CLI_UNPRESULTS("Upack",(unupack(upack, dest, dsize, epbuff, vma, ep, EC32(optional_hdr32.ImageBase), exe_sections[0].rva, ndesc)),1,(dest,0));
+	    break;
+	}
+    }
+
+    
+    while(found && (DCONF & PE_CONF_FSG) && epbuff[0] == '\x87' && epbuff[1] == '\x25') {
+
+	/* FSG v2.0 support - thanks to aCaB ! */
+
+	uint32_t newesi, newedi, newebx, newedx;
+	
+	ssize = exe_sections[i + 1].rsz;
+	dsize = exe_sections[i].vsz;
+
+	CLI_UNPSIZELIMITS("FSG", MAX(dsize, ssize));
+
+	if(ssize <= 0x19 || dsize <= ssize) {
+	    cli_dbgmsg("FSG: Size mismatch (ssize: %d, dsize: %d)\n", ssize, dsize);
+	    free(exe_sections);
+	    return CL_CLEAN;
+	}
+	
+	newedx = cli_readint32(epbuff + 2) - EC32(optional_hdr32.ImageBase);
+	if(!CLI_ISCONTAINED(exe_sections[i + 1].rva, exe_sections[i + 1].rsz, newedx, 4)) {
+	    cli_dbgmsg("FSG: xchg out of bounds (%x), giving up\n", newedx);
+	    break;
+	}
+	
+	if((src = (char *) cli_malloc(ssize)) == NULL) {
+	    free(exe_sections);
+	    return CL_EMEM;
+	}
+
+	if(!cli_seeksect(desc, &exe_sections[i + 1]) || (unsigned int) cli_readn(desc, src, ssize) != ssize) {
+	    cli_dbgmsg("Can't read raw data of section %d\n", i + 1);
+	    free(exe_sections);
+	    free(src);
+	    return CL_EIO;
+	}
+
+	dest = src + newedx - exe_sections[i + 1].rva;
+	if(newedx < exe_sections[i + 1].rva || !CLI_ISCONTAINED(src, ssize, dest, 4)) {
+	    cli_dbgmsg("FSG: New ESP out of bounds\n");
+	    free(src);
+	    break;
+	}
+
+	newedx = cli_readint32(dest) - EC32(optional_hdr32.ImageBase);
+	if(!CLI_ISCONTAINED(exe_sections[i + 1].rva, exe_sections[i + 1].rsz, newedx, 4)) {
+	    cli_dbgmsg("FSG: New ESP (%x) is wrong\n", newedx);
+	    free(src);
+	    break;
+	}
+ 
+	dest = src + newedx - exe_sections[i + 1].rva;
+	if(!CLI_ISCONTAINED(src, ssize, dest, 32)) {
+	    cli_dbgmsg("FSG: New stack out of bounds\n");
+	    free(src);
+	    break;
+	}
+
+	newedi = cli_readint32(dest) - EC32(optional_hdr32.ImageBase);
+	newesi = cli_readint32(dest + 4) - EC32(optional_hdr32.ImageBase);
+	newebx = cli_readint32(dest + 16) - EC32(optional_hdr32.ImageBase);
+	newedx = cli_readint32(dest + 20);
+
+	if(newedi != exe_sections[i].rva) {
+	    cli_dbgmsg("FSG: Bad destination buffer (edi is %x should be %x)\n", newedi, exe_sections[i].rva);
+	    free(src);
+	    break;
+	}
+
+	if(newesi < exe_sections[i + 1].rva || newesi - exe_sections[i + 1].rva >= exe_sections[i + 1].rsz) {
+	    cli_dbgmsg("FSG: Source buffer out of section bounds\n");
+	    free(src);
+	    break;
+	}
+
+	if(!CLI_ISCONTAINED(exe_sections[i + 1].rva, exe_sections[i + 1].rsz, newebx, 16)) {
+	    cli_dbgmsg("FSG: Array of functions out of bounds\n");
+	    free(src);
+	    break;
+	}
+
+	newedx=cli_readint32(newebx + 12 - exe_sections[i + 1].rva + src) - EC32(optional_hdr32.ImageBase);
+	cli_dbgmsg("FSG: found old EP @%x\n",newedx);
+
+	if((dest = (char *) cli_calloc(dsize, sizeof(char))) == NULL) {
+	    free(exe_sections);
+	    free(src);
+	    return CL_EMEM;
+	}
+
+	CLI_UNPTEMP("FSG",(src,dest,exe_sections,0));
+	CLI_UNPRESULTSFSG2("FSG",(unfsg_200(newesi - exe_sections[i + 1].rva + src, dest, ssize + exe_sections[i + 1].rva - newesi, dsize, newedi, EC32(optional_hdr32.ImageBase), newedx, ndesc)),1,(src,dest,0));
+	break;
+    }
+
+
+    while(found && (DCONF & PE_CONF_FSG) && epbuff[0] == '\xbe' && cli_readint32(epbuff + 1) - EC32(optional_hdr32.ImageBase) < min) {
+
+	/* FSG support - v. 1.33 (thx trog for the many samples) */
+
+	int sectcnt = 0;
+	char *support;
+	uint32_t newesi, newedi, oldep, gp, t;
+	struct cli_exe_section *sections;
+
+	ssize = exe_sections[i + 1].rsz;
+	dsize = exe_sections[i].vsz;
+
+	CLI_UNPSIZELIMITS("FSG", MAX(dsize, ssize));
+
+	if(ssize <= 0x19 || dsize <= ssize) {
+	    cli_dbgmsg("FSG: Size mismatch (ssize: %d, dsize: %d)\n", ssize, dsize);
+	    free(exe_sections);
+	    return CL_CLEAN;
+	}
+
+	if(!(gp = cli_rawaddr(cli_readint32(epbuff + 1) - EC32(optional_hdr32.ImageBase), NULL, 0 , &err, fsize, hdr_size)) && err ) {
+	    cli_dbgmsg("FSG: Support data out of padding area\n");
+	    break;
+	}
+
+	lseek(desc, gp, SEEK_SET);
+	gp = exe_sections[i + 1].raw - gp;
+
+	CLI_UNPSIZELIMITS("FSG", gp)
+
+	if((support = (char *) cli_malloc(gp)) == NULL) {
+	    free(exe_sections);
+	    return CL_EMEM;
+	}
+
+	if((int)cli_readn(desc, support, gp) != (int)gp) {
+	    cli_dbgmsg("Can't read %d bytes from padding area\n", gp); 
+	    free(exe_sections);
+	    free(support);
+	    return CL_EIO;
+	}
+
+	/* newebx = cli_readint32(support) - EC32(optional_hdr32.ImageBase);  Unused */
+	newedi = cli_readint32(support + 4) - EC32(optional_hdr32.ImageBase); /* 1st dest */
+	newesi = cli_readint32(support + 8) - EC32(optional_hdr32.ImageBase); /* Source */
+
+	if(newesi < exe_sections[i + 1].rva || newesi - exe_sections[i + 1].rva >= exe_sections[i + 1].rsz) {
+	    cli_dbgmsg("FSG: Source buffer out of section bounds\n");
+	    free(support);
+	    break;
+	}
+
+	if(newedi != exe_sections[i].rva) {
+	    cli_dbgmsg("FSG: Bad destination (is %x should be %x)\n", newedi, exe_sections[i].rva);
+	    free(support);
+	    break;
+	}
+
+	/* Counting original sections */
+	for(t = 12; t < gp - 4; t += 4) {
+	    uint32_t rva = cli_readint32(support+t);
+
+	    if(!rva)
+		break;
+
+	    rva -= EC32(optional_hdr32.ImageBase)+1;
+	    sectcnt++;
+
+	    if(rva % 0x1000) cli_dbgmsg("FSG: Original section %d is misaligned\n", sectcnt);
+
+	    if(rva < exe_sections[i].rva || rva - exe_sections[i].rva >= exe_sections[i].vsz) {
+		cli_dbgmsg("FSG: Original section %d is out of bounds\n", sectcnt);
+		break;
+	    }
+	}
+
+	if(t >= gp - 4 || cli_readint32(support + t)) {
+	    free(support);
+	    break;
+	}
+
+	if((sections = (struct cli_exe_section *) cli_malloc((sectcnt + 1) * sizeof(struct cli_exe_section))) == NULL) {
+	    free(exe_sections);
+	    free(support);
+	    return CL_EMEM;
+	}
+
+	sections[0].rva = newedi;
+	for(t = 1; t <= (uint32_t)sectcnt; t++)
+	    sections[t].rva = cli_readint32(support + 8 + t * 4) - 1 - EC32(optional_hdr32.ImageBase);
+
+	free(support);
+
+	if((src = (char *) cli_malloc(ssize)) == NULL) {
+	    free(exe_sections);
+	    free(sections);
+	    return CL_EMEM;
+	}
+
+	if(!cli_seeksect(desc, &exe_sections[i + 1]) || (unsigned int) cli_readn(desc, src, ssize) != ssize) {
+	    cli_dbgmsg("Can't read raw data of section %d\n", i);
+	    free(exe_sections);
+	    free(sections);
+	    free(src);
+	    return CL_EIO;
+	}
+
+	if((dest = (char *) cli_calloc(dsize, sizeof(char))) == NULL) {
+	    free(exe_sections);
+	    free(src);
+	    free(sections);
+	    return CL_EMEM;
+	}
+
+	oldep = vep + 161 + 6 + cli_readint32(epbuff+163);
+	cli_dbgmsg("FSG: found old EP @%x\n", oldep);
+
+	CLI_UNPTEMP("FSG",(src,dest,sections,exe_sections,0));
+	CLI_UNPRESULTSFSG1("FSG",(unfsg_133(src + newesi - exe_sections[i + 1].rva, dest, ssize + exe_sections[i + 1].rva - newesi, dsize, sections, sectcnt, EC32(optional_hdr32.ImageBase), oldep, ndesc)),1,(src,dest,sections,0));
+	break; /* were done with 1.33 */
+    }
+
+
+    while(found && (DCONF & PE_CONF_FSG) && epbuff[0] == '\xbb' && cli_readint32(epbuff + 1) - EC32(optional_hdr32.ImageBase) < min && epbuff[5] == '\xbf' && epbuff[10] == '\xbe' && vep >= exe_sections[i + 1].rva && vep - exe_sections[i + 1].rva > exe_sections[i + 1].rva - 0xe0 ) {
+
+	/* FSG support - v. 1.31 */
+
+	int sectcnt = 0;
+	uint32_t t;
+	uint32_t gp = cli_rawaddr(cli_readint32(epbuff+1) - EC32(optional_hdr32.ImageBase), NULL, 0 , &err, fsize, hdr_size);
+	char *support;
+	uint32_t newesi = cli_readint32(epbuff+11) - EC32(optional_hdr32.ImageBase);
+	uint32_t newedi = cli_readint32(epbuff+6) - EC32(optional_hdr32.ImageBase);
+	uint32_t oldep = vep - exe_sections[i + 1].rva;
+	struct cli_exe_section *sections;
+
+	ssize = exe_sections[i + 1].rsz;
+	dsize = exe_sections[i].vsz;
+
+
+	if(err) {
+	    cli_dbgmsg("FSG: Support data out of padding area\n");
+	    break;
+	}
+
+	if(newesi < exe_sections[i + 1].rva || newesi - exe_sections[i + 1].rva >= exe_sections[i + 1].raw) {
+	    cli_dbgmsg("FSG: Source buffer out of section bounds\n");
+	    break;
+	}
+
+	if(newedi != exe_sections[i].rva) {
+	    cli_dbgmsg("FSG: Bad destination (is %x should be %x)\n", newedi, exe_sections[i].rva);
+	    break;
+	}
+
+	CLI_UNPSIZELIMITS("FSG", MAX(dsize, ssize));
+
+	if(ssize <= 0x19 || dsize <= ssize) {
+	    cli_dbgmsg("FSG: Size mismatch (ssize: %d, dsize: %d)\n", ssize, dsize);
+	    free(exe_sections);
+	    return CL_CLEAN;
+	}
+
+	lseek(desc, gp, SEEK_SET);
+	gp = exe_sections[i + 1].raw - gp;
+
+	CLI_UNPSIZELIMITS("FSG", gp)
+
+	if((support = (char *) cli_malloc(gp)) == NULL) {
+	    free(exe_sections);
+	    return CL_EMEM;
+	}
+
+	if(cli_readn(desc, support, gp) != (int)gp) {
+	    cli_dbgmsg("Can't read %d bytes from padding area\n", gp); 
+	    free(exe_sections);
+	    free(support);
+	    return CL_EIO;
+	}
+
+	/* Counting original sections */
+	for(t = 0; t < gp - 2; t += 2) {
+	    uint32_t rva = support[t]|(support[t+1]<<8);
+
+	    if (rva == 2 || rva == 1)
+		break;
+
+	    rva = ((rva-2)<<12) - EC32(optional_hdr32.ImageBase);
+	    sectcnt++;
+
+	    if(rva < exe_sections[i].rva || rva - exe_sections[i].rva >= exe_sections[i].vsz) {
+		cli_dbgmsg("FSG: Original section %d is out of bounds\n", sectcnt);
+		break;
+	    }
+	}
+
+	if(t >= gp-10 || cli_readint32(support + t + 6) != 2) {
+	    free(support);
+	    break;
+	}
+
+	if((sections = (struct cli_exe_section *) cli_malloc((sectcnt + 1) * sizeof(struct cli_exe_section))) == NULL) {
+	    free(exe_sections);
+	    free(support);
+	    return CL_EMEM;
+	}
+
+	sections[0].rva = newedi;
+	for(t = 0; t <= (uint32_t)sectcnt - 1; t++) {
+	    sections[t+1].rva = (((support[t*2]|(support[t*2+1]<<8))-2)<<12)-EC32(optional_hdr32.ImageBase);
+	}
+
+	free(support);
+
+	if((src = (char *) cli_malloc(ssize)) == NULL) {
+	    free(exe_sections);
+	    free(sections);
+	    return CL_EMEM;
+	}
+
+	if(!cli_seeksect(desc, &exe_sections[i + 1]) || (unsigned int) cli_readn(desc, src, ssize) != ssize) {
+	    cli_dbgmsg("FSG: Can't read raw data of section %d\n", i);
+	    free(exe_sections);
+	    free(sections);
+	    free(src);
+	    return CL_EIO;
+	}
+
+	if((dest = (char *) cli_calloc(dsize, sizeof(char))) == NULL) {
+	    free(exe_sections);
+	    free(src);
+	    free(sections);
+	    return CL_EMEM;
+	}
+
+	gp = 0xda + 6*(epbuff[16]=='\xe8');
+	oldep = vep + gp + 6 + cli_readint32(src+gp+2+oldep);
+	cli_dbgmsg("FSG: found old EP @%x\n", oldep);
+
+	CLI_UNPTEMP("FSG",(src,dest,sections,exe_sections,0));
+	CLI_UNPRESULTSFSG1("FSG",(unfsg_133(src + newesi - exe_sections[i + 1].rva, dest, ssize + exe_sections[i + 1].rva - newesi, dsize, sections, sectcnt, EC32(optional_hdr32.ImageBase), oldep, ndesc)),1,(src,dest,sections,0));
+	break; /* were done with 1.31 */
+    }
+
+
+    if(found && (DCONF & PE_CONF_UPX)) {
+
+	/* UPX support */
+
+	/* we assume (i + 1) is UPX1 */
+	ssize = exe_sections[i + 1].rsz;
+	dsize = exe_sections[i].vsz + exe_sections[i + 1].vsz;
+
+	CLI_UNPSIZELIMITS("UPX", MAX(dsize, ssize));
+
+	if(ssize <= 0x19 || dsize <= ssize || dsize > CLI_MAX_ALLOCATION ) {
+	    cli_dbgmsg("UPX: Size mismatch or dsize too big (ssize: %d, dsize: %d)\n", ssize, dsize);
+	    free(exe_sections);
+	    return CL_CLEAN;
+	}
+
+	if((src = (char *) cli_malloc(ssize)) == NULL) {
+	    free(exe_sections);
+	    return CL_EMEM;
+	}
+
+	if((dest = (char *) cli_calloc(dsize + 8192, sizeof(char))) == NULL) {
+	    free(exe_sections);
+	    free(src);
+	    return CL_EMEM;
+	}
+
+	if(!cli_seeksect(desc, &exe_sections[i + 1]) || (unsigned int) cli_readn(desc, src, ssize) != ssize) {
+	    cli_dbgmsg("UPX: Can't read raw data of section %d\n", i+1);
+	    free(exe_sections);
+	    free(src);
+	    free(dest);
+	    return CL_EIO;
+	}
+
+	/* try to detect UPX code */
+	if(cli_memstr(UPX_NRV2B, 24, epbuff + 0x69, 13) || cli_memstr(UPX_NRV2B, 24, epbuff + 0x69 + 8, 13)) {
+	    cli_dbgmsg("UPX: Looks like a NRV2B decompression routine\n");
+	    upxfn = upx_inflate2b;
+	} else if(cli_memstr(UPX_NRV2D, 24, epbuff + 0x69, 13) || cli_memstr(UPX_NRV2D, 24, epbuff + 0x69 + 8, 13)) {
+	    cli_dbgmsg("UPX: Looks like a NRV2D decompression routine\n");
+	    upxfn = upx_inflate2d;
+	} else if(cli_memstr(UPX_NRV2E, 24, epbuff + 0x69, 13) || cli_memstr(UPX_NRV2E, 24, epbuff + 0x69 + 8, 13)) {
+	    cli_dbgmsg("UPX: Looks like a NRV2E decompression routine\n");
+	    upxfn = upx_inflate2e;
+	}
+
+	if(upxfn) {
+	    int skew = cli_readint32(epbuff + 2) - EC32(optional_hdr32.ImageBase) - exe_sections[i + 1].rva;
+
+	    if(epbuff[1] != '\xbe' || skew <= 0 || skew > 0xfff) { /* FIXME: legit skews?? */
+		skew = 0; 
+		if(upxfn(src, ssize, dest, &dsize, exe_sections[i].rva, exe_sections[i + 1].rva, vep) >= 0)
+		    upx_success = 1;
+
+	    } else {
+		cli_dbgmsg("UPX: UPX1 seems skewed by %d bytes\n", skew);
+		if(upxfn(src + skew, ssize - skew, dest, &dsize, exe_sections[i].rva, exe_sections[i + 1].rva, vep-skew) >= 0 || upxfn(src, ssize, dest, &dsize, exe_sections[i].rva, exe_sections[i + 1].rva, vep) >= 0)
+		    upx_success = 1;
+	    }
+
+	    if(upx_success)
+		cli_dbgmsg("UPX: Successfully decompressed\n");
+	    else
+		cli_dbgmsg("UPX: Preferred decompressor failed\n");
+	}
+
+	if(!upx_success && upxfn != upx_inflate2b) {
+	    if(upx_inflate2b(src, ssize, dest, &dsize, exe_sections[i].rva, exe_sections[i + 1].rva, vep) == -1 && upx_inflate2b(src + 0x15, ssize - 0x15, dest, &dsize, exe_sections[i].rva, exe_sections[i + 1].rva, vep - 0x15) == -1) {
+
+		cli_dbgmsg("UPX: NRV2B decompressor failed\n");
+	    } else {
+		upx_success = 1;
+		cli_dbgmsg("UPX: Successfully decompressed with NRV2B\n");
+	    }
+	}
+
+	if(!upx_success && upxfn != upx_inflate2d) {
+	    if(upx_inflate2d(src, ssize, dest, &dsize, exe_sections[i].rva, exe_sections[i + 1].rva, vep) == -1 && upx_inflate2d(src + 0x15, ssize - 0x15, dest, &dsize, exe_sections[i].rva, exe_sections[i + 1].rva, vep - 0x15) == -1) {
+
+		cli_dbgmsg("UPX: NRV2D decompressor failed\n");
+	    } else {
+		upx_success = 1;
+		cli_dbgmsg("UPX: Successfully decompressed with NRV2D\n");
+	    }
+	}
+
+	if(!upx_success && upxfn != upx_inflate2e) {
+	    if(upx_inflate2e(src, ssize, dest, &dsize, exe_sections[i].rva, exe_sections[i + 1].rva, vep) == -1 && upx_inflate2e(src + 0x15, ssize - 0x15, dest, &dsize, exe_sections[i].rva, exe_sections[i + 1].rva, vep - 0x15) == -1) {
+		cli_dbgmsg("UPX: NRV2E decompressor failed\n");
+	    } else {
+		upx_success = 1;
+		cli_dbgmsg("UPX: Successfully decompressed with NRV2E\n");
+	    }
+	}
+
+	if(!upx_success) {
+	    cli_dbgmsg("UPX: All decompressors failed\n");
+	    free(src);
+	    free(dest);
+	}
+    }
+
+    if(upx_success) {
+	free(src);
+	free(exe_sections);
+
+	CLI_UNPTEMP("UPX/FSG",(dest,0));
+
+	if((unsigned int) write(ndesc, dest, dsize) != dsize) {
+	    cli_dbgmsg("UPX/FSG: Can't write %d bytes\n", dsize);
+	    free(tempfile);
+	    free(dest);
+	    close(ndesc);
+	    return CL_EIO;
+	}
+
+	free(dest);
+	fsync(ndesc);
+	lseek(ndesc, 0, SEEK_SET);
+
+	if(cli_leavetemps_flag)
+	    cli_dbgmsg("UPX/FSG: Decompressed data saved in %s\n", tempfile);
+
+	cli_dbgmsg("***** Scanning decompressed file *****\n");
+	if((ret = cli_magic_scandesc(ndesc, ctx)) == CL_VIRUS) {
+	    close(ndesc);
+	    CLI_TMPUNLK();
+	    free(tempfile);
+	    return CL_VIRUS;
+	}
+
+	close(ndesc);
+	CLI_TMPUNLK();
+	free(tempfile);
+	return ret;
+    }
+
+
+    /* Petite */
+
+    if(epsize<200) {
+	free(exe_sections);
+	return CL_CLEAN;
+    }
+
+    found = 2;
+
+    if(epbuff[0] != '\xb8' || (uint32_t) cli_readint32(epbuff + 1) != exe_sections[nsections - 1].rva + EC32(optional_hdr32.ImageBase)) {
+	if(nsections < 2 || epbuff[0] != '\xb8' || (uint32_t) cli_readint32(epbuff + 1) != exe_sections[nsections - 2].rva + EC32(optional_hdr32.ImageBase))
+	    found = 0;
+	else
+	    found = 1;
+    }
+
+    if(found && (DCONF & PE_CONF_PETITE)) {
+	cli_dbgmsg("Petite: v2.%d compression detected\n", found);
+
+	if(cli_readint32(epbuff + 0x80) == 0x163c988d) {
+	    cli_dbgmsg("Petite: level zero compression is not supported yet\n");
+	} else {
+	    dsize = max - min;
+
+	    CLI_UNPSIZELIMITS("Petite", dsize);
+
+	    if((dest = (char *) cli_calloc(dsize, sizeof(char))) == NULL) {
+		cli_dbgmsg("Petite: Can't allocate %d bytes\n", dsize);
+		free(exe_sections);
+		return CL_EMEM;
+	    }
+
+	    for(i = 0 ; i < nsections; i++) {
+		if(exe_sections[i].raw) {
+		    if(!cli_seeksect(desc, &exe_sections[i]) || (unsigned int) cli_readn(desc, dest + exe_sections[i].rva - min, exe_sections[i].ursz) != exe_sections[i].ursz) {
+			free(exe_sections);
+			free(dest);
+			return CL_EIO;
+		    }
+		}
+	    }
+
+	    CLI_UNPTEMP("Petite",(dest,exe_sections,0));
+	    CLI_UNPRESULTS("Petite",(petite_inflate2x_1to9(dest, min, max - min, exe_sections, nsections - (found == 1 ? 1 : 0), EC32(optional_hdr32.ImageBase),vep, ndesc, found, EC32(optional_hdr32.DataDirectory[2].VirtualAddress),EC32(optional_hdr32.DataDirectory[2].Size))),0,(dest,0));
+	}
+    }
+
+    /* PESpin 1.1 */
+
+    if((DCONF & PE_CONF_PESPIN) && nsections > 1 &&
+       vep >= exe_sections[nsections - 1].rva &&
+       vep < exe_sections[nsections - 1].rva + exe_sections[nsections - 1].rsz - 0x3217 - 4 &&
+       memcmp(epbuff+4, "\xe8\x00\x00\x00\x00\x8b\x1c\x24\x83\xc3", 10) == 0)  {
+
+	char *spinned;
+
+	CLI_UNPSIZELIMITS("PEspin", fsize);
+
+	if((spinned = (char *) cli_malloc(fsize)) == NULL) {
+	    free(exe_sections);
+	    return CL_EMEM;
+	}
+
+	lseek(desc, 0, SEEK_SET);
+	if((size_t) cli_readn(desc, spinned, fsize) != fsize) {
+	    cli_dbgmsg("PESpin: Can't read %d bytes\n", fsize);
+	    free(spinned);
+	    free(exe_sections);
+	    return CL_EIO;
+	}
+
+	CLI_UNPTEMP("PESpin",(spinned,exe_sections,0));
+	CLI_UNPRESULTS_("PEspin",SPINCASE(),(unspin(spinned, fsize, exe_sections, nsections - 1, vep, ndesc, ctx)),0,(spinned,0));
+    }
+
+
+    /* yC 1.3 */
+
+    if((DCONF & PE_CONF_YC) && nsections > 1 &&
+       EC32(optional_hdr32.AddressOfEntryPoint) == exe_sections[nsections - 1].rva + 0x60 &&
+       memcmp(epbuff, "\x55\x8B\xEC\x53\x56\x57\x60\xE8\x00\x00\x00\x00\x5D\x81\xED\x6C\x28\x40\x00\xB9\x5D\x34\x40\x00\x81\xE9\xC6\x28\x40\x00\x8B\xD5\x81\xC2\xC6\x28\x40\x00\x8D\x3A\x8B\xF7\x33\xC0\xEB\x04\x90\xEB\x01\xC2\xAC", 51) == 0 && fsize >= exe_sections[nsections - 1].raw + 0xC6 + 0xb97)  {
+
+	char *spinned;
+
+	if((spinned = (char *) cli_malloc(fsize)) == NULL) {
+	    free(exe_sections);
+	    return CL_EMEM;
+	}
+
+	lseek(desc, 0, SEEK_SET);
+	if((size_t) cli_readn(desc, spinned, fsize) != fsize) {
+	    cli_dbgmsg("yC: Can't read %d bytes\n", fsize);
+	    free(spinned);
+	    free(exe_sections);
+	    return CL_EIO;
+	}
+
+	CLI_UNPTEMP("yC",(spinned,exe_sections,0));
+	CLI_UNPRESULTS("yC",(yc_decrypt(spinned, fsize, exe_sections, nsections-1, e_lfanew, ndesc)),0,(spinned,0));
+    }
+
+
+    /* WWPack */
+
+    if((DCONF & PE_CONF_WWPACK) && nsections > 1 &&
+       exe_sections[nsections-1].raw>0x2b1 &&
+       vep == exe_sections[nsections - 1].rva &&
+       exe_sections[nsections - 1].rva + exe_sections[nsections - 1].rsz == max &&
+       memcmp(epbuff, "\x53\x55\x8b\xe8\x33\xdb\xeb", 7) == 0 &&
+       memcmp(epbuff+0x68, "\xe8\x00\x00\x00\x00\x58\x2d\x6d\x00\x00\x00\x50\x60\x33\xc9\x50\x58\x50\x50", 19) == 0)  {
+	uint32_t headsize=exe_sections[nsections - 1].raw;
+	char *dest, *wwp;
+
+	for(i = 0 ; i < (unsigned int)nsections-1; i++)
+	    if (exe_sections[i].raw<headsize) headsize=exe_sections[i].raw;
+      
+	dsize = max-min+headsize-exe_sections[nsections - 1].rsz;
+
+	CLI_UNPSIZELIMITS("WWPack", dsize);
+
+	if((dest = (char *) cli_calloc(dsize, sizeof(char))) == NULL) {
+	    cli_dbgmsg("WWPack: Can't allocate %d bytes\n", dsize);
+	    free(exe_sections);
+	    return CL_EMEM;
+	}
+
+	lseek(desc, 0, SEEK_SET);
+	if((size_t) cli_readn(desc, dest, headsize) != headsize) {
+	    cli_dbgmsg("WWPack: Can't read %d bytes from headers\n", headsize);
+	    free(dest);
+	    free(exe_sections);
+	    return CL_EIO;
+	}
+
+	for(i = 0 ; i < (unsigned int)nsections-1; i++) {
+	    if(exe_sections[i].rsz) {
+		if(!cli_seeksect(desc, &exe_sections[i]) || (unsigned int) cli_readn(desc, dest + headsize + exe_sections[i].rva - min, exe_sections[i].rsz) != exe_sections[i].rsz) {
+		    free(dest);
+		    free(exe_sections);
+		    return CL_EIO;
+		}
+	    }
+	}
+
+	if((wwp = (char *) cli_calloc(exe_sections[nsections - 1].rsz, sizeof(char))) == NULL) {
+	    cli_dbgmsg("WWPack: Can't allocate %d bytes\n", exe_sections[nsections - 1].rsz);
+	    free(dest);
+	    free(exe_sections);
+	    return CL_EMEM;
+	}
+
+	if(!cli_seeksect(desc, &exe_sections[nsections - 1]) || (size_t) cli_readn(desc, wwp, exe_sections[nsections - 1].rsz) != exe_sections[nsections - 1].rsz) {
+	    cli_dbgmsg("WWPack: Can't read %d bytes from wwpack sect\n", exe_sections[nsections - 1].rsz);
+	    free(dest);
+	    free(wwp);
+	    free(exe_sections);
+	    return CL_EIO;
+	}
+
+	if (!wwunpack(dest, dsize, headsize, min, exe_sections[nsections-1].rva, e_lfanew, wwp, exe_sections[nsections - 1].rsz, nsections-1)) {
+	
+	    free(wwp);
+
+	    CLI_UNPTEMP("WWPack",(dest,exe_sections,0));
+
+	    if((unsigned int) write(ndesc, dest, dsize) != dsize) {
+		cli_dbgmsg("WWPack: Can't write %d bytes\n", dsize);
+		close(ndesc);
+		free(tempfile);
+		free(dest);
+		free(exe_sections);
+		return CL_EIO;
+	    }
+
+	    free(dest);
+	    if (cli_leavetemps_flag)
+		cli_dbgmsg("WWPack: Unpacked and rebuilt executable saved in %s\n", tempfile);
+	    else
+		cli_dbgmsg("WWPack: Unpacked and rebuilt executable\n");
+
+	    fsync(ndesc);
+	    lseek(ndesc, 0, SEEK_SET);
+
+	    if(cli_magic_scandesc(ndesc, ctx) == CL_VIRUS) {
+		free(exe_sections);
+		close(ndesc);
+		if(!cli_leavetemps_flag)
+		    unlink(tempfile);
+		free(tempfile);
+		return CL_VIRUS;
+	    }
+
+	    close(ndesc);
+	    if(!cli_leavetemps_flag)
+		unlink(tempfile);
+	    free(tempfile);
+	} else {
+	    free(wwp);
+	    free(dest);
+	    cli_dbgmsg("WWPpack: Decompression failed\n");
+	}
+    }
+
+
+    /* ASPACK support */
+    while((DCONF & PE_CONF_ASPACK) && ep+58+0x70e < fsize && !memcmp(epbuff,"\x60\xe8\x03\x00\x00\x00\xe9\xeb",8)) {
+
+        if(epsize<0x3bf || memcmp(epbuff+0x3b9, "\x68\x00\x00\x00\x00\xc3",6)) break;
+	ssize = 0;
+	for(i=0 ; i< nsections ; i++)
+	    if(ssize<exe_sections[i].rva+exe_sections[i].vsz)
+		ssize=exe_sections[i].rva+exe_sections[i].vsz;
+	if(!ssize) break;
+
+	CLI_UNPSIZELIMITS("Aspack", ssize);
+
+        if(!(src=(char *)cli_calloc(ssize, sizeof(char)))) {
+	    free(exe_sections);
+	    return CL_EMEM;
+	}
+        for(i = 0 ; i < (unsigned int)nsections; i++) {
+	    if(!exe_sections[i].rsz) continue;
+	    if(!cli_seeksect(desc, &exe_sections[i])) break;
+            if(!CLI_ISCONTAINED(src, ssize, src+exe_sections[i].rva, exe_sections[i].rsz)) break;
+            if(cli_readn(desc, src+exe_sections[i].rva, exe_sections[i].rsz)!=exe_sections[i].rsz) break;
+        }
+        if(i!=nsections) {
+            cli_dbgmsg("Aspack: Probably hacked/damaged Aspack file.\n");
+            free(src);
+            break;
+        }
+
+	CLI_UNPTEMP("Aspack",(src,exe_sections,0));
+	CLI_UNPRESULTS("Aspack",(unaspack212((uint8_t *)src, ssize, exe_sections, nsections, vep-1, EC32(optional_hdr32.ImageBase), ndesc)),1,(src,0));
+	break;
+    }
+
+    /* NsPack */
+
+    while (DCONF & PE_CONF_NSPACK) {
+	uint32_t eprva = vep;
+	uint32_t start_of_stuff, ssize, dsize, rep = ep;
+	unsigned int nowinldr;
+	char nbuff[24];
+	char *src=epbuff, *dest;
+
+	if (*epbuff=='\xe9') { /* bitched headers */
+	    eprva = cli_readint32(epbuff+1)+vep+5;
+	    if (!(rep = cli_rawaddr(eprva, exe_sections, nsections, &err, fsize, hdr_size)) && err) break;
+	    if (lseek(desc, rep, SEEK_SET)==-1) break;
+	    if (cli_readn(desc, nbuff, 24)!=24) break;
+	    src = nbuff;
+	}
+
+	if (memcmp(src, "\x9c\x60\xe8\x00\x00\x00\x00\x5d\xb8\x07\x00\x00\x00", 13)) break;
+
+	nowinldr = 0x54-cli_readint32(src+17);
+	cli_dbgmsg("NsPack: Found *start_of_stuff @delta-%x\n", nowinldr);
+
+	if (lseek(desc, rep-nowinldr, SEEK_SET)==-1) break;
+	if (cli_readn(desc, nbuff, 4)!=4) break;
+	start_of_stuff=rep+cli_readint32(nbuff);
+	if (lseek(desc, start_of_stuff, SEEK_SET)==-1) break;
+	if (cli_readn(desc, nbuff, 20)!=20) break;
+	src = nbuff;
+	if (!cli_readint32(nbuff)) {
+	    start_of_stuff+=4; /* FIXME: more to do */
+	    src+=4;
+	}
+
+	ssize = cli_readint32(src+5)|0xff;
+	dsize = cli_readint32(src+9);
+
+	CLI_UNPSIZELIMITS("NsPack", MAX(ssize,dsize));
+
+	if ( !ssize || !dsize || dsize != exe_sections[0].vsz) break;
+	if (lseek(desc, start_of_stuff, SEEK_SET)==-1) break;
+	if (!(dest=cli_malloc(dsize))) break;
+	/* memset(dest, 0xfc, dsize); */
+
+	if (!(src=cli_malloc(ssize))) {
+	    free(dest);
+	    break;
+	}
+	/* memset(src, 0x00, ssize); */
+	cli_readn(desc, src, ssize);
+
+	eprva+=0x27a;
+	if (!(rep = cli_rawaddr(eprva, exe_sections, nsections, &err, fsize, hdr_size)) && err) {
+	  free(dest);
+	  free(src);
+	  break;
+	}
+	if (lseek(desc, rep, SEEK_SET)==-1) {
+	  free(dest);
+	  free(src);
+	  break;
+	}
+	if (cli_readn(desc, nbuff, 5)!=5) {
+	  free(dest);
+	  free(src);
+	  break;
+	}
+	eprva=eprva+5+cli_readint32(nbuff+1);
+	cli_dbgmsg("NsPack: OEP = %08x\n", eprva);
+
+	CLI_UNPTEMP("NsPack",(src,dest,exe_sections,0));
+	CLI_UNPRESULTS("NsPack",(unspack(src, dest, ctx, exe_sections[0].rva, EC32(optional_hdr32.ImageBase), eprva, ndesc)),0,(src,dest,0));
+	break;
+    }
+
+    /* to be continued ... */
+
+    free(exe_sections);
+    return CL_CLEAN;
+}
+
+int cli_peheader(int desc, struct cli_exe_info *peinfo)
+{
+	uint16_t e_magic; /* DOS signature ("MZ") */
+	uint32_t e_lfanew; /* address of new exe header */
+	/* Obsolete - see below
+	  uint32_t min = 0, max = 0;
+	*/
+	struct pe_image_file_hdr file_hdr;
+	union {
+	    struct pe_image_optional_hdr64 opt64;
+	    struct pe_image_optional_hdr32 opt32;
+	} pe_opt;
+	struct pe_image_section_hdr *section_hdr;
+	struct stat sb;
+	int i;
+	unsigned int err, pe_plus = 0;
+	uint32_t valign, falign, hdr_size;
+	size_t fsize;
+
+    cli_dbgmsg("in cli_peheader\n");
+
+    if(fstat(desc, &sb) == -1) {
+	cli_dbgmsg("fstat failed\n");
+	return -1;
+    }
+
+    fsize = sb.st_size - peinfo->offset;
+
+    if(cli_readn(desc, &e_magic, sizeof(e_magic)) != sizeof(e_magic)) {
+	cli_dbgmsg("Can't read DOS signature\n");
+	return -1;
+    }
+
+    if(EC16(e_magic) != IMAGE_DOS_SIGNATURE && EC16(e_magic) != IMAGE_DOS_SIGNATURE_OLD) {
+	cli_dbgmsg("Invalid DOS signature\n");
+	return -1;
+    }
+
+    lseek(desc, 58, SEEK_CUR); /* skip to the end of the DOS header */
+
+    if(cli_readn(desc, &e_lfanew, sizeof(e_lfanew)) != sizeof(e_lfanew)) {
+	cli_dbgmsg("Can't read new header address\n");
+	/* truncated header? */
+	return -1;
+    }
+
+    e_lfanew = EC32(e_lfanew);
+    if(!e_lfanew) {
+	cli_dbgmsg("Not a PE file\n");
+	return -1;
+    }
+
+    if(lseek(desc, peinfo->offset + e_lfanew, SEEK_SET) < 0) {
+	/* probably not a PE file */
+	cli_dbgmsg("Can't lseek to e_lfanew\n");
+	return -1;
+    }
+
+    if(cli_readn(desc, &file_hdr, sizeof(struct pe_image_file_hdr)) != sizeof(struct pe_image_file_hdr)) {
+	/* bad information in e_lfanew - probably not a PE file */
+	cli_dbgmsg("Can't read file header\n");
+	return -1;
+    }
+
+    if(EC32(file_hdr.Magic) != IMAGE_NT_SIGNATURE) {
+	cli_dbgmsg("Invalid PE signature (probably NE file)\n");
+	return -1;
+    }
+
+    if ( (peinfo->nsections = EC16(file_hdr.NumberOfSections)) < 1 || peinfo->nsections > 96 ) return -1;
+
+    if (EC16(file_hdr.SizeOfOptionalHeader) < sizeof(struct pe_image_optional_hdr32)) {
+        cli_dbgmsg("SizeOfOptionalHeader too small\n");
+	return -1;
+    }
+
+    if(cli_readn(desc, &optional_hdr32, sizeof(struct pe_image_optional_hdr32)) != sizeof(struct pe_image_optional_hdr32)) {
+        cli_dbgmsg("Can't read optional file header\n");
+	return -1;
+    }
+
+    if(EC32(optional_hdr64.Magic)==PE32P_SIGNATURE) { /* PE+ */
+        if(EC16(file_hdr.SizeOfOptionalHeader)!=sizeof(struct pe_image_optional_hdr64)) {
+	    cli_dbgmsg("Incorrect SizeOfOptionalHeader for PE32+\n");
+	    return -1;
+	}
+        if(cli_readn(desc, &optional_hdr32 + 1, sizeof(struct pe_image_optional_hdr64) - sizeof(struct pe_image_optional_hdr32)) != sizeof(struct pe_image_optional_hdr64) - sizeof(struct pe_image_optional_hdr32)) {
+	    cli_dbgmsg("Can't read optional file header\n");
+	    return -1;
+	}
+	hdr_size = EC32(optional_hdr64.SizeOfHeaders);
+	pe_plus=1;
+    } else { /* PE */
+	if (EC16(file_hdr.SizeOfOptionalHeader)!=sizeof(struct pe_image_optional_hdr32)) {
+	    /* Seek to the end of the long header */
+	    lseek(desc, (EC16(file_hdr.SizeOfOptionalHeader)-sizeof(struct pe_image_optional_hdr32)), SEEK_CUR);
+	}
+	hdr_size = EC32(optional_hdr32.SizeOfHeaders);
+    }
+
+    valign = (pe_plus)?EC32(optional_hdr64.SectionAlignment):EC32(optional_hdr32.SectionAlignment);
+    falign = (pe_plus)?EC32(optional_hdr64.FileAlignment):EC32(optional_hdr32.FileAlignment);
+
+    hdr_size = PESALIGN(hdr_size, valign);
+
+    peinfo->section = (struct cli_exe_section *) cli_calloc(peinfo->nsections, sizeof(struct cli_exe_section));
+
+    if(!peinfo->section) {
+	cli_dbgmsg("Can't allocate memory for section headers\n");
+	return -1;
+    }
+
+    section_hdr = (struct pe_image_section_hdr *) cli_calloc(peinfo->nsections, sizeof(struct pe_image_section_hdr));
+
+    if(!section_hdr) {
+	cli_dbgmsg("Can't allocate memory for section headers\n");
+	free(peinfo->section);
+	peinfo->section = NULL;
+	return -1;
+    }
+
+    if(cli_readn(desc, section_hdr, peinfo->nsections * sizeof(struct pe_image_section_hdr)) != peinfo->nsections * sizeof(struct pe_image_section_hdr)) {
+        cli_dbgmsg("Can't read section header\n");
+	cli_dbgmsg("Possibly broken PE file\n");
+	free(section_hdr);
+	free(peinfo->section);
+	peinfo->section = NULL;
+	return -1;
+    }
+
+    for(i = 0; falign!=0x200 && i<peinfo->nsections; i++) {
+	/* file alignment fallback mode - blah */
+	if (falign && section_hdr[i].SizeOfRawData && EC32(section_hdr[i].PointerToRawData)%falign && !(EC32(section_hdr[i].PointerToRawData)%0x200)) {
+	    falign = 0x200;
+	}
+    }
+
+    for(i = 0; i < peinfo->nsections; i++) {
+        peinfo->section[i].rva = PEALIGN(EC32(section_hdr[i].VirtualAddress), valign);
+	peinfo->section[i].vsz = PESALIGN(EC32(section_hdr[i].VirtualSize), valign);
+	peinfo->section[i].raw = PEALIGN(EC32(section_hdr[i].PointerToRawData), falign);
+	peinfo->section[i].rsz = PESALIGN(EC32(section_hdr[i].SizeOfRawData), falign);
+
+	if (!peinfo->section[i].vsz && peinfo->section[i].rsz)
+	    peinfo->section[i].vsz=PESALIGN(EC32(section_hdr[i].SizeOfRawData), valign);
+
+	if (peinfo->section[i].rsz && !CLI_ISCONTAINED(0, (uint32_t) fsize, peinfo->section[i].raw, peinfo->section[i].rsz))
+	    peinfo->section[i].rsz = (fsize - peinfo->section[i].raw)*(fsize>peinfo->section[i].raw);
+    }
+
+    if(pe_plus)
+	peinfo->ep = EC32(optional_hdr64.AddressOfEntryPoint);
+    else
+	peinfo->ep = EC32(optional_hdr32.AddressOfEntryPoint);
+
+    if(!(peinfo->ep = cli_rawaddr(peinfo->ep, peinfo->section, peinfo->nsections, &err, fsize, hdr_size)) && err) {
+	cli_dbgmsg("Broken PE file\n");
+	free(section_hdr);
+	free(peinfo->section);
+	peinfo->section = NULL;
+	return -1;
+    }
+
+    free(section_hdr);
+    return 0;
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_petite.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_petite.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_petite.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_petite.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,497 @@
+/*
+ *  Copyright (C) 2004 aCaB <acab at clamav.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+
+/*
+** petitep.c
+** 
+** 09/07/2k4 - Dumped and reversed
+** 10/07/2k4 - Very 1st approach
+** 10/07/2k4 - PE stuff and main loop
+** 11/07/2k4 - Porting finished, tracking my bugs...
+** 12/07/2k4 - ARRRRRGHHH :D
+** 14/07/2k4 - Code cleaned
+** 15/07/2k4 - Securing && ClamAV porting
+** 21/07/2k4 - Unmangled imports now supported
+** 22/07/2k4 - Unstripped .relocs now supported
+**
+*/
+
+/*
+** Unpacks a buffer containing a petite 2.2 compressed
+** file. Doesn't perform Import Table unmangling. Doesn't
+** fixup call/jumps. Tries to "guess" the original sections
+** structure and entrypoint.
+**
+** Lotta phanx to Micky for patiently bearing my screams :P
+** Greets to Ian Luck: the SEH MOVSB thingy almost got me :O
+** TODO: Cope with level 0 and older petite versions.
+*/
+
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "cltypes.h"
+#include "rebuildpe.h"
+#include "execs.h"
+#include "others.h"
+#include "petite.h"
+
+
+static int doubledl(char **scur, uint8_t *mydlptr, char *buffer, uint32_t buffersize)
+{
+  unsigned char mydl = *mydlptr;
+  unsigned char olddl = mydl;
+
+  mydl*=2;
+  if ( !(olddl & 0x7f)) {
+    if ( *scur < buffer || *scur >= buffer+buffersize-1 )
+      return -1;
+    olddl = **scur;
+    mydl = olddl*2+1;
+    *scur=*scur + 1;
+  }
+  *mydlptr = mydl;
+  return (olddl>>7)&1;
+}
+
+int petite_inflate2x_1to9(char *buf, uint32_t minrva, uint32_t bufsz, struct cli_exe_section *sections, unsigned int sectcount, uint32_t Imagebase, uint32_t pep, int desc, int version, uint32_t ResRva, uint32_t ResSize)
+{
+  char *adjbuf = buf - minrva;
+  char *packed = NULL;
+  uint32_t thisrva=0, bottom = 0, enc_ep=0, irva=0, workdone=0, grown=0x355, skew=0x35;
+  int j = 0, oob, mangled = 0, check4resources=0;
+  struct cli_exe_section *usects = NULL;
+  void *tmpsct = NULL;
+
+  /*
+    -] The real thing [-
+  */
+
+  /* NOTE: (435063->4350a5) Petite kernel32!imports and error strings */
+
+  /* Here we adjust the start of packed blob, the size of petite code,
+   * the difference in size if relocs were stripped
+   * See below...
+   */
+
+  if ( version == 2 )
+    packed = adjbuf + sections[sectcount-1].rva + 0x1b8;
+  if ( version == 1 ) {
+    packed = adjbuf + sections[sectcount-1].rva + 0x178;
+    grown=0x323;    /* My name is Harry potter */
+    skew=0x34;
+  }
+
+  while (1) {
+    char *ssrc, *ddst;
+    uint32_t size, srva;
+    int backbytes, oldback, backsize, addsize;
+    
+    if ( ! CLI_ISCONTAINED(buf, bufsz, packed, 4)) {
+      if (usects)
+	free(usects);
+      return 1;
+    }
+    srva = cli_readint32(packed);
+
+    if (! srva) {
+      /* WERE DONE !!! :D */
+      int t, upd = 1;
+
+      if ( j <= 0 ) /* Some non petite compressed files will get here */
+	return 1;
+    
+      /* Select * from sections order by rva asc; */
+      while ( upd ) {
+	upd = 0;
+	for (t = 0; t < j-1 ; t++) {
+	  uint32_t trva, trsz, tvsz;
+
+	  if ( usects[t].rva <= usects[t+1].rva )
+	    continue;
+	  trva = usects[t].rva;
+	  trsz = usects[t].rsz;
+	  tvsz = usects[t].vsz;
+	  usects[t].rva = usects[t+1].rva;
+	  usects[t].rsz = usects[t+1].rsz;
+	  usects[t].vsz = usects[t+1].vsz;
+	  usects[t+1].rva = trva;
+	  usects[t+1].rsz = trsz;
+	  usects[t+1].vsz = tvsz;
+	  upd = 1;
+	}
+      }
+
+      /* Computes virtualsize... we try to guess, actually :O */
+      for (t = 0; t < j-1 ; t++) {
+	if ( usects[t].vsz != usects[t+1].rva - usects[t].rva )
+	  usects[t].vsz = usects[t+1].rva - usects[t].rva;
+      }
+      
+      /*
+       * Our encryption is pathetic and out software is lame but
+       * we need to claim it's unbreakable.
+       * So why dont we just mangle the imports and encrypt the EP?!
+       */
+
+      /* Decrypts old entrypoint if we got enough clues */
+      if (enc_ep) {
+	uint32_t virtaddr = pep + 5 + Imagebase, tmpep;
+	int rndm = 0, dummy = 1;
+	char *thunk = adjbuf+irva;
+	char *imports;
+
+	if ( version == 2 ) { /* 2.2 onley */
+
+	  while ( dummy && CLI_ISCONTAINED(buf, bufsz, thunk, 4) ) {
+	    uint32_t api;
+
+	    if (! cli_readint32(thunk)) {
+	      workdone = 1;
+	      break;
+	    }
+
+	    imports = adjbuf + cli_readint32(thunk);
+	    thunk+=4;
+	    dummy = 0;
+
+	    while ( CLI_ISCONTAINED(buf, bufsz, imports, 4)) {
+	      dummy = 0;
+
+	      imports+=4;
+	      if ( ! (api = cli_readint32(imports-4)) ) {
+		dummy  = 1;
+		break;
+	      }
+	      if ( (api != (api | 0x80000000)) && mangled && --rndm < 0) {
+		api = virtaddr;
+		virtaddr +=5; /* EB + 1 double */
+		rndm = virtaddr & 7;
+	      } else {
+		api = 0xbff01337; /* KERNEL32!leet */
+	      }
+	      if (sections[sectcount-1].rva+Imagebase < api )
+		enc_ep--;
+	      if ( api < virtaddr )
+		enc_ep--;
+	      tmpep = (enc_ep & 0xfffffff8)>>3 & 0x1fffffff;
+	      enc_ep = (enc_ep & 7)<<29 | tmpep;
+	    }
+	  }
+	} else 
+	  workdone = 1;
+	enc_ep = pep+5+enc_ep;
+	if ( workdone == 1 ) {
+	  cli_dbgmsg("Petite: Old EP: %x\n", enc_ep);
+	} else {
+	  enc_ep = usects[0].rva;
+	  cli_dbgmsg("Petite: In troubles while attempting to decrypt old EP, using bogus %x\n", enc_ep);
+	}
+      }
+
+      /* Let's compact data */
+      for (t = 0; t < j ; t++) {
+	usects[t].raw = (t>0)?(usects[t-1].raw + usects[t-1].rsz):0;
+	if (usects[t].rsz != 0 && CLI_ISCONTAINED(buf, bufsz, buf + usects[t].raw, usects[t].rsz))
+	  memmove(buf + usects[t].raw, adjbuf + usects[t].rva, usects[t].rsz);
+      }
+
+      /* Showtime!!! */
+      cli_dbgmsg("Petite: Sections dump:\n");
+      for (t = 0; t < j ; t++)
+	cli_dbgmsg("Petite: .SECT%d RVA:%x VSize:%x ROffset: %x, RSize:%x\n", t, usects[t].rva, usects[t].vsz, usects[t].raw, usects[t].rsz);
+      if (! cli_rebuildpe(buf, usects, j, Imagebase, enc_ep, ResRva, ResSize, desc)) {
+	cli_dbgmsg("Petite: Rebuilding failed\n");
+	free(usects);
+	return 1;
+      }
+      free(usects);
+      return 0;
+    }
+
+
+    size = srva & 0x7fffffff;
+    if ( srva != size ) { /* Test and clear bit 31 */
+      check4resources=0;
+      /*
+	Enumerates each petite data section
+	I should get here once ot twice:
+	- 1 time for the resource section (if present)
+	- 1 time for the all_the_rest section
+      */
+
+      if ( ! CLI_ISCONTAINED(buf, bufsz, packed+4, 8) ) {
+	if (usects)
+	  free(usects);
+	return 1;
+      }
+      /* Save the end of current packed section for later use */
+      bottom = cli_readint32(packed+8) + 4;
+      ssrc = adjbuf + cli_readint32(packed+4) - (size-1)*4;
+      ddst = adjbuf + cli_readint32(packed+8) - (size-1)*4;
+
+      if ( !CLI_ISCONTAINED(buf, bufsz, ssrc, size*4) || !CLI_ISCONTAINED(buf, bufsz, ddst, size*4) ) {
+	if (usects)
+	  free(usects);
+	return 1;
+      }
+
+      /* Copy packed data to the end of the current packed section */
+      memmove(ddst, ssrc, size*4);
+      packed += 0x0c;
+    } else {
+      uint32_t check1, check2;
+      uint8_t mydl = 0;
+      uint8_t goback;
+      
+      /* Unpak each original section in turn */
+
+      if ( ! CLI_ISCONTAINED(buf, bufsz, packed+4, 8)) {
+	if (usects)
+	  free(usects);
+	return 1;
+      }
+
+      size = cli_readint32(packed+4); /* How many bytes to unpack */
+      thisrva=cli_readint32(packed+8); /* RVA of the original section */
+      packed += 0x10;
+
+      if ( j >= 96 ) {
+	cli_dbgmsg("Petite: maximum number of sections exceeded, giving up.\n");
+	free(usects);
+	return 1;
+      }
+      /* Alloc 1 more struct */
+      if ( ! (tmpsct = cli_realloc(usects, sizeof(struct cli_exe_section) * (j+1))) ) {
+	if (usects)
+	  free(usects);
+	return 1;
+      }
+
+      usects = (struct cli_exe_section *) tmpsct;
+      /* Save section spex for later rebuilding */
+      usects[j].rva = thisrva;
+      usects[j].rsz = size;
+      if ( (int)(bottom - thisrva) >0 )
+	usects[j].vsz = bottom - thisrva;
+      else
+	usects[j].vsz = size;
+      usects[j].raw = 0; /* Cheaper than memset */
+
+      if (!size) { /* That's a ghost section! reloc any1? :P */
+	j++;
+	continue;
+      }
+
+      ssrc = adjbuf + srva;
+      ddst = adjbuf + thisrva;
+
+      /* Last petite section (unpacked 1st) could contain unpacked data
+       * (eg the icon): let's fix the rva
+       */
+
+      if (!check4resources) {
+	unsigned int q;
+	for ( q = 0 ; q < sectcount ; q++ ) {
+	  if ( thisrva <= sections[q].rva || thisrva >= sections[q].rva + sections[q].vsz)
+	    continue;
+	  usects[j].rva = sections[q].rva;
+	  usects[j].rsz = thisrva - sections[q].rva + size;
+	  break;
+	}
+      }
+
+      /* Increase count of unpacked sections */
+      j++;
+
+
+      /* Setup some crap for later checks */
+      if ( size < 0x10000 ) {
+	check1 = 0x0FFFFC060;
+	check2 = 0x0FFFFFC60;
+	goback = 5;
+      } else if ( size < 0x40000 ) {
+	check1 = 0x0FFFF8180;
+	check2 = 0x0FFFFF980;
+	goback = 7;
+      } else {
+	check1 = 0x0FFFF8300;
+	check2 = 0x0FFFFFB00;
+	goback = 8;
+      }
+
+      /*
+       * NOTE: on last loop we get esi=edi=ImageBase (which is not writeable)
+       * The movsb on the next line causes the iat_rebuild_and_decrypt_oldEP()
+       * func to get called instead... ehehe very smart ;)
+       */
+
+      if ( !CLI_ISCONTAINED(buf, bufsz, ssrc, 1) || !CLI_ISCONTAINED(buf, bufsz, ddst, 1)) {
+	free(usects);
+	return 1;
+      }
+
+      size--;
+      *ddst++=*ssrc++; /* eheh u C gurus gotta luv these monsters :P */
+      backbytes=0;
+      oldback = 0;
+
+      /* No surprises here... NRV any1??? ;) */
+      while (size > 0) {
+	oob = doubledl(&ssrc, &mydl, buf, bufsz);
+	if ( oob == -1 ) {
+	  free(usects);
+	  return 1;
+	}
+	if (!oob) {
+	  if ( !CLI_ISCONTAINED(buf, bufsz, ssrc, 1) || !CLI_ISCONTAINED(buf, bufsz, ddst, 1) ) {
+	    free(usects);
+	    return 1;
+	  }
+	  *ddst++ = (char)((*ssrc++)^(size & 0xff));
+	  size--;
+	} else {
+	  addsize = 0;
+	  backbytes++;
+	  while (1) {
+	    if ( (oob = doubledl(&ssrc, &mydl, buf, bufsz)) == -1 ) {
+	      free(usects);
+	      return 1;
+	    }
+	    backbytes = backbytes*2 + oob;
+	    if ( (oob = doubledl(&ssrc, &mydl, buf, bufsz)) == -1 ) {
+	      free(usects);
+	      return 1;
+	    }
+	    if (!oob)
+	      break;
+	  }
+	  backbytes -= 3;
+	  if ( backbytes >= 0 ) {
+	    backsize = goback;
+	    do {
+	      if ( (oob = doubledl(&ssrc, &mydl, buf, bufsz)) == -1 ) {
+		free(usects);
+		return 1;
+	      }
+	      backbytes = backbytes*2 + oob;
+	      backsize--;
+	    } while (backsize);
+	    backbytes^=0xffffffff;
+	    addsize += 1 + ( backbytes < (int) check2 ) + ( backbytes < (int) check1 );
+	    oldback = backbytes;
+	  } else {
+	    backsize = backbytes+1;
+	    backbytes = oldback;
+	  }
+
+	  if ( (oob = doubledl(&ssrc, &mydl, buf, bufsz)) == -1 ) {
+	    free(usects);
+	    return 1;
+	  }
+	  backsize = backsize*2 + oob;
+	  if ( (oob = doubledl(&ssrc, &mydl, buf, bufsz)) == -1 ) {
+	    free(usects);
+	    return 1;
+	  }
+	  backsize = backsize*2 + oob;
+	  if (!backsize) {
+	    backsize++;
+	    while (1) {
+	      if ( (oob = doubledl(&ssrc, &mydl, buf, bufsz)) == -1 ) {
+		free(usects);
+		return 1;
+	      }
+	      backsize = backsize*2 + oob;
+	      if ( (oob = doubledl(&ssrc, &mydl, buf, bufsz)) == -1 ) {
+		free(usects);
+		return 1;
+	      }
+	      if (!oob)
+		break;
+	    }
+	    backsize+=2;
+	  }
+	  backsize+=addsize;
+	  size-=backsize;
+	  if(!CLI_ISCONTAINED(buf, bufsz, ddst, backsize) || !CLI_ISCONTAINED(buf, bufsz, ddst+backbytes, backsize)) {
+	    free(usects);
+	    return 1;
+	  }
+	  while(backsize--) {
+	    *ddst=*(ddst+backbytes);
+	    ddst++;
+	  }
+	  backbytes=0;
+	  backsize=0;
+	} /* else */
+      } /* while(ebx) */
+
+      /* Any lame petite code here? If so let's strip it
+       * We've done version adjustments already, see above
+       */
+
+      if ( j ) {
+	int strippetite=0;
+	uint32_t reloc;
+	
+	/* LONG MAGIC = 33C05E64 8B188B1B 8D63D65D */
+	if ( usects[j-1].rsz > grown && 
+	     CLI_ISCONTAINED(buf, bufsz, ddst-grown+5+0x4f, 8) &&
+	     cli_readint32(ddst-grown+5+0x4f) == 0x645ec033 &&
+	     cli_readint32(ddst-grown+5+0x4f+4) == 0x1b8b188b
+	     ) {
+	  reloc = 0;
+	  strippetite = 1;
+	}
+	if ( !strippetite && 
+	     usects[j-1].rsz > grown+skew &&
+	     CLI_ISCONTAINED(buf, bufsz, ddst-grown+5+0x4f-skew, 8) &&
+	      cli_readint32(ddst-grown+5+0x4f-skew) == 0x645ec033 &&
+	     cli_readint32(ddst-grown+5+0x4f+4-skew) == 0x1b8b188b
+	     ) {
+	  reloc = skew; /* If the original exe had a .reloc were skewed */
+	  strippetite = 1;
+	}
+	
+	if (strippetite && CLI_ISCONTAINED(buf, bufsz, ddst-grown+0x0f-8-reloc, 8)) {
+	  uint32_t test1, test2;
+	  
+	  /* REMINDER: DON'T BPX IN HERE U DUMBASS!!!!!!!!!!!!!!!!!!!!!!!! */
+	  test1 = cli_readint32(ddst-grown+0x0f-8-reloc)^0x9d6661aa;
+	  test2 = cli_readint32(ddst-grown+0x0f-4-reloc)^0xe908c483;
+	  
+	  cli_dbgmsg("Petite: Found petite code in sect%d(%x). Let's strip it.\n", j-1, usects[j-1].rva);
+	  if (test1 == test2 && CLI_ISCONTAINED(buf, bufsz, ddst-grown+0x0f-reloc, 0x1c0-0x0f+4)) {
+	    irva = cli_readint32(ddst-grown+0x121-reloc);
+	    enc_ep = cli_readint32(ddst-grown+0x0f-reloc)^test1;
+	    mangled = ((uint32_t) cli_readint32(ddst-grown+0x1c0-reloc) != 0x90909090); /* FIXME: Magic's too short??? */
+	    cli_dbgmsg("Petite: Encrypted EP: %x | Array of imports: %x\n",enc_ep, irva);
+	  }
+	  usects[j-1].rsz -= grown+reloc;
+    	}
+      }
+      check4resources++;
+    } /* outer else */
+  } /* while true */
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phish_domaincheck_db.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phish_domaincheck_db.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phish_domaincheck_db.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phish_domaincheck_db.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,96 @@
+/*
+ *  Phishing module: domain list implementation.
+ *
+ *  Copyright (C) 2006-2007 Török Edvin <edwin at clamav.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as 
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ *
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+
+#ifndef CL_DEBUG
+#define NDEBUG
+#endif
+
+#ifdef CL_THREAD_SAFE
+#ifndef _REENTRANT
+#define _REENTRANT
+#endif
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "clamav.h"
+#include "others.h"
+#include "phishcheck.h"
+#include "phish_domaincheck_db.h"
+#include "regex_list.h"
+
+int domainlist_match(const struct cl_engine* engine,char* real_url,const char* display_url,const struct pre_fixup_info* pre_fixup,int hostOnly,unsigned short* flags)
+{
+	const char* info;
+	int rc = engine->domainlist_matcher ? regex_list_match(engine->domainlist_matcher,real_url,display_url,hostOnly ? pre_fixup : NULL,hostOnly,&info,0) : 0;
+	if(rc && info && info[0] && info[0] != ':') {/*match successful, and has custom flags*/
+		if(strlen(info)==3 && isxdigit(info[0]) && isxdigit(info[1]) && isxdigit(info[2])) {
+			unsigned short notwantedflags=0;
+			sscanf(info,"%hx",&notwantedflags);
+		        *flags &= ~notwantedflags;/* filter unwanted phishcheck flags */	
+		}
+		else {
+			cli_warnmsg("Phishcheck:Unknown flag format in domain-list, 3 hex digits expected");
+		}
+	}
+	return rc;
+}
+
+int init_domainlist(struct cl_engine* engine)
+{
+	if(engine) {
+		engine->domainlist_matcher = (struct regex_matcher *) cli_malloc(sizeof(struct regex_matcher));
+		if(!engine->domainlist_matcher)
+			return CL_EMEM;
+		return init_regex_list(engine->domainlist_matcher);
+	}
+	else
+		return CL_ENULLARG;
+}
+
+int is_domainlist_ok(const struct cl_engine* engine)
+{
+	return (engine && engine->domainlist_matcher) ? is_regex_ok(engine->domainlist_matcher) : 1;
+}
+
+void domainlist_cleanup(const struct cl_engine* engine)
+{
+	if(engine && engine->domainlist_matcher) {
+		regex_list_cleanup(engine->domainlist_matcher);
+	}
+}
+
+void domainlist_done(struct cl_engine* engine)
+{
+	if(engine && engine->domainlist_matcher) {
+		regex_list_done(engine->domainlist_matcher);
+		free(engine->domainlist_matcher);
+		engine->domainlist_matcher = NULL;
+	}
+}
+

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phish_whitelist.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phish_whitelist.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phish_whitelist.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phish_whitelist.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,86 @@
+/*
+ *  Phishing module: whitelist implementation.
+ *
+ *  Copyright (C) 2006-2007 Török Edvin <edwin at clamav.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as 
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ *
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+
+#ifndef CL_DEBUG
+#define NDEBUG
+#endif
+
+#ifdef CL_THREAD_SAFE
+#ifndef _REENTRANT
+#define _REENTRANT
+#endif
+#endif
+
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "clamav.h"
+#include "others.h"
+#include "phish_whitelist.h"
+#include "regex_list.h"
+
+int whitelist_match(const struct cl_engine* engine,char* real_url,const char* display_url,int hostOnly)
+{
+	const char* info;/*unused*/
+	cli_dbgmsg("Phishing: looking up in whitelist: %s:%s; host-only:%d\n",real_url,display_url,hostOnly);
+	return	engine->whitelist_matcher ? regex_list_match(engine->whitelist_matcher,real_url,display_url,NULL,hostOnly,&info,1) : 0;
+}
+
+int init_whitelist(struct cl_engine* engine)
+{
+	if(engine) {
+		engine->whitelist_matcher = (struct regex_matcher *) cli_malloc(sizeof(struct regex_matcher));
+		if(!engine->whitelist_matcher)
+			return CL_EMEM;
+		return	init_regex_list(engine->whitelist_matcher);
+	}
+	else
+		return CL_ENULLARG;
+}
+
+int is_whitelist_ok(const struct cl_engine* engine)
+{
+	return (engine && engine->whitelist_matcher) ? is_regex_ok(engine->whitelist_matcher) : 1;
+}
+
+void whitelist_cleanup(const struct cl_engine* engine)
+{
+	if(engine && engine->whitelist_matcher) {
+		regex_list_cleanup(engine->whitelist_matcher);
+	}
+}
+
+void whitelist_done(struct cl_engine* engine)
+{
+	if(engine && engine->whitelist_matcher) {
+		regex_list_done(engine->whitelist_matcher);	
+		free(engine->whitelist_matcher);
+		engine->whitelist_matcher = NULL;
+	}
+}
+

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phishcheck.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phishcheck.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phishcheck.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phishcheck.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,1336 @@
+/*
+ *  Detect phishing, based on URL spoofing detection.
+ *
+ *  Copyright (C) 2006-2007 Török Edvin <edwin at clamav.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as 
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ *
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#ifndef CL_DEBUG
+#define NDEBUG
+#endif
+
+#ifdef CL_THREAD_SAFE
+#ifndef _REENTRANT
+#define _REENTRANT
+#endif
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "clamav.h"
+#include "others.h"
+#include "mbox.h"
+#include "message.h"
+#include "htmlnorm.h"
+#include "phishcheck.h"
+#include "phish_domaincheck_db.h"
+#include "phish_whitelist.h"
+#include "iana_tld.h"
+
+
+#define DOMAIN_REAL 1
+#define DOMAIN_DISPLAY 0
+
+#define PHISHY_USERNAME_IN_URL 1
+#define PHISHY_NUMERIC_IP      2
+#define REAL_IS_MAILTO	       4
+/* this is just a flag, so that the displayed url will be parsed as mailto too, for example
+ * <a href='mailto:somebody at yahoo.com'>to:somebody at yahoo.com</a>*/
+#define DOMAIN_LISTED		 8
+#define PHISHY_CLOAKED_NULL	16
+#define PHISHY_HEX_URL		32
+
+/*
+* Phishing design documentation,
+(initially written at http://wiki.clamav.net/index.php/phishing_design as discussed with aCaB)
+
+TODO:update this doc
+
+*Warning*: if flag *--phish-scan-alldomains* (or equivalent clamd/clamav-milter config option) isn't given, then phishing scanning is done only for domains listed in daily.pdb.
+If your daily.pdb is empty, then by default NO PHISHING is DONE, UNLESS you give the *--phish-scan-alldomains*
+This is just a side-effect, daily.pdb is empty, because it isn't yet officialy in daily.cvd.
+
+phishingCheck() determines if @displayedLink is  a legit representation of @realLink.
+
+Steps:
+
+1. if _realLink_ *==* _displayLink_ => *CLEAN*
+
+2. url cleanup (normalization)
+- whitespace elimination
+- html entity conversion
+- convert hostname to lowercase
+- normalize \ to /
+If there is a dot after the last space, then all spaces are replaced with dots,
+otherwise spaces are stripped.
+So both: 'Go to yahoo.com', and 'Go to e b a y . c o m', and 'Go to ebay. com' will work.
+
+
+3. Matched the urls against a _whitelist_:
+a _realLink_, _displayedLink_ pair is matched against the _whitelist_.
+the _whitelist_ is a list of pairs of realLink, displayedLink. Any of the elements of those pairs can be a _regex_.
+ if url *is found* in _whitelist_ --> *CLEAN*
+
+4. URL is looked up in the _domainlist_, unless disabled via flags (_--phish-scan-alldomains_).
+The _domainlist_ is a list of pairs of realLink, displayedLink (any of which can be regex).
+This is the list of domains we do phishing detection for (such as ebay,paypal,chase,....)
+We can't decide to stop processing here or not, so we just set a flag.
+
+Note(*!*): the flags are modified by the the domainlist checker. If domain is found, then the flags associated with it filter the default compile-time flags.
+
+5. _Hostname_ is extracted from the _displayed URL_.
+It is checked against the _whitelist_, and _domainlist_.
+
+6. Now we know if we want to stop processing.
+If we are only scanning domains in the _domainlist_ (default behaviour), and the url/domain
+isn't found in it, we return (and mark url as not_list/clean).
+If we scan all domains, then the domainlist isn't even checked.
+
+7. URL cloak check.
+check for %00, and hex-encoded IPs in URL.
+
+8. Skip empty displayedURLs
+
+9. SSL mismatch detection.
+Checks if realLink is http, but displayedLink is https or viceversa.
+(by default the SSL detection is done for hrefs only, not for imgs)
+
+10. Hostname of real URL is extracted.
+
+12. Numeric IP detection.
+If url is a numeric IP, then -> phish.
+Maybe we should do DNS lookup?
+Maybe we should disable numericIP checks for --phish-scan-alldomains?
+
+13. isURL(displayedLink).
+Checks if displayedLink is really a url.
+if not -> clean
+
+14. Hostnames of real, displayedLink are compared. If equal -> clean
+
+15. Extract domain names, and compare. If equal -> clean
+
+16. Do DNS lookups/reverse lookups. Disabled now (too much load/too many lookups). *
+
+For the Whitelist(.wdb)/Domainlist(.pdb) format see regex_list.c (search for Flags)
+ *
+ */
+
+/* Constant strings and tables */ 
+static char empty_string[]="";
+
+
+#define ANY_CLOAK "(0[xX][0-9a-fA-F]+|[0-9]+)"
+#define CLOAKED_URL "^"ANY_CLOAK"(\\."ANY_CLOAK"){0,3}$"
+
+static const char cloaked_host_regex[] = CLOAKED_URL;
+static const char tld_regex[] = "^"iana_tld"$";
+static const char cctld_regex[] = "^"iana_cctld"$";
+static const char dotnet[] = ".net";
+static const char adonet[] = "ado.net";
+static const char aspnet[] = "asp.net";
+/* ; is replaced by ' ' so omit it here*/
+static const char lt[]="&lt";
+static const char gt[]="&gt";
+static const char src_text[] = "src";
+static const char href_text[] = "href";
+static const char mailto[] = "mailto:";
+static const char https[]="https://";
+
+static const size_t href_text_len = sizeof(href_text);
+static const size_t src_text_len = sizeof(src_text);
+static const size_t dotnet_len = sizeof(dotnet)-1;
+static const size_t adonet_len = sizeof(adonet)-1;
+static const size_t aspnet_len = sizeof(aspnet)-1;
+static const size_t lt_len = sizeof(lt)-1;
+static const size_t gt_len = sizeof(gt)-1;
+static const size_t mailto_len = sizeof(mailto)-1;
+static const size_t https_len  = sizeof(https)-1;
+
+/* for urls, including mailto: urls, and (broken) http:www... style urls*/
+/* refer to: http://www.w3.org/Addressing/URL/5_URI_BNF.html
+ * Modifications: don't allow empty domains/subdomains, such as www..com <- that is no url
+ * So the 'safe' char class has been split up
+ * */
+/* character classes */
+#define URI_alpha	"a-zA-Z"
+#define URI_digit	"0-9"
+#define URI_safe_nodot  "-$_@&"
+#define URI_safe	"-$_ at .&"
+#define URI_extra	"!*\"'(),"
+#define URI_reserved    "=;/#?: "
+#define URI_national    "{}|[]\\^~"
+#define URI_punctuation "<>"
+
+#define URI_hex		 "[0-9a-fA-f]"
+#define URI_escape      "%"URI_hex"{2}"
+#define URI_xalpha "([" URI_safe URI_alpha URI_digit  URI_extra "]|"URI_escape")" /* URI_safe has to be first, because it contains - */
+#define URI_xalpha_nodot "([" URI_safe_nodot URI_alpha URI_digit URI_extra "]|"URI_escape")"
+
+#define URI_xalphas URI_xalpha"+"
+#define URI_xalphas_nodot URI_xalpha_nodot"*"
+
+#define URI_ialpha  "["URI_alpha"]"URI_xalphas_nodot""
+#define URI_xpalpha URI_xalpha"|\\+"
+#define URI_xpalpha_nodot URI_xalpha_nodot"|\\+"
+#define URI_xpalphas "("URI_xpalpha")+"
+#define URI_xpalphas_nodot "("URI_xpalpha_nodot")+"
+#define optional_URI_xpalphas "("URI_xpalpha"|=)*"
+
+#define URI_scheme URI_ialpha
+#define URI_tld iana_tld
+#define URI_path1 URI_xpalphas_nodot"\\.("URI_xpalphas_nodot"\\.)*"
+#define URI_path2 URI_tld
+#define URI_path3 "(/"optional_URI_xpalphas")*"
+
+#define URI_search "("URI_xalphas")*"
+#define URI_fragmentid URI_xalphas
+
+#define URI_IP_digits "["URI_digit"]{1,3}"
+#define URI_numeric_path URI_IP_digits"(\\."URI_IP_digits"){3}(:"URI_xpalphas_nodot")?(/("URI_xpalphas"/?)*)?"
+#define URI_numeric_URI "("URI_scheme":(//)?)?"URI_numeric_path"(\\?" URI_search")?"
+#define URI_numeric_fragmentaddress URI_numeric_URI"(#"URI_fragmentid")?"
+
+#define URI_URI1 "("URI_scheme":(//)?)?"URI_path1
+#define URI_URI2 URI_path2
+#define URI_URI3 URI_path3"(\\?" URI_search")?"
+
+#define URI_fragmentaddress1 URI_URI1
+#define URI_fragmentaddress2 URI_URI2
+#define URI_fragmentaddress3 URI_URI3"(#"URI_fragmentid")?"
+
+#define URI_CHECK_PROTOCOLS "(http|https|ftp|mailto)://.+"
+
+/*Warning: take care when modifying this regex, it has been tweaked, and tuned, just don't break it please.
+ * there is fragmentaddress1, and 2  to work around the ISO limitation of 509 bytes max length for string constants*/
+static const char numeric_url_regex[] = "^ *"URI_numeric_fragmentaddress" *$";
+
+/* generated by contrib/phishing/generate_tables.c */
+static const short int hextable[256] = {
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+       0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+       0x0, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+       0x0, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
+       0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0
+};
+
+/* Prototypes*/
+static void string_init_c(struct string* dest,char* data);
+static void string_assign_null(struct string* dest);
+static char *rfind(char *start, char c, size_t len);
+static char hex2int(const unsigned char* src);
+static int isTLD(const struct phishcheck* pchk,const char* str,int len);
+static enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* urls);
+static const char* phishing_ret_toString(enum phish_status rc);
+
+static void url_check_init(struct url_check* urls)
+{
+	string_init_c(&urls->realLink, NULL);
+	string_init_c(&urls->displayLink, NULL);
+	string_init_c(&urls->pre_fixup.pre_displayLink, NULL);
+}
+
+/* string reference counting implementation,
+ * so that: we don't have to keep in mind who allocated what, and when needs to be freed,
+ * and thus we won't leak memory*/
+
+static void string_free(struct string* str)
+{
+	for(;;){
+		str->refcount--;
+		if(!str->refcount) {
+			if(str->ref)/* don't free, this is a portion of another string */
+				str=str->ref;/* try to free that one*/
+			else {
+				if(str->data)
+					free(str->data);
+				break;
+			}
+		}
+		else break;
+	}
+}
+
+/* always use the string_assign when assigning to a string, this makes sure the old one's reference count is incremented*/
+static void string_assign(struct string* dest,struct string* src)
+{
+	string_free(dest);
+	src->refcount++;
+	dest->data=src->data;
+	dest->refcount=1;
+	dest->ref=src;
+}
+
+/* data will be freed when string freed */
+/* it doesn't free old string, use only for initialization
+ * Doesn't allow NULL pointers, they are replaced by pointer to empty string
+ * */
+static void string_init_c(struct string* dest,char* data)
+{
+	dest->refcount = data ? 1 : 0;
+	dest->data = data ? data : empty_string;
+	dest->ref = NULL;
+}
+
+/* make a copy of the string between start -> end*/
+static int string_assign_dup(struct string* dest,const char* start,const char* end)
+{
+	char*	    ret  = cli_malloc(end-start+1);
+	if(!ret)
+		return CL_EMEM;
+	strncpy(ret,start,end-start);
+	ret[end-start]='\0';
+
+	string_free(dest);
+	dest->data=ret;
+	dest->refcount=1;
+	dest->ref=NULL;
+	return CL_SUCCESS;
+}
+
+static void string_assign_null(struct string* dest)
+{
+	if(dest) {
+		string_free(dest);
+		dest->data=empty_string;
+		dest->refcount=-1;/* don't free it! */
+		dest->ref=NULL;
+	}
+}
+
+/* this string uses portion of another string*/
+static void string_assign_ref(struct string* dest,struct string* ref,char* data)
+{
+	string_free(dest);
+	ref->refcount++;
+	dest->data=data;
+	dest->refcount=1;
+	dest->ref=ref;
+}
+
+static void free_if_needed(struct url_check* url)
+{
+	string_free(&url->realLink);
+	string_free(&url->displayLink);
+	string_free(&url->pre_fixup.pre_displayLink);
+}
+
+static int build_regex(regex_t* preg,const char* regex,int nosub)
+{
+	int rc;
+	cli_dbgmsg("Phishcheck: Compiling regex: %s\n",regex);
+	rc = cli_regcomp(preg,regex,REG_EXTENDED|REG_ICASE|(nosub ? REG_NOSUB :0));
+	if(rc) {
+	
+#ifdef	C_WINDOWS
+		cli_errmsg("Phishcheck: Error in compiling regex, disabling phishing checks\n");
+#else
+		size_t buflen =	cli_regerror(rc,preg,NULL,0);
+		char *errbuf = cli_malloc(buflen);
+		
+		if(errbuf) {
+			cli_regerror(rc,preg,errbuf,buflen);
+			cli_errmsg("Phishcheck: Error in compiling regex:%s\nDisabling phishing checks\n",errbuf);
+			free(errbuf);
+		} else
+			cli_errmsg("Phishcheck: Error in compiling regex, disabling phishing checks. Additionally an Out-of-memory error was encountered while generating a detailed error message\n");
+#endif
+		return 1;
+	}
+	return CL_SUCCESS;
+}
+
+/* allocates memory */
+static int get_host(const struct phishcheck* s,const char* URL,int isReal,int* phishy,const char **hstart, const char **hend)
+{
+	int rc,ismailto = 0;
+	const char* start;
+	const char* end=NULL;
+	if(!URL) {
+		*hstart=*hend=NULL;
+		return 0;
+	}
+	start = strstr(URL,"://");
+	if(!start) {
+		if(!strncmp(URL,mailto,mailto_len)) {
+			start = URL + mailto_len;
+			ismailto = 1;
+		}
+		else if (!isReal && *phishy&REAL_IS_MAILTO) {
+			/* it is not required to use mailto: in the displayed url, they might use to:, or whatever */
+			end = URL+strlen(URL)+1;
+			start = URL + strcspn(URL,": ")+1;
+			if (start==end)
+				start = URL;
+			ismailto = 1;
+		}
+		else {
+			start=URL;/*URL without protocol*/
+			if(isReal)
+				cli_dbgmsg("Phishcheck: Real URL without protocol: %s\n",URL);
+			else ismailto=2;/*no-protocol, might be mailto, @ is no problem*/
+		}
+	}
+	else
+		start += 3;	/* :// */
+
+	if(!ismailto || !isReal) {
+		const char *realhost,*tld;
+
+		do {
+			end  = start + strcspn(start,":/?");
+			realhost = strchr(start,'@');
+
+			if(realhost == NULL || (start!=end && realhost>end)) {
+				/*don't check beyond end of hostname*/ 
+				break;
+			}
+
+			tld = strrchr(realhost,'.');
+			rc = tld ? isTLD(s,tld,tld-realhost-1) : 0;
+			if(rc < 0)
+				return rc;
+			if(rc)
+				*phishy |= PHISHY_USERNAME_IN_URL;/* if the url contains a username that is there just to fool people,
+			     					     like http://www.ebay.com@somevilplace.someevildomain.com/ */
+			start = realhost+1;/*skip the username*/
+		} while(realhost);/*skip over multiple @ characters, text following last @ character is the real host*/
+	}
+	else if (ismailto && isReal)
+		*phishy |= REAL_IS_MAILTO;
+
+	if(!end) {
+		end  = start + strcspn(start,":/?");/*especially important for mailto:somebody at yahoo.com?subject=...*/
+		if(!end)
+			end  = start + strlen(start);
+	}
+	*hstart = start;
+	*hend = end;
+	return 0;
+}
+
+static int isCountryCode(const struct phishcheck* s,const char* str)
+{
+	return str ? !cli_regexec(&s->preg_cctld,str,0,NULL,0) : 0;
+}
+
+static int isTLD(const struct phishcheck* pchk,const char* str,int len)
+{
+	if (!str)
+		return 0;
+	else {
+		char*	s  = cli_malloc(len+1);
+		int rc;
+
+		if(!s)
+			return CL_EMEM;
+		strncpy(s,str,len);
+		s[len]='\0';
+		rc = !cli_regexec(&pchk->preg_tld,s,0,NULL,0);
+		free(s);
+		return rc ? 1 : 0;
+	}
+}
+
+/*
+ * memrchr isn't standard, so I use this
+ */
+static char *
+rfind(char *start, char c, size_t len)
+{
+	char *p;
+
+	if(start == NULL)
+		return NULL;
+
+	for(p = start + len; (p >= start) && (*p != c); p--)
+		;
+	return (p < start) ? NULL : p;
+}
+
+static void get_domain(const struct phishcheck* pchk,struct string* dest,struct string* host)
+{
+	char* domain;
+	char* tld = strrchr(host->data,'.');
+	if(!tld) {
+		cli_dbgmsg("Phishcheck: Encountered a host without a tld? (%s)\n",host->data);
+		string_assign(dest,host);
+		return;
+	}
+	if(isCountryCode(pchk,tld+1)) {
+		const char* countrycode = tld+1;
+		tld = rfind(host->data,'.',tld-host->data-1);
+		if(!tld) {
+			cli_dbgmsg("Phishcheck: Weird, a name with only 2 levels (%s)\n",
+				host->data);
+			string_assign(dest,host);
+			return;
+		}
+		if(!isTLD(pchk,tld+1,countrycode-tld-1)) {
+			string_assign_ref(dest,host,tld+1);
+			return;/*it was a name like: subdomain.domain.uk, return domain.uk*/
+		}
+	}
+	/*we need to strip one more level, this is the actual domain*/
+	domain = rfind(host->data,'.',tld-host->data-1);
+	if(!domain) {
+		string_assign(dest,host);
+		return;/* it was like sourceforge.net?*/
+	}
+	string_assign_ref(dest,host,domain+1);
+}
+
+static int isNumeric(const char* host)
+{
+	int len = strlen(host);
+	int a,b,c,d,n=0;
+	/* 1.2.3.4 -> 7*/
+	/* 127.127.127.127 -> 15*/
+	if(len<7 || len>15)
+		return 0;
+	sscanf(host,"%d.%d.%d.%d%n",&a,&b,&c,&d,&n);
+	if(n==len)
+		if(a>=0 && a<=256 && b>=0 && b<=256 && c>=0 && c<=256 && d>=0 && d<=256)
+			return 1;
+	return 0;
+}
+
+static int isSSL(const char* URL)
+{
+	return URL ? !strncmp(https,URL,https_len) : 0;
+}
+
+/* deletes @what from the string @begin.
+ * @what_len: length of @what, excluding the terminating \0 */
+static void
+str_hex_to_char(char **begin, const char **end)
+{
+	char *sbegin = *begin;
+	const char *str_end = *end;
+
+	if(str_end <= sbegin)
+		return;
+
+	if(strlen(sbegin) <= 2)
+		return;
+
+	/* convert leading %xx*/
+	if (sbegin[0] == '%') {
+		sbegin[2] = hex2int((unsigned char*)sbegin+1);
+		sbegin += 2;
+	}
+	*begin = sbegin++;
+	while(sbegin+3 < str_end) {
+		while(sbegin+3<str_end && sbegin[0]=='%') {
+			const char* src = sbegin+3;
+			*sbegin = hex2int((unsigned char*)sbegin+1);
+			/* move string */
+			memmove(sbegin+1,src,str_end-src+1);
+			str_end -= 2;
+		}
+		sbegin++;
+	}
+	*end = str_end;
+}
+
+/*
+ * deletes @what from the string @begin.
+ * @what_len: length of @what, excluding the terminating \0
+ */
+static void
+str_strip(char **begin, const char **end, const char *what, size_t what_len)
+{
+	char *sbegin = *begin;
+	const char *str_end = *end;
+	const char *str_end_what;
+	size_t cmp_len = what_len;
+
+	if(begin == NULL || str_end <= sbegin)
+		return;
+
+	/*if(str_end < (sbegin + what_len))
+		return;*/
+	if(strlen(sbegin) < what_len)
+		return;
+
+	/* strip leading @what */
+	while(cmp_len && !strncmp(sbegin,what,cmp_len)) {
+		sbegin += what_len;
+
+		if(cmp_len > what_len)
+			cmp_len -= what_len;
+		else
+			cmp_len = 0;
+	}
+
+	/* strip trailing @what */
+	if(what_len <= (size_t)(str_end - sbegin)) {
+		str_end_what = str_end - what_len + 1;
+		while((str_end_what > sbegin) &&
+		      (strncmp(str_end_what, what, what_len) == 0)) {
+			str_end -= what_len;
+			str_end_what -= what_len;
+		}
+	}
+
+	*begin = sbegin++;
+	while(sbegin+what_len <= str_end) {
+		while(sbegin+what_len<=str_end && !strncmp(sbegin,what,what_len)) {
+			const char* src = sbegin+what_len;
+			/* move string */
+			memmove(sbegin,src,str_end-src+1);
+			str_end -= what_len;
+		}
+		sbegin++;
+	}
+	*end = str_end;
+}
+
+
+/* replace every occurrence of @c in @str with @r*/
+static void str_replace(char* str,const char* end,char c,char r)
+{
+	for(;str<=end;str++) {
+		if(*str==c)
+			*str=r;
+	}
+}
+static void str_make_lowercase(char* str,size_t len)
+{
+	for(;len;str++,len--) {
+		*str = tolower(*str);
+	}
+}
+
+#define fix32(x) ((x)<32 ? 32 : (x))
+static void clear_msb(char* begin)
+{
+	for(;*begin;begin++)
+		*begin = fix32((*begin)&0x7f);
+}
+
+/*
+ * Particularly yahoo puts links like this in mails:
+ * http:/ /mail.yahoo.com
+ * So first step: delete space between / /
+ *
+ * Next there could be possible links like this:
+ * <a href="phishlink">w  w w . e b a y . c o m</a>
+ * Here we need to strip spaces to get this picked up.
+ *
+ * Next there are links like:
+ * <a href="www.yahoo.com">Check out yahoo.com</a>
+ * Here we add a ., so we get: check.out.yahoo.com (it won't trigger)
+ *
+ * Old Rule for adding .: if substring from right contains dot, then add dot,
+ *	otherwise strip space
+ * New Rule: strip all spaces
+ *  strip leading and trailing garbage
+ *
+ */
+static void
+str_fixup_spaces(char **begin, const char **end)
+{
+	char* sbegin = *begin;
+	const char* send = *end;
+	if(!sbegin || !send || send < sbegin)
+		return;
+	/* strip spaces */
+	str_strip(&sbegin, &send, " ",1);
+	/* strip leading/trailing garbage */
+	while(!isalnum(sbegin[0]) && sbegin <= send) sbegin++;
+	while(!isalnum(send[0]) && send >= sbegin) send--;
+	*begin = sbegin;
+	*end = send;
+}
+
+/* allocates memory */
+static int
+cleanupURL(struct string *URL,struct string *pre_URL, int isReal)
+{
+	char *begin = URL->data;
+	const char *end;
+	size_t len;
+	
+	clear_msb(begin);
+	/*if(begin == NULL)
+		return;*/
+	/*TODO: handle hex-encoded IPs*/
+	while(isspace(*begin))
+		begin++;
+
+	len = strlen(begin);
+	if(len == 0) {
+		string_assign_null(URL);
+		string_assign_null(pre_URL);
+		return 0;
+	}
+
+	end = begin + len - 1;
+	/*cli_dbgmsg("%d %d\n", end-begin, len);*/
+	if(begin >= end) {
+		string_assign_null(URL);
+		string_assign_null(pre_URL);
+		return 0;
+	}
+	while(isspace(*end))
+		end--;
+	/*TODO: convert \ to /, and stuff like that*/
+	/* From mailscanner, my comments enclosed in {} */
+	if(!strncmp(begin,dotnet,dotnet_len) || !strncmp(begin,adonet,adonet_len) || !strncmp(begin,aspnet,aspnet_len)) {
+		string_assign_null(URL);
+		string_assign_null(pre_URL);
+	}
+	else {
+		size_t host_len;
+		char* host_begin;
+		int rc;
+
+		str_replace(begin,end,'\\','/');
+		/* some broken MUAs put > in the href, and then
+		 * we get a false positive, so remove them */
+		str_replace(begin,end,'<',' ');
+		str_replace(begin,end,'>',' ');
+		str_replace(begin,end,'\"',' ');
+		str_replace(begin,end,';',' ');
+		str_strip(&begin,&end,lt,lt_len);
+		str_strip(&begin,&end,gt,gt_len);
+		/* convert hostname to lowercase, but only hostname! */
+		host_begin = strchr(begin,':');
+		while(host_begin && host_begin[1]=='/') host_begin++;
+		if(!host_begin) host_begin=begin;
+		else host_begin++;
+		host_len = strcspn(host_begin,"/?");
+		str_make_lowercase(host_begin,host_len);
+		/* convert %xx to real value */
+		str_hex_to_char(&begin,&end);
+		if(isReal) {
+			/* htmlnorm converts \n to space, so we have to strip spaces */
+			str_strip(&begin, &end, " ", 1);
+		}
+		else {
+			/* trim space */
+			while((begin <= end) && (begin[0]==' '))  begin++;
+			while((begin <= end) && (end[0]==' ')) end--;
+		}
+		if (( rc = string_assign_dup(isReal ? URL : pre_URL,begin,end+1) )) {
+			string_assign_null(URL);
+			return rc;
+		}
+		if(!isReal) {
+			str_fixup_spaces(&begin,&end);
+			if (( rc = string_assign_dup(URL,begin,end+1) )) {
+				return rc;
+			}
+		}
+		/*cli_dbgmsg("%p::%s\n",URL->data,URL->data);*/
+	}
+	return 0;
+}
+
+
+/* -------end runtime disable---------*/
+static int found_possibly_unwanted(cli_ctx* ctx)
+{
+	ctx->found_possibly_unwanted = 1;
+	cli_dbgmsg("Phishcheck: found Possibly Unwanted: %s\n",*ctx->virname);
+	return CL_CLEAN;
+}
+
+int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs)
+{
+	int i;
+	struct phishcheck* pchk = (struct phishcheck*) ctx->engine->phishcheck;
+	/* check for status of whitelist fatal error, etc. */
+	if(!pchk || pchk->is_disabled)
+		return CL_CLEAN;
+
+	if(!ctx->found_possibly_unwanted)
+		*ctx->virname=NULL;
+	for(i=0;i<hrefs->count;i++)
+		if(hrefs->contents[i]) {
+			struct url_check urls;
+			enum phish_status rc;
+			urls.always_check_flags = DOMAINLIST_REQUIRED;/* required to work correctly */
+			urls.flags	 = strncmp((char*)hrefs->tag[i],href_text,href_text_len)? (CL_PHISH_ALL_CHECKS&~CHECK_SSL): CL_PHISH_ALL_CHECKS;
+			urls.link_type   = 0;
+			if(!strncmp((char*)hrefs->tag[i],src_text,src_text_len)) {
+				if (!(urls.flags&CHECK_IMG_URL))
+				continue;
+				urls.link_type |= LINKTYPE_IMAGE; 
+			}
+			if (ctx->options&CL_SCAN_PHISHING_DOMAINLIST)
+				urls.flags |= DOMAINLIST_REQUIRED;
+			if (ctx->options & CL_SCAN_PHISHING_BLOCKSSL) {
+				urls.always_check_flags |= CHECK_SSL;
+			}
+			if (ctx->options & CL_SCAN_PHISHING_BLOCKCLOAK) {
+				urls.always_check_flags |= CHECK_CLOAKING;
+			}
+			string_init_c(&urls.realLink,(char*)hrefs->value[i]);
+			string_init_c(&urls.displayLink,(char*)blobGetData(hrefs->contents[i]));
+			string_init_c(&urls.pre_fixup.pre_displayLink, NULL);
+			if (urls.displayLink.data[blobGetDataSize(hrefs->contents[i])-1]) {
+				cli_warnmsg("urls.displayLink.data[...]");
+				return CL_CLEAN;
+			}
+
+			urls.realLink.refcount=-1;
+			urls.displayLink.refcount=-1;/*don't free these, caller will free*/
+			if(strcmp((char*)hrefs->tag[i],"href")) {
+				char *url;
+				url = urls.realLink.data;
+				urls.realLink.data = urls.displayLink.data;
+				urls.displayLink.data = url;
+			}
+
+			rc = phishingCheck(ctx->engine,&urls);
+			if(pchk->is_disabled)
+				return CL_CLEAN;
+			free_if_needed(&urls);
+			cli_dbgmsg("Phishcheck: Phishing scan result: %s\n",phishing_ret_toString(rc));
+			switch(rc)/*TODO: support flags from ctx->options,*/
+				{
+					case CL_PHISH_CLEAN:
+					case CL_PHISH_CLEANUP_OK:
+					case CL_PHISH_HOST_OK:
+					case CL_PHISH_DOMAIN_OK:
+					case CL_PHISH_REDIR_OK:
+					case CL_PHISH_HOST_REDIR_OK:
+					case CL_PHISH_DOMAIN_REDIR_OK:
+					case CL_PHISH_HOST_REVERSE_OK:
+					case CL_PHISH_DOMAIN_REVERSE_OK:
+					case CL_PHISH_WHITELISTED:
+					case CL_PHISH_HOST_WHITELISTED:
+					case CL_PHISH_MAILTO_OK:
+					case CL_PHISH_TEXTURL:
+					case CL_PHISH_HOST_NOT_LISTED:
+					case CL_PHISH_CLEAN_CID:
+						continue;
+/*						break;*/
+					case CL_PHISH_HEX_URL:
+						*ctx->virname="Phishing.Heuristics.Email.HexURL";
+						return found_possibly_unwanted(ctx);
+/*						break;*/
+					case CL_PHISH_NUMERIC_IP:
+						*ctx->virname="Phishing.Heuristics.Email.Cloaked.NumericIP";
+						return found_possibly_unwanted(ctx);
+					case CL_PHISH_CLOAKED_NULL:
+						*ctx->virname="Phishing.Heuristics.Email.Cloaked.Null";/*http://www.real.com%01%00@www.evil.com*/
+						return found_possibly_unwanted(ctx);
+					case CL_PHISH_SSL_SPOOF:
+						*ctx->virname="Phishing.Heuristics.Email.SSL-Spoof";
+						return found_possibly_unwanted(ctx);
+					case CL_PHISH_CLOAKED_UIU:
+						*ctx->virname="Phishing.Heuristics.Email.Cloaked.Username";/*http://www.ebay.com@www.evil.com*/
+						return found_possibly_unwanted(ctx);
+					case CL_PHISH_NOMATCH:
+					default:
+						*ctx->virname="Phishing.Heuristics.Email.SpoofedDomain";
+						return found_possibly_unwanted(ctx);
+				}
+		}
+		else
+			if(strcmp((char*)hrefs->tag[i],"href"))
+					cli_dbgmsg("Phishcheck: href with no contents?\n");
+	return CL_CLEAN;
+}
+
+static char* str_compose(const char* a,const char* b,const char* c)
+{
+	const size_t a_len = strlen(a);
+	const size_t b_len = strlen(b);
+	const size_t c_len = strlen(c);
+	const size_t r_len = a_len+b_len+c_len+1;
+	char* concated = cli_malloc(r_len);
+	if(!concated)
+		return NULL;
+	strncpy(concated,a,a_len);
+	strncpy(concated+a_len,b,b_len);
+	strncpy(concated+a_len+b_len,c,c_len);
+	concated[r_len-1]='\0';
+	return concated;
+}
+
+static char hex2int(const unsigned char* src)
+{
+	return (src[0] == '0' && src[1] == '0') ? 
+		0x1 :/* don't convert %00 to \0, use 0x1
+ 		      * this value is also used by cloak check*/
+		hextable[src[0]]<<4 | hextable[src[1]];
+}
+
+static void free_regex(regex_t* p)
+{
+	if(p) {
+		cli_regfree(p);
+	}
+}
+
+int phishing_init(struct cl_engine* engine)
+{
+	char *url_regex, *realurl_regex;
+	struct phishcheck* pchk;
+	if(!engine->phishcheck) {
+		pchk = engine->phishcheck = cli_malloc(sizeof(struct phishcheck));
+		if(!pchk)
+			return CL_EMEM;
+		pchk->is_disabled = 1;
+	}
+	else {
+		pchk = engine->phishcheck;
+		if(!pchk)
+			return CL_ENULLARG;
+		if(!pchk->is_disabled) {
+			/* already initialized */
+			return CL_SUCCESS;
+		}
+	}
+
+	cli_dbgmsg("Initializing phishcheck module\n");
+
+	if(build_regex(&pchk->preg_hexurl,cloaked_host_regex,1)) {
+		free(pchk);
+		engine->phishcheck = NULL;
+		return CL_EFORMAT;
+	}
+
+	if(build_regex(&pchk->preg_cctld,cctld_regex,1)) {
+		free(pchk);
+		engine->phishcheck = NULL;
+		return CL_EFORMAT;
+	}
+	if(build_regex(&pchk->preg_tld,tld_regex,1)) {
+		free_regex(&pchk->preg_cctld);
+		free(pchk);
+		engine->phishcheck = NULL;
+		return CL_EFORMAT;	
+	}
+	url_regex = str_compose("^ *(("URI_CHECK_PROTOCOLS")|("URI_fragmentaddress1,URI_fragmentaddress2,URI_fragmentaddress3")) *$");
+	if(build_regex(&pchk->preg,url_regex,1)) {
+		free_regex(&pchk->preg_cctld);
+		free_regex(&pchk->preg_tld);
+		free(url_regex);
+		free(pchk);
+		engine->phishcheck = NULL;
+		return CL_EFORMAT;
+	}
+	free(url_regex);
+	realurl_regex = str_compose("^ *(("URI_CHECK_PROTOCOLS")|("URI_path1,URI_fragmentaddress2,URI_fragmentaddress3")) *$");
+	if(build_regex(&pchk->preg_realurl, realurl_regex,1)) {
+		free_regex(&pchk->preg_cctld);
+		free_regex(&pchk->preg_tld);
+		free_regex(&pchk->preg);
+		free(url_regex);
+		free(realurl_regex);
+		free(pchk);
+		engine->phishcheck = NULL;
+		return CL_EFORMAT;
+	}
+	free(realurl_regex);
+	if(build_regex(&pchk->preg_numeric,numeric_url_regex,1)) {
+		free_regex(&pchk->preg_cctld);
+		free_regex(&pchk->preg_tld);
+		free_regex(&pchk->preg);
+		free_regex(&pchk->preg_realurl);
+		free(pchk);
+		engine->phishcheck = NULL;
+		return CL_EFORMAT;
+	}
+	pchk->is_disabled = 0;
+	cli_dbgmsg("Phishcheck module initialized\n");
+	return CL_SUCCESS;
+}
+
+void phishing_done(struct cl_engine* engine)
+{
+	struct phishcheck* pchk = engine->phishcheck;
+	cli_dbgmsg("Cleaning up phishcheck\n");
+	if(pchk && !pchk->is_disabled) {
+		free_regex(&pchk->preg);
+		free_regex(&pchk->preg_hexurl);
+		free_regex(&pchk->preg_cctld);
+		free_regex(&pchk->preg_tld);
+		free_regex(&pchk->preg_numeric);
+		free_regex(&pchk->preg_realurl);
+		pchk->is_disabled = 1;
+	}
+	whitelist_done(engine);
+	domainlist_done(engine);
+	if(pchk) {
+		cli_dbgmsg("Freeing phishcheck struct\n");
+		free(pchk);
+		engine->phishcheck = NULL;
+	}		
+	cli_dbgmsg("Phishcheck cleaned up\n");
+}
+
+/*
+ * Only those URLs are identified as URLs for which phishing detection can be performed.
+ */
+static int isURL(const struct phishcheck* pchk,const char* URL)
+{
+	return URL ? !cli_regexec(&pchk->preg,URL,0,NULL,0) : 0;
+}
+
+/*
+ * Check if this is a real URL, which basically means to check if it has a known URL scheme (http,https,ftp).
+ * This prevents false positives with outbind:// and blocked:: links.
+ */
+static int isRealURL(const struct phishcheck* pchk,const char* URL)
+{
+	return URL ? !cli_regexec(&pchk->preg_realurl,URL,0,NULL,0) : 0;
+}
+
+static int isNumericURL(const struct phishcheck* pchk,const char* URL)
+{
+	return URL ? !cli_regexec(&pchk->preg_numeric,URL,0,NULL,0) : 0;
+}
+
+/* Cleans up @urls
+ * If URLs are identical after cleanup it will return CL_PHISH_CLEANUP_OK.
+ * */
+static enum phish_status cleanupURLs(struct url_check* urls)
+{
+	if(urls->flags&CLEANUP_URL) {
+		cleanupURL(&urls->realLink,NULL,1);
+		cleanupURL(&urls->displayLink,&urls->pre_fixup.pre_displayLink,0);
+		if(!urls->displayLink.data || !urls->realLink.data)
+			return CL_PHISH_NODECISION;
+		if(!strcmp(urls->realLink.data,urls->displayLink.data))
+			return CL_PHISH_CLEANUP_OK;
+	}
+	return CL_PHISH_NODECISION;
+}
+
+static int url_get_host(const struct phishcheck* pchk, struct url_check* url,struct url_check* host_url,int isReal,int* phishy)
+{
+	const char *start, *end;
+	struct string* host = isReal ? &host_url->realLink : &host_url->displayLink;
+	const char* URL = isReal ? url->realLink.data : url->displayLink.data;
+	int rc;
+	if ((rc = get_host(pchk, URL, isReal, phishy, &start, &end))) {
+		return rc;
+	}
+	if(!start || !end) {
+		string_assign_null(host);
+	}
+	else {
+		if(( rc = string_assign_dup(host,start,end) ))
+			return rc;
+	}
+	cli_dbgmsg("Phishcheck:host:%s\n", host->data);
+	if(!isReal) {
+		url->pre_fixup.host_start = start - URL;
+		url->pre_fixup.host_end = end - URL;
+	}
+	if(!host->data)
+		return CL_PHISH_CLEANUP_OK;
+	if(*phishy&REAL_IS_MAILTO)
+		return CL_PHISH_MAILTO_OK;
+	if(strchr(host->data,' ')) {
+		string_free(host);
+		return CL_PHISH_TEXTURL;
+	}
+	if(url->flags&CHECK_CLOAKING && !cli_regexec(&pchk->preg_hexurl,host->data,0,NULL,0)) {
+		/* uses a regex here, so that we don't accidentally block 0xacab.net style hosts */
+		string_free(host);
+		return CL_PHISH_HEX_URL;
+	}
+	if(isReal && host->data[0]=='\0')
+		return CL_PHISH_CLEAN;/* link without domain, such as: href="/isapi.dll?... */
+	if(isNumeric(host->data)) {
+		*phishy |= PHISHY_NUMERIC_IP;
+	}
+	return CL_PHISH_NODECISION;
+}
+
+static void url_get_domain(const struct phishcheck* pchk, struct url_check* url,struct url_check* domains)
+{
+	get_domain(pchk, &domains->realLink, &url->realLink);
+	get_domain(pchk, &domains->displayLink, &url->displayLink);
+	domains->flags = url->flags;
+}
+
+static enum phish_status phishy_map(int phishy,enum phish_status fallback)
+{
+	if(phishy&PHISHY_USERNAME_IN_URL)
+		return CL_PHISH_CLOAKED_UIU;
+	else if(phishy&PHISHY_NUMERIC_IP)
+		return CL_PHISH_NUMERIC_IP;
+	else
+		return fallback;
+}
+
+static int isEncoded(const char* url)
+{
+	const char* start=url;
+	size_t cnt=0;
+	do{
+		cnt++;
+		start=strstr(start,"&#");
+		if(start)
+			start=strstr(start,";");
+	} while(start);
+	return (cnt-1 >strlen(url)*7/10);/*more than 70% made up of &#;*/
+}
+
+static int whitelist_check(const struct cl_engine* engine,struct url_check* urls,int hostOnly)
+{
+	return whitelist_match(engine,urls->realLink.data,urls->displayLink.data,hostOnly);
+}
+
+static int isPhishing(enum phish_status rc)
+{
+	switch(rc) {
+		case CL_PHISH_CLEAN:
+		case CL_PHISH_CLEANUP_OK:
+		case CL_PHISH_WHITELISTED:
+		case CL_PHISH_HOST_WHITELISTED:
+		case CL_PHISH_HOST_OK:
+		case CL_PHISH_DOMAIN_OK:
+		case CL_PHISH_REDIR_OK:
+		case CL_PHISH_HOST_REDIR_OK:
+		case CL_PHISH_DOMAIN_REDIR_OK:
+		case CL_PHISH_HOST_REVERSE_OK:
+		case CL_PHISH_DOMAIN_REVERSE_OK:
+		case CL_PHISH_MAILTO_OK:
+		case CL_PHISH_TEXTURL:
+		case CL_PHISH_HOST_NOT_LISTED:
+		case CL_PHISH_CLEAN_CID:
+			return 0;
+		case CL_PHISH_HEX_URL:
+		case CL_PHISH_CLOAKED_NULL:
+		case CL_PHISH_SSL_SPOOF:
+		case CL_PHISH_CLOAKED_UIU:
+		case CL_PHISH_NUMERIC_IP:
+		case CL_PHISH_NOMATCH:
+			return 1;
+		default:
+			return 1;
+	}
+}
+/* urls can't contain null pointer, caller must ensure this */
+static enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* urls)
+{
+	struct url_check host_url;
+	enum phish_status rc=CL_PHISH_NODECISION;
+	int phishy=0;
+	const struct phishcheck* pchk = (const struct phishcheck*) engine->phishcheck;
+
+	if(!urls->realLink.data)
+		return CL_PHISH_CLEAN;
+
+	cli_dbgmsg("Phishcheck:Checking url %s->%s\n", urls->realLink.data,
+		urls->displayLink.data);
+
+	if(!strcmp(urls->realLink.data,urls->displayLink.data))
+		return CL_PHISH_CLEAN;/* displayed and real URL are identical -> clean */
+
+	if((rc = cleanupURLs(urls))) {
+		if(isPhishing(rc))/* not allowed to decide this is phishing */
+			return CL_PHISH_CLEAN;
+		return rc;/* URLs identical after cleanup */
+	}
+
+	if(whitelist_check(engine,urls,0))
+		return CL_PHISH_WHITELISTED;/* if url is whitelist don't perform further checks */
+
+	if((!isURL(pchk, urls->displayLink.data) || !isRealURL(pchk, urls->realLink.data) )&&
+			( (phishy&PHISHY_NUMERIC_IP && !isNumericURL(pchk, urls->displayLink.data)) ||
+			  !(phishy&PHISHY_NUMERIC_IP))) {
+		cli_dbgmsg("Displayed 'url' is not url:%s\n",urls->displayLink.data);
+		return CL_PHISH_TEXTURL;
+	}
+
+	if(urls->flags&DOMAINLIST_REQUIRED && domainlist_match(engine,urls->realLink.data,urls->displayLink.data,NULL,0,&urls->flags))
+		phishy |= DOMAIN_LISTED;
+	else {
+		/* although entire url is not listed, the host might be,
+		 * so defer phishing decisions till we know if host is listed*/
+	}
+
+	
+	url_check_init(&host_url);
+
+	if((rc = url_get_host(pchk, urls,&host_url,DOMAIN_DISPLAY,&phishy))) {
+		free_if_needed(&host_url);
+		if(isPhishing(rc))
+			return CL_PHISH_CLEAN;
+		return rc;
+	}
+
+
+	if(urls->flags&DOMAINLIST_REQUIRED) {
+		if(!(phishy&DOMAIN_LISTED)) {
+			if(domainlist_match(engine,host_url.displayLink.data,host_url.realLink.data,&urls->pre_fixup,1,&urls->flags))
+				phishy |= DOMAIN_LISTED;
+			else {
+			}
+		}
+	}
+
+	/* link type filtering must occur after last domainlist_match */
+	if(urls->link_type & LINKTYPE_IMAGE && !(urls->flags&CHECK_IMG_URL))
+		return CL_PHISH_HOST_NOT_LISTED;/* its listed, but this link type is filtered */
+
+	if(urls->flags & DOMAINLIST_REQUIRED && !(phishy & DOMAIN_LISTED) ) {
+		urls->flags &= urls->always_check_flags;
+		if(!urls->flags) {
+				free_if_needed(&host_url);
+				return CL_PHISH_HOST_NOT_LISTED;
+			}
+		}
+
+	if(urls->flags&CHECK_CLOAKING) {
+		/*Checks if URL is cloaked.
+		Should we check if it contains another http://, https://?
+		No because we might get false positives from redirect services.*/
+		if(strchr(urls->realLink.data,0x1)) {
+			free_if_needed(&host_url);
+			return CL_PHISH_CLOAKED_NULL;
+		}
+		if(isEncoded(urls->displayLink.data)) {
+			free_if_needed(&host_url);
+			return CL_PHISH_HEX_URL;
+		}
+	}
+
+
+	if(urls->displayLink.data[0]=='\0') {
+		free_if_needed(&host_url);
+		return CL_PHISH_CLEAN;
+	}
+
+	if(urls->flags&CHECK_SSL && isSSL(urls->displayLink.data) && !isSSL(urls->realLink.data)) {
+		free_if_needed(&host_url);
+		return CL_PHISH_SSL_SPOOF;
+	}
+
+	if(!urls->flags&CHECK_CLOAKING && urls->flags & DOMAINLIST_REQUIRED && !(phishy&DOMAIN_LISTED) ) {
+		free_if_needed(&host_url);
+		return CL_PHISH_HOST_NOT_LISTED;
+	}
+
+	if((rc = url_get_host(pchk, urls,&host_url,DOMAIN_REAL,&phishy)))
+	{
+		free_if_needed(&host_url);
+		return rc;
+	}
+
+	if(urls->flags&DOMAINLIST_REQUIRED && !(phishy&DOMAIN_LISTED)) {
+		free_if_needed(&host_url);
+		return CL_PHISH_HOST_NOT_LISTED;
+	}
+
+	if(whitelist_check(engine,&host_url,1)) {
+		free_if_needed(&host_url);
+		return CL_PHISH_HOST_WHITELISTED;
+	}
+
+
+	if(urls->flags&HOST_SUFFICIENT) {
+		if(!strcmp(urls->realLink.data,urls->displayLink.data)) {
+			free_if_needed(&host_url);
+			return CL_PHISH_HOST_OK;
+		}
+
+
+		if(urls->flags&DOMAIN_SUFFICIENT) {
+			struct url_check domain_url;
+			url_check_init(&domain_url);
+			url_get_domain(pchk, &host_url,&domain_url);
+			if(!strcmp(domain_url.realLink.data,domain_url.displayLink.data)) {
+				free_if_needed(&host_url);
+				free_if_needed(&domain_url);
+				return CL_PHISH_DOMAIN_OK;
+			}
+			free_if_needed(&domain_url);
+		}
+
+		free_if_needed(&host_url);
+	}/*HOST_SUFFICIENT*/
+	/*we failed to find a reason why the 2 URLs are different, this is definitely phishing*/
+	if(urls->flags&DOMAINLIST_REQUIRED && !(phishy&DOMAIN_LISTED))
+		return CL_PHISH_HOST_NOT_LISTED;
+	return phishy_map(phishy,CL_PHISH_NOMATCH);
+}
+
+static const char* phishing_ret_toString(enum phish_status rc)
+{
+	switch(rc) {
+		case CL_PHISH_CLEAN:
+			return "Clean";
+		case CL_PHISH_CLEANUP_OK:
+			return "URLs match after cleanup";
+		case CL_PHISH_WHITELISTED:
+			return "URL is whitelisted";
+		case CL_PHISH_HOST_WHITELISTED:
+			return "host part of URL is whitelist";
+		case CL_PHISH_HOST_OK:
+			return "Hosts match";
+		case CL_PHISH_DOMAIN_OK:
+			return "Domains match";
+		case CL_PHISH_REDIR_OK:
+			return "After redirecting realURL, they match";
+		case CL_PHISH_HOST_REDIR_OK:
+			return "After redirecting realURL, hosts match";
+		case CL_PHISH_DOMAIN_REDIR_OK:
+			return "After redirecting the domains match";
+		case CL_PHISH_MAILTO_OK:
+			return "URL is mailto";
+		case CL_PHISH_NUMERIC_IP:
+			return "IP address encountered in hostname";
+		case CL_PHISH_TEXTURL:
+			return "Displayed link is not an URL, can't check if phishing or not";
+		case CL_PHISH_CLOAKED_NULL:
+			return "Link URL is cloaked (null byte %00)";
+		case CL_PHISH_CLOAKED_UIU:
+			return "Link URL contains username, and real<->displayed hosts don't match.";
+			/*username is a legit domain, and after the @ comes the evil one*/
+		case CL_PHISH_SSL_SPOOF:
+			return "Visible links is SSL, real link is not";
+		case CL_PHISH_NOMATCH:
+			return "URLs are way too different";
+		case CL_PHISH_HOST_NOT_LISTED:
+			return "Host not listed in .pdb -> not checked";
+		case CL_PHISH_CLEAN_CID:
+			return "Embedded image in mail -> clean";
+		case CL_PHISH_HEX_URL:
+			return "Embedded hex urls";
+		default:
+			return "Unknown return code";
+	}
+}
+

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pst.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pst.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pst.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pst.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,8 @@
+#include "clamav-config.h"
+#include "clamav.h"
+int
+cli_pst(const char *dir, int desc)
+{
+        cli_warnmsg("PST files not yet supported\n");
+        return CL_EFORMAT;
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_readdb.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_readdb.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_readdb.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_readdb.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,1553 @@
+/*
+ *  Copyright (C) 2002 - 2007 Tomasz Kojm <tkojm at clamav.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+
+#ifdef _MSC_VER
+#include <winsock.h> /* for Sleep() */
+#endif
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#ifdef	HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifndef C_WINDOWS
+#include <dirent.h>
+#endif
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifdef	HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+#include <fcntl.h>
+
+#include "clamav.h"
+#include "cvd.h"
+#ifdef	HAVE_STRINGS_H
+#include <strings.h>
+#endif
+#include "matcher-ac.h"
+#include "matcher-bm.h"
+#include "matcher.h"
+#include "others.h"
+#include "str.h"
+#include "dconf.h"
+#include "lockdb.h"
+#include "readdb.h"
+
+#include "phishcheck.h"
+#include "phish_whitelist.h"
+#include "phish_domaincheck_db.h"
+#include "regex_list.h"
+
+#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
+#include <limits.h>
+#include <stddef.h>
+#endif
+
+#ifdef CL_THREAD_SAFE
+#  include <pthread.h>
+static pthread_mutex_t cli_ref_mutex = PTHREAD_MUTEX_INITIALIZER;
+#endif
+
+/* Prototypes for old public functions just to shut up some gcc warnings;
+ * to be removed in 1.0
+ */
+int cl_loaddb(const char *filename, struct cl_engine **engine, unsigned int *signo);
+int cl_loaddbdir(const char *dirname, struct cl_engine **engine, unsigned int *signo);
+
+
+int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hexsig, unsigned short type, const char *offset, unsigned short target)
+{
+	struct cli_bm_patt *bm_new;
+	char *pt, *hexcpy, *start, *n;
+	int ret, virlen, asterisk = 0;
+	unsigned int i, j, len, parts = 0;
+	int mindist = 0, maxdist = 0, error = 0;
+
+
+    if(strchr(hexsig, '{')) {
+
+	root->ac_partsigs++;
+
+	if(!(hexcpy = cli_strdup(hexsig)))
+	    return CL_EMEM;
+
+	len = strlen(hexsig);
+	for(i = 0; i < len; i++)
+	    if(hexsig[i] == '{' || hexsig[i] == '*')
+		parts++;
+
+	if(parts)
+	    parts++;
+
+	start = pt = hexcpy;
+	for(i = 1; i <= parts; i++) {
+
+	    if(i != parts) {
+		for(j = 0; j < strlen(start); j++) {
+		    if(start[j] == '{') {
+			asterisk = 0;
+			pt = start + j;
+			break;
+		    }
+		    if(start[j] == '*') {
+			asterisk = 1;
+			pt = start + j;
+			break;
+		    }
+		}
+		*pt++ = 0;
+	    }
+
+	    if((ret = cli_ac_addsig(root, virname, start, root->ac_partsigs, parts, i, type, mindist, maxdist, offset, target))) {
+		cli_errmsg("cli_parse_add(): Problem adding signature (1).\n");
+		error = 1;
+		break;
+	    }
+
+	    if(i == parts)
+		break;
+
+	    mindist = maxdist = 0;
+
+	    if(asterisk) {
+		start = pt;
+		continue;
+	    }
+
+	    if(!(start = strchr(pt, '}'))) {
+		error = 1;
+		break;
+	    }
+	    *start++ = 0;
+
+	    if(!pt) {
+		error = 1;
+		break;
+	    }
+
+	    if(!strchr(pt, '-')) {
+		if((mindist = maxdist = atoi(pt)) < 0) {
+		    error = 1;
+		    break;
+		}
+	    } else {
+		if((n = cli_strtok(pt, 0, "-"))) {
+		    if((mindist = atoi(n)) < 0) {
+			error = 1;
+			free(n);
+			break;
+		    }
+		    free(n);
+		}
+
+		if((n = cli_strtok(pt, 1, "-"))) {
+		    if((maxdist = atoi(n)) < 0) {
+			error = 1;
+			free(n);
+			break;
+		    }
+		    free(n);
+		}
+	    }
+	}
+
+	free(hexcpy);
+	if(error)
+	    return CL_EMALFDB;
+
+    } else if(strchr(hexsig, '*')) {
+	root->ac_partsigs++;
+
+	len = strlen(hexsig);
+	for(i = 0; i < len; i++)
+	    if(hexsig[i] == '*')
+		parts++;
+
+	if(parts)
+	    parts++;
+
+	for(i = 1; i <= parts; i++) {
+	    if((pt = cli_strtok(hexsig, i - 1, "*")) == NULL) {
+		cli_errmsg("Can't extract part %d of partial signature.\n", i);
+		return CL_EMALFDB;
+	    }
+
+	    if((ret = cli_ac_addsig(root, virname, pt, root->ac_partsigs, parts, i, type, 0, 0, offset, target))) {
+		cli_errmsg("cli_parse_add(): Problem adding signature (2).\n");
+		free(pt);
+		return ret;
+	    }
+
+	    free(pt);
+	}
+
+    } else if(root->ac_only || strpbrk(hexsig, "?(") || type) {
+	if((ret = cli_ac_addsig(root, virname, hexsig, 0, 0, 0, type, 0, 0, offset, target))) {
+	    cli_errmsg("cli_parse_add(): Problem adding signature (3).\n");
+	    return ret;
+	}
+
+    } else {
+	bm_new = (struct cli_bm_patt *) cli_calloc(1, sizeof(struct cli_bm_patt));
+	if(!bm_new)
+	    return CL_EMEM;
+
+	if(!(bm_new->pattern = (unsigned char *) cli_hex2str(hexsig))) {
+	    free(bm_new);
+	    return CL_EMALFDB;
+	}
+
+	bm_new->length = strlen(hexsig) / 2;
+
+	if((pt = strstr(virname, "(Clam)")))
+	    virlen = strlen(virname) - strlen(pt) - 1;
+	else
+	    virlen = strlen(virname);
+
+	if(virlen <= 0) {
+	    free(bm_new->pattern);
+	    free(bm_new);
+	    return CL_EMALFDB;
+	}
+
+	if((bm_new->virname = cli_calloc(virlen + 1, sizeof(char))) == NULL) {
+	    free(bm_new->pattern);
+	    free(bm_new);
+	    return CL_EMEM;
+	}
+
+	strncpy(bm_new->virname, virname, virlen);
+
+	if(offset) {
+	    bm_new->offset = cli_strdup(offset);
+	    if(!bm_new->offset) {
+		free(bm_new->pattern);
+		free(bm_new->virname);
+		free(bm_new);
+		return CL_EMEM;
+	    }
+	}
+
+	bm_new->target = target;
+
+	if(bm_new->length > root->maxpatlen)
+	    root->maxpatlen = bm_new->length;
+
+	if((ret = cli_bm_addpatt(root, bm_new))) {
+	    cli_errmsg("cli_parse_add(): Problem adding signature (4).\n");
+	    free(bm_new->pattern);
+	    free(bm_new->virname);
+	    free(bm_new);
+	    return ret;
+	}
+    }
+
+    return CL_SUCCESS;
+}
+
+int cli_initengine(struct cl_engine **engine, unsigned int options)
+{
+	int ret;
+
+
+    if(!*engine) {
+#ifdef CL_EXPERIMENTAL
+	cli_dbgmsg("Initializing the engine ("VERSION"-exp)\n");
+#else
+	cli_dbgmsg("Initializing the engine ("VERSION")\n");
+#endif
+
+	*engine = (struct cl_engine *) cli_calloc(1, sizeof(struct cl_engine));
+	if(!*engine) {
+	    cli_errmsg("Can't allocate memory for the engine structure!\n");
+	    return CL_EMEM;
+	}
+
+	(*engine)->refcount = 1;
+
+	(*engine)->root = cli_calloc(CL_TARGET_TABLE_SIZE, sizeof(struct cli_matcher *));
+	if(!(*engine)->root) {
+	    /* no need to free previously allocated memory here */
+	    cli_errmsg("Can't allocate memory for roots!\n");
+	    return CL_EMEM;
+	}
+
+	(*engine)->dconf = cli_dconf_init();
+	if(!(*engine)->dconf) {
+	    cli_errmsg("Can't initialize dynamic configuration\n");
+	    return CL_EMEM;
+	}
+    }
+
+    if((options & CL_DB_PHISHING_URLS) && (((struct cli_dconf*) (*engine)->dconf)->phishing & PHISHING_CONF_ENGINE))
+	if((ret = phishing_init(*engine)))
+	    return ret;
+
+    return CL_SUCCESS;
+}
+
+static int cli_initroots(struct cl_engine *engine, unsigned int options)
+{
+	int i, ret;
+	struct cli_matcher *root;
+
+
+    for(i = 0; i < CL_TARGET_TABLE_SIZE; i++) {
+	if(!engine->root[i]) {
+	    cli_dbgmsg("Initializing engine->root[%d]\n", i);
+	    root = engine->root[i] = (struct cli_matcher *) cli_calloc(1, sizeof(struct cli_matcher));
+	    if(!root) {
+		cli_errmsg("cli_initroots: Can't allocate memory for cli_matcher\n");
+		return CL_EMEM;
+	    }
+
+	    if(options & CL_DB_ACONLY) {
+		cli_dbgmsg("cli_initroots: Only using AC pattern matcher.\n");
+		root->ac_only = 1;
+	    }
+
+	    cli_dbgmsg("Initialising AC pattern matcher of root[%d]\n", i);
+	    if((ret = cli_ac_init(root, cli_ac_mindepth, cli_ac_maxdepth))) {
+		/* no need to free previously allocated memory here */
+		cli_errmsg("cli_initroots: Can't initialise AC pattern matcher\n");
+		return ret;
+	    }
+
+	    if(!root->ac_only) {
+		cli_dbgmsg("cli_initroots: Initializing BM tables of root[%d]\n", i);
+		if((ret = cli_bm_init(root))) {
+		    cli_errmsg("cli_initroots: Can't initialise BM pattern matcher\n");
+		    return ret;
+		}
+	    }
+	}
+    }
+
+    return CL_SUCCESS;
+}
+
+static int cli_loaddb(FILE *fd, struct cl_engine **engine, unsigned int *signo, unsigned int options)
+{
+	char buffer[FILEBUFF], *pt, *start;
+	int line = 0, ret = 0;
+	struct cli_matcher *root;
+
+
+    if((ret = cli_initengine(engine, options))) {
+	cl_free(*engine);
+	return ret;
+    }
+
+    if((ret = cli_initroots(*engine, options))) {
+	cl_free(*engine);
+	return ret;
+    }
+
+    root = (*engine)->root[0];
+
+    while(fgets(buffer, FILEBUFF, fd)) {
+	line++;
+	cli_chomp(buffer);
+
+	pt = strchr(buffer, '=');
+	if(!pt) {
+	    cli_errmsg("Malformed pattern line %d\n", line);
+	    ret = CL_EMALFDB;
+	    break;
+	}
+
+	start = buffer;
+	*pt++ = 0;
+
+	if(*pt == '=') continue;
+
+	if((ret = cli_parse_add(root, start, pt, 0, NULL, 0))) {
+	    cli_errmsg("Problem parsing signature at line %d\n", line);
+	    ret = CL_EMALFDB;
+	    break;
+	}
+    }
+
+    if(!line) {
+	cli_errmsg("Empty database file\n");
+	cl_free(*engine);
+	return CL_EMALFDB;
+    }
+
+    if(ret) {
+	cli_errmsg("Problem parsing database at line %d\n", line);
+	cl_free(*engine);
+	return ret;
+    }
+
+    if(signo)
+	*signo += line;
+
+    return CL_SUCCESS;
+}
+
+static int cli_loadwdb(FILE *fd, struct cl_engine **engine, unsigned int options)
+{
+	int ret = 0;
+
+
+    if((ret = cli_initengine(engine, options))) {
+	cl_free(*engine);
+	return ret;
+    }
+
+    if(!(((struct cli_dconf *) (*engine)->dconf)->phishing & PHISHING_CONF_ENGINE))
+	return CL_SUCCESS;
+
+    if(!(*engine)->whitelist_matcher) {
+	if((ret = init_whitelist(*engine))) {
+	    phishing_done(*engine);
+	    cl_free(*engine);
+	    return ret;
+	}
+    }
+
+    if((ret = load_regex_matcher((*engine)->whitelist_matcher, fd, options, 1))) {
+	phishing_done(*engine);
+	cl_free(*engine);
+	return ret;
+    }
+
+    return CL_SUCCESS;
+}
+
+static int cli_loadpdb(FILE *fd, struct cl_engine **engine, unsigned int options)
+{
+	int ret = 0;
+
+
+    if((ret = cli_initengine(engine, options))) {
+	cl_free(*engine);
+	return ret;
+    }
+
+    if(!(((struct cli_dconf *) (*engine)->dconf)->phishing & PHISHING_CONF_ENGINE))
+	return CL_SUCCESS;
+
+    if(!(*engine)->domainlist_matcher) {
+	if((ret = init_domainlist(*engine))) {
+	    phishing_done(*engine);
+	    cl_free(*engine);
+	    return ret;
+	}
+    }
+
+    if((ret = load_regex_matcher((*engine)->domainlist_matcher, fd, options, 0))) {
+	phishing_done(*engine);
+	cl_free(*engine);
+	return ret;
+    }
+
+    return CL_SUCCESS;
+}
+
+#define NDB_TOKENS 6
+static int cli_loadndb(FILE *fd, struct cl_engine **engine, unsigned int *signo, unsigned short sdb, unsigned int options)
+{
+	const char *tokens[NDB_TOKENS];
+	char buffer[FILEBUFF];
+	const char *sig, *virname, *offset, *pt;
+	struct cli_matcher *root;
+	int line = 0, sigs = 0, ret = 0;
+	unsigned short target;
+	unsigned int phish = options & CL_DB_PHISHING;
+
+
+    if((ret = cli_initengine(engine, options))) {
+	cl_free(*engine);
+	return ret;
+    }
+
+    if((ret = cli_initroots(*engine, options))) {
+	cl_free(*engine);
+	return ret;
+    }
+
+    while(fgets(buffer, FILEBUFF, fd)) {
+	line++;
+
+	if(!strncmp(buffer, "Exploit.JPEG.Comment", 20)) /* temporary */
+	    continue;
+
+	if(!phish)
+	    if(!strncmp(buffer, "HTML.Phishing", 13) || !strncmp(buffer, "Email.Phishing", 14))
+		continue;
+
+	sigs++;
+	cli_chomp(buffer);
+
+	cli_strtokenize(buffer, ':', NDB_TOKENS, tokens);
+
+	if(!(virname = tokens[0])) {
+	    ret = CL_EMALFDB;
+	    break;
+	}
+
+	if((pt = tokens[4])) { /* min version */
+	    if(!isdigit(*pt)) {
+		ret = CL_EMALFDB;
+		break;
+	    }
+
+	    if((unsigned int) atoi(pt) > cl_retflevel()) {
+		cli_dbgmsg("Signature for %s not loaded (required f-level: %d)\n", virname, atoi(pt));
+		sigs--;
+		continue;
+	    }
+
+
+	    if((pt = tokens[5])) { /* max version */
+		if(!isdigit(*pt)) {
+		    ret = CL_EMALFDB;
+		    break;
+		}
+
+		if((unsigned int) atoi(pt) < cl_retflevel()) {
+		    sigs--;
+		    continue;
+		}
+
+	    }
+	}
+
+	if(!(pt = tokens[1]) || !isdigit(*pt)) {
+	    ret = CL_EMALFDB;
+	    break;
+	}
+	target = (unsigned short) atoi(pt);
+
+	if(target >= CL_TARGET_TABLE_SIZE) {
+	    cli_dbgmsg("Not supported target type in signature for %s\n", virname);
+	    sigs--;
+	    continue;
+	}
+
+	root = (*engine)->root[target];
+
+	if(!(offset = tokens[2])) {
+	    ret = CL_EMALFDB;
+	    break;
+	} else if(!strcmp(offset, "*")) {
+	    offset = NULL;
+	}
+
+	if(!(sig = tokens[3])) {
+	    ret = CL_EMALFDB;
+	    break;
+	}
+
+	if((ret = cli_parse_add(root, virname, sig, 0, offset, target))) {
+	    cli_errmsg("Problem parsing signature at line %d\n", line);
+	    ret = CL_EMALFDB;
+	    break;
+	}
+
+    }
+
+    if(!line) {
+	cli_errmsg("Empty database file\n");
+	cl_free(*engine);
+	return CL_EMALFDB;
+    }
+
+    if(ret) {
+	cli_errmsg("Problem parsing database at line %d\n", line);
+	cl_free(*engine);
+	return ret;
+    }
+
+    if(signo)
+	*signo += sigs;
+
+    if(sdb && sigs && !(*engine)->sdb) {
+	(*engine)->sdb = 1;
+	cli_dbgmsg("*** Self protection mechanism activated.\n");
+    }
+
+    return CL_SUCCESS;
+}
+
+static int scomp(const void *a, const void *b)
+{
+    return *(const uint32_t *)a - *(const uint32_t *)b;
+}
+
+#define MD5_HDB	    0
+#define MD5_MDB	    1
+#define MD5_FP	    2
+static int cli_loadmd5(FILE *fd, struct cl_engine **engine, unsigned int *signo, uint8_t mode, unsigned int options)
+{
+	char buffer[FILEBUFF], *pt;
+	int ret = CL_SUCCESS;
+	uint8_t size_field = 1, md5_field = 0, found;
+	uint32_t line = 0, i;
+	struct cli_md5_node *new;
+	struct cli_bm_patt *bm_new;
+	struct cli_matcher *md5_sect = NULL;
+
+
+    if((ret = cli_initengine(engine, options))) {
+	cl_free(*engine);
+	return ret;
+    }
+
+    if(mode == MD5_MDB) {
+	size_field = 0;
+	md5_field = 1;
+    }
+
+    while(fgets(buffer, FILEBUFF, fd)) {
+	line++;
+	cli_chomp(buffer);
+
+	new = (struct cli_md5_node *) cli_calloc(1, sizeof(struct cli_md5_node));
+	if(!new) {
+	    ret = CL_EMEM;
+	    break;
+	}
+
+	if(mode == MD5_FP) /* fp */
+	    new->fp = 1;
+
+	if(!(pt = cli_strtok(buffer, md5_field, ":"))) {
+	    free(new);
+	    ret = CL_EMALFDB;
+	    break;
+	}
+
+	if(!(new->md5 = (unsigned char *) cli_hex2str(pt))) {
+	    cli_errmsg("cli_loadmd5: Malformed MD5 string at line %u\n", line);
+	    free(pt);
+	    free(new);
+	    ret = CL_EMALFDB;
+	    break;
+	}
+	free(pt);
+
+	if(!(pt = cli_strtok(buffer, size_field, ":"))) {
+	    free(new->md5);
+	    free(new);
+	    ret = CL_EMALFDB;
+	    break;
+	}
+	new->size = atoi(pt);
+	free(pt);
+
+	if(!(new->virname = cli_strtok(buffer, 2, ":"))) {
+	    free(new->md5);
+	    free(new);
+	    ret = CL_EMALFDB;
+	    break;
+	}
+
+	if(mode == MD5_MDB) { /* section MD5 */
+	    if(!(*engine)->md5_sect) {
+		(*engine)->md5_sect = (struct cli_matcher *) cli_calloc(sizeof(struct cli_matcher), 1);
+		if(!(*engine)->md5_sect) {
+		    free(new->virname);
+		    free(new->md5);
+		    free(new);
+		    ret = CL_EMEM;
+		    break;
+		}
+		if((ret = cli_bm_init((*engine)->md5_sect))) {
+		    cli_errmsg("cli_loadmd5: Can't initialise BM pattern matcher\n");
+		    free(new->virname);
+		    free(new->md5);
+		    free(new);
+		    break;
+		}
+	    }
+	    md5_sect = (*engine)->md5_sect;
+
+	    bm_new = (struct cli_bm_patt *) cli_calloc(1, sizeof(struct cli_bm_patt));
+	    if(!bm_new) {
+		cli_errmsg("cli_loadmd5: Can't allocate memory for bm_new\n");
+		free(new->virname);
+		free(new->md5);
+		free(new);
+		ret = CL_EMEM;
+		break;
+	    }
+
+	    bm_new->pattern = new->md5;
+	    bm_new->length = 16;
+	    bm_new->virname = new->virname;
+
+	    found = 0;
+	    for(i = 0; i < md5_sect->soff_len; i++) {
+		if(md5_sect->soff[i] == new->size) {
+		    found = 1;
+		    break;
+		}
+	    }
+
+	    if(!found) {
+		md5_sect->soff_len++;
+		md5_sect->soff = (uint32_t *) cli_realloc2(md5_sect->soff, md5_sect->soff_len * sizeof(uint32_t));
+		if(!md5_sect->soff) {
+		    cli_errmsg("cli_loadmd5: Can't realloc md5_sect->soff\n");
+		    free(bm_new->pattern);
+		    free(bm_new->virname);
+		    free(bm_new);
+		    free(new);
+		    ret = CL_EMEM;
+		    break;
+		}
+		md5_sect->soff[md5_sect->soff_len - 1] = new->size;
+	    }
+
+	    free(new);
+
+	    if((ret = cli_bm_addpatt(md5_sect, bm_new))) {
+		cli_errmsg("cli_loadmd5: Error adding BM pattern\n");
+		free(bm_new->pattern);
+		free(bm_new->virname);
+		free(bm_new);
+		break;
+	    }
+
+	} else {
+	    if(!(*engine)->md5_hlist) {
+		cli_dbgmsg("cli_loadmd5: Initializing MD5 list structure\n");
+		(*engine)->md5_hlist = cli_calloc(256, sizeof(struct cli_md5_node *));
+		if(!(*engine)->md5_hlist) {
+		    free(new->virname);
+		    free(new->md5);
+		    free(new);
+		    ret = CL_EMEM;
+		    break;
+		}
+	    }
+
+	    new->next = (*engine)->md5_hlist[new->md5[0] & 0xff];
+	    (*engine)->md5_hlist[new->md5[0] & 0xff] = new;
+	}
+    }
+
+    if(!line) {
+	cli_errmsg("cli_loadmd5: Empty database file\n");
+	cl_free(*engine);
+	return CL_EMALFDB;
+    }
+
+    if(ret) {
+	cli_errmsg("cli_loadmd5: Problem parsing database at line %u\n", line);
+	cl_free(*engine);
+	return ret;
+    }
+
+    if(signo)
+	*signo += line;
+
+    if(md5_sect)
+	qsort(md5_sect->soff, md5_sect->soff_len, sizeof(uint32_t), scomp);
+
+    return CL_SUCCESS;
+}
+
+static int cli_loadmd(FILE *fd, struct cl_engine **engine, unsigned int *signo, int type, unsigned int options)
+{
+	char buffer[FILEBUFF], *pt;
+	int line = 0, comments = 0, ret = 0, crc32;
+	struct cli_meta_node *new;
+
+
+    if((ret = cli_initengine(engine, options))) {
+	cl_free(*engine);
+	return ret;
+    }
+
+    while(fgets(buffer, FILEBUFF, fd)) {
+	line++;
+	if(buffer[0] == '#') {
+	    comments++;
+	    continue;
+	}
+
+	cli_chomp(buffer);
+
+	new = (struct cli_meta_node *) cli_calloc(1, sizeof(struct cli_meta_node));
+	if(!new) {
+	    ret = CL_EMEM;
+	    break;
+	}
+
+	if(!(new->virname = cli_strtok(buffer, 0, ":"))) {
+	    free(new);
+	    ret = CL_EMALFDB;
+	    break;
+	}
+
+	if(!(pt = cli_strtok(buffer, 1, ":"))) {
+	    free(new->virname);
+	    free(new);
+	    ret = CL_EMALFDB;
+	    break;
+	} else {
+	    new->encrypted = atoi(pt);
+	    free(pt);
+	}
+
+	if(!(new->filename = cli_strtok(buffer, 2, ":"))) {
+	    free(new->virname);
+	    free(new);
+	    ret = CL_EMALFDB;
+	    break;
+	} else {
+	    if(!strcmp(new->filename, "*")) {
+		free(new->filename);
+		new->filename = NULL;
+	    }
+	}
+
+	if(!(pt = cli_strtok(buffer, 3, ":"))) {
+	    free(new->filename);
+	    free(new->virname);
+	    free(new);
+	    ret = CL_EMALFDB;
+	    break;
+	} else {
+	    if(!strcmp(pt, "*"))
+		new->size = -1;
+	    else
+		new->size = atoi(pt);
+	    free(pt);
+	}
+
+	if(!(pt = cli_strtok(buffer, 4, ":"))) {
+	    free(new->filename);
+	    free(new->virname);
+	    free(new);
+	    ret = CL_EMALFDB;
+	    break;
+	} else {
+	    if(!strcmp(pt, "*"))
+		new->csize = -1;
+	    else
+		new->csize = atoi(pt);
+	    free(pt);
+	}
+
+	if(!(pt = cli_strtok(buffer, 5, ":"))) {
+	    free(new->filename);
+	    free(new->virname);
+	    free(new);
+	    ret = CL_EMALFDB;
+	    break;
+	} else {
+	    if(!strcmp(pt, "*")) {
+		new->crc32 = 0;
+	    } else {
+		crc32 = cli_hex2num(pt);
+		if(crc32 == -1) {
+		    ret = CL_EMALFDB;
+		    break;
+		}
+		new->crc32 = (unsigned int) crc32;
+	    }
+	    free(pt);
+	}
+
+	if(!(pt = cli_strtok(buffer, 6, ":"))) {
+	    free(new->filename);
+	    free(new->virname);
+	    free(new);
+	    ret = CL_EMALFDB;
+	    break;
+	} else {
+	    if(!strcmp(pt, "*"))
+		new->method = -1;
+	    else
+		new->method = atoi(pt);
+	    free(pt);
+	}
+
+	if(!(pt = cli_strtok(buffer, 7, ":"))) {
+	    free(new->filename);
+	    free(new->virname);
+	    free(new);
+	    ret = CL_EMALFDB;
+	    break;
+	} else {
+	    if(!strcmp(pt, "*"))
+		new->fileno = 0;
+	    else
+		new->fileno = atoi(pt);
+	    free(pt);
+	}
+
+	if(!(pt = cli_strtok(buffer, 8, ":"))) {
+	    free(new->filename);
+	    free(new->virname);
+	    free(new);
+	    ret = CL_EMALFDB;
+	    break;
+	} else {
+	    if(!strcmp(pt, "*"))
+		new->maxdepth = 0;
+	    else
+		new->maxdepth = atoi(pt);
+	    free(pt);
+	}
+
+	if(type == 1) {
+	    new->next = (*engine)->zip_mlist;
+	    (*engine)->zip_mlist = new;
+	} else {
+	    new->next = (*engine)->rar_mlist;
+	    (*engine)->rar_mlist = new;
+	}
+    }
+
+    if(!line) {
+	cli_errmsg("Empty database file\n");
+	cl_free(*engine);
+	return CL_EMALFDB;
+    }
+
+    if(ret) {
+	cli_errmsg("Problem parsing database at line %d\n", line);
+	cl_free(*engine);
+	return ret;
+    }
+
+    if(signo)
+	*signo += (line - comments);
+
+    return CL_SUCCESS;
+}
+
+static int cli_loaddbdir(const char *dirname, struct cl_engine **engine, unsigned int *signo, unsigned int options);
+
+static int cli_load(const char *filename, struct cl_engine **engine, unsigned int *signo, unsigned int options)
+{
+	FILE *fd;
+	int ret = CL_SUCCESS;
+	uint8_t skipped = 0;
+
+
+    if((fd = fopen(filename, "rb")) == NULL) {
+	cli_errmsg("cli_load(): Can't open file %s\n", filename);
+	return CL_EOPEN;
+    }
+
+    if(cli_strbcasestr(filename, ".db")) {
+	ret = cli_loaddb(fd, engine, signo, options);
+
+    } else if(cli_strbcasestr(filename, ".cvd")) {
+	    int warn = 0;
+
+	if(strstr(filename, "daily.cvd"))
+	    warn = 1;
+
+	ret = cli_cvdload(fd, engine, signo, warn, options);
+
+    } else if(cli_strbcasestr(filename, ".hdb")) {
+	ret = cli_loadmd5(fd, engine, signo, MD5_HDB, options);
+
+    } else if(cli_strbcasestr(filename, ".hdu")) {
+	if(options & CL_DB_PUA)
+	    ret = cli_loadmd5(fd, engine, signo, MD5_HDB, options);
+	else
+	    skipped = 1;
+
+    } else if(cli_strbcasestr(filename, ".fp")) {
+	ret = cli_loadmd5(fd, engine, signo, MD5_FP, options);
+
+    } else if(cli_strbcasestr(filename, ".mdb")) {
+	ret = cli_loadmd5(fd, engine, signo, MD5_MDB, options);
+
+    } else if(cli_strbcasestr(filename, ".mdu")) {
+	if(options & CL_DB_PUA)
+	    ret = cli_loadmd5(fd, engine, signo, MD5_MDB, options);
+	else
+	    skipped = 1;
+
+    } else if(cli_strbcasestr(filename, ".ndb")) {
+	ret = cli_loadndb(fd, engine, signo, 0, options);
+
+    } else if(cli_strbcasestr(filename, ".ndu")) {
+	if(!(options & CL_DB_PUA))
+	    skipped = 1;
+	else
+	    ret = cli_loadndb(fd, engine, signo, 0, options);
+
+    } else if(cli_strbcasestr(filename, ".sdb")) {
+	ret = cli_loadndb(fd, engine, signo, 1, options);
+
+    } else if(cli_strbcasestr(filename, ".zmd")) {
+	ret = cli_loadmd(fd, engine, signo, 1, options);
+
+    } else if(cli_strbcasestr(filename, ".rmd")) {
+	ret = cli_loadmd(fd, engine, signo, 2, options);
+
+    } else if(cli_strbcasestr(filename, ".cfg")) {
+	ret = cli_dconf_load(fd, engine, options);
+
+    } else if(cli_strbcasestr(filename, ".wdb")) {
+	if(options & CL_DB_PHISHING_URLS)
+	    ret = cli_loadwdb(fd, engine, options);
+	else
+	    skipped = 1;
+    } else if(cli_strbcasestr(filename, ".pdb")) {
+	if(options & CL_DB_PHISHING_URLS)
+	    ret = cli_loadpdb(fd, engine, options);
+	else
+	    skipped = 1;
+    } else {
+	cli_dbgmsg("cli_load: unknown extension - assuming old database format\n");
+	ret = cli_loaddb(fd, engine, signo, options);
+    }
+
+    if(ret) {
+	cli_errmsg("Can't load %s: %s\n", filename, cl_strerror(ret));
+    } else  {
+	if(skipped)
+	    cli_dbgmsg("%s skipped\n", filename);
+	else
+	    cli_dbgmsg("%s loaded\n", filename);
+    }
+
+    fclose(fd);
+    return ret;
+}
+
+int cl_loaddb(const char *filename, struct cl_engine **engine, unsigned int *signo) {
+    return cli_load(filename, engine, signo, CL_DB_STDOPT);
+}
+
+#define CLI_DBEXT(ext)				\
+    (						\
+	cli_strbcasestr(ext, ".db")    ||	\
+	cli_strbcasestr(ext, ".db2")   ||	\
+	cli_strbcasestr(ext, ".db3")   ||	\
+	cli_strbcasestr(ext, ".hdb")   ||	\
+	cli_strbcasestr(ext, ".hdu")   ||	\
+	cli_strbcasestr(ext, ".fp")    ||	\
+	cli_strbcasestr(ext, ".mdb")   ||	\
+	cli_strbcasestr(ext, ".mdu")   ||	\
+	cli_strbcasestr(ext, ".ndb")   ||	\
+	cli_strbcasestr(ext, ".ndu")   ||	\
+	cli_strbcasestr(ext, ".sdb")   ||	\
+	cli_strbcasestr(ext, ".zmd")   ||	\
+	cli_strbcasestr(ext, ".rmd")   ||	\
+	cli_strbcasestr(ext, ".pdb")   ||	\
+	cli_strbcasestr(ext, ".wdb")   ||	\
+	cli_strbcasestr(ext, ".inc")   ||	\
+	cli_strbcasestr(ext, ".cvd")		\
+    )
+
+static int cli_loaddbdir_l(const char *dirname, struct cl_engine **engine, unsigned int *signo, unsigned int options)
+{
+	DIR *dd;
+	struct dirent *dent;
+#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
+	union {
+	    struct dirent d;
+	    char b[offsetof(struct dirent, d_name) + NAME_MAX + 1];
+	} result;
+#endif
+	struct stat sb;
+	char *dbfile;
+	int ret = CL_ESUPPORT;
+
+
+    cli_dbgmsg("Loading databases from %s\n", dirname);
+
+    /* check for and load daily.cfg */
+    dbfile = (char *) cli_malloc(strlen(dirname) + 11);
+    if(!dbfile)
+	return CL_EMEM;
+    sprintf(dbfile, "%s/daily.cfg", dirname);
+    if(stat(dbfile, &sb) != -1) {
+	if((ret = cli_load(dbfile, engine, signo, options))) {
+	    free(dbfile);
+	    return ret;
+	}
+    }
+    free(dbfile);
+
+    if((dd = opendir(dirname)) == NULL) {
+        cli_errmsg("cli_loaddbdir(): Can't open directory %s\n", dirname);
+        return CL_EOPEN;
+    }
+
+#ifdef HAVE_READDIR_R_3
+    while(!readdir_r(dd, &result.d, &dent) && dent) {
+#elif defined(HAVE_READDIR_R_2)
+    while((dent = (struct dirent *) readdir_r(dd, &result.d))) {
+#else
+    while((dent = readdir(dd))) {
+#endif
+#if	(!defined(C_INTERIX)) && (!defined(C_WINDOWS)) && (!defined(C_CYGWIN))
+	if(dent->d_ino)
+#endif
+	{
+	    if(strcmp(dent->d_name, ".") && strcmp(dent->d_name, "..") && CLI_DBEXT(dent->d_name)) {
+
+		dbfile = (char *) cli_malloc(strlen(dent->d_name) + strlen(dirname) + 2);
+
+		if(!dbfile) {
+		    cli_dbgmsg("cli_loaddbdir(): dbfile == NULL\n");
+		    closedir(dd);
+		    return CL_EMEM;
+		}
+		sprintf(dbfile, "%s/%s", dirname, dent->d_name);
+
+		if(cli_strbcasestr(dbfile, ".inc"))
+		    ret = cli_loaddbdir(dbfile, engine, signo, options);
+		else
+		    ret = cli_load(dbfile, engine, signo, options);
+
+		if(ret) {
+		    cli_dbgmsg("cli_loaddbdir(): error loading database %s\n", dbfile);
+		    free(dbfile);
+		    closedir(dd);
+		    return ret;
+		}
+		free(dbfile);
+	    }
+	}
+    }
+
+    closedir(dd);
+    if(ret == CL_ESUPPORT)
+	cli_errmsg("cli_loaddb(): No supported database files found in %s\n", dirname);
+
+    return ret;
+}
+
+static int cli_loaddbdir(const char *dirname, struct cl_engine **engine, unsigned int *signo, unsigned int options)
+{
+	int ret, try = 0, lock;
+
+
+    cli_dbgmsg("cli_loaddbdir: Acquiring dbdir lock\n");
+    while((lock = cli_readlockdb(dirname, 0)) == CL_ELOCKDB) {
+#ifdef C_WINDOWS
+	Sleep(5);
+#else
+	sleep(5);
+#endif
+	if(try++ > 24) {
+	    cli_errmsg("cl_load(): Unable to lock database directory: %s\n", dirname);
+	    return CL_ELOCKDB;
+	}
+    }
+
+    ret = cli_loaddbdir_l(dirname, engine, signo, options);
+    if(lock == CL_SUCCESS)
+	cli_unlockdb(dirname);
+
+    return ret;
+}
+
+int cl_loaddbdir(const char *dirname, struct cl_engine **engine, unsigned int *signo) {
+    return cli_loaddbdir(dirname, engine, signo, CL_DB_STDOPT);
+}
+
+int cl_load(const char *path, struct cl_engine **engine, unsigned int *signo, unsigned int options)
+{
+	struct stat sb;
+	int ret;
+
+
+    if(stat(path, &sb) == -1) {
+        cli_errmsg("cl_loaddbdir(): Can't get status of %s\n", path);
+        return CL_EIO;
+    }
+
+    if((ret = cli_initengine(engine, options))) {
+	cl_free(*engine);
+	return ret;
+    }
+
+    (*engine)->dboptions = options;
+
+    switch(sb.st_mode & S_IFMT) {
+	case S_IFREG: 
+	    ret = cli_load(path, engine, signo, options);
+	    break;
+
+	case S_IFDIR:
+	    ret = cli_loaddbdir(path, engine, signo, options);
+	    break;
+
+	default:
+	    cli_errmsg("cl_load(%s): Not supported database file type\n", path);
+	    return CL_EOPEN;
+    }
+
+    return ret;
+}
+
+const char *cl_retdbdir(void)
+{
+    return DATADIR;
+}
+
+int cl_statinidir(const char *dirname, struct cl_stat *dbstat)
+{
+	DIR *dd;
+	const struct dirent *dent;
+#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
+	union {
+	    struct dirent d;
+	    char b[offsetof(struct dirent, d_name) + NAME_MAX + 1];
+	} result;
+#endif
+        char *fname;
+
+
+    if(dbstat) {
+	dbstat->entries = 0;
+	dbstat->stattab = NULL;
+	dbstat->statdname = NULL;
+	dbstat->dir = cli_strdup(dirname);
+    } else {
+        cli_errmsg("cl_statdbdir(): Null argument passed.\n");
+	return CL_ENULLARG;
+    }
+
+    if((dd = opendir(dirname)) == NULL) {
+        cli_errmsg("cl_statdbdir(): Can't open directory %s\n", dirname);
+	cl_statfree(dbstat);
+        return CL_EOPEN;
+    }
+
+    cli_dbgmsg("Stat()ing files in %s\n", dirname);
+
+#ifdef HAVE_READDIR_R_3
+    while(!readdir_r(dd, &result.d, &dent) && dent) {
+#elif defined(HAVE_READDIR_R_2)
+    while((dent = (struct dirent *) readdir_r(dd, &result.d))) {
+#else
+    while((dent = readdir(dd))) {
+#endif
+#if	(!defined(C_INTERIX)) && (!defined(C_WINDOWS)) && (!defined(C_CYGWIN))
+	if(dent->d_ino)
+#endif
+	{
+	    if(strcmp(dent->d_name, ".") && strcmp(dent->d_name, "..") && CLI_DBEXT(dent->d_name)) {
+		dbstat->entries++;
+		dbstat->stattab = (struct stat *) cli_realloc2(dbstat->stattab, dbstat->entries * sizeof(struct stat));
+		if(!dbstat->stattab) {
+		    cl_statfree(dbstat);
+		    closedir(dd);
+		    return CL_EMEM;
+		}
+
+#if defined(C_INTERIX) || defined(C_OS2)
+		dbstat->statdname = (char **) cli_realloc2(dbstat->statdname, dbstat->entries * sizeof(char *));
+		if(!dbstat->statdname) {
+		    cl_statfree(dbstat);
+		    closedir(dd);
+		    return CL_EMEM;
+		}
+#endif
+
+                fname = cli_malloc(strlen(dirname) + strlen(dent->d_name) + 32);
+		if(!fname) {
+		    cl_statfree(dbstat);
+		    closedir(dd);
+		    return CL_EMEM;
+		}
+
+		if(cli_strbcasestr(dent->d_name, ".inc")) {
+		    sprintf(fname, "%s/%s/%s.info", dirname, dent->d_name, strstr(dent->d_name, "daily") ? "daily" : "main");
+		} else {
+		    sprintf(fname, "%s/%s", dirname, dent->d_name);
+		}
+#if defined(C_INTERIX) || defined(C_OS2)
+		dbstat->statdname[dbstat->entries - 1] = (char *) cli_malloc(strlen(dent->d_name) + 1);
+		if(!dbstat->statdname[dbstat->entries - 1]) {
+		    cl_statfree(dbstat);
+		    closedir(dd);
+		    return CL_EMEM;
+		}
+
+		strcpy(dbstat->statdname[dbstat->entries - 1], dent->d_name);
+#endif
+		stat(fname, &dbstat->stattab[dbstat->entries - 1]);
+		free(fname);
+	    }
+	}
+    }
+
+    closedir(dd);
+    return CL_SUCCESS;
+}
+
+int cl_statchkdir(const struct cl_stat *dbstat)
+{
+	DIR *dd;
+	struct dirent *dent;
+#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
+	union {
+	    struct dirent d;
+	    char b[offsetof(struct dirent, d_name) + NAME_MAX + 1];
+	} result;
+#endif
+	struct stat sb;
+	unsigned int i, found;
+	char *fname;
+
+
+    if(!dbstat || !dbstat->dir) {
+        cli_errmsg("cl_statdbdir(): Null argument passed.\n");
+	return CL_ENULLARG;
+    }
+
+    if((dd = opendir(dbstat->dir)) == NULL) {
+        cli_errmsg("cl_statdbdir(): Can't open directory %s\n", dbstat->dir);
+        return CL_EOPEN;
+    }
+
+    cli_dbgmsg("Stat()ing files in %s\n", dbstat->dir);
+
+#ifdef HAVE_READDIR_R_3
+    while(!readdir_r(dd, &result.d, &dent) && dent) {
+#elif defined(HAVE_READDIR_R_2)
+    while((dent = (struct dirent *) readdir_r(dd, &result.d))) {
+#else
+    while((dent = readdir(dd))) {
+#endif
+#if	(!defined(C_INTERIX)) && (!defined(C_WINDOWS)) && (!defined(C_CYGWIN))
+	if(dent->d_ino)
+#endif
+	{
+	    if(strcmp(dent->d_name, ".") && strcmp(dent->d_name, "..") && CLI_DBEXT(dent->d_name)) {
+                fname = cli_malloc(strlen(dbstat->dir) + strlen(dent->d_name) + 32);
+		if(!fname) {
+		    closedir(dd);
+		    return CL_EMEM;
+		}
+
+		if(cli_strbcasestr(dent->d_name, ".inc")) {
+		    sprintf(fname, "%s/%s/%s.info", dbstat->dir, dent->d_name, strstr(dent->d_name, "daily") ? "daily" : "main");
+		} else {
+		    sprintf(fname, "%s/%s", dbstat->dir, dent->d_name);
+		}
+		stat(fname, &sb);
+		free(fname);
+
+		found = 0;
+		for(i = 0; i < dbstat->entries; i++)
+#if defined(C_INTERIX) || defined(C_OS2)
+		    if(!strcmp(dbstat->statdname[i], dent->d_name)) {
+#else
+		    if(dbstat->stattab[i].st_ino == sb.st_ino) {
+#endif
+			found = 1;
+			if(dbstat->stattab[i].st_mtime != sb.st_mtime) {
+			    closedir(dd);
+			    return 1;
+			}
+		    }
+
+		if(!found) {
+		    closedir(dd);
+		    return 1;
+		}
+	    }
+	}
+    }
+
+    closedir(dd);
+    return CL_SUCCESS;
+}
+
+int cl_statfree(struct cl_stat *dbstat)
+{
+
+    if(dbstat) {
+
+#if defined(C_INTERIX) || defined(C_OS2)
+	    int i;
+
+	if(dbstat->statdname) {
+	    for(i = 0; i < dbstat->entries; i++) {
+		if(dbstat->statdname[i])
+		    free(dbstat->statdname[i]);
+		dbstat->statdname[i] = NULL;
+	    }
+	    free(dbstat->statdname);
+	    dbstat->statdname = NULL;
+	}
+#endif
+
+	if(dbstat->stattab) {
+	    free(dbstat->stattab);
+	    dbstat->stattab = NULL;
+	}
+	dbstat->entries = 0;
+
+	if(dbstat->dir) {
+	    free(dbstat->dir);
+	    dbstat->dir = NULL;
+	}
+    } else {
+        cli_errmsg("cl_statfree(): Null argument passed\n");
+	return CL_ENULLARG;
+    }
+
+    return CL_SUCCESS;
+}
+
+void cl_free(struct cl_engine *engine)
+{
+	int i;
+	struct cli_md5_node *md5pt, *md5h;
+	struct cli_meta_node *metapt, *metah;
+	struct cli_matcher *root;
+
+
+    if(!engine) {
+	cli_errmsg("cl_free: engine == NULL\n");
+	return;
+    }
+
+#ifdef CL_THREAD_SAFE
+    pthread_mutex_lock(&cli_ref_mutex);
+#endif
+
+    engine->refcount--;
+    if(engine->refcount) {
+#ifdef CL_THREAD_SAFE
+	pthread_mutex_unlock(&cli_ref_mutex);
+#endif
+	return;
+    }
+
+#ifdef CL_THREAD_SAFE
+    pthread_mutex_unlock(&cli_ref_mutex);
+#endif
+
+    if(engine->root) {
+	for(i = 0; i < CL_TARGET_TABLE_SIZE; i++) {
+	    if((root = engine->root[i])) {
+		if(!root->ac_only)
+		    cli_bm_free(root);
+		cli_ac_free(root);
+		free(root);
+	    }
+	}
+	free(engine->root);
+    }
+
+    if(engine->md5_hlist) {
+	for(i = 0; i < 256; i++) {
+	    md5pt = engine->md5_hlist[i];
+	    while(md5pt) {
+		md5h = md5pt;
+		md5pt = md5pt->next;
+		free(md5h->md5);
+		free(md5h->virname);
+		free(md5h);
+	    }
+	}
+	free(engine->md5_hlist);
+    }
+
+    if((root = engine->md5_sect)) {
+	cli_bm_free(root);
+	free(root->soff);
+	free(root);
+    }
+
+    metapt = engine->zip_mlist;
+    while(metapt) {
+	metah = metapt;
+	metapt = metapt->next;
+	free(metah->virname);
+	if(metah->filename)
+	    free(metah->filename);
+	free(metah);
+    }
+
+    metapt = engine->rar_mlist;
+    while(metapt) {
+	metah = metapt;
+	metapt = metapt->next;
+	free(metah->virname);
+	if(metah->filename)
+	    free(metah->filename);
+	free(metah);
+    }
+
+    if(((struct cli_dconf *) engine->dconf)->phishing & PHISHING_CONF_ENGINE)
+	phishing_done(engine);
+
+    if(engine->dconf)
+	free(engine->dconf);
+
+    cli_freelocks();
+    free(engine);
+}
+
+int cl_build(struct cl_engine *engine)
+{
+	int i, ret;
+	struct cli_matcher *root;
+
+
+    if((ret = cli_addtypesigs(engine)))
+	return ret;
+
+    for(i = 0; i < CL_TARGET_TABLE_SIZE; i++)
+	if((root = engine->root[i]))
+	    cli_ac_buildtrie(root);
+    /* FIXME: check return values of cli_ac_buildtree */
+
+    cli_dconf_print(engine->dconf);
+
+    return CL_SUCCESS;
+}
+
+struct cl_engine *cl_dup(struct cl_engine *engine)
+{
+    if(!engine) {
+	cli_errmsg("cl_dup: engine == NULL\n");
+	return NULL;
+    }
+
+#ifdef CL_THREAD_SAFE
+    pthread_mutex_lock(&cli_ref_mutex);
+#endif
+
+    engine->refcount++;
+
+#ifdef CL_THREAD_SAFE
+    pthread_mutex_unlock(&cli_ref_mutex);
+#endif
+
+    return engine;
+}

Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_rebuildpe.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_rebuildpe.c?rev=46573&view=auto

==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_rebuildpe.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_rebuildpe.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,186 @@
+/*
+ *  Copyright (C) 2004 aCaB <acab at clamav.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+
+/*
+** rebuildpe.c
+** 
+** 28/07/2k4 - Moved out of petitep.c
+** 08/08/2k4 - Fixed typo for sects characteristics
+**
+*/
+
+/*
+** Rebuilds a fully parsable / *not runnable* PE file including
+** a fake MZ header, a piece of informational 16bit code,
+** lookalike PE & Optional headers, an array of structures and
+** of course the real content.
+** Sections characteristics will have all the bits set.
+*/
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <string.h>
+
+#include "rebuildpe.h"
+#include "others.h"
+
+#define EC32(x) le32_to_host(x) /* Convert little endian to host */
+#define EC16(x) le16_to_host(x) /* Convert little endian to host */
+#define PEALIGN(o,a) (((a))?(((o)/(a))*(a)):(o))
+#define PESALIGN(o,a) (((a))?(((o)/(a)+((o)%(a)!=0))*(a)):(o))
+
+
+struct IMAGE_PE_HEADER {
+    uint32_t Signature;
+    /* FILE HEADER */
+    uint16_t    Machine;
+    uint16_t    NumberOfSections;
+    uint32_t   TimeDateStamp;
+    uint32_t   PointerToSymbolTable;
+    uint32_t   NumberOfSymbols;
+    uint16_t    SizeOfOptionalHeader;
+    uint16_t    Characteristics;
+    /* OPTIONAL HEADER */
+    uint16_t    Magic;
+    uint8_t    MajorLinkerVersion;
+    uint8_t    MinorLinkerVersion;
+    uint32_t   SizeOfCode;
+    uint32_t   SizeOfInitializedData;
+    uint32_t   SizeOfUninitializedData;
+    uint32_t   AddressOfEntryPoint;
+    uint32_t   BaseOfCode;
+    uint32_t   BaseOfData;
+    /* NT additional fields. */
+    uint32_t   ImageBase;
+    uint32_t   SectionAlignment;
+    uint32_t   FileAlignment;
+    uint16_t    MajorOperatingSystemVersion;
+    uint16_t    MinorOperatingSystemVersion;
+    uint16_t    MajorImageVersion;
+    uint16_t    MinorImageVersion;
+    uint16_t    MajorSubsystemVersion;
+    uint16_t    MinorSubsystemVersion;
+    uint32_t   Win32VersionValue;
+    uint32_t   SizeOfImage;
+    uint32_t   SizeOfHeaders;
+    uint32_t   CheckSum;
+    uint16_t    Subsystem;
+    uint16_t    DllCharacteristics;
+    uint32_t   SizeOfStackReserve;
+    uint32_t   SizeOfStackCommit;
+    uint32_t   SizeOfHeapReserve;
+    uint32_t   SizeOfHeapCommit;
+    uint32_t   LoaderFlags;
+    uint32_t   NumberOfRvaAndSizes;
+    /* IMAGE_DATA_DIRECTORY follows.... */
+};
+
+#define HEADERS "\
+\x4D\x5A\x90\x00\x02\x00\x00\x00\x04\x00\x0F\x00\xFF\xFF\x00\x00\
+\xB0\x00\x00\x00\x00\x00\x00\x00\x40\x00\x1A\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xD0\x00\x00\x00\
+\x0E\x1F\xB4\x09\xBA\x0D\x00\xCD\x21\xB4\x4C\xCD\x21\x54\x68\x69\
+\x73\x20\x66\x69\x6C\x65\x20\x77\x61\x73\x20\x63\x72\x65\x61\x74\
+\x65\x64\x20\x62\x79\x20\x43\x6C\x61\x6D\x41\x56\x20\x66\x6F\x72\
+\x20\x69\x6E\x74\x65\x72\x6E\x61\x6C\x20\x75\x73\x65\x20\x61\x6E\
+\x64\x20\x73\x68\x6F\x75\x6C\x64\x20\x6E\x6F\x74\x20\x62\x65\x20\
+\x72\x75\x6E\x2E\x0D\x0A\x43\x6C\x61\x6D\x41\x56\x20\x2D\x20\x41\
+\x20\x47\x50\x4C\x20\x76\x69\x72\x75\x73\x20\x73\x63\x61\x6E\x6E\
+\x65\x72\x20\x2D\x20\x68\x74\x74\x70\x3A\x2F\x2F\x77\x77\x77\x2E\
+\x63\x6C\x61\x6D\x61\x76\x2E\x6E\x65\x74\x0D\x0A\x24\x00\x00\x00\
+\x50\x45\x00\x00\x4C\x01\xFF\xFF\x43\x4C\x41\x4D\x00\x00\x00\x00\
+\x00\x00\x00\x00\xE0\x00\x83\x8F\x0B\x01\x00\x00\x00\x10\x00\x00\
+\x00\x10\x00\x00\x00\x00\x00\x00\xFF\xFF\xFF\xFF\x00\x10\x00\x00\
+\x00\x10\x00\x00\xFF\xFF\xFF\xFF\x00\x10\x00\x00\x00\x02\x00\x00\
+\x01\x00\x00\x00\x00\x00\x00\x00\x03\x00\x0A\x00\x00\x00\x00\x00\
+\x00\x10\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\
+\x00\x00\x10\x00\x00\x10\x00\x00\x00\x00\x10\x00\x00\x10\x00\x00\
+\x00\x00\x00\x00\x10\x00\x00\x00\
+"
+
+int cli_rebuildpe(char *buffer, struct cli_exe_section *sections, int sects, uint32_t base, uint32_t ep, uint32_t ResRva, uint32_t ResSize, int file)
+{
+  uint32_t datasize=0, rawbase=PESALIGN(0x148+0x80+0x28*sects, 0x200);
+  char *pefile=NULL, *curpe;
+  struct IMAGE_PE_HEADER *fakepe;
+  int i, gotghost=(sections[0].rva > PESALIGN(rawbase, 0x1000));
+
+  if (gotghost) rawbase=PESALIGN(0x148+0x80+0x28*(sects+1), 0x200);
+
+  if(sects+gotghost > 96)
+    return 0;
+
+  for (i=0; i < sects; i++)
+    datasize+=PESALIGN(sections[i].rsz, 0x200);
+
+  if(datasize > CLI_MAX_ALLOCATION)
+    return 0;
+
+  if((pefile = (char *) cli_calloc(rawbase+datasize, 1))) {
+    memcpy(pefile, HEADERS, 0x148);
+
+    datasize = PESALIGN(rawbase, 0x1000);
+
+    fakepe = (struct IMAGE_PE_HEADER *)(pefile+0xd0);
+    fakepe->NumberOfSections = EC16(sects+gotghost);
+    fakepe->AddressOfEntryPoint = EC32(ep);
+    fakepe->ImageBase = EC32(base);
+    fakepe->SizeOfHeaders = EC32(rawbase);
+    memset(pefile+0x148, 0, 0x80);
+    cli_writeint32(pefile+0x148+0x10, ResRva);
+    cli_writeint32(pefile+0x148+0x14, ResSize);
+    curpe = pefile+0x148+0x80;
+
+    if (gotghost) {
+      snprintf(curpe, 8, "empty");
+      cli_writeint32(curpe+8, sections[0].rva-datasize); /* vsize */
+      cli_writeint32(curpe+12, datasize); /* rva */
+      cli_writeint32(curpe+0x24, 0xffffffff);
+      curpe+=40;
+      datasize+=PESALIGN(sections[0].rva-datasize, 0x1000);
+    }
+
+    for (i=0; i < sects; i++) {
+      snprintf(curpe, 8, ".clam%.2d", i+1);
+      cli_writeint32(curpe+8, sections[i].vsz);
+      cli_writeint32(curpe+12, sections[i].rva);
+      cli_writeint32(curpe+16, sections[i].rsz);
+      cli_writeint32(curpe+20, rawbase);
+      /* already zeroed
+      cli_writeint32(curpe+24, 0);
+      cli_writeint32(curpe+28, 0);
+      cli_writeint32(curpe+32, 0);
+      */
+      cli_writeint32(curpe+0x24, 0xffffffff);
+      memcpy(pefile+rawbase, buffer+sections[i].raw, sections[i].rsz);
+      rawbase+=PESALIGN(sections[i].rsz, 0x200);
+      curpe+=40;
+      datasize+=PESALIGN(sections[i].vsz, 0x1000);
+    }
+    fakepe->SizeOfImage = EC32(datasize);
+  } else {
+    return 0;
+  }
+
+  i = (cli_writen(file, pefile, rawbase)!=-1);
+  free(pefile);
+  return i;
+}





More information about the llvm-commits mailing list