/****************************************************************************
* dictionary.cpp
*
* Copyright (C) 2002 Latchesar Ionkov <lionkov@yahoo.com>
*
* This program is based on the kbedic dictionary by 
* Radostin Radnev <radnev@yahoo.com>.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
****************************************************************************/
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/time.h>

#include <string>
#include <vector>
#include <map>
#include <algorithm>

#include "dictionary.h"
#include "utf8.h"
#include "shcm.h"
#include "file.h"

using namespace std;
/**
 * DictImpl class implements the abstract Dictionary class
 *
 * Format of the dictionary database
 *
 *    The database has two parts: header and data.
 *
 *    1. Header
 *    	 The header contains a collection of property values.
 *	 All data in the header is encoded using UTF-8.
 *
 *	 The format of a line of the header is:
 *	 	<name> '=' <value>
 *
 *	 Currently defined properties:
 *		- name			required
 *		  the name of the database (as will be 
 * 		  shown to the user)
 *
 *		- search-ignore-chars	optional (default: empty)
 *		  list of the characters that will be 
 *		  ignored while doing search
 *
 * 		- max-word-length	optional (default: 50)
 *		  maximum length of the word entry
 *
 *		- max-entry-length	optional (default: 8192)
 *		  maximum length of database entry
 *
 *	 The header ends with '\0' character.
 *
 *    2. Data
 *
 *	 The data section of the dictionary contains
 *	 variable-size entries. Every entry defines
 *	 a single word and (all) its meanings.
 *	 The entries in the database are sorted. The
 *	 comparision while sorting ignores the character 
 * 	 case and the characters that should be ignored 
 * 	 (see search-ignore-chars)
 *
 *	 The entry contains two values: word and sense. 
 *	 Both are variable-size, the delimiter between them
 *	 is an '\n' character.
 *
 *	 The entry ends with '\0' character.
 *
 * The class uses binary search to find a word.
 * It creates an index to improve the searches.
 *
 */
class DictImpl : public Dictionary {
public:
	/**
	 * Creates new dictionary object
	 * If error occurs, error description is set to 
	 * non-zero value
	 *
	 * @param filename name of the dictionary file
	 */
	DictImpl(const char* filename);
	virtual ~DictImpl();

	/**
	 * Returns dictionary name
	 *
	 * @return name of the dictionary as set in the 
	 * dictionary properties header
	 */
	virtual const string& getName() const;

	/**
	 * Looks for a word in the dictionary
	 *
	 * Sets the internal dictionary state to point to a
	 * word equal or greater (in lexicographical terms)
	 * to the one specified as parameter.
	 *
	 * Before two words are compared, the are canonized.
	 * I.e. the characters specified in search-ignore-chars 
	 * property are removed and both words are set 
	 * to uppercase.
	 *
	 * Parameter subword is set to true if the word the
	 * dictionary points to starts with the word specified
	 * as a parameter.
	 *
	 * @param word word to look for
	 * @param subword flag if word is subword
	 *
	 * @return true if exact match is found
	 */
	virtual bool findEntry(const string& word, bool& subword);

	/**
	 * Moves the internal word pointer to the next word.
	 *
	 * If the pointer is set to the last word, it is 
	 * not changed.
	 * 
	 * @return true if the pointer is moved
	 */
	virtual bool nextEntry();

	/**
	 * Moves the internal word pointer to the previous word.
	 *
	 * If the pointer is set to the first word, it is 
	 * not changed.
	 * 
	 * @return true if the pointer is moved
	 */
	virtual bool prevEntry();

	/**
	 * Moves the internal word pointer to the first word.
	 * 
	 * @return true if the word is read successfully
	 */
	virtual bool firstEntry();

	/**
	 * Moves the internal word pointer to the last word.
	 * 
	 * @return true if the word is read successfully
	 */
	virtual bool lastEntry();

	/**
	 * Moves the internal word pointer to randomly chosen
	 * entry.
	 * 
	 * @return true if the word is read successfully
	 */
	virtual bool randomEntry();

	/**
	 * Returns the word pointed by the internal word pointer
	 *
	 * @return current word
	 */
	virtual const string& getWord() const;

	/**
	 * Returns the sense of the word pointer by the 
	 * internal word pointer
	 *
	 * @return sense
	 */
	virtual const string& getSense() const;

	/**
	 * Returns error description or zero if no error 
	 *
	 * @return error description
	 */
	virtual const string& getError() const {
		return errorDescr; 
	}

	/**
	 * Creates new dictionary file with the same 
	 * database of words, but different compression method
	 *
	 * @param filename the filename of the new dictionary file
	 * @param compression_method compression method
	 *	currently only "none" and "shcm" are allowed as 
	 * 	compression methods
	 */
	virtual bool xerox(const string& filename, 
		const string& compress_method, bool sort = true);

	/**
	 * Creates new dictionary file with the same 
	 * database of words, but different compression method
	 *
	 * @param fd the file descriptor
	 * @param compression_method compression method
	 *	currently only "none" and "shcm" are allowed as 
	 * 	compression methods
	 */
	virtual bool xerox(int fd, const string& compress_method, bool sort);

protected:
	/**
	 * This structure represent an index entry
	 *
	 * It contains canonized word value and the position
	 * of that value
	 */
	struct IndexEntry {
		string word;
		long pos;

		IndexEntry(string w, long p):word(w), pos(p) {
		}
	};

	// file descriptor to the dictionary file
	File* fdata;

	// position of the first entry
	long firstEntryPos;

	// position of the last entry
	long lastEntryPos;

	// error description
	string errorDescr;

	// properties (see the class overall documentation)
	string name;
	vector<string> ignoreChars;
	int maxWordLength;
	int maxEntryLength;

	// general purpose buffer
	char* buf;

	// current word
	string currWord;

	// the sense of the current word
	mutable string currSense;
	mutable bool senseCompressed;

	// current position
	long currPos;

	// next position, or -1 if not defined
	long nextPos;
	
	// index table
	vector<IndexEntry> index;

	// property values
	map<string, string> properties;

	SHCM* compressor;

	void setError(const string& err) {
		errorDescr = err; 
//		printf("Error: %s\n", (const char*) errorDescr.utf8());
	}

	/**
	 * Reads the properties of the dictionary.
	 *
	 * The file pointer should point to the start of the file
	 * If the execution was successful, on return the file 
	 * pointer points to the first character after '\0' 
	 * end-of-header marker. If error occured, the file pointer
	 * position is undefined.
	 *
	 * Updates properties field with the values of the
	 * properties read from the header.
	 */
	int readProperties();

	/**
	 * Reads a line from the header.
	 *
	 * The read start from the current file pointer position.
	 * If the read is successful, the file pointer is updated.
	 *
	 * If the line contains '\0' character, null string 
	 * is returned to inform for the end of the header
	 *
	 * @return next line
	 */
	int getLine(string&, int&);

	/**
	 * Reads an entry starting from the specified position.
	 *
	 * Updates currWord, currSense, currPos and nextPos fields.
	 * 
	 * The method expects that pos points to the start of an
	 * entry. If not, the results are undefined.
	 *
	 * @param pos start of the entry to read
	 *
	 * @return true if read was succesful
	 */
	bool readEntry(long pos);

	/**
	 * Looks backward for a start of an entry.
	 *
	 * There are no restrictions for the value of the pos
	 * parameter. If the value is less than the position of
	 * the first entry, the position of the first entry is 
	 * returned. If the value is greater than the position of
	 * the last entry, the position of the last entry is 
	 * returned.
	 *
	 * @param position to start scaning backward from
	 *
	 * @return start position of an entry
	 */
	long findPrev(long pos);

	/** 
	 * Looks forward for a start of an entry.
	 *
	 * There are no restrictions for the value of the pos
	 * parameter. If the value is less than the position of
	 * the first entry, the position of the first entry is 
	 * returned. If the value is greater than the position of
	 * the last entry, the position of the last entry is 
	 * returned.
	 *
	 * @param position
	 *
	 * @return start position of an entry
	 */
	long findNext(long pos);

	/**
	 * Puts a string in canonized form ready for 
	 * comparision.
	 * 
	 * All the characters in the word are upper-cased.
	 * All the characters that should be ignored are
	 * removed.
	 *
	 * @param s the word to canonize
	 *
	 * @return cannonical form of the word
	 */

	string canonizeWord(const string& s);

	/**
	 * Index lookup
	 *
	 * Using the index, finds the region in the file where
	 * the specified word is defined.
	 *
	 * @param s word to look for
	 * @param b output param. sets the start of the region
	 * @param e output param. sets the end of the region
	 */
	void bsearchIndex(const string& s, long& b, long& e);

	/**
	 * Checks if the is ok
	 */
	bool checkIntegrity();

	// entry delimiter character
	static const char DATA_DELIMITER;

	// word delimiter character
	static const char WORD_DELIMITER;

	/**
	 * Compares two words
	 *
	 * The words should be put in cannonical form 
	 * before this method is called
	 */
	static int compare(const string& s1, const string& s2) { 
		return s1.compare(s2); 
	}

	static string escape(const string& s);
	static string unescape(const string& s);
};


/***************************************************************************
 *
 * Dictionary class implementation
 *
 **************************************************************************/
Dictionary* Dictionary::create(const char* filename) {
	DictImpl* dict = new DictImpl(filename);

	return dict;
}

Dictionary::~Dictionary() {
}

/***************************************************************************
 *
 * Dictionary class implementation
 *
 **************************************************************************/

const char DictImpl::DATA_DELIMITER='\0';
const char DictImpl::WORD_DELIMITER='\n';

DictImpl::DictImpl(const char* filename) {

	compressor = 0;

	if (strlen(filename)>3 && 
		strcmp(&filename[strlen(filename) - 3], ".dz")==0) {

		fdata = new DZFile();
	} else {
		fdata = new File();
	}

	if (fdata->open(filename) < 0) {
		setError(strerror(errno));
		return;
	}

	// find and set the position of the last word
	firstEntryPos = 0;
	lastEntryPos = fdata->size() - 2;
	lastEntryPos = findPrev(lastEntryPos);

	// read dictionary header
	// set the position of the first word
	firstEntryPos = readProperties();
	currPos = firstEntryPos;

	buf = new char[maxEntryLength];

	// fix the indices
	for(vector<IndexEntry>::iterator it = index.begin(); it != index.end(); ++it) {
		(*it).pos += firstEntryPos;
	}

	// check the integrity
	if (!checkIntegrity()) {
		setError("Integrity failure");
	}

	nextPos = -1;
}

DictImpl::~DictImpl() {
	if (buf) {
		delete buf;
	}

	fdata->close();
}

const string& DictImpl::getName() const {
	return name;
}

bool DictImpl::findEntry(const string& w, bool& subword) {
	long b, e, m;
	bool found;
	string cw;
	
	b = firstEntryPos;
	e = lastEntryPos;

	string word = canonizeWord(w);

	struct timeval tv;
	gettimeofday(&tv, NULL);
//	fprintf(stderr, "findEntry: > %s %015ld %015ld\n", word.c_str(), tv.tv_sec, tv.tv_usec);

	bsearchIndex(word, b, e);
//	printf("findEntry: b=%ld, e=%ld\n", b, e);

	if ( b>=e ) {
		readEntry(b);
		cw = canonizeWord(currWord);
		found = compare(word, cw) == 0;
	} else {
		found = false;
	}

	while (b < e) {
		m = (b+e)/2;
		m = findPrev(m);
		if ((m < 0) || !readEntry(m)) {
			currWord=string();
			currSense=string();
			senseCompressed=false;
			currPos=firstEntryPos;
			return false;
		}

		cw = canonizeWord(currWord);
//		printf("findEntry: compare %s:%s\n", word.c_str(), cw.c_str());
		int cmp = compare(word, cw);
		if (cmp == 0) {
			found = true;
			break;
		} else if (cmp < 0) {
			e = currPos;
		} else {
			b = findNext(m+1);
		}
	}

	if (!found) {
		if (compare(word, cw) > 0) {
			nextEntry();
			cw = canonizeWord(currWord);
		}
	}

	subword = cw.substr(0, word.size()) == word;
//	printf("findEntry: meaning=%s\n", currSense.c_str());

	gettimeofday(&tv, NULL);
//	fprintf(stderr, "findEntry: < %015ld %015ld\n", tv.tv_sec, tv.tv_usec);
	return found;
}

bool DictImpl::nextEntry() {
	long pos;

	if (nextPos > 0) {
		pos = nextPos;
	} else {
		pos = findNext(currPos+1);
	}

	if (pos > lastEntryPos) {
		pos = lastEntryPos;
	}

	if (pos == currPos) {
		return false;
	}

	return readEntry(pos);
}

bool DictImpl::prevEntry() {
	long pos;

	if (pos < firstEntryPos+1) {
		pos = firstEntryPos;
	} else {
		pos = findPrev(currPos-2);
	}

	if (pos > lastEntryPos+1) {
		pos = lastEntryPos;
	}

	if (pos == currPos) {
		return false;
	}

	return readEntry(pos);
}

bool DictImpl::firstEntry() {
	return readEntry(firstEntryPos);
}

bool DictImpl::lastEntry() {
	return readEntry(lastEntryPos);
}

bool DictImpl::randomEntry() {
	
	return readEntry(findNext(firstEntryPos + (long)
		((((double) lastEntryPos) * rand()) / 
		(RAND_MAX + (double) firstEntryPos))));
}

const string& DictImpl::getWord() const {
	return currWord;
}

const string& DictImpl::getSense() const {
	if (senseCompressed) {
		currSense = unescape(currSense);
		currSense = compressor->decode(currSense);
		senseCompressed = false;
	}

	return currSense;
}

void DictImpl::bsearchIndex(const string& s, long& b, long& e) {
	int ib, ie, m;

	ib = m = 0;
	ie = index.size() - 1;

	if (ib >= ie) {
		return;
	}

	while (ib < ie) {
		m = (ib+ie) / 2;
		int cmp = compare(s, index[m].word);
//		printf("bsearchIndex: compare %s:%s\n", (const char*) s.utf8(), 
//			(const char*) index[m]->word.utf8());

		if (cmp == 0) {
			break;
		} else if (cmp < 0) {
			ie = m;
		} else {
			ib = m+1;
			m++;
		}
	}

	if ((int) index.size() <= m) {
		b = lastEntryPos;
		e = lastEntryPos;
	} else {
//		printf("bsearchIndex: compare %s:%s\n", (const char*) s.utf8(), 
//			(const char*) index[m]->word.utf8());
		if (compare(s, index[m].word) < 0 && m > 0) {
			m--;
		}

		b = index[m].pos;
		if ((m+1) < (int) index.size()) {
			e = index[m+1].pos;
		} else {
			e = lastEntryPos;
		}
	}
}

bool DictImpl::readEntry(long pos) {
	int clen = maxEntryLength / 4;
	int n = 0;

	// find the start position of the entry
	if (pos > lastEntryPos) {
		pos = lastEntryPos;
	} 

	currPos = pos;
	nextPos = -1;
	char* pp = 0;

	// read the entry
	while (n < maxEntryLength) {
		int i = fdata->read(currPos + n, &buf[n], clen);
		if (i < 0) {
			setError(strerror(errno));
			return false;
		} else if (i == 0) {
			break;
		}

		pp = (char *) memchr(&buf[n], DATA_DELIMITER, i);
		if (pp != 0) {
			break;
		}
		n += i;
	}

	if (pp == 0) {
		setError("entry too long");
		return false;
	}

	char* p = (char *) memchr(buf, WORD_DELIMITER, pp-buf);
	if (p == 0) {
		setError("invalid format");
		return false;
	}
	*p = '\0';
	currWord = string(buf);
	if (compressor != 0) {
		currWord = unescape(currWord);
		currWord = compressor->decode(currWord);
	}


	*pp = '\0';
	nextPos = currPos + (pp-buf) + 1;

	currSense=string(p+1);
	senseCompressed=false;
	if (compressor != 0) {
		senseCompressed=true;
//		currSense = unescape(currSense);
//		currSense = compressor->decode(currSense);
	}

//	printf("readEntry: currPos=%ld, nextPos=%ld, currSense=%s, currWord=%s\n",
//		currPos, nextPos, currSense.c_str(), currWord.c_str());

	return true;
}

long DictImpl::findPrev(long pos) {
	char s[256];

	if (pos < firstEntryPos) {
		return firstEntryPos;
	}

	if (pos > lastEntryPos) {
		return lastEntryPos;
	}

	long n = pos;

	while (n > firstEntryPos) {
		int len = sizeof(s);
		if ((n-len) < firstEntryPos) {
			len = n - firstEntryPos + 1;
		}

		int k = fdata->read(n - len + 1, s, len);		
		if (k != len) {
			setError(strerror(errno));
			return -1;
		}

		for(int i = len - 1; i >= 0; i--) {
			if (s[i] == DATA_DELIMITER) {
				return (n-len) + i + 2;
			}
		}

		n -= len;
	}

	return firstEntryPos;
}

long DictImpl::findNext(long pos) {
	char s[256];

	if (pos < firstEntryPos) {
		return firstEntryPos;
	}

	if (pos > lastEntryPos) {
		return lastEntryPos;
	}

	while (1) {
		int n = fdata->read(pos, s, sizeof(s));

		if (n < 0) {
			setError(strerror(errno));
			return false;
		}

		if (n == 0) {
			setError("internal error");
			return -1;
		}

		char* p = (char *) memchr(s, DATA_DELIMITER, n);
		if (p != 0) {
			pos += (p - s) + 1;
			break;
		}

		pos += n;
	}

	return pos;
}

string DictImpl::canonizeWord(const string& word) {
	string s = word;
	for(unsigned int i = 0; i < ignoreChars.size(); i++) {
		int n;

		while ((n = s.find(ignoreChars[i])) >= 0) {
			s.erase(n, ignoreChars[i].size());
		}
	}

	string ss;
	Utf8::toupper(s, ss);
	return ss;
}

int DictImpl::readProperties() {
	properties.clear();
	int pos = 0;

	while (1) {
		string line;

		if (getLine(line, pos) <= 0) {
			break;
		}

		int n = line.find('=');
		if (n < 0) {
			continue;
		}

		string name = unescape(line.substr(0, n));
		string value = unescape(line.substr(n+1));
		properties[name] = value;
	}

	// get dictionary name
	name = properties["id"];

	// get the chars that are ignored while searching
	string ic = properties["search-ignore-chars"];
	if (ic.size() == 0) {
		ic = "-.";
		properties["search-ignore-chars"] = ic;
	}

	const char *s, *t;
	s = ic.c_str();

	while (*s != 0) {
		t = s;
		if (Utf8::chartorune(&s) == 128) {
			break;
		}

		ignoreChars.push_back(string(t, (s-t)));
	}
	
	// get the maximum length of a word
	maxWordLength = 50;
	string ns = properties["max-word-length"];
	if (ns.size() != 0) {
		char* eptr;
		int n = strtol(ns.c_str(), &eptr, 0);
		if (*eptr == '\0') {
			maxWordLength = n + 5;
		}
	}

	// get the maximum length of word record 
	// (word and sense including)
	maxEntryLength = 16384;
	ns = properties["max-entry-length"];
	if (ns.size() != 0) {
		char* eptr;
		int n = strtol(ns.c_str(), &eptr, 0);
		if (*eptr == '\0') {
			maxEntryLength = n + 10;
		}
	}

	// read compression method
	ns = properties["compression-method"];
	if (ns.size() == 0) {
		ns = "none";
	}

	if (ns == "shcm") {
		compressor = SHCM::create();
		ns = properties["shcm-tree"];
		if (ns.size() == 0) {
			setError("no shcm tree");
			return 0;
		}

		// this is unnecessary second unescape
		// i am leaving it for now for backward compatibility
		// with already broken dictionaries 
		ns = unescape(ns);
		compressor->startDecode(ns);
	}

	// read the index
	ns = properties["index"];
//	printf("index=%s\n", ns.c_str() + 1);

	int i, n;
	n = 0;
	do {
		i = ns.find((char) 0, n+1);
		if (i < 0) {
			i = ns.size();
		}

		if (i == n) {
			break;
		}

		string idx(ns, n+1, i-n);
		int k = idx.find('\n');
		string word(idx, 0, k);
		string pos(idx, k+1);
		char* eptr;

		long l = strtol(pos.c_str(), &eptr, 0);
		if (*eptr != '\0') {
//			printf("error\n");
			index.clear();
			break;
		}

		index.push_back(IndexEntry(word, l));
//		printf("%s:%ld\n", word.c_str(), l);
		
		n = i;
	} while (n < (int) ns.size());

	properties.erase("index");
	return pos;
}
			
int DictImpl::getLine(string& line, int& pos) {
	char buf[90];
	int i, n;
	int p;

	line.erase();
	p = pos;
	while (1) {
		n = fdata->read(p, buf, sizeof(buf));
		if (n < 0) {
			return 0;
		} else if (n == 0) {
			break;
		}

		for(i = 0; i < n; i++) {
			if (buf[i] == 0) {
				pos += line.size() + i + 1;
				return 0;
			}

			if (buf[i] == '\n') {
				break;
			}
		}

		line.append(buf, i);
		if (i < n) {
			break;
		}

		p += n;
	}

	pos += line.size() + 1;
	return line.size();
}

bool DictImpl::xerox(const string& filename, const string& compress_method, 
	bool sort) {

	int fd = creat(filename.c_str(), S_IREAD | S_IWRITE);

	if (fd < 0) {
		return false;
	}

	bool ret = xerox(fd, compress_method, sort);

	close(fd);

	if (!ret) {
		unlink(filename.c_str());
	}

	return ret;
}

bool DictImpl::xerox(int fd, const string& compress_method, 
	bool do_sort) {

	static char wdelim[] = { WORD_DELIMITER };
	static char ddelim[] = { DATA_DELIMITER };
	static char eql[] = { '=' };
	static char nl[] = { '\n' };

	SHCM* compr = 0;
	string s;
	string shcm_tree;

	if (compress_method == "shcm") {
		compr = SHCM::create();
	}

	// sorting
	typedef pair<string, long> index_type;
	typedef pair<string, string> entry_type;

	vector<index_type> eidx;
	unsigned int mrl = 0;
	unsigned int mwl = 0;
	long dsize = 0;
	firstEntry();
	do {
		string w = getWord();
		if (mwl < w.size()) {
			mwl = w.size();
		}

		unsigned int d = w.size() + getSense().size() + 2;
		if (mrl < d) {
			mrl = d;
		}

//		dsize += d;

		eidx.push_back(index_type(canonizeWord(getWord()), currPos));
	} while (nextEntry());

	if (do_sort) {
		sort(eidx.begin(), eidx.end());
	}

	// load up the dictionary
	vector<index_type>::iterator iit;
	vector<entry_type> entries;
	for(iit = eidx.begin(); iit != eidx.end(); ++iit) {
		readEntry((*iit).second);
		entries.push_back(entry_type(getWord(), getSense()));
	}

	vector<entry_type>::iterator it;

	if (compr != 0) {
	        // first pass of compression 
		compr->startPreEncode();

		for(it = entries.begin(); it != entries.end(); ++it) {
			compr->preencode((*it).first);
			compr->preencode((*it).second);
		}

		shcm_tree = compr->endPreEncode();

		// second pass of compression
		for(it = entries.begin(); it != entries.end(); ++it) {
			(*it).first = escape(compr->encode((*it).first));
			(*it).second = escape(compr->encode((*it).second));
		}
	}

	// creating the index
	long pos = 0;
	int n = 0;
	vector<index_type> idx;
	idx.push_back(index_type(eidx[0].first, 0));

	for(unsigned int i = 0; i < (entries.size() - 1); i++) {
		int m;

		if (n > 32768) {
			idx.push_back(index_type(eidx[i].first, pos+n));
			pos += n;
			n = 0;
		}

		m = entries[i].first.size() + entries[i].second.size() + 2;
		dsize += m;
		n += m;
	}

	idx.push_back(index_type(eidx[entries.size() - 1].first, pos+n));
	n = entries[entries.size() - 1].first.size() + 
		entries[entries.size() - 1].second.size() + 2;
	dsize += n;

	s.erase();
	char* ibuf = new char[mwl+32];
	for(iit=idx.begin(); iit != idx.end(); ++iit) {
		s += (char) 0;
		snprintf(ibuf, mwl+32, "%s\n%ld", (*iit).first.c_str(), (*iit).second);
		s += ibuf;
	}
	delete ibuf;

	// saving the dictionary properties
	map<string, string> prop(properties);
	char buf[16];

	snprintf(buf, sizeof(buf), "%d", mrl);
	prop["max-entry-length"] = buf;
	sprintf(buf, "%d", mwl);
	prop["max-word-length"] = buf;
	prop["compression-method"] = compress_method;
	if (compr != 0) {
		// this is unnecessary second escape
		// i am leaving it for now for backward compatibility
		// with already broken dictionaries 
		prop["shcm-tree"] = escape(shcm_tree); 
	} else {
		prop.erase("shcm-tree");
	}

	if (s.size() > 0) {
		prop["index"] = s;
	}

	snprintf(buf, sizeof(buf), "%ld", dsize);
	prop["dict-size"] = buf;

	map<string, string>::iterator pit = prop.begin();
	while (pit != prop.end()) {
		pair<const string, string> entry = *pit;
		const char* s = escape(entry.first).c_str();
		unsigned int n = write(fd, s, strlen(s));
		if (n != strlen(s)) {
			return false;
		}
		n = write(fd, eql, sizeof(eql));
		if (n != sizeof(eql)) {
			return false;
		}
		s = escape(entry.second).c_str();
		n = write(fd, s, strlen(s));
		if (n != strlen(s)) {
			return false;
}
		n = write(fd, nl, sizeof(nl));
		if (n != sizeof(nl)) {
			return false;
		}
		++pit;
	}

	buf[0] = 0;
	write(fd, buf, 1);

	// saving the words
	for(it = entries.begin(); it != entries.end(); ++it) {
		s = (*it).first;
		write(fd, s.c_str(), s.size());
		write(fd, wdelim, sizeof(wdelim));

		s = (*it).second;
		write(fd, s.c_str(), s.size());
		write(fd, ddelim, sizeof(ddelim));
	}

	return true;
}

bool DictImpl::checkIntegrity() {
	char c;
	long pos = fdata->size();

	// first check if the last character in the file is zero
	c = 0x12;
	fdata->read(fdata->size() - 1, &c, 1);

	if (c != 0) {
		return false;
	}

	// check the file size (if specified in the header)
/*	string s = properties["dict-size"];
	if (s.size() != 0) {
		const char* ss = s.c_str();
		char* sptr;
		printf(">>> %s\n", ss);
		long n = strtol(ss, &sptr, 0);
		if (*sptr == '\0') {
			if (n != (pos - firstEntryPos + 1)) {
				return false;
			}
		}
	}
*/

	// check if the index entry positions point to correct places
	for(unsigned int i = 0; i < index.size(); i+=80) {
		char c = 12;
		IndexEntry ie = index[i];
		fdata->read(ie.pos - 1, &c, 1);
		if (c != 0) {
			return false;
		}
	}

	return true;	
}

string DictImpl::escape(const string& str) {
	const char* s = str.c_str();
	string ret;

	for(unsigned int i = 0; i < str.size(); i++) {
		switch (s[i]) {
			case DATA_DELIMITER:
				ret.push_back(27);
				ret.push_back('0');
				break;

			case WORD_DELIMITER:
				ret.push_back(27);
				ret.push_back('n');
				break;

			case 27:
				ret.push_back(27);
				ret.push_back('e');
				break;

			default:
				ret.push_back(s[i]);
		}
	}

	return ret;
}

string DictImpl::unescape(const string& str) {
	const char* s = str.c_str();
	string ret;

	for(unsigned int i = 0; i < str.size(); i++) {
		if (s[i] == 27) {
			i++;
			switch (s[i]) {
				case '0':
					ret.push_back(DATA_DELIMITER);
					break;

				case 'n':
					ret.push_back(WORD_DELIMITER);
					break;

				case 'e':
					ret.push_back(27);
					break;
			}
		} else {
			ret.push_back(s[i]);
		}
	}

	return ret;
}

