/*****************************************************************************/
/*                                                                           */
/*  THE NONPAREIL DOCUMENT FORMATTING SYSTEM                                 */
/*  COPYRIGHT (C) 2002, 2005 Jeffrey H. Kingston                             */
/*                                                                           */
/*  Jeffrey H. Kingston (jeff@it.usyd.edu.au)                                */
/*  School of Information Technologies                                       */
/*  The University of Sydney 2006                                            */
/*  AUSTRALIA                                                                */
/*                                                                           */
/*  This program is free software; you can redistribute it and/or modify     */
/*  it under the terms of the GNU General Public License as published by     */
/*  the Free Software Foundation; either Version 2, or (at your option)      */
/*  any later version.                                                       */
/*                                                                           */
/*  This program is distributed in the hope that it will be useful,          */
/*  but WITHOUT ANY WARRANTY; without even the implied warranty of           */
/*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            */
/*  GNU General Public License for more details.                             */
/*                                                                           */
/*  You should have received a copy of the GNU General Public License        */
/*  along with this program; if not, write to the Free Software              */
/*  Foundation, Inc., 59 Temple Place, Suite 330, Boston MA 02111-1307 USA   */
/*                                                                           */
/*  FILE:         ustring_pool.c                                             */
/*  DESCRIPTION:  Pool of USTRING objects, indexed by unsigned short.        */
/*                                                                           */
/*****************************************************************************/
#include <assert.h>
#include <limits.h>
#include "utypes.h"
#include "ustring.h"
#include "ustring_pool.h"
#include "memory.h"


/*****************************************************************************/
/*                                                                           */
/*  USTRING_POOL                                                             */
/*                                                                           */
/*  A pool of UStrings.  This has two states, an initial state while it      */
/*  is being constructed, and a reloaded state when reloaded from disk.      */
/*  In the initial state the strings are just stored in an array, and        */
/*  their index in this array is their USTRING_POOL_INDEX.  In the reloaded  */
/*  state the array is NULL, and offsets[] gives a further offset from       */
/*  the start of offsets (measured in unsigned shorts) which is where the    */
/*  string itself (zero-terminated) is kept.                                 */
/*                                                                           */
/*****************************************************************************/

struct ustring_pool_rec {
  ARRAY_USTRING		new_strings;		/* NULL if loaded from disk  */
  int			offsets_size;		/* no. of reloaded strings   */
  union {
    unsigned short	offsets[1];		/* when reloaded (extends)   */
    UCHAR		strings[1];
  } u;
};


/*****************************************************************************/
/*                                                                           */
/*  USTRING_POOL UStringPoolNew()                                            */
/*                                                                           */
/*  Make a new, empty UString pool.                                          */
/*                                                                           */
/*  Note:  google has been added as the first string to help with            */
/*  debugging.  Any uninitialized string pool indexes that happen to         */
/*  contain 0 will point to "google".                                        */
/*                                                                           */
/*****************************************************************************/

USTRING_POOL UStringPoolNew()
{
  USTRING_POOL res;
  GetMemory(res, USTRING_POOL);
  ArrayInit(&res->new_strings);
  UStringPoolAdd(res, AStringToUString("google"));
  return res;
}


/*****************************************************************************/
/*                                                                           */
/*  USTRING_POOL_INDEX UStringPoolAdd(USTRING_POOL pool, USTRING str)        */
/*                                                                           */
/*  Add str to pool and return its index (an unsigned short).  If str is     */
/*  already present it won't be added again, but the existing index will     */
/*  still be returned, so the caller need not be concerned about whether     */
/*  or not str is already present.                                           */
/*                                                                           */
/*  This call is not permitted on compiled string pools, only on new ones.   */
/*                                                                           */
/*****************************************************************************/

USTRING_POOL_INDEX UStringPoolAdd(USTRING_POOL pool, USTRING str)
{
  int i;
  assert(pool->new_strings != NULL);
  for( i = 0;  i < ArraySize(pool->new_strings);  i++ )
    if( UStringEqual(str, ArrayGet(pool->new_strings, i)) )
      return i;
  ArrayAddLast(pool->new_strings, str);
  if( i > USHRT_MAX )
  {
    fprintf(stderr, "too many strings in trie string pool\n");
    exit(1);
  }
  return i;
}


/*****************************************************************************/
/*                                                                           */
/*  void UStringPoolSave(USTRING_POOL pool, USTRING file_name)               */
/*                                                                           */
/*  Save UString pool to a file, in a binary format that can be reloaded     */
/*  quickly.                                                                 */
/*                                                                           */
/*****************************************************************************/

void UStringPoolSave(USTRING_POOL pool, USTRING file_name)
{
  size_t mem, offsets_mem, str_mem;  USTRING str;  int i, j, len, soffs;
  USTRING_POOL pool2;  FILE *fp;

  /* work out how many bytes the offsets array size needs (aligned) */
  offsets_mem = ArraySize(pool->new_strings) * sizeof(unsigned short);
  if( offsets_mem % sizeof(UCHAR) != 0 )
    offsets_mem += sizeof(UCHAR) - offsets_mem % sizeof(UCHAR);
  assert(offsets_mem % sizeof(UCHAR) == 0);

  /* work out how many bytes the strings array needs */
  str_mem = 0;
  ArrayForEach(pool->new_strings, str)
    str_mem += (UStringLength(str) + 1) * sizeof(UCHAR);

  /* work out how many bytes the compiled string pool needs */
  mem = sizeof(ARRAY_USTRING) + sizeof(int) + offsets_mem + str_mem;

  /* build the compiled string pool */
  pool2 = (USTRING_POOL) malloc(mem);
  pool2->new_strings = NULL;
  pool2->offsets_size = ArraySize(pool->new_strings);
  soffs = offsets_mem / sizeof(UCHAR);  /* first free UCHAR after offsets */
  for( i = 0;  i < ArraySize(pool->new_strings);  i++ )
  {
    pool2->u.offsets[i] = soffs;
    str = ArrayGet(pool->new_strings, i);
    len = UStringLength(str);
    for( j = 0;  j <= len;  j++ )
      pool2->u.strings[soffs++] = UStringGet(str, j);
  }

  /* save to binary file */
  fp = fopen((ASTRING) UStringToUTF8(file_name), "wb");
  fwrite(pool2, mem * sizeof(char), 1, fp);
  fclose(fp);
}


/*****************************************************************************/
/*                                                                           */
/*  USTRING_POOL UStringPoolRestore(USTRING file_name)                       */
/*                                                                           */
/*  Load a string pool from the given file, as created by a Save.  This      */
/*  operation is optimized to run very quickly, but it produces a string     */
/*  pool which can no longer be added to, only retrieved from.               */
/*                                                                           */
/*****************************************************************************/

USTRING_POOL UStringPoolRestore(USTRING file_name)
{
  FILE *fp;  int mem;  USTRING_POOL res;

  fp = fopen((ASTRING) UStringToUTF8(file_name), "rb");
  if( fp == NULL )
    return NULL;
  fseek(fp, 0L, SEEK_END);
  mem = ftell(fp);
  rewind(fp);
  res = (USTRING_POOL) malloc(mem);
  fread(res, mem * sizeof(char), 1, fp);
  return res;
}


/*****************************************************************************/
/*                                                                           */
/*  USTRING UStringPoolGet(USTRING_POOL pool, USTRING_POOL_INDEX index)      */
/*                                                                           */
/*  Retrieve the string with the given index from pool.  This works both     */
/*  for pools that were created by New() and pools that were restored by     */
/*  Restore().                                                               */
/*                                                                           */
/*****************************************************************************/

USTRING UStringPoolGet(USTRING_POOL pool, USTRING_POOL_INDEX index)
{
  if( pool->new_strings != NULL )
  {
    /* initial state, get str from new_strings */
    assert(index < ArraySize(pool->new_strings));
    return ArrayGet(pool->new_strings, index);
  }
  else
  {
    /* compiled state, get str from offsets */
    assert(index < pool->offsets_size);
    return &pool->u.strings[pool->u.offsets[index]];
  }
}


/*****************************************************************************/
/*                                                                           */
/*  void UStringPoolDebug(USTRING_POOL pool, int indent, FILE *fp)           */
/*                                                                           */
/*  Debug print of this string pool.                                         */
/*                                                                           */
/*****************************************************************************/

void UStringPoolDebug(USTRING_POOL pool, int indent, FILE *fp)
{
  int i;  USTRING str;
  if( pool->new_strings != NULL )
  {
    /* initial state */
    fprintf(fp, "%*s[ USTRING_POOL (initial state):\n", indent, "");
    for( i = 0;  i < ArraySize(pool->new_strings);  i++ )
    {
      str = ArrayGet(pool->new_strings, i);
      fprintf(fp, "%*s  %d: %s\n", indent, "", i, UStringToDisplayedHex(str));
    }
    fprintf(fp, "%*s]\n", indent, "");
  }
  else
  {
    /* compiled state */
    fprintf(fp, "%*s[ USTRING_POOL (compiled state):\n", indent, "");
    for( i = 0;  i < pool->offsets_size;  i++ )
    {
      str = &pool->u.strings[pool->u.offsets[i]];
      fprintf(fp, "%*s  %d: %s\n", indent, "", i, UStringToDisplayedHex(str));
    }
    fprintf(fp, "%*s]\n", indent, "");
  }
}


/*****************************************************************************/
/*                                                                           */
/*  void UStringPoolTest(USTRING_POOL pool, USTRING file_name, int indent,   */
/*    FILE *fp)                                                              */
/*                                                                           */
/*  Test saving pool to file_name and restoring it again.                    */
/*                                                                           */
/*****************************************************************************/

void UStringPoolTest(USTRING_POOL pool, USTRING file_name, int indent,
  FILE *fp)
{
  USTRING_POOL reloaded_pool;  int i;  USTRING str1, str2;
  fprintf(fp, "%*s[ UStringPoolTest:\n", indent, "");
  assert(pool->new_strings != NULL);
  UStringPoolSave(pool, file_name);
  reloaded_pool = UStringPoolRestore(file_name);
  assert(reloaded_pool->new_strings == NULL);
  if( ArraySize(pool->new_strings) != reloaded_pool->offsets_size )
  {
    fprintf(fp, "%*s  original pool size %d != reloaded pool size %d\n",
      indent, "", ArraySize(pool->new_strings), reloaded_pool->offsets_size);
  }
  for( i = 0;  i < ArraySize(pool->new_strings);  i++ )
  {
    str1 = UStringPoolGet(pool, i);
    str2 = UStringPoolGet(reloaded_pool, i);
    if( !UStringEqual(str1, str2) )
      fprintf(fp, "%*s  [%d: pool [%s] != reloaded_pool [%s]\n", indent, "",
	i, UStringToDisplayedHex(str1), UStringToDisplayedHex(str2));
  }
  fprintf(fp, "%*s]\n", indent, "");
  UStringPoolDebug(reloaded_pool, 2, fp);
}
