/*****************************************************************************/
/*                                                                           */
/*  THE NONPAREIL DOCUMENT FORMATTING SYSTEM                                 */
/*  COPYRIGHT (C) 2002, 2005 Jeffrey H. Kingston                             */
/*                                                                           */
/*  Jeffrey H. Kingston (jeff@it.usyd.edu.au)                                */
/*  School of Information Technologies                                       */
/*  The University of Sydney 2006                                            */
/*  AUSTRALIA                                                                */
/*                                                                           */
/*  This program is free software; you can redistribute it and/or modify     */
/*  it under the terms of the GNU General Public License as published by     */
/*  the Free Software Foundation; either Version 2, or (at your option)      */
/*  any later version.                                                       */
/*                                                                           */
/*  This program is distributed in the hope that it will be useful,          */
/*  but WITHOUT ANY WARRANTY; without even the implied warranty of           */
/*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            */
/*  GNU General Public License for more details.                             */
/*                                                                           */
/*  You should have received a copy of the GNU General Public License        */
/*  along with this program; if not, write to the Free Software              */
/*  Foundation, Inc., 59 Temple Place, Suite 330, Boston MA 02111-1307 USA   */
/*                                                                           */
/*  FILE:         uchar.h                                                    */
/*  DESCRIPTION:  32-bit unsigned Unicode scalar values (header file)        */
/*                                                                           */
/*****************************************************************************/
#ifndef UCHAR_HEADER_FILE
#define UCHAR_HEADER_FILE
#include <stdio.h>
#include "boolean.h"
#include "utypes.h"
#include "ustring.h"

/* names for particular Unicode characters */
#define	UEOF		0xFFFFFF	/* EOF marker (not a Unicode char)   */

/* reading and writing UTF8 */
typedef enum {
  UE_UNEXPECTED_EOF,			/* unexpected EOF in multibyte char  */
  UE_ILLEGAL_CODE,			/* character code exceedds MAX_CHAR  */
  UE_ILLEGAL_BYTE1,			/* illegal first byte                */
  UE_ILLEGAL_BYTE2,			/* illegal second byte               */
  UE_ILLEGAL_BYTE3,			/* illegal third byte                */
  UE_ILLEGAL_BYTE4			/* illegal fourth byte               */
} UCHAR_ERROR;

extern UCHAR_ERROR uchar_error_type;
extern unsigned int uchar_error_byte;

extern BOOLEAN UCharGet(FILE *fp, UCHAR *ch);
extern void UCharPut(UCHAR ch, FILE *fp);

/* unicode general category, as in UCD.html, plus UCHAR_I for invalid */
typedef enum {
  UCHAR_LU,				/* letter, uppercase                 */
  UCHAR_LL,				/* letter, lowercase                 */
  UCHAR_LT,				/* letter, titlecase                 */
  UCHAR_LM,				/* letter, modifier                  */
  UCHAR_LO,				/* letter, other                     */
  UCHAR_MN,				/* mark, nonspacing                  */
  UCHAR_MC,				/* mark, spacing combining           */
  UCHAR_ME,				/* mark, enclosing                   */
  UCHAR_ND,				/* number, decimal digit             */
  UCHAR_NL,				/* number, letter                    */
  UCHAR_NO,				/* number, other                     */
  UCHAR_PC,				/* punctuation, connector            */
  UCHAR_PD,				/* punctuation, dash                 */
  UCHAR_PS,				/* punctuation, open                 */
  UCHAR_PE,				/* punctuation, close                */
  UCHAR_PI,				/* punctuation, initial quote        */
  UCHAR_PF,				/* punctuation, final quote          */
  UCHAR_PO,				/* punctuation, other                */
  UCHAR_SM,				/* symbol, math                      */
  UCHAR_SC,				/* symbol, currency                  */
  UCHAR_SK,				/* symbol, modifier                  */
  UCHAR_SO,				/* symbol, other                     */
  UCHAR_ZS,				/* separator, space                  */
  UCHAR_ZL,				/* separator, line                   */
  UCHAR_ZP,				/* separator, paragraph              */
  UCHAR_CC,				/* other, control                    */
  UCHAR_CF,				/* other, format                     */
  UCHAR_CS,				/* other, surrogate                  */
  UCHAR_CO,				/* other, private use                */
  UCHAR_CN, 				/* other, not assigned               */
  UCHAR_I 				/* invalid                           */
} UCHAR_GENERAL_CATEGORY;

/* Nonpareil lexical classes */
typedef enum {
  UCHAR_LEX_HASH,			/* #                                 */
  UCHAR_LEX_QUOTE_DOUBLE,		/* "                                 */
  UCHAR_LEX_QUOTE_SINGLE,               /* '                                 */
  UCHAR_LEX_LEFT_PAREN,			/* (                                 */
  UCHAR_LEX_RIGHT_PAREN,		/* )                                 */
  UCHAR_LEX_COMMA,			/* ,                                 */
  UCHAR_LEX_EXCLAM,			/* !				     */
  UCHAR_LEX_DOT,			/* .                                 */
  UCHAR_LEX_COLON,			/* :				     */
  UCHAR_LEX_LEFT_BRACKET,		/* [                                 */
  UCHAR_LEX_BACKSLASH,                  /* \                                 */
  UCHAR_LEX_RIGHT_BRACKET,              /* ]                                 */
  UCHAR_LEX_LEFT_BRACE,                 /* {                                 */
  UCHAR_LEX_RIGHT_BRACE,                /* }                                 */
  UCHAR_LEX_ID_BEGIN,			/* Lu, Ll, Lt, Lm, Lo, Nl            */
  UCHAR_LEX_ID_EXTEND,			/* Mn, Mc, Pc, Cf                    */
  UCHAR_LEX_DIGIT,			/* Nd                                */
  UCHAR_LEX_OTHER_PUNCT,                /* P[dseifo], S[mcko] unless above   */
  UCHAR_LEX_SPACE,			/* Zs                                */
  UCHAR_LEX_TAB,			/* legacy tab character              */
  UCHAR_LEX_ENDLINE,			/* Zl, Zp, and their legacy versions */
  UCHAR_LEX_OTHER			/* Me, No, C[cson]                   */
} UCHAR_LEX_CLASS;

/* unicode BIDI classes */
typedef enum {
  UCHAR_BIDI_L,				/* left-to-right                     */
  UCHAR_BIDI_LRE,			/* left-to-right embedding           */
  UCHAR_BIDI_LRO,			/* left-to-right override            */
  UCHAR_BIDI_R,				/* right-to-left                     */
  UCHAR_BIDI_AL,			/* right-to-left Arabic              */
  UCHAR_BIDI_RLE,			/* right-to-left embedding           */
  UCHAR_BIDI_RLO,			/* right-to-left override            */
  UCHAR_BIDI_PDF,			/* pop directional format            */
  UCHAR_BIDI_EN,			/* European number                   */
  UCHAR_BIDI_ES,			/* European number separator         */
  UCHAR_BIDI_ET,			/* European number terminator        */
  UCHAR_BIDI_AN,			/* Arabic number                     */
  UCHAR_BIDI_CS,			/* common number separator           */
  UCHAR_BIDI_NSM,			/* non-spacing mark                  */
  UCHAR_BIDI_BN,			/* boundary neutral                  */
  UCHAR_BIDI_B,				/* paragraph separator               */
  UCHAR_BIDI_S,				/* segment separator                 */
  UCHAR_BIDI_WS,			/* whitespace                        */
  UCHAR_BIDI_ON				/* other neutrals                    */
} UCHAR_BIDI_CLASS;

/* unicode mapping types (none, canonical, or various compatibility)         */
typedef enum {
  UCHAR_CMAP_NONE,			/* no mapping, char is canonical     */
  UCHAR_CMAP_CANONICAL,			/* canonical mapping                 */
  UCHAR_CMAP_FONT,			/* compatibility font mapping        */
  UCHAR_CMAP_NOBREAK,			/* compatibility no-break mapping    */
  UCHAR_CMAP_INITIAL,			/* compatibility initial mapping     */
  UCHAR_CMAP_MEDIAL,			/* compatibility medial mapping      */
  UCHAR_CMAP_FINAL,			/* compatibility final mapping       */
  UCHAR_CMAP_ISOLATED,			/* compatibility isolated mapping    */
  UCHAR_CMAP_CIRCLE,			/* compatibility encircled mapping   */
  UCHAR_CMAP_SUPER,			/* compatibility superscript mapping */
  UCHAR_CMAP_SUB,			/* compatibility subscript mapping   */
  UCHAR_CMAP_VERTICAL,			/* compatibility vertical mapping    */
  UCHAR_CMAP_WIDE,			/* compatibility wide mapping        */
  UCHAR_CMAP_NARROW,			/* compatibility narrow mapping      */
  UCHAR_CMAP_SMALL,			/* compatibility small mapping       */
  UCHAR_CMAP_SQUARE,			/* compatibility CJK squared mapping */
  UCHAR_CMAP_FRACTION,			/* compatibility fraction mapping    */
  UCHAR_CMAP_COMPAT			/* compatibility other mapping       */
} UCHAR_CMAP_CLASS;

extern void UCharInit(USTRING directory);
extern UCHAR UCharMax();
extern BOOLEAN UCharEqualProperties(UCHAR ch1, UCHAR ch2);

/* character properties */
extern UCHAR_GENERAL_CATEGORY UCharGeneralCategory(UCHAR ch);
extern UCHAR_LEX_CLASS UCharLexClass(UCHAR ch);
extern unsigned char UCharCanonicalCombiningClass(UCHAR ch);
extern UCHAR_BIDI_CLASS UCharBidiClass(UCHAR ch);
extern UCHAR_CMAP_CLASS UCharCMapClass(UCHAR ch);
extern USTRING UCharCMap(UCHAR ch);
extern BOOLEAN UCharIsPrintableAscii(UCHAR ch);
extern BOOLEAN UCharIsDecimalDigit(UCHAR ch, int *value);
extern BOOLEAN UCharIsDigit(UCHAR ch, int *value);
extern BOOLEAN UCharIsNumeric(UCHAR ch, int *numerator, int *denominator);
extern BOOLEAN UCharIsBidiMirrored(UCHAR ch);
extern USTRING UCharLowerCase(UCHAR ch);
extern USTRING UCharUpperCase(UCHAR ch);
extern USTRING UCharTitleCase(UCHAR ch);

/* debug, display, and test */
extern ASTRING UCharLexClassShow(UCHAR_LEX_CLASS lc);
extern ASTRING UCharGeneralCategoryShow(UCHAR_GENERAL_CATEGORY gc);
extern ASTRING UCharBidiClassShow(UCHAR_BIDI_CLASS bc);
extern ASTRING UCharCMapClassShow(UCHAR_CMAP_CLASS cc);
extern void UCharTest(FILE *fp);

#endif
