00001 /* 00002 * Copyright (C) 1997-2004 The CDG Team <cdg@nats.informatik.uni-hamburg.de> 00003 * 00004 * This file is free software; as a special exception the author gives 00005 * unlimited permission to copy and/or distribute it, with or without 00006 * modifications, as long as this notice is preserved. 00007 * 00008 * This program is distributed in the hope that it will be useful, but 00009 * WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 00010 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00011 * 00012 * Author: Ingo Schroeder (see also AUTHORS and THANKS for more) 00013 * Purpose: header file for some global variables 00014 * other globals are declared elsewhere, e.g. constraintnet.h 00015 * Birth: 13/3/97 00016 * 00017 * $Id: cdg.h,v 1.65 2004/09/27 17:07:02 micha Exp $ 00018 */ 00019 00020 /* ------------------------------------------------------------------- */ 00021 /** \mainpage The CDG Reference Manual 00022 00023 \author Michael Daum, Ingo Schröder, Kilian A. Foth 00024 00025 \section Purpose Purpose of this document 00026 00027 This document is a thorough description of all data structures and 00028 functions used in the CDG system and serves as a technical documentation of 00029 the parser. 00030 00031 Most of the core functionality is exported by the modules in the directory \c 00032 libcdg, which can be viewed as yet another C library, although a very 00033 specialized one. Application-specific functionality must be implemented by the 00034 application that uses the \c libcdg library. Thus, the command-line parser \c 00035 cdgp includes code to parse its command-line options and to do I/O on a 00036 terminal, and the graphical parser \c xcdg defines extensive routines to 00037 display input structures graphically. This document will eventually cover all 00038 data structures and functions of the constraint parsing library \c libcdg and the 00039 text-based parser \c cdgp. 00040 00041 \c CDG is written entirely in C (and automatically 00042 generated C in some cases). It relies on several non-ANSI extensions 00043 to the C programming language (such as local functions and dynamic 00044 arrays). The GNU C compiler can compile all of these features; others 00045 may or may not. 00046 00047 \section Overview Overview 00048 00049 The \c CDG library is organized into several modules each of which offers a 00050 specialized service to the environment. The set of all modules can be 00051 further divided into those offering the basic infrastructure for \c CDG 00052 parsing whereas others are build on top and offer the different parsing flavours 00053 that are available. 00054 00055 \subsection BasicModules Basic Modules 00056 - \ref Cdg: This the root layer of the \c CDG library. 00057 - \ref Chunker: This module offers an interface for an external chunker. 00058 - \ref Cdgdb: This module provides access to a berkeley database for retrieving lexical entries. 00059 - \ref Command: This module implements the \c CDG scripting interface. 00060 - \ref Compiler: This module implements a compiler in order to translate constraints into C code. 00061 - \ref Constraintnet: Herein the basic and most central datastructure of \c CDG is implemented, the ConstraintNet. 00062 - \ref Eval: The Eval module actually implements the constraint interpreter 00063 - ref Functions: Implementation of all function calls 00064 - \ref Hook: The Hook module connects the core of the library to its outside world by a callback mechanism. 00065 - ref Input:This module defines the data structures and access functions for constraint grammar. 00066 - ref Interpreter: This module implements the constraint grammar language it consists of the following submodules 00067 - ref Levelvalue 00068 - \ref Lexemgraph 00069 - ref Parse 00070 - ref Predicates: Implementation of all predicates 00071 - \ref Scache 00072 - \ref Scorematrix: This module implements the score matrix for a constraint edge. 00073 - ref Set: This module provides a uniform interface for all user-visible variables. 00074 - ref Statistics 00075 - ref Tagger 00076 - ref Testing 00077 - \ref Timer 00078 - ref Variables: This module provides a generic interface for variable encapsulation, and is exclusively used by the set module. 00079 - ref Write 00080 00081 00082 \subsection ParsingFlavours Parsing Flavours 00083 - ref Arcconsistency 00084 - ref Frobbing 00085 - ref MGLS 00086 - ref Increment 00087 - ref Incrementalcompletion 00088 - ref Netsearch 00089 - ref Nodeconsistency 00090 - ref Search 00091 00092 \section Guidelines General guidelines 00093 00094 \subsection FileStructure File structure 00095 00096 Every module consists of a declaration 00097 part \c <moduleName>.h and an implementation part \c <moduleName>.c . 00098 Another module can use the exported services of this 00099 modules by including its declarations in \c <moduleName>.h . 00100 Note that many of the functions covered in this manual are not exported and 00101 therefore cannot be used from other modules at all. The files 00102 \c skel.c and skel.h provide a skeleton for a new module. 00103 00104 \subsection Terms Terms used in this document 00105 00106 While some of the functions in the \c libcdg library explicitly 00107 use variable-length argument lists, other have a prototype like the following: 00108 \code int no, char **args \endcode 00109 00110 In this case it is always assumed that 00111 \a args is an array of valid zero-terminated strings, and that 00112 \a no specifies the number of these strings. This is an alternate 00113 way of passing a varying number of additional arguments. The strings 00114 contained in the array are called \em command words in this 00115 document to distinguish them from the actual function arguments. 00116 00117 \subsection CodingStyle Coding style 00118 00119 Most identifiers of data structures and algorithms are complete 00120 English phrases such as `ConstraintNet' or `printLexiconItem'. When a 00121 module deals primarily with one data structure, it is common practice 00122 to abbreviate the name of this structure and use it as a prefix to all 00123 exported identifiers, as in `lvNew', `lvPrint', and `lvDelete' (rather 00124 than `newLevelValue', `printLevelValue', and `deleteLevelValue'). 00125 00126 To avoid the explicit use of pointer variables, most modules export 00127 \c typedef 'd aliases for all pointer types. These aliases have 00128 meaningful names, while the underlying \c struct types have names 00129 ending in \c Struct: 00130 00131 \code 00132 typedef struct { 00133 String id; 00134 Boolean active; 00135 int counter; 00136 } SectionStruct; 00137 typedef SectionStruct *Section; 00138 \endcode 00139 00140 Although ANSI C allows the programmer to collapse these two 00141 \c typedef statements into one, this construct cannot be parsed by 00142 the interface generator SWIG, used for building \c xcdg. Therefore it is 00143 important to use exactly this way of defining pointers to structures. 00144 00145 One consequence of this definition style is that although very few 00146 pointer symbols are used in the code, most \c libcdg data types 00147 obey reference semantics: A function called on a variable \c s can 00148 usually change the underlying structure, even though C function calls 00149 actually use value semantics. 00150 00151 A more practical demonstration of the coding style is given in \ref Skel. 00152 00153 */ 00154 00155 #ifndef CDG_H 00156 #define CDG_H 00157 00158 /* ------------------------------------------------------------------------- 00159 * @addtogroup Cdg 00160 * @{ 00161 */ 00162 00163 /* -- TYPE DEFINITIONS -------------------------------------------------- */ 00164 /* -- INCLUDES ---------------------------------------------------------- */ 00165 #include <config.h> 00166 #include <unistd.h> 00167 #include <stdlib.h> 00168 #include "blah.h" 00169 00170 #ifdef DMALLOC 00171 #include <dmalloc.h> 00172 #endif 00173 00174 00175 /* -- MACROS ------------------------------------------------------------ */ 00176 00177 #ifndef SWIG 00178 /* ---------------------------------------------------------------------- 00179 * maximum of \c a or \c b. 00180 * Be aware that the expressions \c a and \c b might be evaluated twice 00181 * depending on wether \c a or \c b are greater. 00182 */ 00183 #define max(a, b) (a) > (b) ? (a) : (b) 00184 00185 /* ---------------------------------------------------------------------- 00186 * minimum of \c a or \c b. 00187 * Be aware that the expressions \c a and \c b might be evaluated twice 00188 * depending on wether \c a or \c b are smaller. 00189 */ 00190 #define min(a, b) (a) < (b) ? (a) : (b) 00191 #endif 00192 00193 00194 /* ---------------------------------------------------------------------- 00195 * null, zero, nadda, nothing. 00196 * \c NULL has the numeric value 0. 00197 */ 00198 #ifndef NULL 00199 #define NULL 0 00200 #endif 00201 00202 /* ---------------------------------------------------------------------- 00203 * this is true. 00204 * \c TRUE has the numeric value 1. 00205 */ 00206 #ifndef TRUE 00207 #define TRUE 1 00208 #endif 00209 00210 /* ---------------------------------------------------------------------- 00211 * this is false. 00212 * \c FALSE has the numeric value 0. 00213 */ 00214 #ifndef FALSE 00215 #define FALSE 0 00216 #endif 00217 00218 /* -- FORWARD TYPE DEFINITIONS ------------------------------------------ */ 00219 00220 /** @name Forward type definitions 00221 * This set of declarations help to deal with cirular C-structures which 00222 * introduce circular header file dependencies as a consequence. All 00223 * type definitions of that kind are moved to this place here to get 00224 * a one-stop forward declaration. 00225 * @{ */ 00226 00227 struct ChunkerStruct; 00228 typedef struct ChunkerStruct ChunkerStruct; 00229 /**< type of a chunker structure */ 00230 typedef ChunkerStruct *Chunker; 00231 /**< type of a chunker pointer*/ 00232 00233 struct ChunkStruct; 00234 typedef struct ChunkStruct ChunkStruct; 00235 /**< type of a chunk structure */ 00236 typedef ChunkStruct *Chunk; 00237 /**< type of a chunk pointer */ 00238 00239 struct LexemNodeStruct; 00240 typedef struct LexemNodeStruct LexemNodeStruct; 00241 /**< type of a lexem node structure */ 00242 typedef LexemNodeStruct *LexemNode; 00243 /**< type of a lexem node pointer */ 00244 00245 struct GraphemNodeStruct; 00246 typedef struct GraphemNodeStruct GraphemNodeStruct; 00247 /**< type of a graphem node structure */ 00248 typedef GraphemNodeStruct *GraphemNode; 00249 /**< type of a graphem node pointer */ 00250 00251 /** @} */ 00252 00253 00254 /* -- VARIABLES --------------------------------------------------------- */ 00255 extern Boolean cdgCtrlCTrapped; 00256 extern Boolean cdgCtrlCAllowed; 00257 extern Boolean cdgXCDG; 00258 extern unsigned long cdgTimeLimit; 00259 extern Boolean cdgEncodeUmlauts; 00260 extern String cdgUser; 00261 extern Hashtable cdgNets; 00262 extern Hashtable cdgParses; 00263 extern Hashtable cdgProblems; 00264 00265 /* -- FUNCTIONS --------------------------------------------------------- */ 00266 #ifndef SWIG 00267 extern inline void cdgAgInsert(Agenda, double, Pointer); 00268 extern void cdgInitialize( void ); 00269 extern void cdgDeleteComputed(void); 00270 #endif 00271 extern void cdgFreeString(String str); 00272 extern void cdgFinalize(void); 00273 extern void cdgExecPragmas(List pragmas); 00274 00275 /* ---------------------------------------------------------------------- */ 00276 /** @} */ 00277 #endif /* don't insert anything after this #endif */