Main Page | Modules | Alphabetical List | Data Structures | File List | Data Fields | Related Pages

chunker.h

00001 /* Copyright (C) 1997-2004 The CDG Team <cdg@nats.informatik.uni-hamburg.de> 00002 * 00003 * This file is free software; as a special exception the author gives 00004 * unlimited permission to copy and/or distribute it, with or without 00005 * modifications, as long as this notice is preserved. 00006 * 00007 * This program is distributed in the hope that it will be useful, but 00008 * WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 00009 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00010 * 00011 */ 00012 00013 /* ------------------------------------------------------------------------- 00014 * @addtogroup Chunker 00015 * 00016 * $Id: chunker.h,v 1.12 2004/09/01 13:44:12 micha Exp $ 00017 * 00018 * @{ 00019 */ 00020 00021 00022 #ifndef CHUNKER_H 00023 #define CHUNKER_H 00024 00025 /* -- INCLUDES ------------------------------------------------------------- */ 00026 #include <cdg.h> 00027 #include <lexemgraph.h> 00028 00029 /* -- TYPE DEFINITIONS ----------------------------------------------------- */ 00030 00031 /* ------------------------------------------------------------------------- 00032 * different modes the chunker can operate in. 00033 */ 00034 typedef enum { 00035 DefaultChunker, /**< the globally set chunker type: one of the below */ 00036 FakeChunker, /**< read chunks from the annotation */ 00037 RealChunker, /**< call the real chunker */ 00038 EvalChunker, /**< call the real chunker and compare it to the fake chunker */ 00039 } ChunkerMode; 00040 00041 /* ------------------------------------------------------------------------- 00042 * chunk types. 00043 */ 00044 typedef enum { 00045 NChunk, /**< a chunk of a nominal clause */ 00046 PChunk, /**< a chunk of a prepositional clause */ 00047 VChunk, /**< a chunk of a verbal clause */ 00048 NoChunk, /**< things which never go into a chunk */ 00049 UnknownChunk /**< fallback */ 00050 } ChunkType; 00051 00052 /* ------------------------------------------------------------------------- 00053 * internal representation of a chunk. 00054 */ 00055 struct ChunkStruct { 00056 ChunkType type; /**< label of the chunk */ 00057 List nodes; /**< list of all lexem nodes in the chunk */ 00058 GraphemNode from; /**< first element in the chunk */ 00059 GraphemNode to; /**< last element in the chunk */ 00060 GraphemNode head; /**< head of the chunk */ 00061 struct ChunkStruct *parent; /**< direct dominating span (only used in fake-chunking */ 00062 List subChunks; /**< embedded chunks, e.g. [PC ... [NC ...]] */ 00063 }; 00064 00065 00066 /* -- FUNCTIONS ------------------------------------------------------------ */ 00067 00068 /* initialize chunker module */ 00069 extern void chunkerInitialize(void); 00070 00071 /* finalize the chunker module */ 00072 extern void chunkerFinalize(void); 00073 00074 /* construct a new chunker object */ 00075 extern Chunker chunkerNew(ChunkerMode mode, LexemGraph lg); 00076 00077 /* destroy a chunker object */ 00078 extern void chunkerDelete(Chunker chunker); 00079 00080 /* destroy a chunk object */ 00081 extern void chunkerChunkDelete(Chunk chunk); 00082 00083 /* compute the chunks for a lexemgraph */ 00084 extern List chunkerChunk(Chunker chunker); 00085 00086 /* pretty print the given chunks */ 00087 void chunkerPrintChunks(unsigned long mode, List chunks); 00088 00089 /* get the string representation of a chunk type */ 00090 extern String chunkerStringOfChunkType(Chunk chunk); 00091 00092 /* get the chunk type of its string representation */ 00093 extern ChunkType chunkerChunkTypeOfString(String tag); 00094 00095 /* chunkerCloneChunk: construct a copy of a given chunk including clones of subChunks. */ 00096 extern Chunk chunkerCloneChunk(Chunk chunk); 00097 00098 /* replace all grapheme references in a chunk with those given in a lexemgraph */ 00099 void chunkerReplaceGraphemes(Chunk chunk, LexemGraph lg); 00100 00101 /* ------------------------------------------------------------------------- */ 00102 #endif /* don't insert anything after this #endif */ 00103

CDG 0.95 (20 Oct 2004)