Main Page | Modules | Alphabetical List | Data Structures | File List | Data Fields | Related Pages

constraintnet.c

00001 /* 00002 * Copyright (C) 1997-2004 The CDG Team <cdg@nats.informatik.uni-hamburg.de> 00003 * 00004 * This file is free software; as a special exception the author gives 00005 * unlimited permission to copy and/or distribute it, with or without 00006 * modifications, as long as this notice is preserved. 00007 * 00008 * This program is distributed in the hope that it will be useful, but 00009 * WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 00010 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00011 * 00012 * $Id: constraintnet.c,v 1.177 2004/09/27 17:07:03 micha Exp $ 00013 */ 00014 00015 /* ------------------------------------------------------------------------- 00016 @addtogroup Constraintnet Constraintnet - maintainance of constraint nets 00017 @author Ingo Schroeder 00018 @date 6/3/97 00019 @{ 00020 */ 00021 00022 /* ---------------------------------------------------------------------- */ 00023 #include <config.h> 00024 00025 #include <stdio.h> 00026 #include <blah.h> 00027 #include <math.h> 00028 #include <string.h> 00029 #include "cdg.h" 00030 #include "input.h" 00031 #include "eval.h" 00032 #include "statistics.h" 00033 #include "levelvalue.h" 00034 #include "constraintnet.h" 00035 #include "lexemgraph.h" 00036 #include "hook.h" 00037 #include "timer.h" 00038 #include "increment.h" 00039 #include "parse.h" 00040 #include "set.h" 00041 #include "tagger.h" 00042 #include "chunker.h" 00043 00044 /* ---------------------------------------------------------------------- */ 00045 /* ---------------------------------------------------------------------- */ 00046 /* ---------------------------------------------------------------------- */ 00047 00048 /** Counts the constraint nets created so far and appears 00049 in the name of every constraint net. */ 00050 int cnCounter = 0; 00051 00052 /** cnMostRecentlyCreatedNet points to last constraint net created 00053 (or to NULL). This variable is used as an implicit argument to 00054 commands that expect the name of a constraint net. */ 00055 ConstraintNet cnMostRecentlyCreatedNet = NULL; 00056 00057 /** This variable implements the CDG variable edges. */ 00058 CnEdgesType cnEdgesFlag = cnEdgesOff; 00059 00060 /** If this flag is set, deleted LVs will be bracketed with [ ] in 00061 all output. If it is not set, they will not be shown at all. */ 00062 Boolean cnShowDeletedFlag = FALSE; 00063 00064 /** if this Flag is set, newly created constraint networks will contain 00065 * non-specific dependency edges aswell */ 00066 Boolean cnUseNonSpec = FALSE; 00067 00068 /** This variable gives the approximate ratio of LVs retained while 00069 building a constraint net. It defaults to~1, so that no LVs are 00070 deleted upon building. If it is smaller, then from any set <i>W</i> 00071 of LVs the worst floor(|<i>W</i>| x)) LVs will be 00072 removed. */ 00073 Number cnUnaryPruningFraction = 1.0; 00074 00075 /** This variable implements the CDG variable sortnodes. */ 00076 int cnSortNodesMethod = 0; 00077 00078 /* -- FUNCTIONS --------------------------------------------------------- */ 00079 /* ---------------------------------------------------------------------- 00080 This function returns a new ConstraintNet with a unique name, 00081 but with all fields set to meaningless default values. Building a 00082 complete constraint net is a multi-stage process that involves several 00083 of the other functions of this module. 00084 */ 00085 ConstraintNet cnBuildInit() 00086 { 00087 ConstraintNet net; 00088 00089 net = (ConstraintNet) memMalloc(sizeof (ConstraintNetStruct)); 00090 /* There is no garantee that no longer ids are needed but ... */ 00091 net->id = strPrintf("net%d", cnCounter++); 00092 net->isBuilt = NULL; 00093 net->totalNumberOfValues = 0; 00094 net->values = net->nodes = net->edges = NULL; 00095 net->parses = (List) NULL; 00096 net->searchagenda = (Agenda) NULL; 00097 net->evalUnary = 0; 00098 net->evalBinary = 0; 00099 net->statUnary = 0; 00100 net->cache = NULL; 00101 net->lvTotals = NULL; 00102 00103 return net; 00104 } 00105 00106 /* ---------------------------------------------------------------------- 00107 Build lexeme graph and add tagger and chunker information to NET. 00108 */ 00109 Boolean cnTag(ConstraintNet net, Lattice lat) { 00110 Chunker chunker; 00111 List chunks; 00112 00113 net->lexemgraph = lgNew(lat); 00114 00115 /* invalid graph? */ 00116 if (net->lexemgraph == NULL) { 00117 cnDelete(net); 00118 return FALSE; 00119 } 00120 00121 /* get additional information from partial parsers */ 00122 if(taggerUp()) { 00123 taggerTag(net->lexemgraph); 00124 } 00125 chunker = chunkerNew(DefaultChunker, net->lexemgraph); 00126 chunks = chunkerChunk(chunker); 00127 if (hkVerbosity & CDG_DEBUG) { 00128 chunkerPrintChunks(CDG_DEBUG, chunks); 00129 } 00130 chunkerDelete(chunker); 00131 00132 cdgPrintf(CDG_INFO, "INFO: grapheme graph: #nodes %d, min %d, max %d\n", 00133 vectorSize(net->lexemgraph->graphemnodes), 00134 net->lexemgraph->min, net->lexemgraph->max); 00135 cdgPrintf(CDG_INFO, 00136 "INFO: lexem graph: #nodes %d, min %d, max %d, #paths %lld\n", 00137 vectorSize(net->lexemgraph->nodes), 00138 net->lexemgraph->min, 00139 net->lexemgraph->max, 00140 net->lexemgraph->noOfPaths); 00141 00142 return TRUE; 00143 } 00144 00145 /* ---------------------------------------------------------------------- 00146 Final improvements: optimize, sort, unary pruning, score cache and edges 00147 00148 This function performs various clean-up operations on a net for which 00149 all constraint nodes have been built: 00150 - It removes unnecessary structures from the net using 00151 \b cnOptimizeNet(). 00152 - If \b cnSortNodes is set, it sorts the constraint nodes 00153 using \b cnSortNodes(). 00154 - It applies \b cnUnaryPruning() to each constraint node. 00155 - If \b scUseCache is set, it initializes \b net->cache 00156 to a new cache returned by \b scNew(). 00157 - If \b cnEdgesFlag is not \b cnEdgesOff, it applies 00158 \b cnBuildEdges() to build the edges of the constraint net. 00159 00160 The function returns \b FALSE iff any of the subsidiary functions 00161 returned \b FALSE. 00162 */ 00163 ConstraintNet cnBuildFinal(ConstraintNet net, Boolean buildLVs) 00164 { 00165 int i, j, k; 00166 int max = net->lexemgraph + cnUseNonSpec ? 2 : 1; 00167 00168 /* We set all cells in isBuilt here, instead of doing it in 00169 cnBuildLevelValues, because that function does not do what isBuilt 00170 counts. isBuilt cares about whether all LVs that subordinate word 3 00171 under word 5 have been built ot not. cnBuildLevelValues() only builds 00172 LVs for that bind one particular grapheme, of which word 3 may have 00173 several, but for that grapheme it uses all possible modifiees. 00174 00175 To actually build only the LVs connecting word 3 to word 5, use 00176 cnBuildTriple(). 00177 */ 00178 for(i = net->lexemgraph->min; i < net->lexemgraph->max; i++) { 00179 for(j = 0; j < max+1; j++) { 00180 for(k = 0; k < inputCurrentGrammar->noOfLevels; k++) { 00181 arraySetElement(net->isBuilt, (Pointer)(int)buildLVs, i, j, k); 00182 } 00183 } 00184 } 00185 00186 00187 /* 00188 checks net for deletable values and lexemes 00189 */ 00190 if (buildLVs && cnOptimizeNet(net) < 0) { 00191 net = NULL; 00192 return net; 00193 } 00194 00195 /* 00196 sorts nodes of constraintnet 00197 */ 00198 if (cnSortNodesMethod != 0) { 00199 cnSortNodes(net); 00200 } 00201 00202 /* 00203 unary pruning 00204 */ 00205 for (i = 0; i < vectorSize(net->nodes); i++) { 00206 ConstraintNode node = (ConstraintNode) vectorElement(net->nodes, i); 00207 00208 cnUnaryPruning(node); 00209 } 00210 00211 /* sort LVs in each node according to their score */ 00212 cnSortLVs(net); 00213 00214 /* builds the score cache */ 00215 if (scUseCache) { 00216 net->cache = scNew(vectorSize(net->values)); 00217 } else { 00218 net->cache = NULL; 00219 } 00220 00221 /* build edges TODO: in theory edges should be built incrementally too */ 00222 if (cnEdgesFlag != cnEdgesOff) { 00223 cnBuildEdges(net); 00224 if (net->edges == NULL || vectorSize(net->edges) == 0) { 00225 cdgPrintf(CDG_WARNING, "WARNING: invalid net, net contains no edges\n"); 00226 cnDelete(net); 00227 cdgCtrlCTrapped = cdgCtrlCAllowed = FALSE; 00228 net = NULL; 00229 return net; 00230 } 00231 } 00232 00233 return net; 00234 } 00235 00236 /* ---------------------------------------------------------------------- 00237 Build a constraint net from LAT. 00238 00239 This function returns a new constraint net for a word graph. This 00240 function uses \b cnBuildInit(), \b cnBuildNodes(), and 00241 \b cnBuildFinal(). The resulting net contains no edges. The function 00242 performs these initializations: 00243 - \b id is set to \b net<cnCounter> 00244 - the \b lexemgraph is built 00245 - \b state is set to \b NSCreated 00246 00247 If BUILDLVS is set, the nodes in the constraint net will be filled with 00248 LVs, otherwise the net will contain only constraint nodes, no LVs. 00249 */ 00250 ConstraintNet cnBuild(Lattice lat, Boolean buildLVs) 00251 { 00252 ConstraintNet net; 00253 00254 net = cnBuildInit(); 00255 00256 if(!cnTag(net,lat)) { 00257 return NULL; 00258 } 00259 00260 /* build nodes */ 00261 if (!cnBuildNodes(net, buildLVs)) { 00262 cdgPrintf(CDG_WARNING, "WARNING: invalid net, net contains no nodes\n"); 00263 cnDelete(net); 00264 return NULL; 00265 } 00266 00267 if (cdgCtrlCTrapped) { 00268 cdgPrintf(CDG_WARNING, "WARNING: interrupt while building a constraintnet\n"); 00269 cnDelete(net); 00270 return NULL; 00271 } 00272 00273 net = cnBuildFinal(net, buildLVs); 00274 if (net) 00275 cnMostRecentlyCreatedNet = net; 00276 else 00277 net = NULL; 00278 00279 return net; 00280 } 00281 00282 /* ---------------------------------------------------------------------- 00283 Print a constraint net. 00284 00285 This function displays a constraint net in textual form. Output is 00286 suppressed if \b hkVerbosity does not have the bit \b mode 00287 set. The function uses \b cnPrintNode() and \b lvPrint(). 00288 Constraint nodes from levels that have their \b showflag reset are 00289 skipped. Deleted LVs are only shown if \b cnShowDeletedFlag is set. 00290 */ 00291 void cnPrint(long unsigned int mode, ConstraintNet net) 00292 { 00293 int i, j; 00294 ConstraintNode n; 00295 int noOfNotDeletedNodes = 0; 00296 int vmin = 99999999; 00297 int vmax = 0; 00298 int vtotal = 0; 00299 int vnum; 00300 00301 cdgPrintf(mode, "------------------------------------------------------------\n"); 00302 cdgPrintf(mode, " id: %s\n", net->id); 00303 cdgPrintf(mode, " nodes:\n"); 00304 for (i = 0; i < vectorSize(net->nodes) && !cdgCtrlCTrapped; i++) { 00305 n = (ConstraintNode) vectorElement(net->nodes, i); 00306 00307 if (!n->level->showflag) 00308 continue; 00309 00310 if (!lgAreDeletedNodes(net->lexemgraph, n->gn->lexemes)) 00311 noOfNotDeletedNodes++; 00312 00313 cdgPrintf(mode, "%2d ", i); 00314 cnPrintNode(mode, n); 00315 00316 cdgPrintf(mode, " %d:\n", vectorSize(n->values)); 00317 vnum = n->noValidValues; 00318 for (j = 0; j < vectorSize(n->values) && !cdgCtrlCTrapped; j++) { 00319 LevelValue lv = (LevelValue) vectorElement(n->values, j); 00320 00321 cdgPrintf(mode, " "); 00322 00323 if (!lv->isDeleted || cnShowDeletedFlag) { 00324 cdgPrintf(mode, lv->isDeleted ? "[" : " "); 00325 lvPrint(mode, lv, net->lexemgraph->isDeletedNode, 1); 00326 cdgPrintf(mode, lv->isDeleted ? "] " : " "); 00327 cdgPrintf(mode, "\n"); 00328 } 00329 } 00330 vmin = min(vmin, vnum == 0 ? vmin : vnum); 00331 vmax = max(vmax, vnum); 00332 vtotal += vnum; 00333 cdgPrintf(mode, "\n"); 00334 } 00335 cdgPrintf(mode, "#solutions(s): %d\n", listSize(net->parses)); 00336 cdgPrintf(mode, "#nodes: %d/%d\n", noOfNotDeletedNodes, vectorSize(net->nodes)); 00337 cdgPrintf(mode, "#paths: %lld\n", net->lexemgraph->noOfPaths); 00338 cdgPrintf(mode, "values: #min %d, #max %d, #total %d, average %5.2f\n", 00339 vmin, vmax, vtotal, vtotal / (float)noOfNotDeletedNodes); 00340 00341 cdgPrintf(mode, "#edges: %d\n", net->edges == NULL ? 0 : vectorSize(net->edges)); 00342 /* 00343 for (i = 0; i < vectorSize(net->nodes); i++) 00344 { 00345 n = (ConstraintNode) vectorElement(net->nodes, i); 00346 cdgPrintf(mode,"%2d %s(%d,%d)-%s: ", i, 00347 n->lexemnode->lexem->description, 00348 n->lexemnode->arc->from, 00349 n->lexemnode->arc->to, 00350 n->level->id); 00351 for (j = 0; j < vectorSize(net->edges); j++) 00352 { 00353 ConstraintEdge e = (ConstraintEdge) vectorElement(net->edges, j); 00354 00355 if (n == e->start) 00356 { 00357 cdgPrintf(mode,"%s(%d,%d)-%s ", 00358 e->stop->lexemnode->lexem->description, 00359 e->stop->lexemnode->arc->from, 00360 e->stop->lexemnode->arc->to, 00361 e->stop->level->id); 00362 } 00363 } 00364 cdgPrintf(mode,"\n"); 00365 } 00366 */ 00367 00368 cdgPrintf(mode, "------------------------------------------------------------\n"); 00369 00370 00371 if (net->lexemgraph->chunks) { 00372 cdgPrintf(mode, "chunks:\n"); 00373 chunkerPrintChunks(mode, net->lexemgraph->chunks); 00374 } 00375 } 00376 00377 /* ---------------------------------------------------------------------- 00378 Print a constraint edge. 00379 00380 This function displays a constraint edge and the associated matrix 00381 \b scores. It reacts to the display flag \b mode in the same 00382 way as \b cnPrint() and all other display functions. 00383 */ 00384 void cnPrintEdge(long unsigned int mode, ConstraintEdge e) 00385 { 00386 int i, j; 00387 LevelValue lv; 00388 String pseudoLexeme; /* either a lexeme id or NIL or NONSPEC */ 00389 00390 /* the two nodes */ 00391 cdgPrintf(mode, "%s(%d,%d)-%s ---> %s(%d,%d)-%s\n", 00392 e->start->gn->arc->word, 00393 e->start->gn->arc->from, 00394 e->start->gn->arc->to, 00395 e->start->level->id, 00396 e->stop->gn->arc->word, 00397 e->stop->gn->arc->from, 00398 e->stop->gn->arc->to, 00399 e->stop->level->id); 00400 00401 /* matrix of combinational scores */ 00402 00403 /* top row */ 00404 cdgPrintf(mode, "start v stop >| "); 00405 for (j = 0; j < vectorSize(e->stop->values); j++) { 00406 lv = (LevelValue) vectorElement(e->stop->values, j); 00407 00408 if (lv->modifiee == NULL) { 00409 pseudoLexeme = "NIL"; 00410 } else if (lv->modifiee == NONSPEC) { 00411 pseudoLexeme = "NONSPEC"; 00412 } else { 00413 pseudoLexeme = lv->modifiee->lexem->description; 00414 } 00415 00416 cdgPrintf(mode, "%5.5s/%-10.10s ", lv->label, pseudoLexeme); 00417 00418 00419 } 00420 cdgPrintf(mode, "\n--------------------------------------------------"); 00421 cdgPrintf(mode, "--------------------------------------------------\n"); 00422 00423 /* other rows */ 00424 for (i = 0; i < vectorSize(e->start->values); i++) { 00425 lv = (LevelValue) vectorElement(e->start->values, i); 00426 00427 if (lv->modifiee == NULL) { 00428 pseudoLexeme = "NIL"; 00429 } else if (lv->modifiee == NONSPEC) { 00430 pseudoLexeme = "NONSPEC"; 00431 } else { 00432 pseudoLexeme = lv->modifiee->lexem->description; 00433 } 00434 00435 /* left column */ 00436 cdgPrintf(mode, "%5.5s/%-10.10s | ", lv->label, pseudoLexeme); 00437 00438 /* cells */ 00439 for (j = 0; j < vectorSize(e->stop->values); j++) { 00440 cdgPrintf(mode, " %.6e ", smGetScore(e->scores, i, j)); 00441 } 00442 cdgPrintf(mode, "\n"); 00443 } 00444 00445 cdgPrintf(mode, "\n"); 00446 00447 } 00448 00449 /* ---------------------------------------------------------------------- 00450 Print a constraint node. 00451 00452 This function displays a constraint node. A node of the level 00453 \b LEVEL binding the lexemes \b das_1 and \b das_2 from 00454 time span <i>(1,2)</i> would be displayed like this: 00455 00456 \b das_1/das_2(1-2)/LEVEL 00457 */ 00458 void cnPrintNode(long unsigned int mode, ConstraintNode cn) 00459 { 00460 List l; 00461 00462 for (l = cn->gn->lexemes; l != NULL; l = listNext(l)) { 00463 cdgPrintf(mode, "%s", ((LexemNode) listElement(l))->lexem->description); 00464 if (listNext(l)) { 00465 cdgPrintf(mode, "/"); 00466 } 00467 } 00468 00469 cdgPrintf(mode, "(%d-%d)", 00470 cn->gn->arc->from, 00471 cn->gn->arc->to); 00472 00473 cdgPrintf(mode, "/%s", cn->level->id); 00474 } 00475 00476 /* ------------------------------------------------------------------- 00477 comparison function for cnUnaryPruning() 00478 00479 This function compares two LVs by their scores. It returns \b TRUE 00480 iff \b a->score < b->score. 00481 */ 00482 Boolean cnUnaryPruningCompare(LevelValue a, LevelValue b) 00483 { 00484 return (a->score < b->score); 00485 } 00486 00487 /* ---------------------------------------------------------------------- 00488 Remove a given percentage of values from a node. 00489 00490 This function finds the values with the worst scores from a 00491 constraint node and deletes them. Let <i>W</i> be the set of LVs in \b node. 00492 All elements of <i>W</i> are deleted whose score is smaller than the score 00493 of the <i>n</i>th element, where 00494 00495 n = \b (int) =|W| * (1 - \b cnUnaryPruningFraction) 00496 00497 This function temporarily sorts the set $W$ using the function 00498 \b cnUnaryPruningCompare()=. 00499 */ 00500 void cnUnaryPruning(ConstraintNode node) 00501 { 00502 Vector v; 00503 int i, j; 00504 Number limit = 0.0; 00505 LevelValue lv; 00506 00507 if (node->noValidValues == 0) 00508 return; 00509 00510 v = vectorClone(node->values); 00511 vectorSort(v, cnUnaryPruningCompare); 00512 00513 i = (int)((node->noValidValues - 1) * (1.0 - cnUnaryPruningFraction)); 00514 00515 /* get the i-th valid value of the domain */ 00516 for (j = 0; j < vectorSize(v); j++) { 00517 lv = (LevelValue) vectorElement(v, j); 00518 if (!lv->isDeleted) 00519 i--; 00520 00521 if (i == 0) { 00522 limit = lv->score; 00523 break; 00524 } 00525 } 00526 00527 vectorDelete(v); 00528 00529 /* mark levelvalues as deleted if they fall below the limit */ 00530 for (i = 0; i < vectorSize(node->values); i++) { 00531 lv = (LevelValue) vectorElement(node->values, i); 00532 if (lv->isDeleted) 00533 continue; 00534 00535 if (lv->score < limit) { 00536 cdgPrintf(CDG_INFO, "INFO: deleting "); 00537 lvPrint(CDG_INFO, lv, NULL, 2); 00538 cdgPrintf(CDG_INFO, "\n"); 00539 00540 lv->isDeleted = TRUE; 00541 node->noValidValues--; 00542 } 00543 } 00544 } 00545 00546 /* ------------------------------------------------------------------- 00547 Compare constraint nodes by the ->no of their levels. 00548 */ 00549 Boolean cnNodeComparePrio(ConstraintNode a, ConstraintNode b) 00550 { 00551 return a->level->no > b->level->no; 00552 } 00553 00554 /* ------------------------------------------------------------------- 00555 Comparison function for cnSortNodes() based on domain size. 00556 00557 This function is used by \b cnSortNodes() to compare two 00558 constraint nodes according to the value of their 00559 \b totalNumberOfValues= fields. It returns \b TRUE iff \b a 00560 has a lower value or the same values as \b b. 00561 */ 00562 Boolean cnNodeCompareSmallest(ConstraintNode a, ConstraintNode b) 00563 { 00564 return ( a->noValidValues <= b->noValidValues); 00565 } 00566 00567 /* ------------------------------------------------------------------- 00568 Sort constraint nodes. 00569 00570 This function sorts the constraint nodes of \b net using the 00571 function \b cnNodeCompare(). 00572 */ 00573 void cnSortNodes(ConstraintNet net) 00574 { 00575 ConstraintNode node; 00576 int i; 00577 00578 if (hkVerbosity & CDG_DEBUG) { 00579 cdgPrintf(CDG_DEBUG, "DEBUG: old sorting:\n"); 00580 for (i = 0; i < vectorSize(net->nodes); i++) { 00581 node = (ConstraintNode) vectorElement(net->nodes, i); 00582 cdgPrintf(CDG_DEBUG, " %04d: [%3d] ", i, node->noValidValues); 00583 cnPrintNode(CDG_DEBUG, node); 00584 cdgPrintf(CDG_DEBUG, "\n"); 00585 } 00586 } 00587 00588 if (cnSortNodesMethod==1) { 00589 vectorSort(net->nodes, cnNodeComparePrio); 00590 } else if (cnSortNodesMethod==2) { 00591 vectorSort(net->nodes, cnNodeCompareSmallest); 00592 } 00593 00594 if (hkVerbosity & CDG_DEBUG) { 00595 cdgPrintf(CDG_DEBUG, "DEBUG: new sorting:\n"); 00596 for (i = 0; i < vectorSize(net->nodes); i++) { 00597 node = (ConstraintNode) vectorElement(net->nodes, i); 00598 cdgPrintf(CDG_DEBUG, " %04d: [%3d] ", i, node->noValidValues); 00599 cnPrintNode(CDG_DEBUG, node); 00600 cdgPrintf(CDG_DEBUG, "\n"); 00601 } 00602 } 00603 00604 } 00605 00606 /* ---------------------------------------------------------------------- 00607 Build a new levelvalue in NODE. 00608 00609 This function creates exactly one LV with the specified fields using 00610 \b lvNew() and stores it in \b node, incrementing all relevant 00611 counters properly. 00612 */ 00613 void cnBuildLv(ConstraintNode node, List modifiers, Level level, String label, List modifiees) 00614 { 00615 LevelValue newLv = lvNew(modifiers, level, label, modifiees); 00616 ConstraintNet net = node->net; 00617 00618 newLv->score = evalUnary(newLv, net, NULL, FALSE, NULL, NULL); 00619 if (statUseStatisticsFlag) { 00620 newLv->score *= statUnary(newLv, net); 00621 } 00622 00623 #ifdef DEBUGCNBUILDLV 00624 cdgPrintf(CDG_DEBUG, "DEBUG: Built LV "); 00625 lvPrint(CDG_DEBUG, newLv, NULL); 00626 #endif 00627 00628 if (newLv->score == 0.0) { 00629 #ifdef DEBUGCNBUILDLV 00630 cdgPrintf(CDG_DEBUG, "... invalid.\n"); 00631 #endif 00632 lvDelete(newLv); 00633 } else { 00634 #ifdef DEBUGCNBUILDLV 00635 cdgPrintf(CDG_DEBUG, "... OK.\n"); 00636 #endif 00637 newLv->limit = newLv->score; 00638 vectorAddElement(node->values, newLv); 00639 newLv->indexWRTNet = vectorAddElement(net->values, newLv); 00640 net->totalNumberOfValues++; 00641 node->totalNumberOfValues++; 00642 } 00643 } 00644 00645 /* ---------------------------------------------------------------------- 00646 Builds all level values for LEVEL, MODIFIER and MODIFIEE. 00647 00648 This function builds all LVs that represent tuples composed from the 00649 parameters it receives. This function is used as the innermost loop by 00650 \b cnBuildIter(). 00651 */ 00652 void cnBuildLevelValues(ConstraintNode node, Level level, 00653 GraphemNode modifier, GraphemNode modifiee) 00654 { 00655 ConstraintNet net = node->net; 00656 Direction dir; 00657 List l; 00658 00659 /* guard against self-modification */ 00660 if(!lgMayModify(net->lexemgraph, modifier, modifiee)) { 00661 return; 00662 } 00663 00664 /* compute edge direction */ 00665 if (modifiee == NONSPEC) { 00666 dir = Right; 00667 } else if(!modifiee) { 00668 dir = Nil; 00669 } else if(modifier->arc->from < modifiee->arc->from) { 00670 dir = Right; 00671 } else { 00672 dir = Left; 00673 } 00674 00675 /* iterate over labels */ 00676 for(l = level->labels; l != NULL; l = listNext(l)) { 00677 00678 /* compute edge type */ 00679 String label = listElement(l); 00680 EdgeType et = etEncode(level,label,dir); 00681 BitString downRelevant = vectorElement(inputCurrentGrammar->downFeatures, et); 00682 BitString upRelevant = vectorElement(inputCurrentGrammar->upFeatures, et); 00683 List downClasses = lgPartitions(modifier, downRelevant); 00684 List upClasses = lgPartitions(modifiee, upRelevant); 00685 00686 List m, n; 00687 for (m = downClasses; m != NULL; m = listNext(m)) { 00688 List modifiers = listElement(m); 00689 for (n = upClasses; n != NULL; n = listNext(n)) { 00690 List modifiees = listElement(n); 00691 cnBuildLv(node, modifiers, level, label, modifiees); 00692 } 00693 } 00694 if (spec(modifiee)) { 00695 listForEachDelete(upClasses, listDelete); 00696 } else { 00697 listDelete(upClasses); 00698 } 00699 listForEachDelete(downClasses, listDelete); 00700 } 00701 } 00702 00703 /* ---------------------------------------------------------------------- 00704 Build all LVs on LEVEL for the pair of time points (A,B) in NET. 00705 00706 All LVs that connect a word that starts at time point A as a modifier to 00707 a word that starts at time point B are built and inserted into the 00708 constraint net at the appropriate places. B == -1 means NIL, B == -2 00709 means NONSPEC. 00710 00711 Returns FALSE if such LVs have already been built. 00712 00713 This function has nothing to do with incremental processing. It is 00714 intended for use by parsing algorithms that do not want the entire set 00715 of LVs built before they can start working, although the entire input 00716 is known. 00717 */ 00718 Boolean cnBuildTriple(ConstraintNet net, int a, int b, int levelno) { 00719 LexemGraph lg = net->lexemgraph; 00720 int i,j; 00721 int bb = b; 00722 List l; 00723 /* warp sentinel values */ 00724 if(b < 0) { 00725 bb = lg->max - b; 00726 } 00727 00728 /* refuse unnecessary work */ 00729 if(arrayElement(net->isBuilt, a, bb, levelno)) { 00730 return FALSE; 00731 } 00732 00733 /* do necessary work */ 00734 for (l = inputCurrentGrammar->levels; l != NULL; l = listNext(l)) { 00735 Level level = (Level) listElement(l); 00736 if (!level->useflag) continue; 00737 if(level->no != levelno) continue; 00738 00739 for(i = 0; i < vectorSize(lg->graphemnodes); i++) { 00740 GraphemNode gn = vectorElement(lg->graphemnodes, i); 00741 ConstraintNode node = NULL; 00742 if(a != gn->arc->from) continue; 00743 00744 /* find pertinent constraint node */ 00745 for(j = 0; j < vectorSize(net->nodes); j++) { 00746 node = vectorElement(net->nodes, j); 00747 if(node->level != level) continue; 00748 if(node->gn != gn) continue; 00749 break; 00750 } 00751 00752 if(-1 == b) { 00753 cnBuildLevelValues(node, level, gn, NULL); 00754 } 00755 else if(-2 == b) { 00756 cnBuildLevelValues(node, level, gn, NONSPEC); 00757 } 00758 else for(j = 0; j < vectorSize(lg->graphemnodes); j++) { 00759 GraphemNode modifiee = vectorElement(lg->graphemnodes, j); 00760 if(modifiee->arc->from != b) continue; 00761 cnBuildLevelValues(node, level, gn, modifiee); 00762 } 00763 node->noValidValues = vectorSize(node->values); 00764 node->totalNumberOfValues = node->noValidValues; 00765 } 00766 } 00767 00768 /* be proud of it */ 00769 arraySetElement(net->isBuilt, (Pointer)TRUE, a, bb, levelno); 00770 return TRUE; 00771 00772 } 00773 00774 00775 /* ---------------------------------------------------------------------- 00776 Builds the constraint node corresponding to GN in NET. 00777 00778 This function really performs most of the work that was documented 00779 under cnBuildNodes() for simplicity. 00780 */ 00781 Boolean cnBuildIter(ConstraintNet net, GraphemNode gn, Boolean buildLVs) 00782 { 00783 Level level; 00784 GraphemNode modifiee; 00785 ConstraintNode node; 00786 List l; 00787 int k, z; 00788 int ggs = vectorSize(net->lexemgraph->graphemnodes); 00789 int lgs = vectorSize(net->lexemgraph->nodes); 00790 int lvs = listSize(inputCurrentGrammar->levels); 00791 00792 if (gn->lexemes == NULL) 00793 return TRUE; 00794 00795 cdgFlush(); 00796 00797 /* loop through all levels */ 00798 for (l = inputCurrentGrammar->levels; l != NULL; l = listNext(l)) { 00799 level = (Level) listElement(l); 00800 if (!level->useflag) 00801 continue; 00802 00803 /* check for ^C here */ 00804 if (cdgCtrlCTrapped) { 00805 00806 /* delete all constraint-nodes */ 00807 for (z = 0; z < vectorSize(net->nodes); z++) { 00808 node = (ConstraintNode) vectorElement(net->nodes, z); 00809 cnDeleteNode(node); 00810 } 00811 vectorDelete(net->nodes); 00812 net->nodes = NULL; 00813 return FALSE; 00814 } 00815 cdgFlush(); 00816 00817 /* if an entire grapheme is deleted from the graph already, 00818 do not even build the constraint node. */ 00819 if (lgAreDeletedNodes(net->lexemgraph, gn->lexemes)) { 00820 continue; 00821 } 00822 00823 /* allocate new constraint node */ 00824 node = (ConstraintNode) memMalloc(sizeof (ConstraintNodeStruct)); 00825 node->net = net; 00826 node->level = level; 00827 node->gn = gn; 00828 node->values = vectorNew(lgs * lvs); 00829 node->totalNumberOfValues = 0; 00830 00831 /* fill it */ 00832 if(buildLVs) { 00833 for (k = 0; k <= ggs; k++) { 00834 if(k == ggs) { 00835 modifiee = NULL; 00836 } else { 00837 modifiee = vectorElement(net->lexemgraph->graphemnodes, k); 00838 } 00839 cnBuildLevelValues(node, level, gn, modifiee); 00840 } 00841 if (cnUseNonSpec) { 00842 cnBuildLevelValues(node, level, gn, NONSPEC); 00843 } 00844 } 00845 00846 /* add it to the net */ 00847 node->noValidValues = vectorSize(node->values); 00848 node->totalNumberOfValues = node->noValidValues; 00849 vectorAddElement(net->nodes, node); 00850 } 00851 return TRUE; 00852 } 00853 00854 00855 /* ---------------------------------------------------------------------- 00856 Builds the constraint nodes of NET. If BUILDLVS is set, the nodes are 00857 immediately filled with all possible LVs, otherwise they remain empty. 00858 00859 Returns FALSE if the constraintnet is invalid. 00860 00861 Basically it performs the following steps: 00862 00863 - allocates the Vectors \b nodes and \b values 00864 - partitions the set of lexeme nodes created from each word 00865 hypothesis according to each level by using \b lgPartitions() 00866 - allocates a \b ConstraintNode for each of the partitions <i>k</i> 00867 and inserts it into the Vector \b nodes 00868 - checks whether the subordination <i>(k, l, m)</i> is possible for each 00869 triples of modifier set, label and modifiee set, and constructs the 00870 corresponding LV 00871 - checks whether the subordination <i>(k, l, \b NIL)</i> is possible for each 00872 pair of modifier set and label, and constructs the corresponding LV 00873 - inserts all new LVs into the respective constraint nodes and the 00874 Vector \b values 00875 - sorts the LVs in each constraint node by their limit. 00876 00877 However, some complications apply: 00878 00879 - Under \b CDG_DEBUG, each word in the word graph is printed 00880 as it is used to build LVs. 00881 - If a level has its \b useflag reset, it is ignored completely. 00882 - Some of the LVs created may be destroyed again by 00883 \b cnUnaryPruning() if the variable 00884 \b cnUnaryPruningFraction is smaller than~1. Such LVs are not 00885 stored in the constraint node nor in the constraint net. 00886 - Each iteration executes the hook \b HOOK_CNBUILDNODES. 00887 - If \b cnSortNodes is set, \b cnSortNodes() is called 00888 after building all nodes. 00889 - If \b scUseCache is set, a new cache is allocated for the 00890 constraint net. 00891 - The time elapsed is printed as a \b CDG_PROFILE message. 00892 - This function is interruptible by \b C-c in much the same 00893 way as \b cnBuildEdges(). 00894 */ 00895 Boolean cnBuildNodes(ConstraintNet net, Boolean buildLVs) 00896 { 00897 Timer profileTime; 00898 GraphemNode gn; 00899 int i; 00900 int ggs = vectorSize(net->lexemgraph->graphemnodes); 00901 int lgs = vectorSize(net->lexemgraph->nodes); 00902 int lvs = listSize(inputCurrentGrammar->levels); 00903 LexemGraph lg = net->lexemgraph; 00904 00905 profileTime = timerNew(); 00906 net->nodes = vectorNew(lgs * lvs); 00907 net->values = vectorNew(lgs * lvs); 00908 net->isBuilt = arrayNew(lg->max, (2+lg->max), lvs, 0); 00909 00910 for (i = 0; i < ggs; i++) { 00911 gn = (GraphemNode) vectorElement(net->lexemgraph->graphemnodes, i); 00912 if (gn->lexemes == NULL) 00913 continue; 00914 00915 cdgPrintf(CDG_DEBUG, "%s ", 00916 ((LexemNode) listElement(gn->lexemes))->lexem->word); 00917 00918 if (!cnBuildIter(net, gn, buildLVs)) 00919 break; 00920 } 00921 00922 cdgPrintf(CDG_DEBUG, "\n"); 00923 cdgPrintf(CDG_PROFILE, 00924 "PROFILE: building nodes took %ldms\n", timerElapsed(profileTime)); 00925 if (hkVerbosity & CDG_HOOK) 00926 cdgExecHook(HOOK_CNBUILDNODES, net); 00927 00928 timerFree(profileTime); 00929 return TRUE; 00930 } 00931 00932 /* ---------------------------------------------------------------------- 00933 Optimize constraint node by deleting lexemes and values. 00934 @returns > 0 : we have changed the constraintnode 00935 @returns 0 : no changes have been made 00936 @returns < 0 : the node is invalid 00937 00938 This function serves to discard structures in a constraint node that 00939 cannot appear in any solution. It returns the number of deletion 00940 operations that it has performed, or -1 if an inconsistency was found. 00941 This function is repeatedly called by \b cnOptimizeNet(). 00942 */ 00943 int cnOptimizeNode(ConstraintNet net, ConstraintNode node) 00944 { 00945 int k; 00946 LevelValue lv; 00947 int noChanges = 0; 00948 LexemGraph lg = net->lexemgraph; 00949 GraphemNode gn; 00950 List l; 00951 Boolean pruneModifiees = TRUE; 00952 GraphemNode theModifiee = NULL; 00953 BitString down = bitNew(vectorSize(lg->nodes)); 00954 BitString up = bitNew(vectorSize(lg->nodes)); 00955 bitClearAll(down); 00956 bitClearAll(up); 00957 00958 #ifdef DEBUGCNOPTIMIZENODE 00959 { 00960 int i; 00961 cdgPrintf(CDG_DEBUG, 00962 "DEBUG: optimizing constraint node %s/%s(%d,%d)\n", 00963 node->gn->arc->word, 00964 node->level->id, 00965 node->gn->arc->from, 00966 node->gn->arc->to); 00967 for(i = 0; i < vectorSize(node->values); i++) { 00968 LevelValue lv = vectorElement(node->values, i); 00969 if(lv->isDeleted) { 00970 cdgPrintf(CDG_DEBUG, "["); 00971 } 00972 lvPrint(CDG_DEBUG, lv, NULL, 0); 00973 if(lv->isDeleted) { 00974 cdgPrintf(CDG_DEBUG, "]"); 00975 } 00976 cdgPrintf(CDG_DEBUG, "\n"); 00977 } 00978 } 00979 #endif 00980 /* condition (a) */ 00981 if(node->noValidValues > 0) { 00982 00983 /* test domain */ 00984 for (k = 0; k < vectorSize(node->values); k++) { 00985 lv = (LevelValue) vectorElement(node->values, k); 00986 if (lv->isDeleted) { 00987 continue; 00988 } 00989 00990 /* mark invalid levelvalues as deleted */ 00991 if (lgAreDeletedNodes(lg, lv->modifiers)) { 00992 #ifdef DEBUGCNOPTIMIZENODE 00993 cdgPrintf(CDG_DEBUG, "DEBUG: Deleting LV "); 00994 lvPrint(CDG_DEBUG, lv, NULL, 0); 00995 cdgPrintf(CDG_DEBUG, " because of invalid modifiers.\n"); 00996 #endif 00997 lv->isDeleted = TRUE; 00998 node->noValidValues--; 00999 noChanges++; 01000 } else if ((spec(lv->modifiees) && 01001 lgAreDeletedNodes(lg, lv->modifiees)) || 01002 (spec(lv->modifiee) && 01003 !lgCompatibleNodes(lg, lv->modifier, lv->modifiee))) { 01004 #ifdef DEBUGCNOPTIMIZENODE 01005 cdgPrintf(CDG_DEBUG, "DEBUG: Deleting LV "); 01006 lvPrint(CDG_DEBUG, lv, NULL, 0); 01007 cdgPrintf(CDG_DEBUG, " because of invalid modifiees.\n"); 01008 #endif 01009 lv->isDeleted = TRUE; 01010 node->noValidValues--; 01011 noChanges++; 01012 } 01013 01014 if(lv->isDeleted) { 01015 continue; 01016 } 01017 01018 /* detect that a lexeme node can still be bound */ 01019 for(l = lv->modifiers; l != NULL; l = listNext(l)) { 01020 LexemNode ln = listElement(l); 01021 bitSet(down, ln->no); 01022 } 01023 01024 /* if all of the LVs in this node modify the same grapheme node, 01025 then we can delete all variants of their modifiees. 01026 01027 Otherwise we can't be sure, so we don't do anything. */ 01028 01029 01030 if(spec(lv->modifiee)) { 01031 if(pruneModifiees) { 01032 if(!theModifiee) { 01033 theModifiee = lv->modifiee->grapheme; 01034 #ifdef DEBUGCNOPTIMIZENODE 01035 cdgPrintf(CDG_DEBUG, 01036 "DEBUG: no modifiee seen yet, taking `%s'(%d-%d)\n", 01037 theModifiee->arc->word, 01038 theModifiee->arc->from, 01039 theModifiee->arc->to); 01040 #endif 01041 } else if(theModifiee != lv->modifiee->grapheme) { 01042 #ifdef DEBUGCNOPTIMIZENODE 01043 cdgPrintf(CDG_DEBUG, "DEBUG: That's a different modifiee -- no modifiee pruning.\n"); 01044 #endif 01045 pruneModifiees = FALSE; 01046 } 01047 } 01048 if(pruneModifiees) { 01049 for(l = lv->modifiees; l != NULL; l = listNext(l)) { 01050 LexemNode ln = listElement(l); 01051 bitSet(up, ln->no); 01052 } 01053 } 01054 } else { 01055 #ifdef DEBUGCNOPTIMIZENODE 01056 if(pruneModifiees) { 01057 cdgPrintf(CDG_DEBUG, "DEBUG: NIL binding seen -- no modifiee pruning.\n"); 01058 } 01059 #endif 01060 pruneModifiees = FALSE; 01061 } 01062 } 01063 } 01064 01065 if (node->noValidValues == 0) { 01066 if (!lgAreDeletableNodes(lg, node->gn->lexemes)) { 01067 cdgPrintf(CDG_WARNING, 01068 "WARNING: can't find a value for %s/%s(%d,%d), invalid net\n", 01069 node->gn->arc->word, 01070 node->level->id, 01071 node->gn->arc->from, 01072 node->gn->arc->to); 01073 01074 return -1; /* this indicates an invalid net */ 01075 } 01076 } 01077 01078 /* condition (b) */ 01079 for(l = node->gn->lexemes; l != NULL; l = listNext(l)) { 01080 LexemNode ln = listElement(l); 01081 if(!lgIsDeletedNode(lg, ln) && !bitGet(down, ln->no)) { 01082 #ifdef DEBUGCNOPTIMIZENODE 01083 cdgPrintf(CDG_DEBUG, "DEBUG: deleting "); 01084 lgPrintNode(CDG_DEBUG, ln); 01085 cdgPrintf(CDG_DEBUG, " because of the modifier criterion.\n"); 01086 #endif 01087 lgDeleteNode(lg, ln); 01088 noChanges++; 01089 } 01090 } 01091 if(pruneModifiees && theModifiee) { 01092 for(l = theModifiee->lexemes; l != NULL; l = listNext(l)) { 01093 LexemNode ln = listElement(l); 01094 if(!lgIsDeletedNode(lg, ln) && !bitGet(up, ln->no)) { 01095 #ifdef DEBUGCNOPTIMIZENODE 01096 cdgPrintf(CDG_DEBUG, "DEBUG: deleting "); 01097 lgPrintNode(CDG_DEBUG, ln); 01098 cdgPrintf(CDG_DEBUG, " because of the modifiee criterion.\n"); 01099 #endif 01100 lgDeleteNode(lg, ln); 01101 noChanges++; 01102 } 01103 } 01104 } 01105 bitDelete(up); 01106 bitDelete(down); 01107 01108 /* condition (c) */ 01109 gn = node->gn; 01110 if(lg->noOfPathsFromStart[gn->no] * lg->noOfPathsToEnd[gn->no] == 0 && 01111 !lgAreDeletedNodes(lg, gn->lexemes)) { 01112 #ifdef DEBUGCNOPTIMIZENODE 01113 cdgPrintf(CDG_DEBUG, "DEBUG: Deleting entire grapheme node because of the path criterion.\n"); 01114 #endif 01115 lgDeleteNodes(lg, gn->lexemes); 01116 noChanges++; 01117 } 01118 01119 return noChanges; 01120 } 01121 01122 /* ---------------------------------------------------------------------- 01123 This function returns the number of undeleted LVs in \b net. 01124 It is a helper fo cnOptimizeNet 01125 */ 01126 int countValidValues(ConstraintNet net) 01127 { 01128 int i; 01129 LevelValue lv; 01130 int result = 0; 01131 01132 for (i = 0; i < vectorSize(net->values); i++) { 01133 lv = (LevelValue)vectorElement(net->values, i); 01134 if (!lv->isDeleted) 01135 result++; 01136 } 01137 01138 return result; 01139 } 01140 01141 01142 /* ---------------------------------------------------------------------- 01143 optimize constraint net by deleting lexemes and values 01144 01145 This function deletes lexeme nodes and LVs from a constraint net that 01146 cannot possibly appear in any solution. It uses several rules for 01147 deleting structures: 01148 01149 - An LV can be deleted if it binds a set of lexeme nodes that are 01150 all deleted. 01151 - An LV can be deleted if is modifier and modifiee are 01152 incompatible. 01153 - A lexeme node can be deleted if it it cannot be bound by any LV 01154 on one level. 01155 - A lexeme can be deleted if there is no path through the lexeme 01156 graph in which it appears, and which is totally undeleted. 01157 01158 Since these conditions can trigger each other, the function 01159 \b cnOptimizeNode() is called repeatedly on each node in the 01160 constraint net until no progress has been made. The function returns 01161 the total number of changes returned by the calls to 01162 \b cnOptimizeNode(), or -1 if none of them did so. 01163 01164 @returns > 0 : we have changed the constraintnet 01165 @returns 0 : no changes have been made 01166 @returns < 0 : the net is invalid 01167 */ 01168 int cnOptimizeNet(ConstraintNet net) 01169 { 01170 ConstraintNode node; 01171 int i, n, result = 0; 01172 Boolean progress = TRUE; 01173 #ifdef DEBUG_CNOPTIMIZENET 01174 int noDeletedValues; 01175 int oldNoValues; 01176 oldNoValues = countValidValues(net); 01177 #endif 01178 01179 /* loop while progress has been made */ 01180 while (progress) { 01181 progress = FALSE; 01182 01183 /* loop over all nodes and call cnOptimizeNode */ 01184 for (i = 0; i < vectorSize(net->nodes); i++) { 01185 node = (ConstraintNode)vectorElement(net->nodes, i); 01186 01187 n = cnOptimizeNode(net, node); 01188 if (n < 0) { 01189 return -1; 01190 } 01191 else if (n > 0) { 01192 progress = TRUE; 01193 result += n; 01194 } 01195 } 01196 } 01197 01198 #ifdef DEBUG_CNOPTIMIZENET 01199 noDeletedValues = oldNoValues - countValidValues(net); 01200 if (noDeletedValues) { 01201 cdgPrintf(CDG_DEBUG, "DEBUG: deleted %d levelvalues\n", 01202 noDeletedValues); 01203 } 01204 #endif 01205 01206 return result; 01207 } 01208 01209 /* ---------------------------------------------------------------------- 01210 Are two constraint nodes connected by an arc? 01211 This function checks whether two constraint nodes should be connected by an 01212 edge or not. This check is always performed before allocating a 01213 constraint edge. Two nodes fail this test if they bind the same lexeme 01214 and belong to the same level. 01215 */ 01216 Boolean cnConnectedByArc(ConstraintNode a, ConstraintNode b) 01217 { 01218 /** 01219 Think it over which nodes are connected by an arc. 01220 Do we need arcs in both directions? 01221 */ 01222 return (a->gn != b->gn || a->level != b->level); 01223 01224 /** 01225 return ((a->lexemnode == b->lexemnode && a->level != b->level) 01226 || 01227 (a->level == b->level 01228 && 01229 lgDistanceOfNodes (a->lexemnode->lexemgraph, 01230 a->lexemnode, 01231 b->lexemnode) != 0 01232 ) 01233 ); 01234 */ 01235 } 01236 01237 /* ---------------------------------------------------------------------- 01238 Return a constraint net to untouched state 01239 01240 This function undoes all changes that have been made to a constraint 01241 net since it was built. In particular, it restores all deleted 01242 lexeme nodes and LVs to a constraint net. All parses of 01243 \b net are deallocated. This returns the net to the state that it 01244 was in immediately after being optimized. \b TRUE is returned only 01245 if at least one value remains for each pair of word and level. 01246 */ 01247 Boolean cnRenew(ConstraintNet net) 01248 { 01249 int i, j; 01250 LevelValue lv; 01251 LexemNode ln; 01252 ConstraintNode node; 01253 01254 /* refresh constraint nodes and LVs */ 01255 for (i = 0; i < vectorSize(net->nodes); i++) { 01256 node = (ConstraintNode) vectorElement(net->nodes, i); 01257 01258 node->noValidValues = 0; 01259 for (j = 0; j < vectorSize(node->values); j++) { 01260 lv = vectorElement(node->values, j); 01261 01262 if (!cnUseNonSpec && lv->modifiee == NONSPEC) { 01263 lv->isDeleted = TRUE; 01264 } else { 01265 lv->isDeleted = FALSE; 01266 node->noValidValues++; 01267 } 01268 lv->limit = lv->score; 01269 lv->no = -1; 01270 } 01271 } 01272 01273 /* refresh individual lexemes */ 01274 for (i = 0; i < vectorSize(net->lexemgraph->nodes); i++) { 01275 ln = vectorElement(net->lexemgraph->nodes, i); 01276 ln->limit = 1.0; 01277 } 01278 01279 /* undo lexem deletions */ 01280 bvSetAllElements(net->lexemgraph->isDeletedNode, FALSE); 01281 lgComputeNoOfPaths(net->lexemgraph); 01282 lgComputeDistances(net->lexemgraph); 01283 01284 /* throw away score-cache and build a new one */ 01285 scDelete(net->cache); 01286 if (scUseCache) { 01287 net->cache = scNew(vectorSize(net->values)); 01288 } else { 01289 net->cache = NULL; 01290 } 01291 01292 /* throw away all solutions */ 01293 /* Parses are owned by inputCurrentGrammar and are not freed here. */ 01294 listDelete(net->parses); 01295 net->parses = NULL; 01296 01297 /* re-sort the LVs by unary score */ 01298 cnSortLVs(net); 01299 01300 /* optimize the net */ 01301 if(cnOptimizeNet(net) < 0) { 01302 return FALSE; 01303 }; 01304 01305 return TRUE; 01306 } 01307 01308 /* ---------------------------------------------------------------------- 01309 builds edges in a constraint net 01310 01311 This function computes all edges in a constraint net: 01312 01313 - The Vector \b edges is allocated. 01314 - A constraint edge is built for every pair of constraint nodes 01315 and inserted into \b edges. 01316 - The fields \b isMarked and \b scores are allocated for 01317 each edge. 01318 - All pairs of LVs from the two constraint nodes are evaluated 01319 jointly and the result is store in the matrix \b scores. The result 01320 will be~0 if the LVs are no \b lvCompatible(), otherwise it is 01321 the product of the combined binary score and the two unary scores. 01322 - A mapping from all edges to their reverses is computed and 01323 stored in the field \b reverse. 01324 01325 \b C-c interrupts this function, deletes all partial results, and 01326 displays the total time elapsed as a \b CDG_PROFILE message. 01327 */ 01328 void cnBuildEdges(ConstraintNet net) 01329 { 01330 LexemGraph lg = net->lexemgraph; 01331 int lgs = vectorSize(lg->nodes); 01332 int lvs = listSize(inputCurrentGrammar->levels); 01333 int i, j, k, l; 01334 int total, done = 0; 01335 ConstraintNode in, jn; 01336 ConstraintEdge edge, reverse; 01337 Timer profileTime; 01338 Vector reverseTable; 01339 01340 profileTime = timerNew(); 01341 01342 reverseTable = vectorNew(vectorSize(net->nodes) * vectorSize(net->nodes)); 01343 vectorSetAllElements(reverseTable, NULL); 01344 01345 /* estimate total # of steps necessary */ 01346 total = vectorSize(net->nodes) * vectorSize(net->nodes); 01347 if (!cdgXCDG) { 01348 cdgPrintf(CDG_DEBUG, "DEBUG: building edges... 0%%"); 01349 } 01350 01351 /* 01352 check whether there are already edges, free them 01353 */ 01354 01355 /* 01356 How many edges will we probably end up with? 01357 (lgs*lvs)^2 is the maximum number 01358 */ 01359 net->edges = vectorNew(lgs * lvs); 01360 for (i = 0; i < vectorSize(net->nodes); i++) { 01361 in = (ConstraintNode) vectorElement(net->nodes, i); 01362 for (j = 0; j < vectorSize(net->nodes); j++) { 01363 jn = (ConstraintNode) vectorElement(net->nodes, j); 01364 01365 /* progress message */ 01366 if (!cdgXCDG) { 01367 cdgPrintf(CDG_DEBUG, "%3.0f%%", 100 * (++done / (float)total)); 01368 } 01369 01370 /* check for ^C here */ 01371 if (cdgCtrlCTrapped) { 01372 /* compare to cnDelete */ 01373 for (i = 0; i < vectorSize(net->edges); i++) { 01374 edge = (ConstraintEdge) vectorElement(net->edges, i); 01375 memFree(edge->isMarked); 01376 smDelete(edge->scores); 01377 memFree(edge); 01378 } 01379 vectorDelete(net->edges); 01380 /* end comparison */ 01381 net->edges = NULL; 01382 return; 01383 } 01384 /* TEST TEST TEST TEST TEST TEST TEST TEST */ 01385 /* not really necessary */ 01386 if (in->level->no <= jn->level->no) 01387 k = in->level->no * lvs + jn->level->no; 01388 else 01389 k = jn->level->no * lvs + in->level->no; 01390 01391 if (cnEdgesFlag == cnEdgesFew && inputCurrentGrammar->levelMatrixCounter != NULL 01392 && (int)vectorElement(inputCurrentGrammar->levelMatrixCounter, 01393 k) == 0) continue; 01394 /* TEST TEST TEST TEST TEST TEST TEST TEST */ 01395 01396 /* connected by arc should be configurable */ 01397 if (cnConnectedByArc(in, jn)) { 01398 edge = (ConstraintEdge) memMalloc(sizeof (ConstraintEdgeStruct)); 01399 edge->start = in; 01400 edge->stop = jn; 01401 edge->reverse = (ConstraintEdge) NULL; 01402 01403 /* 7/31/97 IS - added isMarked to general edge setup */ 01404 edge->isMarked = 01405 (Boolean *) memMalloc(sizeof (Boolean) * vectorSize(in->values)); 01406 edge->scores = smNew(vectorSize(in->values), vectorSize(jn->values)); 01407 for (k = 0; k < vectorSize(in->values); k++) { 01408 LevelValue ik = (LevelValue) vectorElement(in->values, k); 01409 01410 edge->isMarked[k] = FALSE; 01411 for (l = 0; l < vectorSize(jn->values); l++) { 01412 LevelValue jl = (LevelValue) vectorElement(jn->values, l); 01413 double d; 01414 01415 /* check whether the level values can exist on a path */ 01416 if (lvCompatible(lg, ik, jl)) { 01417 d = ik->score * jl->score * evalBinary(ik, jl, net, NULL, FALSE, NULL, NULL); 01418 } else { 01419 d = 0.0; 01420 } 01421 smSetScore(edge->scores, d, k, l); 01422 /* 30.7.97 changed initialization FALSE -> TRUE */ 01423 smSetFlag(edge->scores, TRUE, k, l); 01424 } 01425 } 01426 vectorAddElement(net->edges, edge); 01427 01428 k = i * vectorSize(net->nodes) + j; 01429 vectorSetElement(reverseTable, edge, k); 01430 k = i + vectorSize(net->nodes) * j; 01431 reverse = vectorElement(reverseTable, k); 01432 if (reverse != NULL) { 01433 edge->reverse = reverse; 01434 reverse->reverse = edge; 01435 } 01436 } 01437 } 01438 cdgFlush(); 01439 } 01440 01441 vectorDelete(reverseTable); 01442 01443 if (!cdgXCDG) { 01444 cdgPrintf(CDG_DEBUG, "done.\n"); 01445 } 01446 01447 cdgPrintf(CDG_PROFILE, "PROFILE: building edges took %ldms\n", 01448 timerElapsed(profileTime)); 01449 01450 timerFree(profileTime); 01451 } 01452 01453 /* ---------------------------------------------------------------------- 01454 Does this node bind the earliest lexeme in the lattice? 01455 The functions 01456 \b lgIsStartNode() and \b lgIsEndNode() are used for this 01457 purpose. 01458 */ 01459 Boolean cnIsStartNode(ConstraintNode n) 01460 { 01461 return (n->level == listElement(inputCurrentGrammar->levels) 01462 && 01463 lgIsStartNode(n->gn)); 01464 } 01465 01466 /* ---------------------------------------------------------------------- 01467 Does this node bind the latest lexeme in the lattice? 01468 The functions 01469 \b lgIsStartNode() and \b lgIsEndNode() are used for this 01470 purpose. */ 01471 Boolean cnIsEndNode(ConstraintNode n) 01472 { 01473 return (n->level == listLastElement(inputCurrentGrammar->levels) 01474 && 01475 lgIsEndNode(n->gn)); 01476 } 01477 01478 /* ---------------------------------------------------------------------- 01479 cnDeleteBinding 01480 01481 This function deallocates \b NodeBinding structure. Both 01482 components of the pair are shallow copies and cannot be deallocated. 01483 */ 01484 void cnDeleteBinding(NodeBinding nb) 01485 { 01486 memFree(nb); 01487 } 01488 01489 /* ---------------------------------------------------------------------------- 01490 Show what the conflict means. This function prints out the conflict 01491 itself, the LVs that actually cause it, the constraint that is 01492 violated, and the participating lexemes. 01493 */ 01494 void cvAnalyse(ConstraintViolation cv, Vector context) 01495 { 01496 01497 Boolean inverse; 01498 int i; 01499 List l; 01500 LevelValue lv; 01501 List lexemes = NULL; 01502 LexemNode ln; 01503 01504 VarInfo var; 01505 01506 /* show conflict */ 01507 cdgPrintf(CDG_INFO, "\n\nThe conflict is:\n================\n\n"); 01508 cvPrint(CDG_INFO, cv); 01509 01510 /* show constraint */ 01511 cdgPrintf(CDG_INFO, "\n\n\nThe constraint is:\n==================\n\n"); 01512 printConstraint(CDG_INFO, cv->constraint); 01513 01514 /* is the order of LVs in the signature different from the order in the 01515 * conflict? */ 01516 inverse = 01517 listSize(cv->constraint->vars) == 2 01518 && evalBinaryConstraint(cv->constraint, NULL, context, cv->lv1, cv->lv2); 01519 01520 /* it is safe to assume that these two vector elements are not NULL because 01521 * ConstraintViolations never point to NULL slots. */ 01522 01523 /* show LVs */ 01524 cdgPrintf(CDG_INFO, "\n\nThe LVs concerned are:\n======================\n\n"); 01525 for (i = 1, l = cv->constraint->vars; l; l = listNext(l), i++) { 01526 var = (VarInfo) listElement(l); 01527 if (listSize(cv->constraint->vars) == 1) { 01528 lv = cv->lv1; 01529 } else if ((i == 1 && !inverse) || (i == 2 && inverse)) { 01530 lv = cv->lv1; 01531 } else { 01532 lv = cv->lv2; 01533 } 01534 cdgPrintf(CDG_INFO, "%s == ", var->varname); 01535 lvPrint(CDG_INFO, lv, NULL, 0); 01536 cdgPrintf(CDG_INFO, "\n"); 01537 01538 /* note down the participating lexemes */ 01539 if (lv->modifiee != NULL && lv->modifiee != NONSPEC) { 01540 lexemes = listAddUniqueElement(lexemes, lv->modifiee); 01541 } 01542 lexemes = listAddUniqueElement(lexemes, lv->modifier); 01543 } 01544 01545 /* show participating lexemes */ 01546 cdgPrintf(CDG_INFO, 01547 "\n\nThe lexemes concerned are:\n==========================\n\n"); 01548 for (l = lexemes; l; l = listNext(l)) { 01549 ln = (LexemNode) listElement(l); 01550 01551 cdgPrintf(CDG_INFO, "%s: ", ln->lexem->word); 01552 printValue(CDG_INFO, ln->lexem->value, 0); 01553 cdgPrintf(CDG_INFO, "\n\n"); 01554 } 01555 01556 listDelete(lexemes); 01557 01558 } 01559 01560 /* ---------------------------------------------------------------------- 01561 cvDelete 01562 01563 This function deallocates a structure of type 01564 \b ConstraintViolation. Since all components of this type are 01565 shallow copies, only the structure itself is deallocated. 01566 */ 01567 void cvDelete(ConstraintViolation cv) 01568 { 01569 /* Usually, all LVs are owned by the net in which they occur. If an LV 01570 with indexWRTNet -1 appears in a constraint violation, it must be an 01571 LV that exists only temporarily (not in any net). These LVs are owned 01572 by the cv, so they must be deallocated here. */ 01573 if(cv->lv1->indexWRTNet == -1) { 01574 lvDelete(cv->lv1); 01575 } 01576 01577 if(cv->lv2 && cv->lv2->indexWRTNet == -1) { 01578 lvDelete(cv->lv2); 01579 } 01580 01581 memFree(cv); 01582 } 01583 01584 /* ---------------------------------------------------------------------- 01585 deletes a constraint-node 01586 01587 This function deallocates a single constraint node and all LVs 01588 contained in it. Note that pointers to these LVs may remain in the 01589 field \b values of the enclosing constraint net, so 01590 \b cnDeleteNode() should only be called immediately before 01591 deleting the net itself. 01592 */ 01593 void cnDeleteNode(ConstraintNode node) 01594 { 01595 int j; 01596 LevelValue lv; 01597 01598 for (j = 0; j < vectorSize(node->values); j++) { 01599 lv = (LevelValue) vectorElement(node->values, j); 01600 lvDelete(lv); 01601 } 01602 01603 vectorDelete(node->values); 01604 01605 memFree(node); 01606 } 01607 01608 /* ---------------------------------------------------------------------- 01609 deletes a constraint-edge 01610 01611 This function deallocates a constraint edge. Apart from the 01612 \b ConstraintEdgeStruct itself, only the field \b scores 01613 is deallocated by calling \b smDelete(). 01614 */ 01615 void cnDeleteEdge(ConstraintEdge edge) 01616 { 01617 memFree(edge->isMarked); 01618 smDelete(edge->scores); 01619 01620 memFree(edge); 01621 } 01622 01623 /* ---------------------------------------------------------------------- 01624 deletes constraint net 01625 01626 This function deallocates an entire constraint net. Note that while 01627 \b cnBuild() only allocates a few fields, this function allocates 01628 everything. The following functions are used for the purpose: 01629 01630 - \b lgDelete() for the \b lexemgraph 01631 - \b cnDeleteNode() for the elements of \b nodes 01632 - \b cnDeleteEdge() for the elements of \b edges 01633 - \b agDelete() for the \b searchagenda 01634 - \b deleteParse() for the elements of \b parses 01635 - \b scDelete() for the \b cache 01636 01637 If \b net is identical to \b cnMostRecentlyCreatedNet, that 01638 variable is reset. 01639 */ 01640 void cnDelete(ConstraintNet net) 01641 { 01642 int i; 01643 ConstraintNode node; 01644 ConstraintEdge edge; 01645 01646 if (net == NULL) { 01647 cdgPrintf(CDG_WARNING, "WARNING: cnDelete: net is NULL\n"); 01648 return; 01649 } 01650 if (cnMostRecentlyCreatedNet == net) 01651 cnMostRecentlyCreatedNet = NULL; 01652 01653 cdgFreeString(net->id); 01654 if (net->lexemgraph != NULL) 01655 lgDelete(net->lexemgraph); 01656 if (net->values != NULL) 01657 vectorDelete(net->values); 01658 if (net->nodes != NULL) { 01659 for (i = 0; i < vectorSize(net->nodes); i++) { 01660 node = (ConstraintNode) vectorElement(net->nodes, i); 01661 cnDeleteNode(node); 01662 } 01663 vectorDelete(net->nodes); 01664 } 01665 if (net->edges != NULL) { 01666 for (i = 0; i < vectorSize(net->edges); i++) { 01667 edge = (ConstraintEdge) vectorElement(net->edges, i); 01668 cnDeleteEdge(edge); 01669 } 01670 vectorDelete(net->edges); 01671 } 01672 01673 if (net->searchagenda != NULL) 01674 agDelete(net->searchagenda); 01675 01676 if (net->lvTotals != NULL) 01677 vectorDelete(net->lvTotals); 01678 01679 /* Parses are owned by inputCurrentGrammar and are not freed here. */ 01680 listDelete(net->parses); 01681 net->parses = NULL; 01682 01683 scDelete(net->cache); 01684 memFree(net); 01685 } 01686 01687 /* ---------------------------------------------------------------------- 01688 print out some information about this net 01689 01690 This function displays some general information about \b net. 01691 Apart from the fields of the structure themselves, the minimal, 01692 average, and maximal number of LVs per node is calculated and 01693 displayed. This function is called in various places to summarize 01694 briefly the state of a constraint net. 01695 */ 01696 void cnPrintInfo(ConstraintNet net) 01697 { 01698 int i, vmin, vmax, vtotal; 01699 ConstraintNode node; 01700 long double candidates; 01701 01702 if (!(hkVerbosity & CDG_INFO)) 01703 return; 01704 01705 vmin = 99999999; 01706 vtotal = vmax = 0; 01707 candidates = 0; 01708 for (i = 0; i < vectorSize(net->nodes); i++) { 01709 node = (ConstraintNode) vectorElement(net->nodes, i); 01710 vmin = min(vmin, node->noValidValues); 01711 vmax = max(vmax, node->noValidValues); 01712 vtotal += node->noValidValues; 01713 if (node->noValidValues) { 01714 if (!candidates) { 01715 candidates = node->noValidValues; 01716 } else { 01717 candidates *= node->noValidValues; 01718 if (candidates == HUGE_VAL) { 01719 cdgPrintf(CDG_WARNING, "WARNING: very large problem\n"); 01720 } 01721 } 01722 } 01723 } 01724 01725 cdgPrintf(CDG_INFO, "INFO: net: id %s, wordgraph %s\n", 01726 net->id, net->lexemgraph->lattice->id); 01727 cdgPrintf(CDG_INFO, " #nodes %d, #edges %d\n", 01728 vectorSize(net->nodes), 01729 net->edges == NULL ? 0 : vectorSize(net->edges)); 01730 cdgPrintf(CDG_INFO, 01731 " #evaluations: %d unary, %d statistics, %d binary\n", 01732 net->evalUnary, net->statUnary, net->evalBinary); 01733 cdgPrintf(CDG_INFO, 01734 " #values: min %d, max %d, total %d, average %5.2f\n", vmin, 01735 vmax, vtotal, vtotal / (float)vectorSize(net->nodes)); 01736 cdgPrintf(CDG_INFO, " #candidates: %.4Lg\n", candidates); 01737 if (net->cache && net->cache->size > 0) { 01738 cdgPrintf(CDG_INFO, 01739 " cache: size %d #values %d, #hits %d, %1.1f on average, usage %1.2f%%\n", 01740 net->cache->capacity, 01741 net->cache->size, 01742 net->cache->hits, 01743 (float)net->cache->hits / (float)net->cache->size, 01744 (float)net->cache->size / (float)net->cache->capacity * 100); 01745 } 01746 } 01747 01748 /* ---------------------------------------------------------------------- 01749 Allocate a new constraint violation. 01750 This function allocates a new \b ConstraintViolation. The 01751 parameters are installed in the corresponding fields of the new 01752 structure. All fields of a \b ConstraintViolation are shallow 01753 copies. 01754 01755 */ 01756 ConstraintViolation cvNew(Constraint c, LevelValue lva, LevelValue lvb) 01757 { 01758 01759 ConstraintViolation result = memMalloc(sizeof(ConstraintViolationStruct)); 01760 result->constraint = c; 01761 result->penalty = c->penalty; 01762 result->lv1 = lva; 01763 result->lv2 = lvb; 01764 /* If an LV with indexWRTNet -1 appears in a constraint violation, 01765 it must be an LV that exists only temporarily (not in any net). These 01766 LVs are deallocated when they are no longer needed, so we clone them 01767 here in case the violation is preserved longer than the structure that 01768 caused it. */ 01769 if(result->lv1->indexWRTNet == -1) { 01770 result->lv1 = lvClone(result->lv1); 01771 } 01772 if(result->lv2 && result->lv2->indexWRTNet == -1) { 01773 result->lv2 = lvClone(result->lv2); 01774 } 01775 01776 01777 result->nodeBindingIndex1 = lvIndex(lva); 01778 result->nodeBindingIndex2 = lvb ? lvIndex(lvb) : -1; 01779 /* Note that the default initialization to -1 is meaningful. At several 01780 points in the program, cv structures are compared by comparing their 01781 fields nodeBindingIndex1 and nodeBindingIndex2. If nodeBindingIndex2 01782 were left undefined for unary constraints, two identical unary 01783 violations might get classified as different. */ 01784 01785 return result; 01786 01787 } 01788 01789 /* ---------------------------------------------------------------------- 01790 clone a constraint violation 01791 @returns a clone of \b cv. 01792 */ 01793 ConstraintViolation cvClone(ConstraintViolation cv) { 01794 01795 ConstraintViolation result = cvNew(cv->constraint, cv->lv1, cv->lv2); 01796 result->penalty = cv->penalty; 01797 return result; 01798 01799 } 01800 01801 01802 /* ---------------------------------------------------------------------- 01803 re-sorts the LVs in each constraint node by limit rather than by score 01804 01805 This function sorts the Vectors \b values=of all constraint nodes 01806 of \b net using \b lvCompare(). 01807 */ 01808 void cnSortLVs(ConstraintNet net) 01809 { 01810 int i; 01811 ConstraintNode n; 01812 01813 for (i = 0; i < vectorSize(net->nodes); i++) { 01814 n = (ConstraintNode) vectorElement(net->nodes, i); 01815 vectorSort(n->values, lvCompare); 01816 } 01817 01818 } 01819 01820 /* ---------------------------------------------------------------------- 01821 compares two violations by penalty, then by domain index 01822 This function compares two structures of type 01823 \b ConstraintViolation, comparing first the penalty of the 01824 violation, after that the fields 01825 \b nodeBindingIndex1 and \b nodeBindingIndex2 and finally the 01826 names of the constraints. 01827 01828 TODO: this function is never used, see cvCompare for a similar function 01829 */ 01830 Boolean cnCompareViolation(ConstraintViolation a, ConstraintViolation b) { 01831 01832 if(a->penalty < b->penalty) { 01833 return TRUE; 01834 } else if(a->penalty > b->penalty) { 01835 return FALSE; 01836 } 01837 01838 if(a->nodeBindingIndex1 < b->nodeBindingIndex1) { 01839 return TRUE; 01840 } else if(a->nodeBindingIndex1 > b->nodeBindingIndex1) { 01841 return FALSE; 01842 } 01843 01844 if(a->nodeBindingIndex2 < b->nodeBindingIndex2) { 01845 return TRUE; 01846 } else if(a->nodeBindingIndex2 > b->nodeBindingIndex2) { 01847 return FALSE; 01848 } 01849 01850 if(strcmp(a->constraint->id, b->constraint->id) < 0) { 01851 return TRUE; 01852 } 01853 01854 return FALSE; 01855 01856 } 01857 01858 /* ---------------------------------------------------------------------- 01859 prints all Parses of a net 01860 01861 This function applies \b parsePrint() to all entries of 01862 \b net->parses. 01863 */ 01864 void cnPrintParses(ConstraintNet net) 01865 { 01866 List l; 01867 01868 for (l = net->parses; l != NULL; l = listNext(l)) { 01869 parsePrint(listElement(l)); 01870 cdgPrintf(CDG_INFO, "\n\n"); 01871 } 01872 } 01873 01874 /* ---------------------------------------------------------------------- 01875 Find ConstraintNode of an LV. This finds LVs by comparing their IWRT 01876 numbers, so cannot give meaningful results if lv was not originally 01877 built for net. 01878 @returns the constraint node in \b net that holds 01879 \b lv (or \b NULL). 01880 */ 01881 ConstraintNode cnFindNode(ConstraintNet net, LevelValue lv) { 01882 01883 ConstraintNode cn; 01884 LevelValue lv2; 01885 int i, j; 01886 01887 for (i = 0; i < vectorSize(net->nodes); i++) { 01888 cn = (ConstraintNode) vectorElement(net->nodes, i); 01889 for (j = 0; j < vectorSize(cn->values); j++) { 01890 lv2 = (LevelValue) vectorElement(cn->values, j); 01891 if(lv->indexWRTNet == lv2->indexWRTNet) { 01892 return cn; 01893 } 01894 } 01895 } 01896 01897 cdgPrintf(CDG_WARNING, 01898 "WARNING: can't find LV #%d in net!\n", 01899 lv->indexWRTNet); 01900 01901 return NULL; 01902 01903 } 01904 01905 /* ---------------------------------------------------------------------- 01906 Print a conflict. 01907 01908 This function displays a conflict using \b cdgPrintf(). If 01909 \b width is~0, the constraint identifier is printed in full, 01910 otherwise it is truncated to \b width characters. 01911 */ 01912 void cvPrint(unsigned long mode, ConstraintViolation cv) { 01913 01914 if (cv->nodeBindingIndex2 >= 0) { 01915 cdgPrintf(CDG_INFO, "%03d : %03d : %4.3e : %s", 01916 cv->nodeBindingIndex1, cv->nodeBindingIndex2, 01917 cv->penalty, cv->constraint->id); 01918 } else { 01919 cdgPrintf(CDG_INFO, " %03d : %4.3e : %s", 01920 cv->nodeBindingIndex1, cv->penalty, cv->constraint->id); 01921 } 01922 01923 } 01924 01925 /* ---------------------------------------------------------------------- 01926 Does a conflict occur in a list? 01927 01928 This function checks whether a conflict equivalent to \b cv= 01929 appears in the List. \b TRUE= is returned if both the constraint 01930 and the fields \b nodeBindingIndex1= and \b nodeBindingIndex2= 01931 match (the penalty need not match). 01932 */ 01933 Boolean cvContains(List conflicts, ConstraintViolation cv) { 01934 01935 List l; 01936 ConstraintViolation cv2; 01937 01938 for (l = conflicts; l; l = listNext(l)) { 01939 cv2 = listElement(l); 01940 if (cv->constraint->no == cv2->constraint->no && 01941 ((cv->nodeBindingIndex1 == cv2->nodeBindingIndex1 && 01942 cv->nodeBindingIndex2 == cv2->nodeBindingIndex2) 01943 || 01944 (cv->nodeBindingIndex2 == cv2->nodeBindingIndex1 && 01945 cv->nodeBindingIndex1 == cv2->nodeBindingIndex2))) { 01946 return TRUE; 01947 } 01948 } 01949 01950 return FALSE; 01951 } 01952 01953 /* ---------------------------------------------------------------------- 01954 compares two violations by natural order 01955 01956 This function compares two \b ConstraintViolation= structures by 01957 the position of the affected LVs. If both affect the same LVs, it 01958 compares them by the constraint name. 01959 */ 01960 Boolean cvCompareNatural(ConstraintViolation a, ConstraintViolation b) 01961 { 01962 if (a->nodeBindingIndex1 < b->nodeBindingIndex1) { 01963 return TRUE; 01964 } else if (a->nodeBindingIndex1 > b->nodeBindingIndex1) { 01965 return FALSE; 01966 } 01967 01968 if (a->nodeBindingIndex2 < b->nodeBindingIndex2) { 01969 return TRUE; 01970 } else if (a->nodeBindingIndex2 > b->nodeBindingIndex2) { 01971 return FALSE; 01972 } 01973 01974 if (strcmp(a->constraint->id, b->constraint->id) < 0) { 01975 return TRUE; 01976 } 01977 01978 return FALSE; 01979 } 01980 01981 01982 /* -------------------------------------------------------------------- 01983 Constraint violations should be sorted first by penalty, then by arity, 01984 finally by natural order. 01985 01986 This function compares two conflicts. It returns \b TRUE 01987 - if \b a has the lower penalty 01988 - otherwise, if \b a is unary and \b b is binary 01989 - otherwise, if the first LV in \b a precedes the first lv in 01990 \b b according to natural order. 01991 */ 01992 Boolean cvCompare(ConstraintViolation a, ConstraintViolation b) 01993 { 01994 int indexa, indexb; 01995 01996 /* compare by penalty */ 01997 if (a->penalty < b->penalty) 01998 return TRUE; 01999 if (a->penalty > b->penalty) 02000 return FALSE; 02001 02002 /* compare by arity */ 02003 if (!a->lv2 && b->lv2) 02004 return TRUE; 02005 if (a->lv2 && !b->lv2) 02006 return FALSE; 02007 02008 /* compare names */ 02009 indexa = strcmp(a->constraint->id, b->constraint->id); 02010 if (indexa < 0) 02011 return FALSE; 02012 if (indexa > 0) 02013 return TRUE; 02014 02015 /* compare by natural order */ 02016 indexa = lvIndex(a->lv1); 02017 indexb = lvIndex(b->lv1); 02018 if (indexa < indexb) 02019 return TRUE; 02020 02021 return FALSE; 02022 } 02023 02024 /* ---------------------------------------------------------------------- 02025 Looks for a constraint net with id ID and return it or NULL if 02026 it can't find the net in inputCurrentGrammar. 02027 02028 @returns the net with the specified \b id in the 02029 \b inputCurrentGrammar structure, or \b NULL. 02030 */ 02031 ConstraintNet cnFindNet(String id) 02032 { 02033 return hashGet(cdgNets, id); 02034 } 02035 /* ---------------------------------------------------------------------- 02036 prints the active levelvalues 02037 02038 This function applies \b lvPrint() to all LVs in the specified 02039 constraint net that are not deleted. 02040 */ 02041 void cnPrintActiveLVs(ConstraintNet net) 02042 { 02043 int i, k; 02044 int j = 0; 02045 02046 for (i=0; i < vectorSize(net->nodes); i++) { 02047 ConstraintNode cn = (ConstraintNode)vectorElement(net->nodes, i); 02048 for (k=0; k < vectorSize(cn->values); k++) { 02049 LevelValue lv = (LevelValue)vectorElement(cn->values, k); 02050 if (!lv->isDeleted) { 02051 lvPrint(CDG_DEBUG, lv, net->lexemgraph->isDeletedNode, 0); 02052 cdgPrintf(CDG_DEBUG, "\n"); 02053 j++; 02054 } 02055 } 02056 } 02057 cdgPrintf(CDG_INFO, "INFO: number of levelvalues: %d found; %d active\n", vectorSize(net->values), j); 02058 } 02059 02060 /* ---------------------------------------------------------------------- 02061 undelete all levelvalues in a constraint net 02062 02063 This function resets the field \b isDeleted for all LVs in the 02064 specified constraint net. 02065 */ 02066 void cnUndeleteAllLVs(ConstraintNet net) 02067 { 02068 int i; 02069 LevelValue lv; 02070 02071 for (i=0; i<vectorSize(net->values); i++) { 02072 lv = (LevelValue) vectorElement(net->values, i); 02073 lv->isDeleted = FALSE; 02074 } 02075 } 02076 02077 /* ---------------------------------------------------------------------- 02078 delete all levelvalues in a constraint net 02079 02080 This function sets the field \b isDeleted for all LVs in the 02081 specified constraint net. 02082 */ 02083 void cnDeleteAllLVs(ConstraintNet net) 02084 { 02085 int i; 02086 LevelValue lv; 02087 02088 for (i=0; i<vectorSize(net->values); i++) { 02089 lv = (LevelValue) vectorElement(net->values, i); 02090 lv->isDeleted = TRUE; 02091 } 02092 } 02093 02094 /* ---------------------------------------------------------------------- 02095 Update NET with the arcs in LISTARCS. 02096 02097 This function is used by \b incrementalcompletion to extend an 02098 existing constraint net by the structures corresponding to the 02099 specified \b Arc structures. */ 02100 Boolean cnBuildUpdateArcs(ConstraintNet net, List listArcs) 02101 { 02102 int i; 02103 List m; 02104 02105 for (i = 0; i < vectorSize(net->nodes); i++) { 02106 ConstraintNode cn = (ConstraintNode) vectorElement(net->nodes, i); 02107 Level level = cn->level; 02108 GraphemNode modifier = cn->gn; 02109 02110 #ifdef DEBUGCNBUILDNODES 02111 cdgPrintf(CDG_DEBUG, "\nDEBUG: generating LevelValues for node "); 02112 cnPrintNode(CDG_DEBUG, cn); 02113 cdgPrintf(CDG_DEBUG, "\n"); 02114 #endif 02115 02116 for (m = listArcs; m; m = listNext(m)) { 02117 Arc arc = (Arc) listElement(m); 02118 GraphemNode modifiee = cnGetGraphemNodeFromArc(net, arc); 02119 if (modifiee == NULL) { 02120 continue; 02121 } 02122 cnBuildLevelValues(cn, level, modifier, modifiee); 02123 } 02124 cn->noValidValues = vectorSize(cn->values); 02125 cn->totalNumberOfValues = cn->noValidValues; 02126 } 02127 02128 for (m = listArcs; m; m = listNext(m)) { 02129 Arc arc = (Arc) listElement(m); 02130 GraphemNode gn = cnGetGraphemNodeFromArc(net, arc); 02131 if (gn == NULL) { 02132 continue; 02133 } 02134 if (gn->lexemes == NULL) { 02135 continue; 02136 } 02137 if (!cnBuildIter(net, gn, TRUE)) { 02138 return FALSE; 02139 } 02140 /*** add levelvalues for NONSPEC ***/ 02141 } 02142 02143 return TRUE; 02144 } 02145 02146 /* ---------------------------------------------------------------------- 02147 Return the graphem node in NET that points to ARC. 02148 */ 02149 GraphemNode cnGetGraphemNodeFromArc(ConstraintNet net, Arc arc) 02150 { 02151 int i; 02152 GraphemNode gn; 02153 02154 for (i=0; i<vectorSize(net->lexemgraph->graphemnodes); i++) { 02155 gn = (GraphemNode) vectorElement(net->lexemgraph->graphemnodes, i); 02156 if (gn->arc == arc) 02157 return gn; 02158 } 02159 02160 return (GraphemNode) NULL; 02161 } 02162 02163 02164 /* ---------------------------------------------------------------------- 02165 Callback function for cnUnaryPruningFraction. 02166 02167 This function is used as the callback function for the CDG variable 02168 \b unaryFraction. It prints a notification of the change to the 02169 CDG shell. 02170 */ 02171 void cnCallback(String name, float *var) 02172 { 02173 if (strcmp(name, "unaryFraction") == 0) { 02174 cdgPrintf(CDG_INFO, "INFO: unary pruning fraction set to %f\n", 02175 *var); 02176 } 02177 } 02178 02179 /* ---------------------------------------------------------------------- 02180 Initialize the module constraintnet. 02181 02182 This function initializes the module \ref Constraintnet. It 02183 merely registers the module's CDG variables. 02184 */ 02185 void cnInitialize() 02186 { 02187 setRegister("showdeleted", SET_BOOL, &cnShowDeletedFlag, NULL, NULL, NULL, NULL); 02188 setRegister("edges", SET_ENUM, &cnEdgesFlag, NULL, NULL, NULL, 02189 "on", cnEdgesOn, "off", cnEdgesOff, "few", cnEdgesFew, "all", cnEdgesAll, NULL); 02190 setRegister("sortnodes", SET_ENUM, &cnSortNodesMethod, NULL, NULL, NULL, 02191 "off", 0, "on", 1, "prio", 1, "smallest", 2, NULL); 02192 setRegister("unaryFraction", SET_FLOAT, &cnUnaryPruningFraction, NULL, NULL, cnCallback, NULL); 02193 setRegister("usenonspec", SET_BOOL, &cnUseNonSpec, NULL, NULL, NULL, NULL); 02194 } 02195 02196 /* ---------------------------------------------------------------------- 02197 Needed by XCDG because it can't talk to LexemGraphs directly. 02198 02199 This function simply returns \b cn->lexemgraph->lattice. It exists 02200 because XCDG needs that field, and XCDG cannot talk directly to lexeme 02201 graphs (they contain arrays of long long ints, which SWIG doesn't 02202 handle correctly). 02203 */ 02204 Lattice cnGetLattice(ConstraintNet cn) { 02205 return cn->lexemgraph->lattice; 02206 } 02207 02208 /* ---------------------------------------------------------------------- */ 02209 /** @} */ 02210 /* -- ENDOFFILE --------------------------------------------------------- */

CDG 0.95 (20 Oct 2004)