Main Page | Modules | Alphabetical List | Data Structures | File List | Data Fields | Related Pages

eval.c

00001 /* 00002 * Copyright (C) 1997-2004 The CDG Team <cdg@nats.informatik.uni-hamburg.de> 00003 * 00004 * This file is free software; as a special exception the author gives 00005 * unlimited permission to copy and/or distribute it, with or without 00006 * modifications, as long as this notice is preserved. 00007 * 00008 * This program is distributed in the hope that it will be useful, but 00009 * WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 00010 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00011 * 00012 * $Id: eval.c,v 1.112 2004/09/27 17:07:03 micha Exp $ 00013 */ 00014 00015 /* ------------------------------------------------------------------------- 00016 @addtogroup Eval Eval - routines to evaluate constraint formulas 00017 @author Ingo Schroeder 00018 @date 7/3/97 00019 00020 This module provides functions to evaluate constraints (structures of 00021 type Constraint) on LVs (structures of type LevelValue). 00022 The result of evaluating a constraint is always a Boolean, but 00023 since constraint formulas are defined compositionally, there are also 00024 functions for evaluating terms of other types. 00025 00026 All evaluation functions use output parameters of type Value to 00027 compute and return their results. ErrorValue is returned if any 00028 error occurred during evaluation, such as an access to a feature of the 00029 root node. This typically does not cause a run-time error, but simply 00030 makes the containing formula evaluate to FALSE. Note that 00031 unpredictable results can be produced if an error occurs in a nested 00032 term. Therefore an evaluation error always produces a CDG_WARNING 00033 message. 00034 @{ 00035 */ 00036 00037 #include <config.h> 00038 00039 #include <stdio.h> 00040 #include <stdarg.h> 00041 #include <string.h> 00042 #include <limits.h> 00043 #include "cdg.h" 00044 #include "input.h" 00045 #include "lexemgraph.h" 00046 #include "constraintnet.h" 00047 #include "levelvalue.h" 00048 #include "functions.h" 00049 #include "predicates.h" 00050 #include "eval.h" 00051 #include "increment.h" 00052 #include "hook.h" 00053 #ifndef DARWIN 00054 #include "compile.h" 00055 #endif 00056 #include "timer.h" 00057 #include "set.h" 00058 #include "chunker.h" 00059 00060 /* ---------------------------------------------------------------------- 00061 * @{ 00062 * @name Static Strings 00063 * These strings are used so often that we want to pre-compute 00064 * their addresses. This is the only location where it's really 00065 * important to have full speed. 00066 * They are initialized in evalInitialize() 00067 */ 00068 String static_string_id; /**< static string @a "id" */ 00069 String static_string_word; /**< static string @a "word" */ 00070 String static_string_from; /**< static string @a "from" */ 00071 String static_string_to; /**< static string @a "to" */ 00072 String static_string_info; /**< static string @a "info" */ 00073 String static_string_chunk_start; /**< static string @a "chunk_start" */ 00074 String static_string_chunk_end; /**< static string @a "chunk_end" */ 00075 String static_string_chunk_type; /**< static string @a "chunk_type" */ 00076 /* @} */ 00077 00078 /** This variable is set to the current Constraint during an evaluation, 00079 NULL otherwise. */ 00080 Constraint evalCurrentConstraint = NULL; 00081 00082 /** Set to the current Formula during an evaluation, 00083 NULL otherwise. 00084 00085 Actually this is not strictly true. During evaluation, it is set to the 00086 formula whose evaluation was most recently *begun*, which is not at all 00087 the same; if we evaluate the formula 00088 @code 00089 X@cat = N & X^cat != V 00090 @endcode 00091 then the first formula (&) will be evaluated first, then the second 00092 formula (=) and then the third one (!=), but after that, 00093 evalCurrentFormula is not reset to the first formula. To trace the 00094 progress of evaluation exactly, we would need either LISP-style dynamic 00095 binding, or an extra Formula argument across the entire chain of 00096 evaluation. But since evalCurrentFormula is only used within predHas() 00097 to distinguish different has() invocations from each other, and those do 00098 not nest, the current behaviour is sufficient. 00099 */ 00100 Formula evalCurrentFormula = NULL; 00101 00102 /** Implements subsumesWarnings. 00103 00104 If a constraint evaluation tries to access lexical information of the 00105 root node, the corresponding subexpression unconditionally evaluates to 00106 FALSE, and a warning is printed. This flag can be used to turn off the 00107 warnings; however the interpretation of the erroneous expression as 00108 FALSE remains. It is therefore recommended that you write all 00109 constraints so that they will not access features of the root node in 00110 the first place. 00111 00112 */ 00113 Boolean evalSloppySubsumesWarnings = FALSE; 00114 00115 /** Implements evalmethod. */ 00116 EvalMethodType evalEvaluationMethod = EMTInterpreted; 00117 00118 /** Implements peekvaluemethod. */ 00119 EvalMethodType evalPeekValueMethod = EMTCompiled; 00120 00121 /** Notes whether evaluation is locked. 00122 00123 Since locks can be nested, this counter can rise above 1; any value 00124 higher than 0 means that cached results are still valid. 00125 */ 00126 int lock_counter = 0; 00127 00128 /** The number of time points in the lexeme graph that will 00129 be used in the evaluations 00130 */ 00131 int lock_width = 0; 00132 00133 /** At the moment, only the results of identical has() applications are 00134 cached because those are expected to be particularly expensive. In 00135 principle, this method could be extended to arbitrary identical 00136 subformulas. 00137 00138 The result of the application of has() number X at time point T 00139 is stored in cell X * lock_width + T. 2 means TRUE, 1 means FALSE, and 0 00140 `unknown'. 00141 */ 00142 int *has_cache = NULL; 00143 00144 /* ---------------------------------------------------------------------- 00145 Declare evaluation lock. 00146 00147 When many constraints are evaluated on the same dependency tree, it can 00148 be worthwhile to cache the result of common subformulas. Therefore we 00149 offer the caller the possibility to declare that the dependency tree will 00150 not change during the next evaluations. The module eval is then free to 00151 reuse the results of previous evaluations until the tree is unlocked 00152 again. 00153 00154 WIDTH specifies the number of time points in the lexeme graph that will 00155 be used in the evaluations. 00156 */ 00157 void lock_tree(int width) { 00158 00159 /* do not even try to cache things if there are no has() formulas */ 00160 if(!inputCurrentGrammar->is_context_sensitive) { 00161 return; 00162 } 00163 00164 if(!lock_counter) { 00165 lock_width = width; 00166 has_cache = calloc(width*inputCurrentGrammar->no_has, sizeof(int)); 00167 } 00168 00169 lock_counter++; 00170 } 00171 00172 /* ---------------------------------------------------------------------- 00173 Release evaluation lock. 00174 00175 If lock_tree() was called multiple times, evaluation remains locked onto 00176 the current tree until each call been cancelled individually. 00177 */ 00178 void unlock_tree(void) { 00179 00180 if(!inputCurrentGrammar->is_context_sensitive) { 00181 return; 00182 } 00183 00184 if(lock_counter <= 0) { 00185 cdgPrintf(CDG_ERROR, "ERROR: invalid tree unlock request!\n"); 00186 } 00187 lock_counter--; 00188 if(!lock_counter) { 00189 lock_width = 0; 00190 free(has_cache); 00191 has_cache = NULL; 00192 } 00193 } 00194 00195 00196 /* ---------------------------------------------------------------------- 00197 Compare Numbers with some margin for rounding error. 00198 00199 Upon removing conflicts and introducing new ones, the score of a 00200 structure must frequently be multiplied and divided with constants. 00201 Repeated use of this practice may introduce rounding errors. 00202 Therefore we use a special function for comparison with some margin 00203 for error. The problem of distinguishing rounding errors from very 00204 subtle penalties remains unsolved. 00205 */ 00206 Boolean significantlyGreater(Number a, Number b) 00207 { 00208 00209 if (b == 0.0) { 00210 return a > 0.0; 00211 } 00212 00213 return (a / b > 1.00001); 00214 } 00215 00216 /* ---------------------------------------------------------------------- 00217 Allocate a Badness. 00218 */ 00219 Badness bNew(int no, int hard, Number soft) 00220 { 00221 00222 Badness result = memMalloc(sizeof (BadnessStruct)); 00223 00224 result->no = no; 00225 result->hard = hard; 00226 result->soft = soft; 00227 00228 return result; 00229 } 00230 00231 /* ---------------------------------------------------------------------- 00232 Copy a Badness. 00233 00234 src must have been allocated by the caller. 00235 */ 00236 void bCopy(Badness dest, Badness src) 00237 { 00238 *dest = *src; 00239 } 00240 00241 /* ---------------------------------------------------------------------- 00242 Create deep copy of a Badness. 00243 00244 The result is a new Badness owned by the caller. 00245 */ 00246 Badness bClone(Badness b) 00247 { 00248 return bNew(b->no, b->hard, b->soft); 00249 } 00250 00251 /* ---------------------------------------------------------------------- 00252 Add score to a Badness. 00253 00254 This function adds a classical score to a generalized score, i.e. it 00255 either increments b->hard or multiplies b->soft by score. 00256 00257 */ 00258 Badness bAdd(Badness b, Number score) 00259 { 00260 00261 b->no++; 00262 if (score == 0.0) { 00263 b->hard++; 00264 } else { 00265 b->soft *= score; 00266 } 00267 00268 return b; 00269 } 00270 00271 /* ---------------------------------------------------------------------- 00272 Subtract score from a Badness. 00273 */ 00274 Badness bSubtract(Badness b, Number score) 00275 { 00276 00277 b->no--; 00278 if (score == 0.0) { 00279 b->hard--; 00280 } else { 00281 b->soft /= score; 00282 } 00283 00284 return b; 00285 } 00286 00287 /* ---------------------------------------------------------------------- 00288 Add Badness b to a. 00289 */ 00290 Badness bAddBadness(Badness a, Badness b) 00291 { 00292 a->no += b->no; 00293 a->hard += b->hard; 00294 a->soft *= b->soft; 00295 00296 return a; 00297 } 00298 00299 /* ---------------------------------------------------------------------- 00300 Subtract Badness b from a. 00301 */ 00302 Badness bSubtractBadness(Badness a, Badness b) 00303 { 00304 a->no -= b->no; 00305 a->hard -= b->hard; 00306 if (b->soft != 0.0) 00307 a->soft /= b->soft; 00308 else 00309 a->soft = 1.0; 00310 00311 return a; 00312 } 00313 00314 /* ---------------------------------------------------------------------- 00315 De-allocate a Badness. 00316 */ 00317 void bDelete(Badness b) 00318 { 00319 memFree(b); 00320 } 00321 00322 /* ---------------------------------------------------------------------- 00323 Print a badness in canonical form. 00324 */ 00325 void bPrint(unsigned long mode, Badness b) 00326 { 00327 if (b->no == 0) { 00328 cdgPrintf(mode, "*** no conflicts! ***"); 00329 return; 00330 } 00331 00332 cdgPrintf(mode, "(%d/%4.3e)", b->hard, b->soft); 00333 } 00334 00335 /* ---------------------------------------------------------------------- 00336 Test if two badnesses are equal. 00337 */ 00338 Boolean bEqual(Badness a, Badness b) 00339 { 00340 return (a->hard == b->hard && a->soft == b->soft); 00341 } 00342 00343 /* ---------------------------------------------------------------------- 00344 Compare two Badnesses. 00345 00346 Returns TRUE if a is properly better than b, FALSE if they are equal. 00347 */ 00348 Boolean bCompare(Badness a, Badness b) 00349 { 00350 /* fewer hard conflicts? */ 00351 if (a->hard < b->hard) 00352 return TRUE; 00353 00354 /* more hard conflicts? */ 00355 if (a->hard > b->hard) 00356 return FALSE; 00357 00358 /* better score? */ 00359 if (significantlyGreater(a->soft, b->soft)) 00360 return TRUE; 00361 00362 return FALSE; 00363 } 00364 00365 /* ---------------------------------------------------------------------- 00366 Return new Badness better than any other. 00367 00368 The result is owned by the caller. 00369 */ 00370 Badness bestBadness(void) 00371 { 00372 return bNew(0, 0, 1.0); 00373 } 00374 00375 /* ---------------------------------------------------------------------- 00376 Return new Badness worse than any other. 00377 00378 Strictly speaking, this is not the worst Badness that any structure 00379 could ever have, but I guess it will do. 00380 00381 The result is owned by the caller. 00382 */ 00383 Badness worstBadness(void) 00384 { 00385 return bNew(INT_MAX, INT_MAX, 0.0); 00386 } 00387 00388 /* ---------------------------------------------------------------------- 00389 Evaluate a formula. 00390 00391 This function interprets the Boolean formulas defined in the cdg input 00392 language. It is basically a comprehensive listing of actions for each 00393 possible value of f->type. 00394 00395 Each of the different types of formula is evaluated in the expected way. 00396 In particular, conjunctions, implications and disjunction are guaranteed 00397 to short-circuit in the same way as the corresponding C operators. 00398 00399 Binary relations (FTEqual etc.) are evaluated by calling evalTerm() on 00400 both halves of the formula. If the two resulting types are not 00401 compatible, FALSE is returned with a warning. In general, every type is 00402 only compatible with itself. The ErrorValue is not compatible with any 00403 type, not even with itself. 00404 00405 The various relations are defined as follows: 00406 00407 - Two strings are equal if they are identical (since no strings 00408 are duplicated in cdg, this is equivalent to checking whether they 00409 have the same reading). 00410 - Two numbers are equal if == holds between them. 00411 - Two lexeme nodes are equal if they were created from the same 00412 grapheme node or they are both NULL. Note that this may judge two 00413 simultaneous lexeme nodes as equal even though they are acoustically 00414 incompatible. This case should not arise since no constraint should 00415 ever be evaluated on two incompatible LVs; however, evalFormula() does 00416 not check this. 00417 - Two underspecified values or an underspecified and a specified 00418 value are never equal. 00419 - FTGreater holds between two numbers that satisfy C's > operator. 00420 - FTLess holds between two numbers that satisfy C's < operator. 00421 00422 If f->type is FTConnexion, subsumesConnexion() is called to check 00423 whether the specification f->data.connexion.c subsumes the actual 00424 configuration of the LVs found in the fields 00425 f->data.connexion.var1->levelvalue and 00426 f->data.connexion.var2->levelvalue. 00427 00428 If f=>type is FTDirection, subsumesDirection() is called to check 00429 whether the specification f->data.direction.d subsumes the Direction of 00430 the LevelValue structure found in the field 00431 f->data.direction.var->levelvalue. 00432 00433 */ 00434 Boolean evalFormula(Formula f, LexemGraph lg, Vector context) 00435 { 00436 ValueStruct vas, vbs; 00437 Value a, b; 00438 00439 evalCurrentFormula = f; 00440 00441 switch(f->type) { 00442 00443 case FTConstant: 00444 return (f->data.constant); 00445 /* -------------------------------------------------- */ 00446 case FTNegation: 00447 return (!evalFormula(f->data.formula, lg, context)); 00448 /* -------------------------------------------------- */ 00449 case FTPredicate: 00450 return (f->data.predicate.function(f->data.predicate.args, lg, context)); 00451 /* -------------------------------------------------- */ 00452 case FTConjunction: 00453 return (evalFormula(f->data.junction.arg1, lg, context) && 00454 evalFormula(f->data.junction.arg2, lg, context)); 00455 /* -------------------------------------------------- */ 00456 case FTDisjunction: 00457 return (evalFormula(f->data.junction.arg1, lg, context) || 00458 evalFormula(f->data.junction.arg2, lg, context)); 00459 /* -------------------------------------------------- */ 00460 case FTImplication: 00461 return (!evalFormula(f->data.junction.arg1, lg, context) || 00462 evalFormula(f->data.junction.arg2, lg, context)); 00463 /* -------------------------------------------------- */ 00464 case FTBiimplication: 00465 { 00466 Boolean b1 = evalFormula(f->data.junction.arg1, lg, context); 00467 Boolean b2 = evalFormula(f->data.junction.arg2, lg, context); 00468 00469 return b1 == b2; 00470 } 00471 /* -------------------------------------------------- */ 00472 case FTEqual: 00473 a = evalTerm(f->data.relation.op1, &vas, lg, context); 00474 b = evalTerm(f->data.relation.op2, &vbs, lg, context); 00475 00476 if (a->type == VTError || b->type == VTError) 00477 return (FALSE); 00478 00479 if (a->type != b->type) { 00480 cdgPrintf(CDG_WARNING, "WARNING: type mismatch in formula:\n"); 00481 printFormula(CDG_WARNING, f, FTConstant, 0); 00482 cdgPrintf(CDG_WARNING, "\n"); 00483 return (FALSE); 00484 } 00485 00486 switch (a->type) { 00487 case VTString: 00488 return (a->data.string == b->data.string); 00489 case VTNumber: 00490 return (a->data.number == b->data.number); 00491 case VTLexemNode:{ 00492 if (a->data.lexemnode != NULL && b->data.lexemnode != NULL && 00493 a->data.lexemnode != NONSPEC && b->data.lexemnode != NONSPEC) { 00494 return 00495 (((LexemNode) a->data.lexemnode)->grapheme == 00496 ((LexemNode) b->data.lexemnode)->grapheme); 00497 } else if (a->data.lexemnode == NULL && b->data.lexemnode == NULL) 00498 return (TRUE); 00499 else 00500 return (FALSE); 00501 } 00502 default: 00503 return (FALSE); 00504 } 00505 break; 00506 /* -------------------------------------------------- */ 00507 case FTGreater: 00508 a = evalTerm(f->data.relation.op1, &vas, lg, context); 00509 b = evalTerm(f->data.relation.op2, &vbs, lg, context); 00510 00511 if (a->type == VTError || b->type == VTError) 00512 return (FALSE); 00513 00514 if (a->type == VTNumber && b->type == VTNumber) 00515 return (a->data.number > b->data.number); 00516 00517 cdgPrintf(CDG_WARNING, "WARNING: type mismatch in formula:\n"); 00518 printFormula(CDG_WARNING, f, FTConstant, 0); 00519 cdgPrintf(CDG_WARNING, "\n"); 00520 break; 00521 /* -------------------------------------------------- */ 00522 case FTLess: 00523 a = evalTerm(f->data.relation.op1, &vas, lg, context); 00524 b = evalTerm(f->data.relation.op2, &vbs, lg, context); 00525 00526 if (a->type == VTError || b->type == VTError) 00527 return (FALSE); 00528 00529 if (a->type == VTNumber && b->type == VTNumber) 00530 return (a->data.number < b->data.number); 00531 00532 cdgPrintf(CDG_WARNING, "WARNING: type mismatch in formula:\n"); 00533 printFormula(CDG_WARNING, f, FTConstant, 0); 00534 cdgPrintf(CDG_WARNING, "\n"); 00535 break; 00536 /* -------------------------------------------------- */ 00537 case FTGreaterEqual: 00538 a = evalTerm(f->data.relation.op1, &vas, lg, context); 00539 b = evalTerm(f->data.relation.op2, &vbs, lg, context); 00540 00541 if (a->type == VTError || b->type == VTError) 00542 return (FALSE); 00543 00544 if (a->type == VTNumber && b->type == VTNumber) 00545 return (a->data.number >= b->data.number); 00546 00547 cdgPrintf(CDG_WARNING, "WARNING: type mismatch in formula:\n"); 00548 printFormula(CDG_WARNING, f, FTConstant, 0); 00549 cdgPrintf(CDG_WARNING, "\n"); 00550 break; 00551 /* -------------------------------------------------- */ 00552 case FTLessEqual: 00553 a = evalTerm(f->data.relation.op1, &vas, lg, context); 00554 b = evalTerm(f->data.relation.op2, &vbs, lg, context); 00555 00556 if (a->type == VTError || b->type == VTError) 00557 return (FALSE); 00558 00559 if (a->type == VTNumber && b->type == VTNumber) 00560 return (a->data.number <= b->data.number); 00561 00562 cdgPrintf(CDG_WARNING, "WARNING: type mismatch in formula:\n"); 00563 printFormula(CDG_WARNING, f, FTConstant, 0); 00564 cdgPrintf(CDG_WARNING, "\n"); 00565 break; 00566 /* -------------------------------------------------- */ 00567 case FTNotEqual: 00568 a = evalTerm(f->data.relation.op1, &vas, lg, context); 00569 b = evalTerm(f->data.relation.op2, &vbs, lg, context); 00570 00571 if (a->type == VTError || b->type == VTError) 00572 return (FALSE); 00573 00574 if (a->type == VTNumber && b->type == VTNumber) 00575 return (a->data.number != b->data.number); 00576 else if (a->type == VTString && b->type == VTString) 00577 return (a->data.string != b->data.string); 00578 else if (a->type == VTLexemNode && b->type == VTLexemNode) { 00579 if (a->data.lexemnode == NULL && b->data.lexemnode != NULL) 00580 return (TRUE); 00581 if (a->data.lexemnode != NULL && b->data.lexemnode == NULL) 00582 return (TRUE); 00583 if (a->data.lexemnode == NULL && b->data.lexemnode == NULL) 00584 return (FALSE); 00585 if (a->data.lexemnode == NONSPEC || b->data.lexemnode == NONSPEC) 00586 return (TRUE); 00587 return (((LexemNode) a->data.lexemnode)->grapheme != 00588 ((LexemNode) b->data.lexemnode)->grapheme); 00589 } 00590 00591 cdgPrintf(CDG_WARNING, "WARNING: type mismatch in formula:\n"); 00592 printFormula(CDG_WARNING, f, FTConstant, 0); 00593 cdgPrintf(CDG_WARNING, "\n"); 00594 break; 00595 /* -------------------------------------------------- */ 00596 case FTConnexion: 00597 return (subsumesConnexion(f->data.connexion.c, 00598 getCon(f->data.connexion.var1->levelvalue, 00599 f->data.connexion.var2->levelvalue))); 00600 /* -------------------------------------------------- */ 00601 case FTDirection: 00602 return (subsumesDirection(f->data.direction.d, 00603 getDir(f->data.direction.var->levelvalue))); 00604 } 00605 return (FALSE); 00606 } 00607 00608 /* ---------------------------------------------------------------------- 00609 * returns value for a path in a feature matrix 00610 */ 00611 Value peekValue(Value val, List path) 00612 { 00613 00614 Value v; 00615 00616 /* anchor */ 00617 if (path == NULL) { 00618 return val; 00619 } else { 00620 00621 /* recursion */ 00622 if (val->type == VTConjunction) { 00623 String att = (String) listElement(path); 00624 List l; 00625 00626 for (l = val->data.list; l != NULL; l = listNext(l)) { 00627 v = (Value) listElement(l); 00628 if (v->type != VTAVNode) { 00629 cdgPrintf(CDG_WARNING, "WARNING: value "); 00630 printValue(CDG_WARNING, v, 0); 00631 cdgPrintf(CDG_WARNING, " in value "); 00632 printValue(CDG_WARNING, val, 0); 00633 cdgPrintf(CDG_WARNING, 00634 " should be attribute/value pair, but isn't!\n\n"); 00635 return ErrorValue; 00636 } 00637 00638 if (((AVNode) v->data.avnode)->attribute == att) { 00639 return peekValue(((AVNode) v->data.avnode)->value, listNext(path)); 00640 } 00641 } 00642 return ErrorValue; 00643 } else { 00644 cdgPrintf(CDG_WARNING, 00645 "WARNING: can't find any path in something not a conjunction\n"); 00646 return ErrorValue; 00647 } 00648 } 00649 } 00650 00651 /* ---------------------------------------------------------------------------- 00652 evaluate a CDG Term. 00653 00654 @param t the term to be evaluated 00655 @param val temporary storage where the result is stored in 00656 @param lg the current lexemgraph 00657 @param context the current parse tree (or NULL) 00658 @returns a pointer to @c val or the ErrorValue 00659 00660 This function evaluates a term on a value of the cdg formula language, 00661 computing both its type and its value. Both are returned as the result of 00662 evalTerm(). The function performs an exhaustive case distinction over the 00663 possible values of @c t->type: 00664 00665 - TTTopPeek: The field @c t->data.peek.varinfo->levelvalue->modifiee is 00666 checked. If the modifiee is underspecified, ErrorValue is returned, 00667 otherwise peekValue() is applied to the path @c t->data.peek.path in the 00668 value of the modifiee.\n 00669 \n 00670 Exceptions occur if @c t->data.peek.path is @c id, @c word, @c from or @c to. 00671 These special attributes are notated like features of a lexeme node, but 00672 are actually attributes of the lexeme node, and not the lexeme itself. 00673 These attributes are computed as follows: 00674 - the attribute id is computed by setting @c val->lexemnode 00675 to the modifiee itself. If the modifiee is underspecified, 00676 ErrorValue is returned, VTLexemNode otherwise. 00677 - the attribute word is computed by setting 00678 @c val->data.string to the field word of the lexical entry of the 00679 modifier. (Strictly speaking, this *is* an attribute of the lexeme 00680 rather than the lexeme node; but as it does not appear in the 00681 feature matrix, it cannot be computed by peekValue().) If the 00682 modifiee is underspecified, ErrorValue is returned, @c val otherwise. 00683 - the attribute from is computed by setting @c val->data.number 00684 to the field from of the modifiee. If the modifiee is underspecified, 00685 @c lg->max+1 is assigned instead. In any case, @c val is returned. 00686 - the attribute to is computed by setting 00687 @c val->data.number to the field to of the modifiee. If the 00688 modifiee is underspecified, the number @c lg->max+2 is assigned 00689 instead. In any case, @c val is returned. \n 00690 \n 00691 - TTBottomPeek: The same algorithm is applied to the 00692 modifier of the LevelValue -- simplified by the fact that modifiers cannot 00693 be underspecified. 00694 - TTLabel: @c val->data.string is set to the field label of the LV and @c val is returned. 00695 - TTLevel: @c val->data.string is set to the identifier of the Level of LV, 00696 and @c val is returned. 00697 - TTAdd, TTSubtract, TTMultiply, TTDivide: The two fields 00698 @c t->data.operation.op1 and @c t->data.operation.op1 are evaluated by 00699 evalTerm(). ErrorValue is returned unless both results are numbers. 00700 Dividing by @c 0 also returns ErrorValue. Otherwise the results are 00701 conjoined by the corresponding C operator (+, -, * oder /), 00702 @c val->data.number is set to the result of this computation, and VTNumber 00703 is returned. 00704 applied to t->data.function.args (and lg) and both the resulting type 00705 and the computed are passed to the caller of evalTerm(). 00706 - TTString: @c val->data.string is set to @c t->data.string, and @c val is returned. 00707 - TTNumber: @c val->data.number is set to @c t->data.number, and @c val is returned. 00708 00709 */ 00710 Value evalTerm(Term t, Value val, LexemGraph lg, Vector context) 00711 { 00712 /* Variables for add, sub, ... */ 00713 ValueStruct a, b; 00714 Value va, vb; 00715 00716 /* Variables for TTTopPeek, BottomPeek */ 00717 LexemNode ln; 00718 LevelValue lv; 00719 List l; 00720 00721 switch (t->type) { 00722 case TTTopPeek:{ 00723 lv = (LevelValue) t->data.peek.varinfo->levelvalue; 00724 l = t->data.peek.path; 00725 ln = (LexemNode) lv->modifiee; 00726 00727 if (l) { 00728 /* special handling of special attribute */ 00729 String s = (String) listElement(l); 00730 00731 if (!listNext(l)) { 00732 00733 /* special handling of `id' attribute */ 00734 if (s == static_string_id) { 00735 val->data.lexemnode = (struct LexemNodeStruct *)ln; 00736 val->type = VTLexemNode; 00737 return val; 00738 } 00739 00740 if (ln == NONSPEC) { 00741 /* Modifiee not specified */ 00742 if (s == static_string_from) { 00743 val->data.number = lg->max + 1; 00744 val->type = VTNumber; 00745 return val; 00746 } 00747 if (s == static_string_to) { 00748 val->data.number = lg->max + 2; 00749 val->type = VTNumber; 00750 return val; 00751 } 00752 return ErrorValue; 00753 } 00754 00755 if (!ln) { 00756 return ErrorValue; 00757 } 00758 /* special handling of `word' attribute */ 00759 if (s == static_string_word) { 00760 val->data.string = ln->lexem->word; 00761 val->type = VTString; 00762 return val; 00763 } 00764 /* special handling of `from' attribute */ 00765 if (s == static_string_from) { 00766 val->data.number = (Number) ln->arc->from; 00767 val->type = VTNumber; 00768 return val; 00769 } 00770 /* special handling of `to' attribute */ 00771 if (s == static_string_to) { 00772 val->data.number = (Number) ln->arc->to; 00773 val->type = VTNumber; 00774 return val; 00775 } 00776 /* special handling of `chunk_start' attribute */ 00777 if (s == static_string_chunk_start) { 00778 Chunk chunk = ln->grapheme->chunk; 00779 if (!chunk) 00780 return ErrorValue; 00781 val->data.number = chunk->from->arc->from; 00782 val->type = VTNumber; 00783 return val; 00784 } 00785 /* special handling of `chunk_end' attribute */ 00786 if (s == static_string_chunk_end) { 00787 Chunk chunk = ln->grapheme->chunk; 00788 if (!chunk) 00789 return ErrorValue; 00790 val->data.number = chunk->to->arc->to; 00791 val->type = VTNumber; 00792 return val; 00793 } 00794 /* special handling of `chunk_type' attribute */ 00795 if (s == static_string_chunk_type) { 00796 Chunk chunk = ln->grapheme->chunk; 00797 if (!chunk) 00798 return ErrorValue; 00799 val->data.string = chunkerStringOfChunkType(chunk); 00800 val->type = VTString; 00801 return val; 00802 } 00803 } else { 00804 /* `info' attribute must have additional attributes */ 00805 if (s == static_string_info) { 00806 /* peek value from infostring */ 00807 if (!ln || !ln->arc->info) { 00808 return ErrorValue; 00809 } 00810 return peekValue(ln->arc->info, listNext(l)); 00811 } 00812 } 00813 } 00814 00815 if (!ln || ln == NONSPEC) { 00816 return ErrorValue; 00817 } 00818 00819 switch (evalPeekValueMethod) { 00820 case EMTCompiled: 00821 return (ln->lexem->values[t->data.peek.pathindex]); 00822 case EMTInterpreted: 00823 return peekValue(ln->lexem->value, l); 00824 default: 00825 cdgPrintf(CDG_WARNING, "WARNING: invalid evalpeekvalue method\n"); 00826 return ErrorValue; 00827 } 00828 } 00829 case TTBottomPeek:{ 00830 lv = (LevelValue) t->data.peek.varinfo->levelvalue; 00831 l = t->data.peek.path; 00832 ln = lv->modifier; 00833 00834 /* special handling of special attribute */ 00835 if (l) { 00836 String s = (String) listElement(l); 00837 00838 if (!listNext(l)) { 00839 /* special handling of `id' attribute */ 00840 if (s == static_string_id) { 00841 val->data.lexemnode = (struct LexemNodeStruct *)ln; 00842 val->type = VTLexemNode; 00843 return val; 00844 } 00845 00846 /* special handling of `word' attribute */ 00847 if (s == static_string_word) { 00848 val->data.string = ln->lexem->word; 00849 val->type = VTString; 00850 return val; 00851 } 00852 /* special handling of `from' attribute */ 00853 if (s == static_string_from) { 00854 val->data.number = (Number) ln->arc->from; 00855 val->type = VTNumber; 00856 return val; 00857 } 00858 /* special handling of `to' attribute */ 00859 if (s == static_string_to) { 00860 val->data.number = (Number) ln->arc->to; 00861 val->type = VTNumber; 00862 return val; 00863 } 00864 /* special handling of `chunk_start' attribute */ 00865 if (s == static_string_chunk_start) { 00866 Chunk chunk = ln->grapheme->chunk; 00867 if (!chunk) { 00868 return ErrorValue; 00869 } else { 00870 val->data.number = (Number) chunk->from->arc->from; 00871 val->type = VTNumber; 00872 return val; 00873 } 00874 } 00875 /* special handling of `chunk_end' attribute */ 00876 if (s == static_string_chunk_end) { 00877 Chunk chunk = ln->grapheme->chunk; 00878 if (!chunk) { 00879 return ErrorValue; 00880 } else { 00881 val->data.number = (Number) chunk->to->arc->to; 00882 val->type = VTNumber; 00883 return val; 00884 } 00885 } 00886 /* special handling of `chunk_type' attribute */ 00887 if (s == static_string_chunk_type) { 00888 Chunk chunk = ln->grapheme->chunk; 00889 if (!chunk) 00890 return ErrorValue; 00891 val->data.string = chunkerStringOfChunkType(chunk); 00892 val->type = VTString; 00893 return val; 00894 } 00895 } else { 00896 /* `info' attribute must have additional attributes */ 00897 if (s == static_string_info) { 00898 /* peek value from infostring */ 00899 if (!ln->arc->info) { 00900 return ErrorValue; 00901 } 00902 return peekValue(ln->arc->info, listNext(l)); 00903 } 00904 } 00905 } 00906 00907 switch (evalPeekValueMethod) { 00908 case EMTCompiled: 00909 return (ln->lexem->values[t->data.peek.pathindex]); 00910 case EMTInterpreted: 00911 return peekValue(ln->lexem->value, l); 00912 default: 00913 cdgPrintf(CDG_WARNING, "WARNING: invalid evalpeekvalue method\n"); 00914 return ErrorValue; 00915 } 00916 } 00917 /* -------------------------------------------------- */ 00918 case TTLabel: 00919 val->data.string = ((LevelValue) t->data.label->levelvalue)->label; 00920 val->type = VTString; 00921 return val; 00922 /* -------------------------------------------------- */ 00923 case TTLevel: 00924 val->data.string = ((LevelValue) t->data.level->levelvalue)->level->id; 00925 val->type = VTString; 00926 return val; 00927 /* -------------------------------------------------- */ 00928 case TTAdd: 00929 if ((va = evalTerm(t->data.operation.op1, &a, lg, context))->type != VTNumber || 00930 (vb = evalTerm(t->data.operation.op2, &b, lg, context))->type != VTNumber) { 00931 cdgPrintf(CDG_WARNING, "WARNING: type mismatch in term:\n"); 00932 printTerm(CDG_WARNING, t, FALSE); 00933 return ErrorValue; 00934 } else { 00935 val->data.number = va->data.number + vb->data.number; 00936 val->type = VTNumber; 00937 } 00938 return val; 00939 /* -------------------------------------------------- */ 00940 case TTSubtract: 00941 if ((va = evalTerm(t->data.operation.op1, &a, lg, context))->type != VTNumber || 00942 (vb = evalTerm(t->data.operation.op2, &b, lg, context))->type != VTNumber) { 00943 cdgPrintf(CDG_WARNING, "WARNING: type mismatch in term:\n"); 00944 printTerm(CDG_WARNING, t, FALSE); 00945 return ErrorValue; 00946 } else { 00947 val->data.number = va->data.number - vb->data.number; 00948 val->type = VTNumber; 00949 } 00950 return val; 00951 /* -------------------------------------------------- */ 00952 case TTMultiply: 00953 if ((va = evalTerm(t->data.operation.op1, &a, lg, context))->type != VTNumber || 00954 (vb = evalTerm(t->data.operation.op2, &b, lg, context))->type != VTNumber) { 00955 cdgPrintf(CDG_WARNING, "WARNING: type mismatch in term:\n"); 00956 printTerm(CDG_WARNING, t, FALSE); 00957 return ErrorValue; 00958 } else { 00959 val->data.number = va->data.number * vb->data.number; 00960 val->type = VTNumber; 00961 } 00962 return val; 00963 /* -------------------------------------------------- */ 00964 case TTDivide: 00965 if ((va = evalTerm(t->data.operation.op1, &a, lg, context))->type != VTNumber || 00966 (vb = evalTerm(t->data.operation.op2, &b, lg, context))->type != VTNumber) { 00967 cdgPrintf(CDG_WARNING, "WARNING: type mismatch in term:\n"); 00968 printTerm(CDG_WARNING, t, FALSE); 00969 return ErrorValue; 00970 } else if (vb->data.number == 0.0) { 00971 cdgPrintf(CDG_WARNING, "WARNING: division by zero in term:\n"); 00972 printTerm(CDG_WARNING, t, FALSE); 00973 return ErrorValue; 00974 } else { 00975 val->data.number = va->data.number / vb->data.number; 00976 val->type = VTNumber; 00977 } 00978 return val; 00979 /* -------------------------------------------------- */ 00980 case TTFunction: 00981 return (Value) t->data.function.function(t->data.function.args, val, lg, context); 00982 /* -------------------------------------------------- */ 00983 case TTString: 00984 val->data.string = t->data.string; 00985 val->type = VTString; 00986 return val; 00987 /* -------------------------------------------------- */ 00988 case TTNumber: 00989 val->data.number = t->data.number; 00990 val->type = VTNumber; 00991 return val; 00992 default: 00993 return ErrorValue; 00994 } 00995 } 00996 00997 /* ---------------------------------------------------------------------- 00998 Evaluate a constraint w.r.t. the specified LVs. 00999 01000 Here's the detailed algorithm: 01001 01002 - check the number of arguments and returns TRUE (with a warning) if 01003 it does not match the arity of c 01004 - instantiate the variables in c->vars with the LVs passed 01005 as arguments 01006 - do *not* check whether c is applicable to these LVs. Applying 01007 constraints to arbitrary LVs can produce spurious constraint 01008 violations without warning! Hence evalConstraint() should only be 01009 called on constraints whose applicability was previously checked by 01010 match{Unary,Binary}Signature(). 01011 - set evalCurrentConstraint and evaluates c->formula using evalFormula() 01012 - reset evalCurrentConstraint 01013 - if the result was FALSE and c has a variable penalty 01014 (if c->penaltyTerm is non-NULL), set c->penalty to evalTerm(c->penaltyTerm) 01015 - execute the hook HOOK_EVAL 01016 - de-instantiate c->vars 01017 - increase either evalUnary or evalBinary 01018 - return the result of evalFormula() 01019 01020 The net is actually only used to record statistics (the fields evalUnary 01021 and evalBinary) and can be NULL. 01022 01023 The context may be NULL. If it is not NULL it should be a Vector of LVs, 01024 each of which must be located at its index as calculated by lvIndex(). 01025 This is then passed on to predicates and functions that need to examine 01026 the context of an LV to operate. 01027 01028 If the constraint c uses such a function or predicate (that is, if it has 01029 its is_context_sensitive flag set) and there is no context, then 01030 evalConstraint() returns TRUE. This is obviously not always correct, so 01031 if you do not provide context to your evaluations and still use 01032 context-sensitive constraints, you can miss some conflicts. 01033 01034 This function can evaluate constraints of arbitrary arity. The number of 01035 LVs passed must equal the arity. Unary and binary constraints can also be 01036 evaluated by evalUnaryConstraint() and evalBinaryConstraint(). Those 01037 functions are slightly faster than using the general vararg mechanism. 01038 01039 */ 01040 Boolean evalConstraint(Constraint c, ConstraintNet net, Vector context, ...) 01041 { 01042 va_list ap; 01043 LevelValue lv; 01044 VarInfo vi; 01045 List l; 01046 Boolean result = TRUE; 01047 01048 Value val; 01049 ValueStruct vas; 01050 01051 LexemGraph lg; 01052 int counter = 0; 01053 LevelValue lva = NULL, lvb = NULL; 01054 01055 /* if we have no context, don't even try to find a conflict. */ 01056 if(c->is_context_sensitive && !context) { 01057 return TRUE; 01058 } 01059 01060 /* instantiate constraint variables */ 01061 l = c->vars; 01062 va_start(ap, context); 01063 while ((lv = va_arg(ap, LevelValue)) != NULL && l != NULL) { 01064 vi = (VarInfo) listElement(l); 01065 vi->levelvalue = lv; 01066 01067 counter++; 01068 if (counter == 1) 01069 lva = lv; 01070 if (counter == 2) 01071 lvb = lv; 01072 l = listNext(l); 01073 } 01074 va_end(ap); 01075 01076 lg = lva->modifier->lexemgraph; 01077 result = evalFormula(c->formula, lg, context); 01078 01079 if (result == FALSE && c->penaltyTerm != NULL) { 01080 /* determine actual penalty */ 01081 val = evalTerm(c->penaltyTerm, &vas, lg, context); 01082 if (val->type != VTNumber) { 01083 cdgPrintf(CDG_WARNING, 01084 "WARNING: penalty term of `%s' doesn't evaluate to a number\n", 01085 c->id); 01086 c->penalty = 0.0; 01087 } else { 01088 c->penalty = val->data.number; 01089 } 01090 } 01091 evalCurrentConstraint = NULL; 01092 01093 if ((hkVerbosity & CDG_HOOK) && !result) 01094 cdgExecHook(HOOK_EVAL, c); 01095 01096 #if 0 01097 /* de-instantiate constraint variables */ 01098 for (l = c->vars; l != NULL; l = listNext(l)) 01099 ((VarInfo) listElement(l))->levelvalue = NULL; 01100 #endif 01101 01102 /* count */ 01103 if (net) { 01104 if (counter == 1) 01105 net->evalUnary++; 01106 else 01107 net->evalBinary++; 01108 } 01109 01110 return (result); 01111 } 01112 01113 /* ---------------------------------------------------------------------- 01114 * Evaluate a unary constraint on LV. 01115 01116 This function evaluates a unary constraint. It has the same effect as 01117 evalConstraint() used with four arguments, but saves the overhead of 01118 accessing the variable argument list. Also, it will call a compiled 01119 constraint function rather than evalConstraint() if possible. 01120 01121 */ 01122 Boolean evalUnaryConstraint(Constraint c, ConstraintNet net, 01123 Vector context, LevelValue lv) 01124 { 01125 VarInfo vi; 01126 Boolean result; 01127 01128 Value val; 01129 ValueStruct vas; 01130 01131 LexemGraph lg; 01132 01133 /* if we have no context, don't even try to find a conflict. */ 01134 if(c->is_context_sensitive && !context) { 01135 return TRUE; 01136 } 01137 01138 /* instantiate constraint variable */ 01139 vi = (VarInfo) listElement(c->vars); 01140 vi->levelvalue = lv; 01141 01142 lg = lv->modifier->lexemgraph; 01143 evalCurrentConstraint = c; 01144 01145 /* eval compiled */ 01146 if (evalEvaluationMethod == EMTCompiled) { 01147 result = (*c->cfunction) (lg, context, lv); 01148 } 01149 01150 /* eval interpreted */ 01151 else { 01152 01153 result = evalFormula(c->formula, lg, context); 01154 if (c->penaltyTerm && !result) { 01155 /* determine actual penalty */ 01156 val = evalTerm(c->penaltyTerm, &vas, lg, context); 01157 if (val->type != VTNumber) { 01158 cdgPrintf(CDG_WARNING, 01159 "WARNING: penalty term of `%s' doesn't evaluate to a number\n", 01160 c->id); 01161 c->penalty = 0.0; 01162 } else { 01163 c->penalty = val->data.number; 01164 } 01165 } 01166 } 01167 evalCurrentConstraint = NULL; 01168 01169 if ((hkVerbosity & CDG_HOOK) && !result) { 01170 cdgExecHook(HOOK_EVAL, c); 01171 } 01172 01173 if (net) { 01174 net->evalUnary++; 01175 } 01176 01177 return (result); 01178 } 01179 01180 /* ------------------------------------------------------------ 01181 Evaluate one binary constraint on two LVs. 01182 01183 This function evaluates a binary constraint. It has the same effect as 01184 evalConstraint() used with five arguments, but saves the overhead of 01185 accessing the variable argument list. Also, it will call a compiled 01186 constraint function rather than evalConstraint() if possible. 01187 */ 01188 Boolean evalBinaryConstraint(Constraint c, ConstraintNet net, 01189 Vector context, 01190 LevelValue lva, LevelValue lvb) 01191 { 01192 VarInfo vi; 01193 Boolean result; 01194 01195 Value val; 01196 ValueStruct vas; 01197 01198 LexemGraph lg; 01199 01200 /* if we have no context, don't even try to find a conflict. */ 01201 if(c->is_context_sensitive && !context) { 01202 return TRUE; 01203 } 01204 01205 /* instantiate constraint variables */ 01206 vi = (VarInfo) listElement(c->vars); 01207 vi->levelvalue = lva; 01208 01209 vi = (VarInfo) listElement(listNext(c->vars)); 01210 vi->levelvalue = lvb; 01211 01212 evalCurrentConstraint = c; 01213 lg = lva->modifier->lexemgraph; 01214 01215 /* eval compiled */ 01216 if (evalEvaluationMethod == EMTCompiled) { 01217 result = (*c->cfunction) (lg, context, lva, lvb); 01218 } 01219 01220 /* eval interpreted */ 01221 else { 01222 result = evalFormula(c->formula, lg, context); 01223 if (c->penaltyTerm && !result) { 01224 /* determine actual penalty */ 01225 val = evalTerm(c->penaltyTerm, &vas, lg, context); 01226 if (val->type != VTNumber) { 01227 cdgPrintf(CDG_WARNING, 01228 "WARNING: penalty term of `%s' doesn't evaluate to a number\n", 01229 c->id); 01230 c->penalty = 0.0; 01231 } else { 01232 c->penalty = val->data.number; 01233 } 01234 } 01235 } 01236 evalCurrentConstraint = NULL; 01237 01238 if ((hkVerbosity & CDG_HOOK) && !result) { 01239 cdgExecHook(HOOK_EVAL, c); 01240 } 01241 01242 if (net) { 01243 net->evalBinary++; 01244 } 01245 01246 return (result); 01247 } 01248 01249 /* ------------------------------------------------------------ 01250 Eval unary constraints on LV. 01251 01252 This function applies all unary constraints to lv. It uses the Vector 01253 cdgConstraintVector in much the same way as evalBinary() uses 01254 cdgConstraintMatrix. The same exceptions for deactivated constraints 01255 apply as in that function. In addition, evalUnary() stores all violated 01256 local constraints in the List lv->constraints. 01257 01258 */ 01259 Number evalUnary(LevelValue lv, ConstraintNet net, 01260 Vector context, Boolean use_cs_only, 01261 Badness b, List *conflicts) 01262 { 01263 List l; 01264 Number score = 1.0; 01265 Constraint c; 01266 int index; 01267 01268 /* evalUnary() can operate in fast mode, calculating only the cumulative 01269 score, or in thorough mode, calculating the score, the Badness and the 01270 List of all conflicts found. The thorough mode is requested by passing 01271 a valid pointer as the third argument. */ 01272 Boolean thorough = (b != NULL); 01273 01274 /* grant a cleared violations-list */ 01275 if (!context) { 01276 /* the lv->constraints only contain local unary constraints, 01277 no context-sensitive ones */ 01278 listDelete(lv->constraints); 01279 lv->constraints = NULL; 01280 } 01281 01282 index = inputUCIndex(lv->level->no, getDir(lv)); 01283 for (l = inputCurrentGrammar->hashedUCs[index]; l != NULL; l = listNext(l)) { 01284 c = (Constraint) listElement(l); 01285 01286 /* correct arity and activated */ 01287 if (listSize(c->vars) != 1 || !c->section->active || !c->active) 01288 continue; 01289 01290 if(use_cs_only && !c->is_context_sensitive) { 01291 continue; 01292 } 01293 if (!evalUnaryConstraint(c, net, context, lv)) { 01294 #ifdef DEBUGEVALUNARY 01295 cdgPrintf(CDG_DEBUG, "\nDEBUG: constraint `%s' failed.\n", c->id); 01296 #endif 01297 if (!context) { 01298 /* add a local unary violation to the list of violated constraints */ 01299 lv->constraints = listPrependElement(lv->constraints, c); 01300 } 01301 if (!thorough && c->penalty == 0.0) { 01302 return (0.0); 01303 } 01304 score *= c->penalty; 01305 if (thorough) { 01306 bAdd(b, c->penalty); 01307 if (conflicts) { 01308 *conflicts = 01309 listPrependElement(*conflicts, cvNew(c, lv, NULL)); 01310 } 01311 } 01312 } 01313 } 01314 #ifdef DEBUGEVALUNARY 01315 cdgPrintf(CDG_DEBUG, "OK.\n"); 01316 #endif 01317 return (score); 01318 } 01319 01320 01321 /* ---------------------------------------------------------------------- 01322 Evaluate all constraints on two LVs. 01323 01324 This function performs a conceptually simple task: it computes the 01325 combined score of two LVs in a constraint net and returns the computed 01326 score. This is done by applying evalConstraint() to all known 01327 constraints and tallying the scores of all violated constraints. 01328 01329 For efficiency reasons, the actual implementation of this function is 01330 much more complicated: 01331 01332 - The entire function can be run in fast mode or in thorough mode. In 01333 fast mode, the function will stop applying constraints if a constraint 01334 with score 0 is violated, and return 0 immediately. In thorough mode, 01335 it will keep applying all possible constraints and bAdd() the results 01336 to the Badness b. Also, ConstraintViolation structures will be 01337 allocated and inserted into the List conflicts. Thorough mode is 01338 requested by passing a non-null value for b. For fast mode, both b and 01339 conflicts should be NULL. 01340 - In fast mode, if the LVs lva and lvb have already been 01341 evaluated by this function and scUseCache was set during both 01342 invocations of evalBinary(), the result is read from net->cache and 01343 not computed again. 01344 - To make this case more probable, the LVs lva and lvb are always sorted by 01345 the number of their respective levels. (That is, only half of the 01346 entire cache is ever used.) 01347 - Even if no precomputed result exists, not all constraints may be 01348 evaluated: 01349 01350 Unary constraints are ignored completely. 01351 01352 Inactive constraints and constraints from inactive sections are 01353 skipped likewise. 01354 01355 Of the remaining constraints, only those that may actually fail are 01356 evaluated. Hence not all constraints are evaluated, but only those 01357 whose signature matches the actual configuration existing between 01358 lva and lvb. The List of these constraints is found in the 01359 matrix cdgConstraintMatrix[]. 01360 01361 In fast mode, if a hard constraint is violated, the result 0.0 is 01362 returned immediately. 01363 01364 If the parameter context is NULL, context-sensitive constraints will 01365 not be evaluated. 01366 01367 If the parameter use_cs_only is set, ONLY context-sensitive constraints 01368 will be evaluated. See evalInContext() for why this is occasionally 01369 necessary. 01370 01371 - Theoretically, a binary constraint may be violated twice by two 01372 LVs. Think of a constraint that forbids the label X to appear more 01373 than once in an analysis. If two LVs with label X are evaluated 01374 jointly, both (a,b) and (b,a) violate this condition. 01375 But since the two violations do not really indicate different errors, 01376 this kind of double fault is usually not desired. 01377 01378 A double fault can only occur if the LVs match the signature of a 01379 constraint in both variants of instantiation. Since every binary 01380 constraint signature must explicitly state the two levels it refers 01381 to, this is only possible if both LVs belong to the same level and 01382 either their configuration is symmetrical, or it is is asymmetrical, 01383 but the signature of the constraint subsumes both configurations. 01384 01385 To eliminate all double faults, evalBinary() checks for all 01386 of these possibilities explicitly. 01387 01388 - If a value was computed rather than looked up, and 01389 scUseCache is set, it is registered in net->cache. 01390 01391 The parameter net can be NULL, so that LVs can be evaluated even in the 01392 absence of a constraint net. In this case no caching is possible. 01393 01394 */ 01395 Number evalBinary(LevelValue lva, LevelValue lvb, 01396 ConstraintNet net, Vector context, 01397 Boolean use_cs_only, 01398 Badness b, List *conflicts) 01399 { 01400 List l; 01401 Number score = 1.0; 01402 Constraint c; 01403 int index; 01404 int connexion; 01405 Direction dir1, dir2; 01406 List violated = NULL; 01407 Boolean thorough = (b != NULL); 01408 Boolean foundContextSensitive = FALSE; 01409 01410 /* force lv order */ 01411 if (lva->indexWRTNet > lvb->indexWRTNet) { 01412 LevelValue lv = lvb; 01413 01414 lvb = lva; 01415 lva = lv; 01416 } 01417 01418 /* lookup scorecache */ 01419 if (!thorough && scUseCache && net) { 01420 score = scGetScore(net->cache, lva, lvb); 01421 if (score != -1.0) 01422 return (score); 01423 else 01424 score = 1.0; 01425 } 01426 01427 dir1 = getDir(lva); 01428 connexion = getCon(lva, lvb); 01429 dir2 = getDir(lvb); 01430 01431 index = inputBCIndex(lva->level->no, lvb->level->no, dir1, connexion, dir2); 01432 #ifdef DEBUG_EVALBINARY 01433 cdgPrintf(CDG_DEBUG, "DEBUG: looking up cell %d {X", index); 01434 printDirection(CDG_DEBUG, dir1); 01435 cdgPrintf(CDG_DEBUG, "%s", lva->level->id); 01436 printConnexion(CDG_DEBUG, connexion); 01437 cdgPrintf(CDG_DEBUG, "Y"); 01438 printDirection(CDG_DEBUG, dir2); 01439 cdgPrintf(CDG_DEBUG, "%s}", lvb->level->id); 01440 cdgPrintf(CDG_DEBUG, " (%d constraints)\n", 01441 listSize(inputCurrentGrammar->hashedBCs[index])); 01442 #endif 01443 01444 lock_tree(lva->modifier->lexemgraph->max); 01445 01446 /* simplest case: lva and lvb belong to the same level, 01447 and their configuration is symmetrical. That means 01448 that there is only one class of constraints that they can violate. */ 01449 01450 if (lva->level->no == lvb->level->no && 01451 connexion == inverseConnexion(connexion) && dir1 == dir2) { 01452 01453 /* give each constraint in the list two chances to fail */ 01454 for (l = inputCurrentGrammar->hashedBCs[index]; l != NULL; l = listNext(l)) { 01455 c = (Constraint) listElement(l); 01456 /* activated? */ 01457 if (!c->section->active || !c->active) { 01458 #ifdef DEBUG_EVALBINARY 01459 cdgPrintf(CDG_DEBUG, "DEBUG: skipping constraint %s\n", c->id); 01460 #endif 01461 continue; 01462 } 01463 if (c->is_context_sensitive) 01464 foundContextSensitive = TRUE; 01465 else if(use_cs_only) 01466 continue; 01467 if (!evalBinaryConstraint(c, net, context, lva, lvb) 01468 || !evalBinaryConstraint(c, net, context, lvb, lva)) { 01469 if (!thorough && c->penalty == 0.0) { 01470 #ifdef DEBUG_EVALBINARY 01471 cdgPrintf(CDG_DEBUG, "DEBUG: bailing out!\n"); 01472 #endif 01473 score = 0.0; 01474 goto end; 01475 } 01476 score *= c->penalty; 01477 if (thorough) { 01478 bAdd(b, c->penalty); 01479 if (conflicts) { 01480 *conflicts = 01481 listPrependElement(*conflicts, cvNew(c, lva, lvb)); 01482 } 01483 } 01484 } 01485 } 01486 } 01487 01488 /* general case: there are two classes of constraints that lva and lvb 01489 may violate, e.g. {X\SYN/Y\SEM} and {X\SEM\Y\SYN}. */ 01490 else { 01491 01492 /* evaluate each constraint from either of two lists, 01493 but without double faults */ 01494 violated = NULL; 01495 01496 /* first list */ 01497 01498 for (l = inputCurrentGrammar->hashedBCs[index]; l != NULL; l = listNext(l)) { 01499 c = (Constraint) listElement(l); 01500 /* activated? */ 01501 if (!c->section->active || !c->active) { 01502 #ifdef DEBUG_EVALBINARY 01503 cdgPrintf(CDG_DEBUG, "DEBUG: skipping constraint %s\n", c->id); 01504 #endif 01505 continue; 01506 } 01507 if (c->is_context_sensitive) 01508 foundContextSensitive = TRUE; 01509 else if(use_cs_only) 01510 continue; 01511 if (!evalBinaryConstraint(c, net, context, lva, lvb)) { 01512 violated = listPrependElement(violated, c); 01513 c->active = FALSE; 01514 if (!thorough && c->penalty == 0.0) { 01515 #ifdef DEBUG_EVALBINARY 01516 cdgPrintf(CDG_DEBUG, "DEBUG: bailing out!\n"); 01517 #endif 01518 score = 0.0; 01519 goto end; 01520 } 01521 score *= c->penalty; 01522 if (thorough) { 01523 bAdd(b, c->penalty); 01524 if (conflicts) { 01525 *conflicts = 01526 listPrependElement(*conflicts, cvNew(c, lva, lvb)); 01527 } 01528 } 01529 } 01530 } 01531 01532 index = inputBCIndex(lvb->level->no, 01533 lva->level->no, 01534 dir2, inverseConnexion(connexion), dir1); 01535 #ifdef DEBUG_EVALBINARY 01536 cdgPrintf(CDG_DEBUG, "DEBUG: looking up cell %d {X", index); 01537 printDirection(CDG_DEBUG, dir2); 01538 cdgPrintf(CDG_DEBUG, "%s", lvb->level->id); 01539 printConnexion(CDG_DEBUG, inverseConnexion(connexion)); 01540 cdgPrintf(CDG_DEBUG, "Y"); 01541 printDirection(CDG_DEBUG, dir1); 01542 cdgPrintf(CDG_DEBUG, "%s}", lva->level->id); 01543 cdgPrintf(CDG_DEBUG, " (%d constraints)\n", 01544 listSize(inputCurrentGrammar->hashedBCs[index])); 01545 #endif 01546 01547 /* second list */ 01548 for (l = inputCurrentGrammar->hashedBCs[index]; l != NULL; l = listNext(l)) { 01549 c = (Constraint) listElement(l); 01550 01551 /* activated? */ 01552 if (!c->section->active || !c->active) { 01553 #ifdef DEBUG_EVALBINARY 01554 cdgPrintf(CDG_DEBUG, "DEBUG: skipping constraint %s\n", c->id); 01555 #endif 01556 continue; 01557 } 01558 if (c->is_context_sensitive) 01559 foundContextSensitive = TRUE; 01560 else if(use_cs_only) 01561 continue; 01562 if (!evalBinaryConstraint(c, net, context, lvb, lva)) { 01563 if (!thorough && c->penalty == 0.0) { 01564 #ifdef DEBUG_EVALBINARY 01565 cdgPrintf(CDG_DEBUG, "DEBUG: bailing out!\n"); 01566 #endif 01567 score = 0.0; 01568 goto end; 01569 } 01570 score *= c->penalty; 01571 if (thorough) { 01572 bAdd(b, c->penalty); 01573 if (conflicts) { 01574 *conflicts = 01575 listPrependElement(*conflicts, cvNew(c, lva, lvb)); 01576 } 01577 } 01578 } 01579 } 01580 } 01581 01582 end: 01583 unlock_tree(); 01584 for (l = violated; l; l = listNext(l)) { 01585 c = listElement(l); 01586 c->active = TRUE; 01587 } 01588 listDelete(violated); 01589 01590 /* store computed score in scorecache */ 01591 if (scUseCache && net && !foundContextSensitive) { 01592 scSetScore(net->cache, lva, lvb, score); 01593 } 01594 01595 return (score); 01596 } 01597 01598 01599 /* ---------------------------------------------------------------------- 01600 Eval context-sensitive constraints only. 01601 01602 This function evaluates all LVs in the Vector LVs against each other, 01603 using only context-sensitive constraints, and assuming that the context 01604 is context. 01605 01606 This is useful for methods that cannot normally provide context, e.g. 01607 because they operate incrementally. Such methods should use normal 01608 evalUnary and evalBinary calls with no context while they are building a 01609 structure, and then this function once the structure is complete. 01610 01611 Note that LVs and context can be different; one frobbing method actually 01612 needs to eval a subset of a structure, but in the global context. 01613 */ 01614 Number evalInContext(Vector LVs, Vector context, Badness b, List* conflicts) { 01615 int i,j; 01616 LevelValue lva, lvb; 01617 Number result = 1.0; 01618 01619 /* Don't pay for what you don't use */ 01620 if(!inputCurrentGrammar->is_context_sensitive) { 01621 return 1.0; 01622 } 01623 01624 for(i = 0; i < vectorSize(LVs); i++) { 01625 lva = vectorElement(LVs, i); 01626 if(!lva) continue; 01627 result *= evalUnary(lva, NULL, context, TRUE, b, conflicts); 01628 for(j = i+1; j < vectorSize(LVs); j++) { 01629 lvb = vectorElement(LVs, j); 01630 if(!lvb) continue; 01631 result *= evalBinary(lva, lvb, NULL, context, TRUE, b, conflicts); 01632 } 01633 } 01634 01635 return result; 01636 } 01637 01638 01639 /* ---------------------------------------------------------------------- 01640 * Initialize the eval module 01641 */ 01642 void evalInitialize(void) 01643 { 01644 static_string_id = strRegister("id"); 01645 static_string_word = strRegister("word"); 01646 static_string_from = strRegister("from"); 01647 static_string_to = strRegister("to"); 01648 static_string_info = strRegister("info"); 01649 static_string_chunk_start = strRegister("chunk_start"); 01650 static_string_chunk_end = strRegister("chunk_end"); 01651 static_string_chunk_type = strRegister("chunk_type"); 01652 01653 setRegister("subsumesWarnings", SET_BOOL, &evalSloppySubsumesWarnings, NULL, 01654 NULL, NULL, "sloppy", TRUE, "full", FALSE, NULL); 01655 setRegister("evalmethod", SET_ENUM, &evalEvaluationMethod, NULL, 01656 (BooleanFunction *) evalValidateEvalMethod, NULL, "compiled", 01657 EMTCompiled, "interpreted", EMTInterpreted, NULL); 01658 setRegister("peekvaluemethod", SET_ENUM, &evalPeekValueMethod, NULL, NULL, 01659 NULL, "compiled", EMTCompiled, "interpreted", EMTInterpreted, 01660 NULL); 01661 } 01662 01663 /* ---------------------------------------------------------------------- 01664 * finalize the eval module 01665 */ 01666 void evalFinalize(void) 01667 { 01668 cdgFreeString(static_string_id); 01669 cdgFreeString(static_string_word); 01670 cdgFreeString(static_string_from); 01671 cdgFreeString(static_string_to); 01672 cdgFreeString(static_string_info); 01673 cdgFreeString(static_string_chunk_start); 01674 cdgFreeString(static_string_chunk_end); 01675 cdgFreeString(static_string_chunk_type); 01676 } 01677 01678 /* ---------------------------------------------------------------------- 01679 This is the validation function for the CDG variable evalmethod. 01680 */ 01681 Boolean evalValidateEvalMethod(String name, String value, int *var) 01682 { 01683 Constraint c; 01684 HashIterator hi; 01685 if (strcmp(name, "evalmethod") == 0) { 01686 if (strcmp(value, "compiled") == 0) { 01687 01688 if (*var != EMTCompiled) { 01689 return FALSE; 01690 } 01691 01692 /* Check whether there are constraints at all. */ 01693 if (!inputCurrentGrammar->constraints) { 01694 cdgPrintf(CDG_WARNING, "WARNING: no constraints loaded, " 01695 "refusing to switch to compiled evaluation\n"); 01696 return FALSE; 01697 } 01698 /* Check whether all constraints have a valid cfunction. */ 01699 hi = hashIteratorNew(inputCurrentGrammar->constraints); 01700 while((c = hashIteratorNextValue(hi))) { 01701 if (!c->cfunction) { 01702 cdgPrintf(CDG_WARNING, "WARNING: invalid function for `%s', " 01703 "refusing to switch to compiled evaluation\n", c->id); 01704 return FALSE; 01705 } 01706 } 01707 hashIteratorDelete(hi); 01708 return TRUE; 01709 } else if (strcmp(value, "interpreted") == 0) { 01710 return *var == EMTInterpreted; 01711 } 01712 } 01713 return FALSE; 01714 } 01715 01716 /* ---------------------------------------------------------------------- */ 01717 /* ---------------------------------------------------------------------- */ 01718 /*@}*/

CDG 0.95 (20 Oct 2004)