00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
#include <config.h>
00024
00025
#include <stdio.h>
00026
#include <blah.h>
00027
#include <math.h>
00028
#include <string.h>
00029
#include "cdg.h"
00030
#include "input.h"
00031
#include "eval.h"
00032
#include "statistics.h"
00033
#include "levelvalue.h"
00034
#include "constraintnet.h"
00035
#include "lexemgraph.h"
00036
#include "hook.h"
00037
#include "timer.h"
00038
#include "increment.h"
00039
#include "parse.h"
00040
#include "set.h"
00041
#include "tagger.h"
00042
#include "chunker.h"
00043
00044
00045
00046
00047
00048
00049
00050 int cnCounter = 0;
00051
00052
00053
00054
00055 ConstraintNet cnMostRecentlyCreatedNet =
NULL;
00056
00057
00058 CnEdgesType cnEdgesFlag = cnEdgesOff;
00059
00060
00061
00062 Boolean
cnShowDeletedFlag =
FALSE;
00063
00064
00065
00066 Boolean
cnUseNonSpec =
FALSE;
00067
00068
00069
00070
00071
00072
00073 Number
cnUnaryPruningFraction = 1.0;
00074
00075
00076 int cnSortNodesMethod = 0;
00077
00078
00079
00080
00081
00082
00083
00084
00085 ConstraintNet cnBuildInit()
00086 {
00087
ConstraintNet net;
00088
00089 net = (
ConstraintNet) memMalloc(
sizeof (
ConstraintNetStruct));
00090
00091 net->
id = strPrintf(
"net%d",
cnCounter++);
00092 net->
isBuilt =
NULL;
00093 net->
totalNumberOfValues = 0;
00094 net->
values = net->
nodes = net->
edges =
NULL;
00095 net->
parses = (List)
NULL;
00096 net->
searchagenda = (Agenda)
NULL;
00097 net->
evalUnary = 0;
00098 net->
evalBinary = 0;
00099 net->
statUnary = 0;
00100 net->
cache =
NULL;
00101 net->
lvTotals =
NULL;
00102
00103
return net;
00104 }
00105
00106
00107
00108
00109 Boolean
cnTag(
ConstraintNet net, Lattice lat) {
00110
Chunker chunker;
00111 List chunks;
00112
00113 net->
lexemgraph =
lgNew(lat);
00114
00115
00116
if (net->
lexemgraph ==
NULL) {
00117
cnDelete(net);
00118
return FALSE;
00119 }
00120
00121
00122
if(taggerUp()) {
00123 taggerTag(net->
lexemgraph);
00124 }
00125 chunker =
chunkerNew(
DefaultChunker, net->
lexemgraph);
00126 chunks =
chunkerChunk(chunker);
00127
if (
hkVerbosity &
CDG_DEBUG) {
00128
chunkerPrintChunks(
CDG_DEBUG, chunks);
00129 }
00130
chunkerDelete(chunker);
00131
00132
cdgPrintf(
CDG_INFO,
"INFO: grapheme graph: #nodes %d, min %d, max %d\n",
00133 vectorSize(net->
lexemgraph->
graphemnodes),
00134 net->
lexemgraph->
min, net->
lexemgraph->
max);
00135
cdgPrintf(
CDG_INFO,
00136
"INFO: lexem graph: #nodes %d, min %d, max %d, #paths %lld\n",
00137 vectorSize(net->
lexemgraph->
nodes),
00138 net->
lexemgraph->
min,
00139 net->
lexemgraph->
max,
00140 net->
lexemgraph->
noOfPaths);
00141
00142
return TRUE;
00143 }
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163 ConstraintNet cnBuildFinal(
ConstraintNet net, Boolean buildLVs)
00164 {
00165
int i, j, k;
00166
int max = net->
lexemgraph +
cnUseNonSpec ? 2 : 1;
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
for(i = net->
lexemgraph->
min; i < net->
lexemgraph->
max; i++) {
00179
for(j = 0; j <
max+1; j++) {
00180
for(k = 0; k < inputCurrentGrammar->noOfLevels; k++) {
00181 arraySetElement(net->
isBuilt, (Pointer)(
int)buildLVs, i, j, k);
00182 }
00183 }
00184 }
00185
00186
00187
00188
00189
00190
if (buildLVs &&
cnOptimizeNet(net) < 0) {
00191 net =
NULL;
00192
return net;
00193 }
00194
00195
00196
00197
00198
if (
cnSortNodesMethod != 0) {
00199
cnSortNodes(net);
00200 }
00201
00202
00203
00204
00205
for (i = 0; i < vectorSize(net->
nodes); i++) {
00206
ConstraintNode node = (
ConstraintNode) vectorElement(net->
nodes, i);
00207
00208
cnUnaryPruning(node);
00209 }
00210
00211
00212
cnSortLVs(net);
00213
00214
00215
if (
scUseCache) {
00216 net->
cache =
scNew(vectorSize(net->
values));
00217 }
else {
00218 net->
cache =
NULL;
00219 }
00220
00221
00222
if (
cnEdgesFlag != cnEdgesOff) {
00223
cnBuildEdges(net);
00224
if (net->
edges ==
NULL || vectorSize(net->
edges) == 0) {
00225
cdgPrintf(
CDG_WARNING,
"WARNING: invalid net, net contains no edges\n");
00226
cnDelete(net);
00227
cdgCtrlCTrapped =
cdgCtrlCAllowed =
FALSE;
00228 net =
NULL;
00229
return net;
00230 }
00231 }
00232
00233
return net;
00234 }
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250 ConstraintNet cnBuild(Lattice lat, Boolean buildLVs)
00251 {
00252
ConstraintNet net;
00253
00254 net =
cnBuildInit();
00255
00256
if(!
cnTag(net,lat)) {
00257
return NULL;
00258 }
00259
00260
00261
if (!
cnBuildNodes(net, buildLVs)) {
00262
cdgPrintf(
CDG_WARNING,
"WARNING: invalid net, net contains no nodes\n");
00263
cnDelete(net);
00264
return NULL;
00265 }
00266
00267
if (
cdgCtrlCTrapped) {
00268
cdgPrintf(
CDG_WARNING,
"WARNING: interrupt while building a constraintnet\n");
00269
cnDelete(net);
00270
return NULL;
00271 }
00272
00273 net =
cnBuildFinal(net, buildLVs);
00274
if (net)
00275
cnMostRecentlyCreatedNet = net;
00276
else
00277 net =
NULL;
00278
00279
return net;
00280 }
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291 void cnPrint(
long unsigned int mode,
ConstraintNet net)
00292 {
00293
int i, j;
00294
ConstraintNode n;
00295
int noOfNotDeletedNodes = 0;
00296
int vmin = 99999999;
00297
int vmax = 0;
00298
int vtotal = 0;
00299
int vnum;
00300
00301
cdgPrintf(mode,
"------------------------------------------------------------\n");
00302
cdgPrintf(mode,
" id: %s\n", net->
id);
00303
cdgPrintf(mode,
" nodes:\n");
00304
for (i = 0; i < vectorSize(net->
nodes) && !
cdgCtrlCTrapped; i++) {
00305 n = (
ConstraintNode) vectorElement(net->
nodes, i);
00306
00307
if (!n->
level->showflag)
00308
continue;
00309
00310
if (!
lgAreDeletedNodes(net->
lexemgraph, n->
gn->
lexemes))
00311 noOfNotDeletedNodes++;
00312
00313
cdgPrintf(mode,
"%2d ", i);
00314
cnPrintNode(mode, n);
00315
00316
cdgPrintf(mode,
" %d:\n", vectorSize(n->
values));
00317 vnum = n->
noValidValues;
00318
for (j = 0; j < vectorSize(n->
values) && !
cdgCtrlCTrapped; j++) {
00319 LevelValue lv = (LevelValue) vectorElement(n->
values, j);
00320
00321
cdgPrintf(mode,
" ");
00322
00323
if (!lv->isDeleted ||
cnShowDeletedFlag) {
00324
cdgPrintf(mode, lv->isDeleted ?
"[" :
" ");
00325 lvPrint(mode, lv, net->
lexemgraph->
isDeletedNode, 1);
00326
cdgPrintf(mode, lv->isDeleted ?
"] " :
" ");
00327
cdgPrintf(mode,
"\n");
00328 }
00329 }
00330 vmin =
min(vmin, vnum == 0 ? vmin : vnum);
00331 vmax =
max(vmax, vnum);
00332 vtotal += vnum;
00333
cdgPrintf(mode,
"\n");
00334 }
00335
cdgPrintf(mode,
"#solutions(s): %d\n", listSize(net->
parses));
00336
cdgPrintf(mode,
"#nodes: %d/%d\n", noOfNotDeletedNodes, vectorSize(net->
nodes));
00337
cdgPrintf(mode,
"#paths: %lld\n", net->
lexemgraph->
noOfPaths);
00338
cdgPrintf(mode,
"values: #min %d, #max %d, #total %d, average %5.2f\n",
00339 vmin, vmax, vtotal, vtotal / (
float)noOfNotDeletedNodes);
00340
00341
cdgPrintf(mode,
"#edges: %d\n", net->
edges ==
NULL ? 0 : vectorSize(net->
edges));
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363
00364
00365
00366
00367
00368
cdgPrintf(mode,
"------------------------------------------------------------\n");
00369
00370
00371
if (net->
lexemgraph->
chunks) {
00372
cdgPrintf(mode,
"chunks:\n");
00373
chunkerPrintChunks(mode, net->
lexemgraph->
chunks);
00374 }
00375 }
00376
00377
00378
00379
00380
00381
00382
00383
00384 void cnPrintEdge(
long unsigned int mode,
ConstraintEdge e)
00385 {
00386
int i, j;
00387 LevelValue lv;
00388 String pseudoLexeme;
00389
00390
00391
cdgPrintf(mode,
"%s(%d,%d)-%s ---> %s(%d,%d)-%s\n",
00392 e->
start->
gn->
arc->word,
00393 e->
start->
gn->
arc->from,
00394 e->
start->
gn->
arc->to,
00395 e->
start->
level->id,
00396 e->
stop->
gn->
arc->word,
00397 e->
stop->
gn->
arc->from,
00398 e->
stop->
gn->
arc->to,
00399 e->
stop->
level->id);
00400
00401
00402
00403
00404
cdgPrintf(mode,
"start v stop >| ");
00405
for (j = 0; j < vectorSize(e->
stop->
values); j++) {
00406 lv = (LevelValue) vectorElement(e->
stop->
values, j);
00407
00408
if (lv->modifiee ==
NULL) {
00409 pseudoLexeme =
"NIL";
00410 }
else if (lv->modifiee == NONSPEC) {
00411 pseudoLexeme =
"NONSPEC";
00412 }
else {
00413 pseudoLexeme = lv->modifiee->lexem->description;
00414 }
00415
00416
cdgPrintf(mode,
"%5.5s/%-10.10s ", lv->label, pseudoLexeme);
00417
00418
00419 }
00420
cdgPrintf(mode,
"\n--------------------------------------------------");
00421
cdgPrintf(mode,
"--------------------------------------------------\n");
00422
00423
00424
for (i = 0; i < vectorSize(e->
start->
values); i++) {
00425 lv = (LevelValue) vectorElement(e->
start->
values, i);
00426
00427
if (lv->modifiee ==
NULL) {
00428 pseudoLexeme =
"NIL";
00429 }
else if (lv->modifiee == NONSPEC) {
00430 pseudoLexeme =
"NONSPEC";
00431 }
else {
00432 pseudoLexeme = lv->modifiee->lexem->description;
00433 }
00434
00435
00436
cdgPrintf(mode,
"%5.5s/%-10.10s | ", lv->label, pseudoLexeme);
00437
00438
00439
for (j = 0; j < vectorSize(e->
stop->
values); j++) {
00440
cdgPrintf(mode,
" %.6e ",
smGetScore(e->
scores, i, j));
00441 }
00442
cdgPrintf(mode,
"\n");
00443 }
00444
00445
cdgPrintf(mode,
"\n");
00446
00447 }
00448
00449
00450
00451
00452
00453
00454
00455
00456
00457
00458 void cnPrintNode(
long unsigned int mode,
ConstraintNode cn)
00459 {
00460 List l;
00461
00462
for (l = cn->
gn->
lexemes; l !=
NULL; l = listNext(l)) {
00463
cdgPrintf(mode,
"%s", ((
LexemNode) listElement(l))->lexem->description);
00464
if (listNext(l)) {
00465
cdgPrintf(mode,
"/");
00466 }
00467 }
00468
00469
cdgPrintf(mode,
"(%d-%d)",
00470 cn->
gn->
arc->from,
00471 cn->
gn->
arc->to);
00472
00473
cdgPrintf(mode,
"/%s", cn->
level->id);
00474 }
00475
00476
00477
00478
00479
00480
00481
00482 Boolean
cnUnaryPruningCompare(LevelValue a, LevelValue b)
00483 {
00484
return (a->score < b->score);
00485 }
00486
00487
00488
00489
00490
00491
00492
00493
00494
00495
00496
00497
00498
00499
00500 void cnUnaryPruning(
ConstraintNode node)
00501 {
00502 Vector v;
00503
int i, j;
00504 Number limit = 0.0;
00505 LevelValue lv;
00506
00507
if (node->
noValidValues == 0)
00508
return;
00509
00510 v = vectorClone(node->
values);
00511 vectorSort(v,
cnUnaryPruningCompare);
00512
00513 i = (
int)((node->
noValidValues - 1) * (1.0 -
cnUnaryPruningFraction));
00514
00515
00516
for (j = 0; j < vectorSize(v); j++) {
00517 lv = (LevelValue) vectorElement(v, j);
00518
if (!lv->isDeleted)
00519 i--;
00520
00521
if (i == 0) {
00522 limit = lv->score;
00523
break;
00524 }
00525 }
00526
00527 vectorDelete(v);
00528
00529
00530
for (i = 0; i < vectorSize(node->
values); i++) {
00531 lv = (LevelValue) vectorElement(node->
values, i);
00532
if (lv->isDeleted)
00533
continue;
00534
00535
if (lv->score < limit) {
00536
cdgPrintf(
CDG_INFO,
"INFO: deleting ");
00537 lvPrint(
CDG_INFO, lv,
NULL, 2);
00538
cdgPrintf(
CDG_INFO,
"\n");
00539
00540 lv->isDeleted =
TRUE;
00541 node->
noValidValues--;
00542 }
00543 }
00544 }
00545
00546
00547
00548
00549 Boolean
cnNodeComparePrio(
ConstraintNode a,
ConstraintNode b)
00550 {
00551
return a->
level->no > b->
level->no;
00552 }
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562 Boolean
cnNodeCompareSmallest(
ConstraintNode a,
ConstraintNode b)
00563 {
00564
return ( a->
noValidValues <= b->
noValidValues);
00565 }
00566
00567
00568
00569
00570
00571
00572
00573 void cnSortNodes(
ConstraintNet net)
00574 {
00575
ConstraintNode node;
00576
int i;
00577
00578
if (
hkVerbosity &
CDG_DEBUG) {
00579
cdgPrintf(
CDG_DEBUG,
"DEBUG: old sorting:\n");
00580
for (i = 0; i < vectorSize(net->
nodes); i++) {
00581 node = (
ConstraintNode) vectorElement(net->
nodes, i);
00582
cdgPrintf(
CDG_DEBUG,
" %04d: [%3d] ", i, node->
noValidValues);
00583
cnPrintNode(
CDG_DEBUG, node);
00584
cdgPrintf(
CDG_DEBUG,
"\n");
00585 }
00586 }
00587
00588
if (
cnSortNodesMethod==1) {
00589 vectorSort(net->
nodes,
cnNodeComparePrio);
00590 }
else if (
cnSortNodesMethod==2) {
00591 vectorSort(net->
nodes,
cnNodeCompareSmallest);
00592 }
00593
00594
if (
hkVerbosity &
CDG_DEBUG) {
00595
cdgPrintf(
CDG_DEBUG,
"DEBUG: new sorting:\n");
00596
for (i = 0; i < vectorSize(net->
nodes); i++) {
00597 node = (
ConstraintNode) vectorElement(net->
nodes, i);
00598
cdgPrintf(
CDG_DEBUG,
" %04d: [%3d] ", i, node->
noValidValues);
00599
cnPrintNode(
CDG_DEBUG, node);
00600
cdgPrintf(
CDG_DEBUG,
"\n");
00601 }
00602 }
00603
00604 }
00605
00606
00607
00608
00609
00610
00611
00612
00613 void cnBuildLv(
ConstraintNode node, List modifiers, Level level, String label, List modifiees)
00614 {
00615 LevelValue newLv = lvNew(modifiers, level, label, modifiees);
00616
ConstraintNet net = node->
net;
00617
00618 newLv->score =
evalUnary(newLv, net,
NULL,
FALSE,
NULL,
NULL);
00619
if (statUseStatisticsFlag) {
00620 newLv->score *= statUnary(newLv, net);
00621 }
00622
00623
#ifdef DEBUGCNBUILDLV
00624
cdgPrintf(
CDG_DEBUG,
"DEBUG: Built LV ");
00625 lvPrint(
CDG_DEBUG, newLv,
NULL);
00626
#endif
00627
00628
if (newLv->score == 0.0) {
00629
#ifdef DEBUGCNBUILDLV
00630
cdgPrintf(
CDG_DEBUG,
"... invalid.\n");
00631
#endif
00632
lvDelete(newLv);
00633 }
else {
00634
#ifdef DEBUGCNBUILDLV
00635
cdgPrintf(
CDG_DEBUG,
"... OK.\n");
00636
#endif
00637
newLv->limit = newLv->score;
00638 vectorAddElement(node->
values, newLv);
00639 newLv->indexWRTNet = vectorAddElement(net->
values, newLv);
00640 net->
totalNumberOfValues++;
00641 node->
totalNumberOfValues++;
00642 }
00643 }
00644
00645
00646
00647
00648
00649
00650
00651
00652 void cnBuildLevelValues(
ConstraintNode node, Level level,
00653
GraphemNode modifier,
GraphemNode modifiee)
00654 {
00655
ConstraintNet net = node->
net;
00656 Direction dir;
00657 List l;
00658
00659
00660
if(!
lgMayModify(net->
lexemgraph, modifier, modifiee)) {
00661
return;
00662 }
00663
00664
00665
if (modifiee == NONSPEC) {
00666 dir = Right;
00667 }
else if(!modifiee) {
00668 dir = Nil;
00669 }
else if(modifier->
arc->from < modifiee->
arc->from) {
00670 dir = Right;
00671 }
else {
00672 dir = Left;
00673 }
00674
00675
00676
for(l = level->labels; l !=
NULL; l = listNext(l)) {
00677
00678
00679 String label = listElement(l);
00680 EdgeType et = etEncode(level,label,dir);
00681 BitString downRelevant = vectorElement(inputCurrentGrammar->downFeatures, et);
00682 BitString upRelevant = vectorElement(inputCurrentGrammar->upFeatures, et);
00683 List downClasses =
lgPartitions(modifier, downRelevant);
00684 List upClasses =
lgPartitions(modifiee, upRelevant);
00685
00686 List m, n;
00687
for (m = downClasses; m !=
NULL; m = listNext(m)) {
00688 List modifiers = listElement(m);
00689
for (n = upClasses; n !=
NULL; n = listNext(n)) {
00690 List modifiees = listElement(n);
00691
cnBuildLv(node, modifiers, level, label, modifiees);
00692 }
00693 }
00694
if (spec(modifiee)) {
00695 listForEachDelete(upClasses, listDelete);
00696 }
else {
00697 listDelete(upClasses);
00698 }
00699 listForEachDelete(downClasses, listDelete);
00700 }
00701 }
00702
00703
00704
00705
00706
00707
00708
00709
00710
00711
00712
00713
00714
00715
00716
00717
00718 Boolean
cnBuildTriple(
ConstraintNet net,
int a,
int b,
int levelno) {
00719
LexemGraph lg = net->
lexemgraph;
00720
int i,j;
00721
int bb = b;
00722 List l;
00723
00724
if(b < 0) {
00725 bb = lg->
max - b;
00726 }
00727
00728
00729
if(arrayElement(net->
isBuilt, a, bb, levelno)) {
00730
return FALSE;
00731 }
00732
00733
00734
for (l = inputCurrentGrammar->levels; l !=
NULL; l = listNext(l)) {
00735 Level level = (Level) listElement(l);
00736
if (!level->useflag)
continue;
00737
if(level->no != levelno)
continue;
00738
00739
for(i = 0; i < vectorSize(lg->
graphemnodes); i++) {
00740
GraphemNode gn = vectorElement(lg->
graphemnodes, i);
00741
ConstraintNode node =
NULL;
00742
if(a != gn->
arc->from)
continue;
00743
00744
00745
for(j = 0; j < vectorSize(net->
nodes); j++) {
00746 node = vectorElement(net->
nodes, j);
00747
if(node->
level != level)
continue;
00748
if(node->
gn != gn)
continue;
00749
break;
00750 }
00751
00752
if(-1 == b) {
00753
cnBuildLevelValues(node, level, gn,
NULL);
00754 }
00755
else if(-2 == b) {
00756
cnBuildLevelValues(node, level, gn, NONSPEC);
00757 }
00758
else for(j = 0; j < vectorSize(lg->
graphemnodes); j++) {
00759
GraphemNode modifiee = vectorElement(lg->
graphemnodes, j);
00760
if(modifiee->
arc->from != b)
continue;
00761
cnBuildLevelValues(node, level, gn, modifiee);
00762 }
00763 node->
noValidValues = vectorSize(node->
values);
00764 node->
totalNumberOfValues = node->
noValidValues;
00765 }
00766 }
00767
00768
00769 arraySetElement(net->
isBuilt, (Pointer)
TRUE, a, bb, levelno);
00770
return TRUE;
00771
00772 }
00773
00774
00775
00776
00777
00778
00779
00780
00781 Boolean
cnBuildIter(
ConstraintNet net,
GraphemNode gn, Boolean buildLVs)
00782 {
00783 Level level;
00784
GraphemNode modifiee;
00785
ConstraintNode node;
00786 List l;
00787
int k, z;
00788
int ggs = vectorSize(net->
lexemgraph->
graphemnodes);
00789
int lgs = vectorSize(net->
lexemgraph->
nodes);
00790
int lvs = listSize(inputCurrentGrammar->levels);
00791
00792
if (gn->
lexemes ==
NULL)
00793
return TRUE;
00794
00795
cdgFlush();
00796
00797
00798
for (l = inputCurrentGrammar->levels; l !=
NULL; l = listNext(l)) {
00799 level = (Level) listElement(l);
00800
if (!level->useflag)
00801
continue;
00802
00803
00804
if (
cdgCtrlCTrapped) {
00805
00806
00807
for (z = 0; z < vectorSize(net->
nodes); z++) {
00808 node = (
ConstraintNode) vectorElement(net->
nodes, z);
00809
cnDeleteNode(node);
00810 }
00811 vectorDelete(net->
nodes);
00812 net->
nodes =
NULL;
00813
return FALSE;
00814 }
00815
cdgFlush();
00816
00817
00818
00819
if (
lgAreDeletedNodes(net->
lexemgraph, gn->
lexemes)) {
00820
continue;
00821 }
00822
00823
00824 node = (
ConstraintNode) memMalloc(
sizeof (
ConstraintNodeStruct));
00825 node->
net = net;
00826 node->
level = level;
00827 node->
gn = gn;
00828 node->
values = vectorNew(lgs * lvs);
00829 node->
totalNumberOfValues = 0;
00830
00831
00832
if(buildLVs) {
00833
for (k = 0; k <= ggs; k++) {
00834
if(k == ggs) {
00835 modifiee =
NULL;
00836 }
else {
00837 modifiee = vectorElement(net->
lexemgraph->
graphemnodes, k);
00838 }
00839
cnBuildLevelValues(node, level, gn, modifiee);
00840 }
00841
if (
cnUseNonSpec) {
00842
cnBuildLevelValues(node, level, gn, NONSPEC);
00843 }
00844 }
00845
00846
00847 node->
noValidValues = vectorSize(node->
values);
00848 node->
totalNumberOfValues = node->
noValidValues;
00849 vectorAddElement(net->
nodes, node);
00850 }
00851
return TRUE;
00852 }
00853
00854
00855
00856
00857
00858
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868
00869
00870
00871
00872
00873
00874
00875
00876
00877
00878
00879
00880
00881
00882
00883
00884
00885
00886
00887
00888
00889
00890
00891
00892
00893
00894
00895 Boolean
cnBuildNodes(
ConstraintNet net, Boolean buildLVs)
00896 {
00897
Timer profileTime;
00898
GraphemNode gn;
00899
int i;
00900
int ggs = vectorSize(net->
lexemgraph->
graphemnodes);
00901
int lgs = vectorSize(net->
lexemgraph->
nodes);
00902
int lvs = listSize(inputCurrentGrammar->levels);
00903
LexemGraph lg = net->
lexemgraph;
00904
00905 profileTime =
timerNew();
00906 net->
nodes = vectorNew(lgs * lvs);
00907 net->
values = vectorNew(lgs * lvs);
00908 net->
isBuilt = arrayNew(lg->
max, (2+lg->
max), lvs, 0);
00909
00910
for (i = 0; i < ggs; i++) {
00911 gn = (
GraphemNode) vectorElement(net->
lexemgraph->
graphemnodes, i);
00912
if (gn->
lexemes ==
NULL)
00913
continue;
00914
00915
cdgPrintf(
CDG_DEBUG,
"%s ",
00916 ((
LexemNode) listElement(gn->
lexemes))->lexem->word);
00917
00918
if (!
cnBuildIter(net, gn, buildLVs))
00919
break;
00920 }
00921
00922
cdgPrintf(
CDG_DEBUG,
"\n");
00923
cdgPrintf(
CDG_PROFILE,
00924
"PROFILE: building nodes took %ldms\n",
timerElapsed(profileTime));
00925
if (
hkVerbosity &
CDG_HOOK)
00926
cdgExecHook(
HOOK_CNBUILDNODES, net);
00927
00928
timerFree(profileTime);
00929
return TRUE;
00930 }
00931
00932
00933
00934
00935
00936
00937
00938
00939
00940
00941
00942
00943 int cnOptimizeNode(
ConstraintNet net,
ConstraintNode node)
00944 {
00945
int k;
00946 LevelValue lv;
00947
int noChanges = 0;
00948
LexemGraph lg = net->
lexemgraph;
00949
GraphemNode gn;
00950 List l;
00951 Boolean pruneModifiees =
TRUE;
00952
GraphemNode theModifiee =
NULL;
00953 BitString down = bitNew(vectorSize(lg->
nodes));
00954 BitString up = bitNew(vectorSize(lg->
nodes));
00955 bitClearAll(down);
00956 bitClearAll(up);
00957
00958
#ifdef DEBUGCNOPTIMIZENODE
00959
{
00960
int i;
00961
cdgPrintf(
CDG_DEBUG,
00962
"DEBUG: optimizing constraint node %s/%s(%d,%d)\n",
00963 node->
gn->
arc->word,
00964 node->
level->id,
00965 node->
gn->
arc->from,
00966 node->
gn->
arc->to);
00967
for(i = 0; i < vectorSize(node->
values); i++) {
00968 LevelValue lv = vectorElement(node->
values, i);
00969
if(lv->isDeleted) {
00970
cdgPrintf(
CDG_DEBUG,
"[");
00971 }
00972 lvPrint(
CDG_DEBUG, lv,
NULL, 0);
00973
if(lv->isDeleted) {
00974
cdgPrintf(
CDG_DEBUG,
"]");
00975 }
00976
cdgPrintf(
CDG_DEBUG,
"\n");
00977 }
00978 }
00979
#endif
00980
00981
if(node->
noValidValues > 0) {
00982
00983
00984
for (k = 0; k < vectorSize(node->
values); k++) {
00985 lv = (LevelValue) vectorElement(node->
values, k);
00986
if (lv->isDeleted) {
00987
continue;
00988 }
00989
00990
00991
if (
lgAreDeletedNodes(lg, lv->modifiers)) {
00992
#ifdef DEBUGCNOPTIMIZENODE
00993
cdgPrintf(
CDG_DEBUG,
"DEBUG: Deleting LV ");
00994 lvPrint(
CDG_DEBUG, lv,
NULL, 0);
00995
cdgPrintf(
CDG_DEBUG,
" because of invalid modifiers.\n");
00996
#endif
00997
lv->isDeleted =
TRUE;
00998 node->
noValidValues--;
00999 noChanges++;
01000 }
else if ((spec(lv->modifiees) &&
01001
lgAreDeletedNodes(lg, lv->modifiees)) ||
01002 (spec(lv->modifiee) &&
01003 !
lgCompatibleNodes(lg, lv->modifier, lv->modifiee))) {
01004
#ifdef DEBUGCNOPTIMIZENODE
01005
cdgPrintf(
CDG_DEBUG,
"DEBUG: Deleting LV ");
01006 lvPrint(
CDG_DEBUG, lv,
NULL, 0);
01007
cdgPrintf(
CDG_DEBUG,
" because of invalid modifiees.\n");
01008
#endif
01009
lv->isDeleted =
TRUE;
01010 node->
noValidValues--;
01011 noChanges++;
01012 }
01013
01014
if(lv->isDeleted) {
01015
continue;
01016 }
01017
01018
01019
for(l = lv->modifiers; l !=
NULL; l = listNext(l)) {
01020
LexemNode ln = listElement(l);
01021 bitSet(down, ln->
no);
01022 }
01023
01024
01025
01026
01027
01028
01029
01030
if(spec(lv->modifiee)) {
01031
if(pruneModifiees) {
01032
if(!theModifiee) {
01033 theModifiee = lv->modifiee->grapheme;
01034
#ifdef DEBUGCNOPTIMIZENODE
01035
cdgPrintf(
CDG_DEBUG,
01036
"DEBUG: no modifiee seen yet, taking `%s'(%d-%d)\n",
01037 theModifiee->
arc->word,
01038 theModifiee->
arc->from,
01039 theModifiee->
arc->to);
01040
#endif
01041
}
else if(theModifiee != lv->modifiee->grapheme) {
01042
#ifdef DEBUGCNOPTIMIZENODE
01043
cdgPrintf(
CDG_DEBUG,
"DEBUG: That's a different modifiee -- no modifiee pruning.\n");
01044
#endif
01045
pruneModifiees =
FALSE;
01046 }
01047 }
01048
if(pruneModifiees) {
01049
for(l = lv->modifiees; l !=
NULL; l = listNext(l)) {
01050
LexemNode ln = listElement(l);
01051 bitSet(up, ln->
no);
01052 }
01053 }
01054 }
else {
01055
#ifdef DEBUGCNOPTIMIZENODE
01056
if(pruneModifiees) {
01057
cdgPrintf(
CDG_DEBUG,
"DEBUG: NIL binding seen -- no modifiee pruning.\n");
01058 }
01059
#endif
01060
pruneModifiees =
FALSE;
01061 }
01062 }
01063 }
01064
01065
if (node->
noValidValues == 0) {
01066
if (!
lgAreDeletableNodes(lg, node->
gn->
lexemes)) {
01067
cdgPrintf(
CDG_WARNING,
01068
"WARNING: can't find a value for %s/%s(%d,%d), invalid net\n",
01069 node->
gn->
arc->word,
01070 node->
level->id,
01071 node->
gn->
arc->from,
01072 node->
gn->
arc->to);
01073
01074
return -1;
01075 }
01076 }
01077
01078
01079
for(l = node->
gn->
lexemes; l !=
NULL; l = listNext(l)) {
01080
LexemNode ln = listElement(l);
01081
if(!
lgIsDeletedNode(lg, ln) && !bitGet(down, ln->
no)) {
01082
#ifdef DEBUGCNOPTIMIZENODE
01083
cdgPrintf(
CDG_DEBUG,
"DEBUG: deleting ");
01084
lgPrintNode(
CDG_DEBUG, ln);
01085
cdgPrintf(
CDG_DEBUG,
" because of the modifier criterion.\n");
01086
#endif
01087
lgDeleteNode(lg, ln);
01088 noChanges++;
01089 }
01090 }
01091
if(pruneModifiees && theModifiee) {
01092
for(l = theModifiee->
lexemes; l !=
NULL; l = listNext(l)) {
01093
LexemNode ln = listElement(l);
01094
if(!
lgIsDeletedNode(lg, ln) && !bitGet(up, ln->
no)) {
01095
#ifdef DEBUGCNOPTIMIZENODE
01096
cdgPrintf(
CDG_DEBUG,
"DEBUG: deleting ");
01097
lgPrintNode(
CDG_DEBUG, ln);
01098
cdgPrintf(
CDG_DEBUG,
" because of the modifiee criterion.\n");
01099
#endif
01100
lgDeleteNode(lg, ln);
01101 noChanges++;
01102 }
01103 }
01104 }
01105 bitDelete(up);
01106 bitDelete(down);
01107
01108
01109 gn = node->
gn;
01110
if(lg->
noOfPathsFromStart[gn->
no] * lg->
noOfPathsToEnd[gn->
no] == 0 &&
01111 !
lgAreDeletedNodes(lg, gn->
lexemes)) {
01112
#ifdef DEBUGCNOPTIMIZENODE
01113
cdgPrintf(
CDG_DEBUG,
"DEBUG: Deleting entire grapheme node because of the path criterion.\n");
01114
#endif
01115
lgDeleteNodes(lg, gn->
lexemes);
01116 noChanges++;
01117 }
01118
01119
return noChanges;
01120 }
01121
01122
01123
01124
01125
01126 int countValidValues(
ConstraintNet net)
01127 {
01128
int i;
01129 LevelValue lv;
01130
int result = 0;
01131
01132
for (i = 0; i < vectorSize(net->
values); i++) {
01133 lv = (LevelValue)vectorElement(net->
values, i);
01134
if (!lv->isDeleted)
01135 result++;
01136 }
01137
01138
return result;
01139 }
01140
01141
01142
01143
01144
01145
01146
01147
01148
01149
01150
01151
01152
01153
01154
01155
01156
01157
01158
01159
01160
01161
01162
01163
01164
01165
01166
01167
01168 int cnOptimizeNet(
ConstraintNet net)
01169 {
01170
ConstraintNode node;
01171
int i, n, result = 0;
01172 Boolean progress =
TRUE;
01173
#ifdef DEBUG_CNOPTIMIZENET
01174
int noDeletedValues;
01175
int oldNoValues;
01176 oldNoValues =
countValidValues(net);
01177
#endif
01178
01179
01180
while (progress) {
01181 progress =
FALSE;
01182
01183
01184
for (i = 0; i < vectorSize(net->
nodes); i++) {
01185 node = (
ConstraintNode)vectorElement(net->
nodes, i);
01186
01187 n =
cnOptimizeNode(net, node);
01188
if (n < 0) {
01189
return -1;
01190 }
01191
else if (n > 0) {
01192 progress =
TRUE;
01193 result += n;
01194 }
01195 }
01196 }
01197
01198
#ifdef DEBUG_CNOPTIMIZENET
01199
noDeletedValues = oldNoValues -
countValidValues(net);
01200
if (noDeletedValues) {
01201
cdgPrintf(
CDG_DEBUG,
"DEBUG: deleted %d levelvalues\n",
01202 noDeletedValues);
01203 }
01204
#endif
01205
01206
return result;
01207 }
01208
01209
01210
01211
01212
01213
01214
01215
01216 Boolean
cnConnectedByArc(
ConstraintNode a,
ConstraintNode b)
01217 {
01218
01219
01220
01221
01222
return (a->
gn != b->
gn || a->
level != b->
level);
01223
01224
01225
01226
01227
01228
01229
01230
01231
01232
01233
01234
01235 }
01236
01237
01238
01239
01240
01241
01242
01243
01244
01245
01246
01247 Boolean
cnRenew(
ConstraintNet net)
01248 {
01249
int i, j;
01250 LevelValue lv;
01251
LexemNode ln;
01252
ConstraintNode node;
01253
01254
01255
for (i = 0; i < vectorSize(net->
nodes); i++) {
01256 node = (
ConstraintNode) vectorElement(net->
nodes, i);
01257
01258 node->
noValidValues = 0;
01259
for (j = 0; j < vectorSize(node->
values); j++) {
01260 lv = vectorElement(node->
values, j);
01261
01262
if (!
cnUseNonSpec && lv->modifiee == NONSPEC) {
01263 lv->isDeleted =
TRUE;
01264 }
else {
01265 lv->isDeleted =
FALSE;
01266 node->
noValidValues++;
01267 }
01268 lv->limit = lv->score;
01269 lv->no = -1;
01270 }
01271 }
01272
01273
01274
for (i = 0; i < vectorSize(net->
lexemgraph->
nodes); i++) {
01275 ln = vectorElement(net->
lexemgraph->
nodes, i);
01276 ln->
limit = 1.0;
01277 }
01278
01279
01280 bvSetAllElements(net->
lexemgraph->
isDeletedNode,
FALSE);
01281
lgComputeNoOfPaths(net->
lexemgraph);
01282
lgComputeDistances(net->
lexemgraph);
01283
01284
01285
scDelete(net->
cache);
01286
if (
scUseCache) {
01287 net->
cache =
scNew(vectorSize(net->
values));
01288 }
else {
01289 net->
cache =
NULL;
01290 }
01291
01292
01293
01294 listDelete(net->
parses);
01295 net->
parses =
NULL;
01296
01297
01298
cnSortLVs(net);
01299
01300
01301
if(
cnOptimizeNet(net) < 0) {
01302
return FALSE;
01303 };
01304
01305
return TRUE;
01306 }
01307
01308
01309
01310
01311
01312
01313
01314
01315
01316
01317
01318
01319
01320
01321
01322
01323
01324
01325
01326
01327
01328 void cnBuildEdges(
ConstraintNet net)
01329 {
01330
LexemGraph lg = net->
lexemgraph;
01331
int lgs = vectorSize(lg->
nodes);
01332
int lvs = listSize(inputCurrentGrammar->levels);
01333
int i, j, k, l;
01334
int total, done = 0;
01335
ConstraintNode in, jn;
01336
ConstraintEdge edge, reverse;
01337
Timer profileTime;
01338 Vector reverseTable;
01339
01340 profileTime =
timerNew();
01341
01342 reverseTable = vectorNew(vectorSize(net->
nodes) * vectorSize(net->
nodes));
01343 vectorSetAllElements(reverseTable,
NULL);
01344
01345
01346 total = vectorSize(net->
nodes) * vectorSize(net->
nodes);
01347
if (!
cdgXCDG) {
01348
cdgPrintf(
CDG_DEBUG,
"DEBUG: building edges... 0%%");
01349 }
01350
01351
01352
01353
01354
01355
01356
01357
01358
01359 net->
edges = vectorNew(lgs * lvs);
01360
for (i = 0; i < vectorSize(net->
nodes); i++) {
01361 in = (
ConstraintNode) vectorElement(net->
nodes, i);
01362
for (j = 0; j < vectorSize(net->
nodes); j++) {
01363 jn = (
ConstraintNode) vectorElement(net->
nodes, j);
01364
01365
01366
if (!
cdgXCDG) {
01367
cdgPrintf(
CDG_DEBUG,
"%3.0f%%", 100 * (++done / (
float)total));
01368 }
01369
01370
01371
if (
cdgCtrlCTrapped) {
01372
01373
for (i = 0; i < vectorSize(net->
edges); i++) {
01374 edge = (
ConstraintEdge) vectorElement(net->
edges, i);
01375 memFree(edge->
isMarked);
01376
smDelete(edge->
scores);
01377 memFree(edge);
01378 }
01379 vectorDelete(net->
edges);
01380
01381 net->
edges =
NULL;
01382
return;
01383 }
01384
01385
01386
if (in->
level->no <= jn->
level->no)
01387 k = in->
level->no * lvs + jn->
level->no;
01388
else
01389 k = jn->
level->no * lvs + in->
level->no;
01390
01391
if (
cnEdgesFlag == cnEdgesFew && inputCurrentGrammar->levelMatrixCounter !=
NULL
01392 && (
int)vectorElement(inputCurrentGrammar->levelMatrixCounter,
01393 k) == 0)
continue;
01394
01395
01396
01397
if (
cnConnectedByArc(in, jn)) {
01398 edge = (
ConstraintEdge) memMalloc(
sizeof (
ConstraintEdgeStruct));
01399 edge->
start = in;
01400 edge->
stop = jn;
01401 edge->
reverse = (
ConstraintEdge)
NULL;
01402
01403
01404 edge->
isMarked =
01405 (Boolean *) memMalloc(
sizeof (Boolean) * vectorSize(in->
values));
01406 edge->
scores =
smNew(vectorSize(in->
values), vectorSize(jn->
values));
01407
for (k = 0; k < vectorSize(in->
values); k++) {
01408 LevelValue ik = (LevelValue) vectorElement(in->
values, k);
01409
01410 edge->
isMarked[k] =
FALSE;
01411
for (l = 0; l < vectorSize(jn->
values); l++) {
01412 LevelValue jl = (LevelValue) vectorElement(jn->
values, l);
01413
double d;
01414
01415
01416
if (lvCompatible(lg, ik, jl)) {
01417 d = ik->score * jl->score *
evalBinary(ik, jl, net,
NULL,
FALSE,
NULL,
NULL);
01418 }
else {
01419 d = 0.0;
01420 }
01421
smSetScore(edge->
scores, d, k, l);
01422
01423
smSetFlag(edge->
scores,
TRUE, k, l);
01424 }
01425 }
01426 vectorAddElement(net->
edges, edge);
01427
01428 k = i * vectorSize(net->
nodes) + j;
01429 vectorSetElement(reverseTable, edge, k);
01430 k = i + vectorSize(net->
nodes) * j;
01431 reverse = vectorElement(reverseTable, k);
01432
if (reverse !=
NULL) {
01433 edge->
reverse = reverse;
01434 reverse->
reverse = edge;
01435 }
01436 }
01437 }
01438
cdgFlush();
01439 }
01440
01441 vectorDelete(reverseTable);
01442
01443
if (!
cdgXCDG) {
01444
cdgPrintf(
CDG_DEBUG,
"done.\n");
01445 }
01446
01447
cdgPrintf(
CDG_PROFILE,
"PROFILE: building edges took %ldms\n",
01448
timerElapsed(profileTime));
01449
01450
timerFree(profileTime);
01451 }
01452
01453
01454
01455
01456
01457
01458
01459 Boolean
cnIsStartNode(
ConstraintNode n)
01460 {
01461
return (n->
level == listElement(inputCurrentGrammar->levels)
01462 &&
01463
lgIsStartNode(n->
gn));
01464 }
01465
01466
01467
01468
01469
01470
01471 Boolean
cnIsEndNode(
ConstraintNode n)
01472 {
01473
return (n->
level == listLastElement(inputCurrentGrammar->levels)
01474 &&
01475
lgIsEndNode(n->
gn));
01476 }
01477
01478
01479
01480
01481
01482
01483
01484 void cnDeleteBinding(
NodeBinding nb)
01485 {
01486 memFree(nb);
01487 }
01488
01489
01490
01491
01492
01493
01494 void cvAnalyse(
ConstraintViolation cv, Vector context)
01495 {
01496
01497 Boolean inverse;
01498
int i;
01499 List l;
01500 LevelValue lv;
01501 List lexemes =
NULL;
01502
LexemNode ln;
01503
01504 VarInfo var;
01505
01506
01507
cdgPrintf(
CDG_INFO,
"\n\nThe conflict is:\n================\n\n");
01508
cvPrint(
CDG_INFO, cv);
01509
01510
01511
cdgPrintf(
CDG_INFO,
"\n\n\nThe constraint is:\n==================\n\n");
01512 printConstraint(
CDG_INFO, cv->
constraint);
01513
01514
01515
01516 inverse =
01517 listSize(cv->
constraint->vars) == 2
01518 &&
evalBinaryConstraint(cv->
constraint,
NULL, context, cv->
lv1, cv->
lv2);
01519
01520
01521
01522
01523
01524
cdgPrintf(
CDG_INFO,
"\n\nThe LVs concerned are:\n======================\n\n");
01525
for (i = 1, l = cv->
constraint->vars; l; l = listNext(l), i++) {
01526 var = (VarInfo) listElement(l);
01527
if (listSize(cv->
constraint->vars) == 1) {
01528 lv = cv->
lv1;
01529 }
else if ((i == 1 && !inverse) || (i == 2 && inverse)) {
01530 lv = cv->
lv1;
01531 }
else {
01532 lv = cv->
lv2;
01533 }
01534
cdgPrintf(
CDG_INFO,
"%s == ", var->varname);
01535 lvPrint(
CDG_INFO, lv,
NULL, 0);
01536
cdgPrintf(
CDG_INFO,
"\n");
01537
01538
01539
if (lv->modifiee !=
NULL && lv->modifiee != NONSPEC) {
01540 lexemes = listAddUniqueElement(lexemes, lv->modifiee);
01541 }
01542 lexemes = listAddUniqueElement(lexemes, lv->modifier);
01543 }
01544
01545
01546
cdgPrintf(
CDG_INFO,
01547
"\n\nThe lexemes concerned are:\n==========================\n\n");
01548
for (l = lexemes; l; l = listNext(l)) {
01549 ln = (
LexemNode) listElement(l);
01550
01551
cdgPrintf(
CDG_INFO,
"%s: ", ln->
lexem->word);
01552 printValue(
CDG_INFO, ln->
lexem->value, 0);
01553
cdgPrintf(
CDG_INFO,
"\n\n");
01554 }
01555
01556 listDelete(lexemes);
01557
01558 }
01559
01560
01561
01562
01563
01564
01565
01566
01567 void cvDelete(
ConstraintViolation cv)
01568 {
01569
01570
01571
01572
01573
if(cv->
lv1->indexWRTNet == -1) {
01574 lvDelete(cv->
lv1);
01575 }
01576
01577
if(cv->
lv2 && cv->
lv2->indexWRTNet == -1) {
01578 lvDelete(cv->
lv2);
01579 }
01580
01581 memFree(cv);
01582 }
01583
01584
01585
01586
01587
01588
01589
01590
01591
01592
01593 void cnDeleteNode(
ConstraintNode node)
01594 {
01595
int j;
01596 LevelValue lv;
01597
01598
for (j = 0; j < vectorSize(node->
values); j++) {
01599 lv = (LevelValue) vectorElement(node->
values, j);
01600 lvDelete(lv);
01601 }
01602
01603 vectorDelete(node->
values);
01604
01605 memFree(node);
01606 }
01607
01608
01609
01610
01611
01612
01613
01614
01615 void cnDeleteEdge(
ConstraintEdge edge)
01616 {
01617 memFree(edge->
isMarked);
01618
smDelete(edge->
scores);
01619
01620 memFree(edge);
01621 }
01622
01623
01624
01625
01626
01627
01628
01629
01630
01631
01632
01633
01634
01635
01636
01637
01638
01639
01640 void cnDelete(
ConstraintNet net)
01641 {
01642
int i;
01643
ConstraintNode node;
01644
ConstraintEdge edge;
01645
01646
if (net ==
NULL) {
01647
cdgPrintf(
CDG_WARNING,
"WARNING: cnDelete: net is NULL\n");
01648
return;
01649 }
01650
if (
cnMostRecentlyCreatedNet == net)
01651
cnMostRecentlyCreatedNet =
NULL;
01652
01653
cdgFreeString(net->
id);
01654
if (net->
lexemgraph !=
NULL)
01655
lgDelete(net->
lexemgraph);
01656
if (net->
values !=
NULL)
01657 vectorDelete(net->
values);
01658
if (net->
nodes !=
NULL) {
01659
for (i = 0; i < vectorSize(net->
nodes); i++) {
01660 node = (
ConstraintNode) vectorElement(net->
nodes, i);
01661
cnDeleteNode(node);
01662 }
01663 vectorDelete(net->
nodes);
01664 }
01665
if (net->
edges !=
NULL) {
01666
for (i = 0; i < vectorSize(net->
edges); i++) {
01667 edge = (
ConstraintEdge) vectorElement(net->
edges, i);
01668
cnDeleteEdge(edge);
01669 }
01670 vectorDelete(net->
edges);
01671 }
01672
01673
if (net->
searchagenda !=
NULL)
01674 agDelete(net->
searchagenda);
01675
01676
if (net->
lvTotals !=
NULL)
01677 vectorDelete(net->
lvTotals);
01678
01679
01680 listDelete(net->
parses);
01681 net->
parses =
NULL;
01682
01683
scDelete(net->
cache);
01684 memFree(net);
01685 }
01686
01687
01688
01689
01690
01691
01692
01693
01694
01695
01696 void cnPrintInfo(
ConstraintNet net)
01697 {
01698
int i, vmin, vmax, vtotal;
01699
ConstraintNode node;
01700
long double candidates;
01701
01702
if (!(
hkVerbosity &
CDG_INFO))
01703
return;
01704
01705 vmin = 99999999;
01706 vtotal = vmax = 0;
01707 candidates = 0;
01708
for (i = 0; i < vectorSize(net->
nodes); i++) {
01709 node = (
ConstraintNode) vectorElement(net->
nodes, i);
01710 vmin =
min(vmin, node->
noValidValues);
01711 vmax =
max(vmax, node->
noValidValues);
01712 vtotal += node->
noValidValues;
01713
if (node->
noValidValues) {
01714
if (!candidates) {
01715 candidates = node->
noValidValues;
01716 }
else {
01717 candidates *= node->
noValidValues;
01718
if (candidates == HUGE_VAL) {
01719
cdgPrintf(
CDG_WARNING,
"WARNING: very large problem\n");
01720 }
01721 }
01722 }
01723 }
01724
01725
cdgPrintf(
CDG_INFO,
"INFO: net: id %s, wordgraph %s\n",
01726 net->
id, net->
lexemgraph->
lattice->id);
01727
cdgPrintf(
CDG_INFO,
" #nodes %d, #edges %d\n",
01728 vectorSize(net->
nodes),
01729 net->
edges ==
NULL ? 0 : vectorSize(net->
edges));
01730
cdgPrintf(
CDG_INFO,
01731
" #evaluations: %d unary, %d statistics, %d binary\n",
01732 net->
evalUnary, net->
statUnary, net->
evalBinary);
01733
cdgPrintf(
CDG_INFO,
01734
" #values: min %d, max %d, total %d, average %5.2f\n", vmin,
01735 vmax, vtotal, vtotal / (
float)vectorSize(net->
nodes));
01736
cdgPrintf(
CDG_INFO,
" #candidates: %.4Lg\n", candidates);
01737
if (net->
cache && net->
cache->
size > 0) {
01738
cdgPrintf(
CDG_INFO,
01739
" cache: size %d #values %d, #hits %d, %1.1f on average, usage %1.2f%%\n",
01740 net->
cache->
capacity,
01741 net->
cache->
size,
01742 net->
cache->
hits,
01743 (
float)net->
cache->
hits / (
float)net->
cache->
size,
01744 (
float)net->
cache->
size / (
float)net->
cache->
capacity * 100);
01745 }
01746 }
01747
01748
01749
01750
01751
01752
01753
01754
01755
01756 ConstraintViolation cvNew(Constraint c, LevelValue lva, LevelValue lvb)
01757 {
01758
01759
ConstraintViolation result = memMalloc(
sizeof(
ConstraintViolationStruct));
01760 result->
constraint = c;
01761 result->
penalty = c->penalty;
01762 result->
lv1 = lva;
01763 result->
lv2 = lvb;
01764
01765
01766
01767
01768
01769
if(result->
lv1->indexWRTNet == -1) {
01770 result->
lv1 = lvClone(result->
lv1);
01771 }
01772
if(result->
lv2 && result->
lv2->indexWRTNet == -1) {
01773 result->
lv2 = lvClone(result->
lv2);
01774 }
01775
01776
01777 result->
nodeBindingIndex1 = lvIndex(lva);
01778 result->
nodeBindingIndex2 = lvb ? lvIndex(lvb) : -1;
01779
01780
01781
01782
01783
01784
01785
return result;
01786
01787 }
01788
01789
01790
01791
01792
01793 ConstraintViolation cvClone(
ConstraintViolation cv) {
01794
01795
ConstraintViolation result =
cvNew(cv->
constraint, cv->
lv1, cv->
lv2);
01796 result->
penalty = cv->
penalty;
01797
return result;
01798
01799 }
01800
01801
01802
01803
01804
01805
01806
01807
01808 void cnSortLVs(
ConstraintNet net)
01809 {
01810
int i;
01811
ConstraintNode n;
01812
01813
for (i = 0; i < vectorSize(net->
nodes); i++) {
01814 n = (
ConstraintNode) vectorElement(net->
nodes, i);
01815 vectorSort(n->
values, lvCompare);
01816 }
01817
01818 }
01819
01820
01821
01822
01823
01824
01825
01826
01827
01828
01829
01830 Boolean
cnCompareViolation(
ConstraintViolation a,
ConstraintViolation b) {
01831
01832
if(a->
penalty < b->
penalty) {
01833
return TRUE;
01834 }
else if(a->
penalty > b->
penalty) {
01835
return FALSE;
01836 }
01837
01838
if(a->
nodeBindingIndex1 < b->
nodeBindingIndex1) {
01839
return TRUE;
01840 }
else if(a->
nodeBindingIndex1 > b->
nodeBindingIndex1) {
01841
return FALSE;
01842 }
01843
01844
if(a->
nodeBindingIndex2 < b->
nodeBindingIndex2) {
01845
return TRUE;
01846 }
else if(a->
nodeBindingIndex2 > b->
nodeBindingIndex2) {
01847
return FALSE;
01848 }
01849
01850
if(strcmp(a->
constraint->id, b->
constraint->id) < 0) {
01851
return TRUE;
01852 }
01853
01854
return FALSE;
01855
01856 }
01857
01858
01859
01860
01861
01862
01863
01864 void cnPrintParses(
ConstraintNet net)
01865 {
01866 List l;
01867
01868
for (l = net->
parses; l !=
NULL; l = listNext(l)) {
01869 parsePrint(listElement(l));
01870
cdgPrintf(
CDG_INFO,
"\n\n");
01871 }
01872 }
01873
01874
01875
01876
01877
01878
01879
01880
01881 ConstraintNode cnFindNode(
ConstraintNet net, LevelValue lv) {
01882
01883
ConstraintNode cn;
01884 LevelValue lv2;
01885
int i, j;
01886
01887
for (i = 0; i < vectorSize(net->
nodes); i++) {
01888 cn = (
ConstraintNode) vectorElement(net->
nodes, i);
01889
for (j = 0; j < vectorSize(cn->
values); j++) {
01890 lv2 = (LevelValue) vectorElement(cn->
values, j);
01891
if(lv->indexWRTNet == lv2->indexWRTNet) {
01892
return cn;
01893 }
01894 }
01895 }
01896
01897
cdgPrintf(
CDG_WARNING,
01898
"WARNING: can't find LV #%d in net!\n",
01899 lv->indexWRTNet);
01900
01901
return NULL;
01902
01903 }
01904
01905
01906
01907
01908
01909
01910
01911
01912 void cvPrint(
unsigned long mode,
ConstraintViolation cv) {
01913
01914
if (cv->
nodeBindingIndex2 >= 0) {
01915
cdgPrintf(
CDG_INFO,
"%03d : %03d : %4.3e : %s",
01916 cv->
nodeBindingIndex1, cv->
nodeBindingIndex2,
01917 cv->
penalty, cv->
constraint->id);
01918 }
else {
01919
cdgPrintf(
CDG_INFO,
" %03d : %4.3e : %s",
01920 cv->
nodeBindingIndex1, cv->
penalty, cv->
constraint->id);
01921 }
01922
01923 }
01924
01925
01926
01927
01928
01929
01930
01931
01932
01933 Boolean
cvContains(List conflicts,
ConstraintViolation cv) {
01934
01935 List l;
01936
ConstraintViolation cv2;
01937
01938
for (l = conflicts; l; l = listNext(l)) {
01939 cv2 = listElement(l);
01940
if (cv->
constraint->no == cv2->
constraint->no &&
01941 ((cv->
nodeBindingIndex1 == cv2->
nodeBindingIndex1 &&
01942 cv->
nodeBindingIndex2 == cv2->
nodeBindingIndex2)
01943 ||
01944 (cv->
nodeBindingIndex2 == cv2->
nodeBindingIndex1 &&
01945 cv->
nodeBindingIndex1 == cv2->
nodeBindingIndex2))) {
01946
return TRUE;
01947 }
01948 }
01949
01950
return FALSE;
01951 }
01952
01953
01954
01955
01956
01957
01958
01959
01960 Boolean
cvCompareNatural(
ConstraintViolation a,
ConstraintViolation b)
01961 {
01962
if (a->
nodeBindingIndex1 < b->
nodeBindingIndex1) {
01963
return TRUE;
01964 }
else if (a->
nodeBindingIndex1 > b->
nodeBindingIndex1) {
01965
return FALSE;
01966 }
01967
01968
if (a->
nodeBindingIndex2 < b->
nodeBindingIndex2) {
01969
return TRUE;
01970 }
else if (a->
nodeBindingIndex2 > b->
nodeBindingIndex2) {
01971
return FALSE;
01972 }
01973
01974
if (strcmp(a->
constraint->id, b->
constraint->id) < 0) {
01975
return TRUE;
01976 }
01977
01978
return FALSE;
01979 }
01980
01981
01982
01983
01984
01985
01986
01987
01988
01989
01990
01991
01992 Boolean
cvCompare(
ConstraintViolation a,
ConstraintViolation b)
01993 {
01994
int indexa, indexb;
01995
01996
01997
if (a->
penalty < b->
penalty)
01998
return TRUE;
01999
if (a->
penalty > b->
penalty)
02000
return FALSE;
02001
02002
02003
if (!a->
lv2 && b->
lv2)
02004
return TRUE;
02005
if (a->
lv2 && !b->
lv2)
02006
return FALSE;
02007
02008
02009 indexa = strcmp(a->
constraint->id, b->
constraint->id);
02010
if (indexa < 0)
02011
return FALSE;
02012
if (indexa > 0)
02013
return TRUE;
02014
02015
02016 indexa = lvIndex(a->
lv1);
02017 indexb = lvIndex(b->
lv1);
02018
if (indexa < indexb)
02019
return TRUE;
02020
02021
return FALSE;
02022 }
02023
02024
02025
02026
02027
02028
02029
02030
02031 ConstraintNet cnFindNet(String
id)
02032 {
02033
return hashGet(
cdgNets,
id);
02034 }
02035
02036
02037
02038
02039
02040
02041 void cnPrintActiveLVs(
ConstraintNet net)
02042 {
02043
int i, k;
02044
int j = 0;
02045
02046
for (i=0; i < vectorSize(net->
nodes); i++) {
02047
ConstraintNode cn = (
ConstraintNode)vectorElement(net->
nodes, i);
02048
for (k=0; k < vectorSize(cn->
values); k++) {
02049 LevelValue lv = (LevelValue)vectorElement(cn->
values, k);
02050
if (!lv->isDeleted) {
02051 lvPrint(
CDG_DEBUG, lv, net->
lexemgraph->
isDeletedNode, 0);
02052
cdgPrintf(
CDG_DEBUG,
"\n");
02053 j++;
02054 }
02055 }
02056 }
02057
cdgPrintf(
CDG_INFO,
"INFO: number of levelvalues: %d found; %d active\n", vectorSize(net->
values), j);
02058 }
02059
02060
02061
02062
02063
02064
02065
02066 void cnUndeleteAllLVs(
ConstraintNet net)
02067 {
02068
int i;
02069 LevelValue lv;
02070
02071
for (i=0; i<vectorSize(net->
values); i++) {
02072 lv = (LevelValue) vectorElement(net->
values, i);
02073 lv->isDeleted =
FALSE;
02074 }
02075 }
02076
02077
02078
02079
02080
02081
02082
02083 void cnDeleteAllLVs(
ConstraintNet net)
02084 {
02085
int i;
02086 LevelValue lv;
02087
02088
for (i=0; i<vectorSize(net->
values); i++) {
02089 lv = (LevelValue) vectorElement(net->
values, i);
02090 lv->isDeleted =
TRUE;
02091 }
02092 }
02093
02094
02095
02096
02097
02098
02099
02100 Boolean
cnBuildUpdateArcs(
ConstraintNet net, List listArcs)
02101 {
02102
int i;
02103 List m;
02104
02105
for (i = 0; i < vectorSize(net->
nodes); i++) {
02106
ConstraintNode cn = (
ConstraintNode) vectorElement(net->
nodes, i);
02107 Level level = cn->
level;
02108
GraphemNode modifier = cn->
gn;
02109
02110
#ifdef DEBUGCNBUILDNODES
02111
cdgPrintf(
CDG_DEBUG,
"\nDEBUG: generating LevelValues for node ");
02112
cnPrintNode(
CDG_DEBUG, cn);
02113
cdgPrintf(
CDG_DEBUG,
"\n");
02114
#endif
02115
02116
for (m = listArcs; m; m = listNext(m)) {
02117 Arc arc = (Arc) listElement(m);
02118
GraphemNode modifiee =
cnGetGraphemNodeFromArc(net, arc);
02119
if (modifiee ==
NULL) {
02120
continue;
02121 }
02122
cnBuildLevelValues(cn, level, modifier, modifiee);
02123 }
02124 cn->
noValidValues = vectorSize(cn->
values);
02125 cn->
totalNumberOfValues = cn->
noValidValues;
02126 }
02127
02128
for (m = listArcs; m; m = listNext(m)) {
02129 Arc arc = (Arc) listElement(m);
02130
GraphemNode gn =
cnGetGraphemNodeFromArc(net, arc);
02131
if (gn ==
NULL) {
02132
continue;
02133 }
02134
if (gn->
lexemes ==
NULL) {
02135
continue;
02136 }
02137
if (!
cnBuildIter(net, gn,
TRUE)) {
02138
return FALSE;
02139 }
02140
02141 }
02142
02143
return TRUE;
02144 }
02145
02146
02147
02148
02149 GraphemNode cnGetGraphemNodeFromArc(
ConstraintNet net, Arc arc)
02150 {
02151
int i;
02152
GraphemNode gn;
02153
02154
for (i=0; i<vectorSize(net->
lexemgraph->
graphemnodes); i++) {
02155 gn = (
GraphemNode) vectorElement(net->
lexemgraph->
graphemnodes, i);
02156
if (gn->
arc == arc)
02157
return gn;
02158 }
02159
02160
return (
GraphemNode)
NULL;
02161 }
02162
02163
02164
02165
02166
02167
02168
02169
02170
02171 void cnCallback(String name,
float *var)
02172 {
02173
if (strcmp(name,
"unaryFraction") == 0) {
02174
cdgPrintf(
CDG_INFO,
"INFO: unary pruning fraction set to %f\n",
02175 *var);
02176 }
02177 }
02178
02179
02180
02181
02182
02183
02184
02185 void cnInitialize()
02186 {
02187 setRegister(
"showdeleted", SET_BOOL, &
cnShowDeletedFlag,
NULL,
NULL,
NULL,
NULL);
02188 setRegister(
"edges", SET_ENUM, &
cnEdgesFlag,
NULL,
NULL,
NULL,
02189
"on", cnEdgesOn,
"off", cnEdgesOff,
"few", cnEdgesFew,
"all", cnEdgesAll,
NULL);
02190 setRegister(
"sortnodes", SET_ENUM, &
cnSortNodesMethod,
NULL,
NULL,
NULL,
02191
"off", 0,
"on", 1,
"prio", 1,
"smallest", 2,
NULL);
02192 setRegister(
"unaryFraction", SET_FLOAT, &
cnUnaryPruningFraction,
NULL,
NULL,
cnCallback,
NULL);
02193 setRegister(
"usenonspec", SET_BOOL, &
cnUseNonSpec,
NULL,
NULL,
NULL,
NULL);
02194 }
02195
02196
02197
02198
02199
02200
02201
02202
02203
02204 Lattice
cnGetLattice(
ConstraintNet cn) {
02205
return cn->
lexemgraph->
lattice;
02206 }
02207
02208
02209
02210