COOPY » Guide
version 0.6.5
|
00001 #include <coopy/Merger.h> 00002 #include <coopy/Dbg.h> 00003 #include <coopy/Mover.h> 00004 #include <coopy/NameSniffer.h> 00005 #include <coopy/IndexSniffer.h> 00006 00007 #include <stdlib.h> 00008 #include <ctype.h> 00009 00010 #include <algorithm> 00011 00012 #define WANT_MAP2STRING 00013 #define WANT_VECTOR2STRING 00014 #include <coopy/Stringer.h> 00015 00016 using namespace std; 00017 using namespace coopy::store; 00018 using namespace coopy::cmp; 00019 00020 static string normalize_string(string low, const CompareFlags& flags) { 00021 if (flags.ignore_case) { 00022 for (size_t c=0; c<low.length(); c++) { 00023 low[c] = tolower(low[c]); 00024 } 00025 } 00026 return low; 00027 } 00028 00029 static bool compare_string(const SheetCell& a, const SheetCell& b, 00030 const CompareFlags& flags) { 00031 if (a.escaped&&b.escaped) { 00032 string blank = "__NOT_SET__CSVCOMPARE_SSFOSSIL"; 00033 if (a.text!="NULL"&&a.text!=blank) { 00034 printf("sorry, coopy produced an inconsistent null type (a) '%s'\n", a.text.c_str()); 00035 exit(1); 00036 } 00037 if (b.text!="NULL"&&b.text!=blank) { 00038 printf("sorry, coopy produced an inconsistent null type (b) '%s'\n", b.text.c_str()); 00039 exit(1); 00040 } 00041 } 00042 if (!flags.ignore_case) { 00043 return a==b; 00044 } 00045 if (a.escaped!=b.escaped) return false; 00046 return normalize_string(a.text,flags)==normalize_string(b.text,flags); 00047 } 00048 00049 bool Merger::mergeRow(coopy::store::DataSheet& pivot, 00050 coopy::store::DataSheet& local, 00051 coopy::store::DataSheet& remote, 00052 MatchUnit& row_unit, 00053 Patcher& output, 00054 const CompareFlags& flags, 00055 std::vector<coopy::cmp::RowChange>& rc) { 00056 bool fixedColumns = flags.fixed_columns; 00057 bool diff = output.wantDiff(); 00058 bool link = output.wantLinks(); 00059 int pRow = row_unit.pivotUnit; 00060 int lRow = row_unit.localUnit; 00061 int rRow = row_unit.remoteUnit; 00062 bool delRow = row_unit.deleted; 00063 string blank = "__NOT_SET__CSVCOMPARE_SSFOSSIL"; 00064 SheetCell blankCell; 00065 blankCell.text = blank; 00066 blankCell.escaped = true; 00067 vector<SheetCell> expandLocal, expandRemote, expandPivot, expandMerge; 00068 vector<SheetCell> saveLocal; 00069 vector<int> expandDel; 00070 vector<int> existsLocally; 00071 map<string,SheetCell> cond, value, value0, conflicted_value, 00072 conflicted_parent_value; 00073 vector<string> address; 00074 vector<string> action; 00075 int lastCol = -1; 00076 int addCol = 0; 00077 address.push_back("0"); 00078 action.push_back("select"); 00079 int at = 0; 00080 for (list<MatchUnit>::iterator it=col_merge.accum.begin(); 00081 it!=col_merge.accum.end(); 00082 it++) { 00083 MatchUnit& unit = *it; 00084 int pCol = unit.pivotUnit; 00085 int lCol = unit.localUnit; 00086 int rCol = unit.remoteUnit; 00087 bool deleted = unit.deleted; 00088 string mval = ""; 00089 if (diff||!deleted) { 00090 expandDel.push_back(deleted); 00091 existsLocally.push_back(lCol!=-1); 00092 if (lRow>=0 && lCol>=0) { 00093 //printf("access local %d %d (size %d %d)\n", lCol, lRow, 00094 //local.width(), local.height()); 00095 expandLocal.push_back(local.cellSummary(lCol,lRow)); 00096 } else { 00097 expandLocal.push_back(blankCell); 00098 } 00099 if (rRow>=0 && rCol>=0) { 00100 //printf("access remote %d %d\n", rCol, rRow); 00101 expandRemote.push_back(remote.cellSummary(rCol,rRow)); 00102 } else { 00103 expandRemote.push_back(blankCell); 00104 } 00105 if (pRow>=0 && pCol>=0) { 00106 //printf("access pivot %d %d\n", pCol, pRow); 00107 expandPivot.push_back(pivot.cellSummary(pCol,pRow)); 00108 } else { 00109 expandPivot.push_back(blankCell); 00110 } 00111 } 00112 if (lRow>=0 && lCol>=0 && !deleted) { 00113 string n; 00114 if (names.size()>at) n = names[at]; 00115 if (diff || include_column.find(n)!=include_column.end()) { 00116 if (exclude_column.find(n)==exclude_column.end()) { 00117 //printf("I think that %s has name %s\n", 00118 //local.cellSummary(lCol,lRow).toString().c_str(), 00119 //names[at].c_str()); 00120 //cond[names[at]] = pivot.cellSummary(pCol,pRow); 00121 cond[n] = local.cellSummary(lCol,lRow); 00122 /* 00123 printf("LOCAL %s IS\n%s\n", 00124 local.desc().c_str(), 00125 local.toString().c_str()); 00126 printf("CONDITION %s %d %d\n", cond[names[at]].toString().c_str(), 00127 lCol, lRow); 00128 */ 00129 } 00130 } 00131 } 00132 if (!deleted) { 00133 at++; 00134 } 00135 if (diff) { 00136 if (lCol!=-1) { 00137 lastCol = lCol; 00138 addCol = 0; 00139 } else { 00140 addCol++; 00141 } 00142 char buf[256]; 00143 if (addCol>0) { 00144 snprintf(buf,sizeof(buf),"%d+%d", lastCol+1, addCol); 00145 } else { 00146 snprintf(buf,sizeof(buf),"%d", lastCol+1); 00147 } 00148 address.push_back(buf); 00149 if (deleted) { 00150 action.push_back("delete"); 00151 } else if (lCol==-1) { 00152 action.push_back("add"); 00153 } else { 00154 action.push_back(""); 00155 } 00156 } 00157 } 00158 //printf("Onwards\n"); 00159 bool conflict = false; 00160 bool change = false; 00161 expandMerge = expandLocal; 00162 at = 0; 00163 for (size_t i=0; i<expandLocal.size(); i++) { 00164 if (fixedColumns) { 00165 if (!existsLocally[i]) continue; 00166 } 00167 SheetCell& _l = expandMerge[i]; 00168 SheetCell& _r = expandRemote[i]; 00169 SheetCell& _p = expandPivot[i]; 00170 bool novel = false; 00171 bool conflicted1 = false; 00172 bool deleted = (bool)expandDel[i]; 00173 bool ignored = false; 00174 if (!deleted) { 00175 if (filtered_names.size()>0) { 00176 if (filtered_names.find(names[at])==filtered_names.end()) { 00177 ignored = true; 00178 } 00179 } 00180 } 00181 if (!ignored) { 00182 if (!compare_string(_l,_r,flags)) { 00183 if (_l==blankCell) { 00184 if (!_r.escaped) { 00185 _l = _r; 00186 novel = true; 00187 } 00188 } else { 00189 if (_r!=blankCell) { 00190 // two assertions, do they conflict? 00191 // if pivot is the same as either, then no. 00192 if (compare_string(_p,_l,flags)||compare_string(_p,_r,flags)) { 00193 if (compare_string(_p,_l,flags)) { 00194 _l = _r; 00195 change = true; 00196 novel = true; 00197 } 00198 } else { 00199 string resolve = flags.resolve; 00200 if (resolve=="") { 00201 fprintf(stderr,"# conflict: {{%s}} vs {{%s}} from {{%s}}\n", 00202 _l.toString().c_str(), 00203 _r.toString().c_str(), 00204 _p.toString().c_str()); 00205 conflict = true; 00206 conflicted1 = true; 00207 change = true; 00208 novel = true; 00209 output.setConflicted(); 00210 } else { 00211 fprintf(stderr,"# auto-resolving conflict: ours:{{%s}} vs theirs:{{%s}} from neither:{{%s}} -- picking %s\n", 00212 _l.toString().c_str(), 00213 _r.toString().c_str(), 00214 _p.toString().c_str(), 00215 resolve.c_str()); 00216 if (resolve=="ours") { 00217 // do nothing 00218 } else if (resolve=="theirs") { 00219 _l = _r; 00220 change = true; 00221 novel = true; 00222 } else if (resolve=="neither") { 00223 _l = _p; 00224 change = true; 00225 novel = true; 00226 } 00227 } 00228 //break; 00229 } 00230 } 00231 } 00232 } 00233 } 00234 00235 if (diff) { 00236 if (!deleted) { 00237 if (novel) { 00238 string n; 00239 if (names.size()>at) n = names[at]; 00240 if (exclude_column.find(n)==exclude_column.end()) { 00241 value[n] = _l; 00242 if (conflicted1) { 00243 //printf("SETTING conflicted value\n"); 00244 conflicted_value[n] = _r; 00245 conflicted_parent_value[n] = _p; 00246 } 00247 } 00248 } 00249 at++; 00250 } 00251 } 00252 if (diff&&!novel) { 00253 if (!delRow) { 00254 _l = blankCell; 00255 _r = blankCell; 00256 _p = blankCell; 00257 expandLocal[i] = blankCell; 00258 } 00259 } 00260 } 00261 00262 /* 00263 dbg_printf("row lens merge local remote %d %d %d\n", 00264 (int)expandMerge.size(), 00265 (int)expandLocal.size(), 00266 (int)expandRemote.size()); 00267 */ 00268 00269 /* 00270 if (link) { 00271 LinkDeclare decl; 00272 decl.mode = LINK_DECLARE_MERGE; 00273 decl.column = false; 00274 decl.rc_id_pivot = pRow; 00275 decl.rc_id_local = lRow; 00276 decl.rc_id_remote = rRow; 00277 decl.rc_deleted = delRow; 00278 output.declareLink(decl); 00279 } 00280 */ 00281 00282 if (!diff) { 00283 if (conflict) { 00284 conflicts++; 00285 //output.addRow("[local]",expandLocal,blank); 00286 //output.addRow("[conflicting]",expandRemote,blank); 00287 /* 00288 } else { 00289 if (lRow!=-1 && rRow!=-1) { 00290 output.addRow("",expandMerge,blank); 00291 } else if (lRow!=-1) { 00292 output.addRow("",expandMerge,blank); // local add 00293 } else if (rRow!=-1) { 00294 output.addRow("[add]",expandMerge,blank); // remote add 00295 } 00296 */ 00297 } 00298 } else { 00299 00300 /* 00301 if (conflict) { 00302 //printf("Cannot produce a diff when there are data conflicts\n"); 00303 //fprintf(stderr,"Conflict Alert!\n"); 00304 //return false; 00305 } 00306 */ 00307 if (address!=lastAddress) { 00308 //output.addRow("[for]",address,blank); 00309 lastAddress = address; 00310 } 00311 if (address!=lastAddress || action!=lastAction) { 00312 //output.addRow("[do]",action,blank); 00313 lastAction = action; 00314 } 00315 00316 bool activity = true; 00317 00318 /* 00319 if (lRow!=-1) { 00320 if ((int)expandMerge.size()==local.width()) { 00321 if (current_row<local.height()) { 00322 size_t i; 00323 for (i=0; i<expandMerge.size(); i++) { 00324 SheetCell data = expandMerge[i]; 00325 SheetCell was = local.cellSummary(i,current_row); 00326 if (was!=data && data!=blankCell) { 00327 break; 00328 } 00329 } 00330 if (i==expandMerge.size()) { 00331 activity = false; 00332 } 00333 } 00334 } 00335 } 00336 */ 00337 00338 /* 00339 dbg_printf("Row: (index p/l/r %d %d %d) act %d del %d / sz %d %d %d %d\n", 00340 pRow, lRow, rRow, 00341 activity, 00342 delRow, 00343 (int)expandMerge.size(), local.width(), current_row, local.height()); 00344 */ 00345 00346 RowChange rowChange; 00347 RowChange rowChangeMove; 00348 bool haveMove = false; 00349 rowChange.cond = cond; 00350 rowChange.val = value; 00351 rowChange.conflictingVal = conflicted_value; 00352 rowChange.conflictingParentVal = conflicted_parent_value; 00353 rowChange.names = names; 00354 rowChange.conflicted = conflict; 00355 rowChange.pRow = pRow; 00356 rowChange.lRow = lRow; 00357 rowChange.rRow = rRow; 00358 bool prev_had_row = had_row; 00359 if (last_local_row!=-1) { 00360 had_row = true; 00361 had_foreign_row = false; 00362 } 00363 if (!delRow) { 00364 //dbg_printf("Cursor? lRow %d last_local_row %d last_local_row_marked %d had_row %d\n", 00365 //lRow, last_local_row, last_local_row_marked, had_row); 00366 if (lRow!=-1) { 00367 if (had_row||had_foreign_row) { //last_local_row!=-1) { 00368 //if (lRow!=last_local_row+1||last_local_row==-1) { 00369 if (lRow<bottom_local_row||had_foreign_row) { 00370 //if (fixed_row.find(lRow)!=fixed_row.end()) { 00371 if (last_local_row>=0) { 00372 if (last_local_row_marked!=last_local_row) { 00373 RowChange alt = lastRowChange; 00374 alt.mode = ROW_CHANGE_CONTEXT; 00375 if (flags.use_order) { 00376 if (pivot.height()>0) { 00377 rc.push_back(alt); 00378 } 00379 } 00380 } 00381 } 00382 if (prev_had_row||lRow!=0) { 00383 dbg_printf("MOVE! lRow %d last_local_row %d last_local_row_marked %d\n", 00384 lRow, last_local_row, last_local_row_marked); 00385 RowChange alt = rowChange; 00386 alt.mode = ROW_CHANGE_MOVE; 00387 if (flags.use_order) { 00388 haveMove = true; 00389 rowChangeMove = alt; 00390 //rc.push_back(alt); 00391 } 00392 } 00393 last_local_row_marked = lRow; 00394 } 00395 } 00396 } 00397 } 00398 if (activity||delRow) { 00399 char buf[256]; 00400 if (lRow==-1) { 00401 addition++; 00402 snprintf(buf,sizeof(buf),"%d+%d",last_row+1,addition); 00403 } else { 00404 snprintf(buf,sizeof(buf),"%d",lRow+1); 00405 addition = 0; 00406 } 00407 SheetCell cbuf(buf,false); 00408 expandMerge.insert(expandMerge.begin(),cbuf); 00409 expandLocal.insert(expandLocal.begin(),cbuf); 00410 expandRemote.insert(expandRemote.begin(),cbuf); 00411 //if (change) { 00412 //output.addRow("[-]",expandLocal,blank); 00413 //} 00414 if (lRow==-1) { 00415 if (flags.canInsert()) { 00416 if (haveMove) { 00417 rc.push_back(rowChangeMove); 00418 haveMove = false; 00419 } 00420 //output.addRow("[+++]",expandMerge,blank); 00421 rowChange.mode = ROW_CHANGE_INSERT; 00422 //output.changeRow(rowChange); 00423 //printf("last_local_row_marked %d last_local_row %d lRow %d\n", 00424 //last_local_row_marked, last_local_row, lRow); 00425 if (last_local_row>=0) { 00426 if (last_local_row_marked!=last_local_row) { 00427 RowChange alt = lastRowChange; 00428 alt.mode = ROW_CHANGE_CONTEXT; 00429 if (flags.use_order) { 00430 if (pivot.height()>0) { 00431 rc.push_back(alt); 00432 } 00433 } 00434 } 00435 } else { 00436 if (!(prev_had_row||had_foreign_row||allGone)) { 00437 RowChange alt; 00438 alt.mode = ROW_CHANGE_CONTEXT; 00439 if (flags.use_order) { 00440 if (pivot.height()>0) { 00441 rc.push_back(alt); 00442 } 00443 } 00444 } 00445 } 00446 rc.push_back(rowChange); 00447 if (last_local_row<0) { 00448 had_foreign_row = true; 00449 } 00450 last_local_row_marked = lRow; 00451 } 00452 } else { 00453 if (rRow==-1) { 00454 if (flags.canDelete()) { 00455 if (pRow!=-1) { 00456 if (haveMove) { 00457 rc.push_back(rowChangeMove); 00458 haveMove = false; 00459 } 00460 //output.addRow("[---]",expandLocal,blank); 00461 rowChange.mode = ROW_CHANGE_DELETE; 00462 //output.changeRow(rowChange); 00463 rc.push_back(rowChange); 00464 last_local_row_marked = lRow; 00465 } 00466 } 00467 } else { 00468 if (flags.canUpdate()) { 00469 if (value.size()!=0) { 00470 //output.addRow("[+]",expandMerge,blank); 00471 rowChange.mode = haveMove?ROW_CHANGE_MOVE:ROW_CHANGE_UPDATE; 00472 haveMove = false; 00473 //output.changeRow(rowChange); 00474 rc.push_back(rowChange); 00475 last_local_row_marked = lRow; 00476 } 00477 } 00478 } 00479 } 00480 } 00481 if (haveMove) { 00482 rc.push_back(rowChangeMove); 00483 haveMove = false; 00484 } 00485 if (lRow!=-1 && !delRow) { 00486 current_row = lRow; 00487 last_row = lRow; 00488 current_row++; 00489 } 00490 last_local_row = lRow; 00491 lastRowChange = rowChange; 00492 if (last_local_row>=bottom_local_row) { 00493 bottom_local_row = last_local_row; 00494 } 00495 } 00496 return true; 00497 } 00498 00499 00500 bool Merger::merge(MergerState& state) { 00501 last_local_row = -1; 00502 bottom_local_row = -1; 00503 last_local_row_marked = -1; 00504 had_row = false; 00505 had_foreign_row = false; 00506 allGone = false; 00507 00508 coopy::store::DataSheet& pivot = state.pivot; 00509 coopy::store::DataSheet& local = state.local; 00510 coopy::store::DataSheet& remote = state.remote; 00511 const OrderResult& row_local = state.nrow_local; 00512 const OrderResult& row_remote = state.nrow_remote; 00513 const OrderResult& col_local = state.ncol_local; 00514 const OrderResult& col_remote = state.ncol_remote; 00515 Patcher& output = state.output; 00516 const CompareFlags& flags = state.flags; 00517 NameSniffer& local_names = state.local_names; 00518 NameSniffer& remote_names = state.remote_names; 00519 00520 bool diff = output.wantDiff(); 00521 bool link = output.wantLinks(); 00522 00523 if (state.allIdentical && !link) { 00524 return true; 00525 } 00526 00527 for (int i=0; i<(int)flags.include_columns.size(); i++) { 00528 include_column[flags.include_columns[i]] = 1; 00529 } 00530 for (int i=0; i<(int)flags.exclude_columns.size(); i++) { 00531 exclude_column[flags.exclude_columns[i]] = 1; 00532 } 00533 00534 dbg_printf("Merging column order...\n"); 00535 CompareFlags cflags = flags; 00536 cflags.head_trimmed = false; 00537 cflags.tail_trimmed = false; 00538 col_merge.merge(col_local,col_remote,cflags,true); 00539 00540 dbg_printf("Merging row order...\n"); 00541 00542 if (false) { //trust_ids) { 00543 row_merge.merge_by_id(row_local,row_remote,flags); 00544 } else { 00545 if (col_merge.overlap==0 && diff) { 00546 dbg_printf("No overlap, just use remote...\n"); 00547 row_merge.accum.clear(); 00548 for (int i=0; i<remote.height(); i++) { 00549 MatchUnit unit; 00550 unit.pivotUnit = -1; 00551 unit.localUnit = -1; 00552 unit.remoteUnit = i; 00553 unit.deleted = false; 00554 row_merge.accum.push_back(unit); 00555 } 00556 } else { 00557 row_merge.merge(row_local,row_remote,flags,false); 00558 } 00559 } 00560 00561 conflicts = 0; 00562 dbg_printf("Order merges are done...\n"); 00563 00564 allGone = false; 00565 if (diff) { 00566 current_row = 0; 00567 last_row = -1; 00568 addition = 0; 00569 lastAddress.clear(); 00570 lastAction.clear(); 00571 00572 local_names.sniff(); 00573 remote_names.sniff(); 00574 //NameSniffer localName(local); 00575 //NameSniffer remoteName(remote); 00576 00577 // for now, we will only use filtered index if column manipulations 00578 // are non-existent or trivial 00579 IndexSniffer localIndex(local,state.flags,local_names); 00580 bool constantColumns = true; 00581 bool constantIndex = true; 00582 00583 vector<int> local_cols; 00584 vector<string> local_col_names; 00585 vector<string> original_col_names; 00586 for (int i=0; i<local.width(); i++) { 00587 local_cols.push_back(i); 00588 00589 string name = local_names.suggestColumnName(i); 00590 /* 00591 if (name[0]>='0'&&name[0]<='9') { 00592 name = string("[") + name + "]"; 00593 } 00594 */ 00595 local_col_names.push_back(name); 00596 } 00597 original_col_names = local_col_names; 00598 00599 vector<int> index_flags = localIndex.suggestIndexes(); 00600 RowChange::txt2bool indexes; 00601 bool atLeastOne = false; 00602 for (int i=0; i<(int)original_col_names.size(); i++) { 00603 string name = original_col_names[i]; 00604 indexes[name] = (index_flags[i]>0); 00605 if (include_column.find(name)!=include_column.end()) indexes[name] = true; 00606 if (exclude_column.find(name)!=exclude_column.end()) indexes[name] =false; 00607 atLeastOne = atLeastOne||indexes[name]; 00608 } 00609 if (!atLeastOne) { 00610 indexes.clear(); 00611 } 00612 00613 vector<OrderChange> cc; 00614 00615 bool fixedColumns = flags.fixed_columns; 00616 00617 dbg_printf("Column order pre-deletions is %s\n", 00618 vector2string(local_col_names).c_str()); 00619 00620 // Pass 1: signal any column deletions 00621 for (list<MatchUnit>::iterator it=col_merge.accum.begin(); 00622 it!=col_merge.accum.end(); 00623 it++) { 00624 MatchUnit& unit = *it; 00625 //int pCol = unit.localUnit; 00626 int lCol = unit.pivotUnit; 00627 //int rCol = unit.remoteUnit; 00628 bool deleted = unit.deleted; 00629 if (lCol!=-1 && deleted) { 00630 OrderChange change; 00631 change.indicesBefore = local_cols; 00632 change.namesBefore = local_col_names; 00633 vector<int>::iterator it = std::find(local_cols.begin(), 00634 local_cols.end(), 00635 lCol); 00636 if (it==local_cols.end()) { 00637 fprintf(stderr,"Merge logic failure\n"); 00638 exit(1); 00639 } 00640 int idx = it-local_cols.begin(); 00641 if (indexes.find(change.namesBefore[idx])!=indexes.end()) { 00642 if (indexes[change.namesBefore[idx]]) { 00643 constantIndex = false; 00644 } 00645 } 00646 change.mode = ORDER_CHANGE_DELETE; 00647 change.subject = lCol; 00648 local_cols.erase(it); 00649 local_col_names.erase(local_col_names.begin()+idx); 00650 change.indicesAfter = local_cols; 00651 change.namesAfter = local_col_names; 00652 //output.changeColumn(change); 00653 cc.push_back(change); 00654 if (local_cols.size()==0) { 00655 allGone = true; 00656 } 00657 } 00658 } 00659 00660 00661 dbg_printf("Column order pre-shuffle is %s\n", 00662 vector2string(local_col_names).c_str()); 00663 00664 // Pass 2: check order 00665 vector<int> shuffled_cols; 00666 for (list<MatchUnit>::iterator it=col_merge.accum.begin(); 00667 it!=col_merge.accum.end(); 00668 it++) { 00669 MatchUnit& unit = *it; 00670 //int pCol = unit.localUnit; 00671 int lCol = unit.localUnit; 00672 //int rCol = unit.remoteUnit; 00673 bool deleted = unit.deleted; 00674 if (lCol!=-1 && !deleted) { 00675 shuffled_cols.push_back(lCol); 00676 } 00677 //printf("[%d:%d:%d %d] ", lCol, pCol, rCol, deleted); 00678 } 00679 //printf("\n"); 00680 00681 dbg_printf("Column order is now %s\n", 00682 vector2string(local_col_names).c_str()); 00683 00684 // Pass 2: signal any column shuffling 00685 // 1 2 3 4 00686 // 2 3 4 1 00687 Mover move; 00688 vector<int> move_order; 00689 00690 if (local_cols.size()!=shuffled_cols.size()) { 00691 dbg_printf("Match failed %s:%d (%d vs %d)\n", 00692 __FILE__, __LINE__, 00693 local_cols.size(), shuffled_cols.size() 00694 ); 00695 fprintf(stderr,"Match failed, please report %s:%d\n", 00696 __FILE__, __LINE__); 00697 exit(1); 00698 } 00699 00700 move.move(local_cols,shuffled_cols,move_order); 00701 dbg_printf("* move complete\n"); 00702 00703 if (move_order.size()>0) { 00704 // Should send messages for this case, but we're not ready 00705 // yet to exercise it. 00706 // For now, local order will remain unchanged. 00707 00708 dbg_printf("MOVE order %d\n", (int)local_cols.size()); 00709 dbg_printf(" [%s]\n", vector2string(local_cols).c_str()); 00710 dbg_printf(" [%s]\n", vector2string(shuffled_cols).c_str()); 00711 dbg_printf(" [%s]\n\n", vector2string(move_order).c_str()); 00712 00713 //vector<int> local_cols_save = local_cols; 00714 for (int m=0; m<(int)move_order.size(); m++) { 00715 int a = move_order[m]; 00716 //int a = local_cols_save[p] 00717 dbg_printf("Move %d\n", a); 00718 00719 OrderChange change; 00720 change.indicesBefore = local_cols; 00721 change.namesBefore = local_col_names; 00722 vector<int>::iterator it = std::find(local_cols.begin(), 00723 local_cols.end(), 00724 a); 00725 if (it==local_cols.end()) { 00726 fprintf(stderr,"Merge logic failure\n"); 00727 exit(1); 00728 } 00729 vector<int>::iterator it2 = std::find(shuffled_cols.begin(), 00730 shuffled_cols.end(), 00731 a); 00732 if (it2==shuffled_cols.end()) { 00733 fprintf(stderr,"Merge logic failure\n"); 00734 exit(1); 00735 } 00736 change.subject = *it; 00737 change.object = *it2; 00738 int idx = it-local_cols.begin(); 00739 int idx2 = it2-shuffled_cols.begin(); 00740 change.mode = ORDER_CHANGE_MOVE; 00741 local_cols.erase(it); 00742 string name = local_col_names[idx]; 00743 change.object = *it2; 00744 local_col_names.erase(local_col_names.begin()+idx); 00745 local_cols.insert(local_cols.begin()+idx2,a); 00746 local_col_names.insert(local_col_names.begin()+idx2,name); 00747 change.indicesAfter = local_cols; 00748 change.namesAfter = local_col_names; 00749 //output.changeColumn(change); 00750 if (change.namesBefore!=change.namesAfter) { 00751 cc.push_back(change); 00752 } 00753 } 00754 } 00755 00756 dbg_printf("Column order is now %s\n", 00757 vector2string(local_col_names).c_str()); 00758 00759 //printf(">>> %s %d\n", __FILE__, __LINE__); 00760 00761 // Pass 3: signal any column insertions 00762 int at = 0; 00763 for (list<MatchUnit>::iterator it=col_merge.accum.begin(); 00764 it!=col_merge.accum.end(); 00765 it++) { 00766 MatchUnit& unit = *it; 00767 dbg_printf("Add: UNIT %d (%d/%d/%d)\n", at, 00768 unit.pivotUnit, unit.localUnit, unit.remoteUnit); 00769 //int pCol = unit.localUnit; 00770 int lCol = unit.pivotUnit; 00771 int rCol = unit.remoteUnit; 00772 bool deleted = unit.deleted; 00773 if (lCol==-1 && rCol!=-1 && !deleted) { 00774 OrderChange change; 00775 change.indicesBefore = local_cols; 00776 change.namesBefore = local_col_names; 00777 change.mode = ORDER_CHANGE_INSERT; 00778 local_cols.insert(local_cols.begin()+at,-rCol-1); 00779 00780 string name = remote_names.suggestColumnName(rCol); 00781 dbg_printf("Add: Addition of remote name %s\n", name.c_str()); 00782 bool collision = false; 00783 if (name[0]>='0'&&name[0]<='9') { 00784 name = string("{") + name + "}"; 00785 } 00786 do { 00787 collision = false; 00788 for (int i=0; i<(int)local_col_names.size(); i++) { 00789 if (local_col_names[i]==name) { 00790 collision = true; 00791 name = name + "_"; 00792 break; 00793 } 00794 } 00795 } while (collision); 00796 local_col_names.insert(local_col_names.begin()+at,name); 00797 change.indicesAfter = local_cols; 00798 change.namesAfter = local_col_names; 00799 change.subject = local_cols[at]; 00800 //output.changeColumn(change); 00801 cc.push_back(change); 00802 at++; 00803 } 00804 if (unit.localUnit!=-1 && !deleted) { 00805 at++; 00806 } 00807 } 00808 00809 //printf(">>> %s %d\n", __FILE__, __LINE__); 00810 00811 dbg_printf("Column order is now %s\n", 00812 vector2string(local_col_names).c_str()); 00813 00814 // PASS 4 - column renames 00815 if (flags.assume_header) { 00816 for (list<MatchUnit>::iterator it=col_merge.accum.begin(); 00817 it!=col_merge.accum.end(); 00818 it++) { 00819 MatchUnit& unit = *it; 00820 int lCol = unit.pivotUnit; 00821 int rCol = unit.remoteUnit; 00822 bool deleted = unit.deleted; 00823 if (lCol!=-1 && rCol!=-1 && !deleted) { 00824 string lName = local_names.suggestColumnName(lCol); 00825 string rName = remote_names.suggestColumnName(rCol); 00826 if (lName!=rName) { 00827 dbg_printf("Renamed %s -> %s\n", lName.c_str(), rName.c_str()); 00828 OrderChange change; 00829 change.indicesBefore = local_cols; 00830 change.namesBefore = local_col_names; 00831 change.mode = ORDER_CHANGE_RENAME; 00832 change.indicesAfter = local_cols; 00833 vector<string>::iterator it = std::find(local_col_names.begin(), 00834 local_col_names.end(), 00835 lName); 00836 if (it==local_col_names.end()) { 00837 fprintf(stderr,"Confused by column name %s\n", lName.c_str()); 00838 } else { 00839 *it = rName; 00840 change.namesAfter = local_col_names; 00841 change.subject = local_cols[it-local_col_names.begin()]; 00842 cc.push_back(change); 00843 } 00844 } 00845 } 00846 } 00847 } 00848 00849 if (cc.size()>0) { 00850 constantColumns = false; 00851 } 00852 00853 names = local_col_names; 00854 filtered_names.clear(); 00855 00856 if (fixedColumns) { 00857 for (int i=0; i<(int)original_col_names.size(); i++) { 00858 filtered_names.insert(original_col_names[i]); 00859 } 00860 } 00861 00862 if (fixedColumns) { 00863 local_col_names = original_col_names; 00864 } 00865 00866 //printf(">>> %s %d\n", __FILE__, __LINE__); 00867 00868 00869 // LINKIT 00870 00871 if (link) { 00872 if (local_col_names!=original_col_names) { 00873 local_names.sniff(); 00874 remote_names.sniff(); 00875 00876 // perspective: MERGE, COLUMN 00877 for (list<MatchUnit>::iterator it=col_merge.accum.begin(); 00878 it!=col_merge.accum.end(); 00879 it++) { 00880 MatchUnit& unit = *it; 00881 int pCol = unit.pivotUnit; 00882 int lCol = unit.localUnit; 00883 int rCol = unit.remoteUnit; 00884 bool deleted = unit.deleted; 00885 LinkDeclare decl; 00886 decl.mode = LINK_DECLARE_MERGE; 00887 decl.column = true; 00888 decl.rc_id_pivot = pCol; 00889 decl.rc_id_local = lCol; 00890 decl.rc_id_remote = rCol; 00891 decl.rc_deleted = deleted; 00892 if (lCol!=-1) { 00893 decl.rc_str_local = local_names.suggestColumnName(lCol); 00894 } 00895 if (rCol!=-1) { 00896 decl.rc_str_remote = remote_names.suggestColumnName(rCol); 00897 } 00898 output.declareLink(decl); 00899 } 00900 } 00901 } 00902 00903 vector<RowChange> rc; 00904 // Now process rows 00905 if (!state.allIdentical) { 00906 for (list<MatchUnit>::iterator it=row_merge.accum.begin(); 00907 it!=row_merge.accum.end(); 00908 it++) { 00909 MatchUnit& unit = *it; 00910 00911 // Special case: if all columns were deleted, then we assume 00912 // all local rows are deleted. 00913 if (allGone) { 00914 unit.localUnit = -1; 00915 unit.pivotUnit = -1; 00916 } 00917 00918 if (link) { 00919 LinkDeclare decl; 00920 decl.mode = LINK_DECLARE_MERGE; 00921 decl.column = false; 00922 decl.rc_id_pivot = unit.pivotUnit; 00923 decl.rc_id_local = unit.localUnit; 00924 decl.rc_id_remote = unit.remoteUnit; 00925 decl.rc_deleted = unit.deleted; 00926 decl.pivot = PolySheet(&pivot,false); 00927 decl.local = PolySheet(&local,false); 00928 decl.remote = PolySheet(&remote,false); 00929 output.declareLink(decl); 00930 } 00931 if (unit.remoteUnit!=-1 || !allGone) { 00932 bool ok = mergeRow(pivot,local,remote,unit,output,flags,rc); 00933 if (!ok) { return false; } 00934 } 00935 } 00936 } 00937 00938 00939 if (!fixedColumns) { 00940 NameChange nc; 00941 nc.mode = NAME_CHANGE_DECLARE; 00942 nc.final = false; 00943 nc.constant = constantColumns; 00944 nc.names = original_col_names; 00945 output.changeName(nc); 00946 } 00947 00948 //printf(">>> %s %d\n", __FILE__, __LINE__); 00949 00950 if (!fixedColumns) { 00951 for (int i=0; i<(int)cc.size(); i++) { 00952 OrderChange& change = cc[i]; 00953 output.changeColumn(change); 00954 } 00955 } 00956 00957 { 00958 NameChange nc; 00959 nc.mode = NAME_CHANGE_DECLARE; 00960 nc.final = true; 00961 nc.names = local_col_names; 00962 nc.constant = constantColumns; 00963 output.changeName(nc); 00964 } 00965 00966 //printf(">>> %s %d\n", __FILE__, __LINE__); 00967 00968 if (!constantIndex) { 00969 for (int i=0; i<(int)original_col_names.size(); i++) { 00970 string name = original_col_names[i]; 00971 indexes[name] = true; 00972 } 00973 } 00974 if (rc.size()>0) { 00975 output.addPoolsFromFlags(state.local); 00976 } 00977 for (int i=0; i<(int)rc.size(); i++) { 00978 /* 00979 scope for being smarter here about what gets scoped in. 00980 */ 00981 RowChange& change = rc[i]; 00982 change.indexes = indexes; 00983 change.allNames = local_col_names; 00984 output.changeRow(change); 00985 } 00986 00987 //printf(">>> %s %d\n", __FILE__, __LINE__); 00988 00989 output.mergeDone(); 00990 return true; 00991 } 00992 00993 00994 00995 // MERGE 00996 00997 if (link) { 00998 local_names.sniff(); 00999 remote_names.sniff(); 01000 01001 // perspective: MERGE, COLUMN 01002 bool column_change = false; 01003 for (list<MatchUnit>::iterator it=col_merge.accum.begin(); 01004 it!=col_merge.accum.end(); 01005 it++) { 01006 MatchUnit& unit = *it; 01007 int pCol = unit.pivotUnit; 01008 int lCol = unit.localUnit; 01009 int rCol = unit.remoteUnit; 01010 bool deleted = unit.deleted; 01011 if (pCol!=lCol || pCol!=rCol || deleted) { 01012 column_change = true; 01013 } 01014 } 01015 01016 01017 if (column_change) { 01018 for (list<MatchUnit>::iterator it=col_merge.accum.begin(); 01019 it!=col_merge.accum.end(); 01020 it++) { 01021 MatchUnit& unit = *it; 01022 int pCol = unit.pivotUnit; 01023 int lCol = unit.localUnit; 01024 int rCol = unit.remoteUnit; 01025 bool deleted = unit.deleted; 01026 LinkDeclare decl; 01027 decl.mode = LINK_DECLARE_MERGE; 01028 decl.column = true; 01029 decl.rc_id_pivot = pCol; 01030 decl.rc_id_local = lCol; 01031 decl.rc_id_remote = rCol; 01032 decl.rc_deleted = deleted; 01033 if (lCol!=-1) { 01034 decl.rc_str_local = local_names.suggestColumnName(lCol); 01035 } 01036 if (rCol!=-1) { 01037 decl.rc_str_remote = remote_names.suggestColumnName(rCol); 01038 } 01039 output.declareLink(decl); 01040 } 01041 } 01042 } 01043 01044 vector<string> header; 01045 for (list<MatchUnit>::iterator it=col_merge.accum.begin(); 01046 it!=col_merge.accum.end(); 01047 it++) { 01048 MatchUnit& unit = *it; 01049 //int pCol = unit.localUnit; 01050 int lCol = unit.pivotUnit; 01051 int rCol = unit.remoteUnit; 01052 bool deleted = unit.deleted; 01053 if (!deleted) { 01054 if (lCol!=-1&&rCol!=-1) { 01055 header.push_back(""); 01056 } else if (lCol!=-1) { 01057 header.push_back(""); // local add 01058 } else if (rCol!=-1) { 01059 header.push_back("[add]"); // remote add 01060 } else { 01061 header.push_back("[float]"); 01062 } 01063 } 01064 } 01065 output.addHeader("[conflict]",header,""); 01066 01067 for (list<MatchUnit>::iterator it=row_merge.accum.begin(); 01068 it!=row_merge.accum.end(); 01069 it++) { 01070 MatchUnit& unit = *it; 01071 //int _l = unit.localUnit; 01072 //int _p = unit.pivotUnit; 01073 //int _r = unit.remoteUnit; 01074 if (link) { 01075 LinkDeclare decl; 01076 decl.mode = LINK_DECLARE_MERGE; 01077 decl.column = false; 01078 decl.rc_id_pivot = unit.pivotUnit; 01079 decl.rc_id_local = unit.localUnit; 01080 decl.rc_id_remote = unit.remoteUnit; 01081 decl.rc_deleted = unit.deleted; 01082 decl.pivot = PolySheet(&pivot,false); 01083 decl.local = PolySheet(&local,false); 01084 decl.remote = PolySheet(&remote,false); 01085 output.declareLink(decl); 01086 } 01087 01088 bool deleted = unit.deleted; 01089 if (!deleted) { 01090 vector<RowChange> rc; 01091 bool ok = mergeRow(pivot,local,remote,unit,output,flags,rc); 01092 if (!ok) return false; 01093 for (int i=0; i<(int)rc.size(); i++) { 01094 output.changeRow(rc[i]); 01095 } 01096 } 01097 } 01098 01099 output.mergeDone(); 01100 01101 if (conflicts==0) { 01102 dbg_printf("No conflicts!\n"); 01103 output.stripMarkup(); 01104 } 01105 01106 //dbg_printf("Got merged result (%dx%d)\n", result.width(), result.height()); 01107 //CsvFile::write(result,"result.csv"); 01108 return true; 01109 } 01110 01111 01112