COOPY » Guide  version 0.6.5
/home/paulfitz/cvs/coopy_scm/coopy/src/libcoopy_core/Merger.cpp
Go to the documentation of this file.
00001 #include <coopy/Merger.h>
00002 #include <coopy/Dbg.h>
00003 #include <coopy/Mover.h>
00004 #include <coopy/NameSniffer.h>
00005 #include <coopy/IndexSniffer.h>
00006 
00007 #include <stdlib.h>
00008 #include <ctype.h>
00009 
00010 #include <algorithm>
00011 
00012 #define WANT_MAP2STRING
00013 #define WANT_VECTOR2STRING
00014 #include <coopy/Stringer.h>
00015 
00016 using namespace std;
00017 using namespace coopy::store;
00018 using namespace coopy::cmp;
00019 
00020 static string normalize_string(string low, const CompareFlags& flags) {
00021   if (flags.ignore_case) {
00022     for (size_t c=0; c<low.length(); c++) {
00023       low[c] = tolower(low[c]);
00024     }
00025   }
00026   return low;
00027 }
00028 
00029 static bool compare_string(const SheetCell& a, const SheetCell& b,
00030                            const CompareFlags& flags) {
00031   if (a.escaped&&b.escaped) {
00032     string blank = "__NOT_SET__CSVCOMPARE_SSFOSSIL";
00033     if (a.text!="NULL"&&a.text!=blank) {
00034       printf("sorry, coopy produced an inconsistent null type (a) '%s'\n", a.text.c_str());
00035       exit(1);
00036     }
00037     if (b.text!="NULL"&&b.text!=blank) {
00038       printf("sorry, coopy produced an inconsistent null type (b) '%s'\n", b.text.c_str());
00039       exit(1);
00040     }
00041   }
00042   if (!flags.ignore_case) {
00043     return a==b;
00044   }
00045   if (a.escaped!=b.escaped) return false;
00046   return normalize_string(a.text,flags)==normalize_string(b.text,flags);
00047 }
00048 
00049 bool Merger::mergeRow(coopy::store::DataSheet& pivot, 
00050                       coopy::store::DataSheet& local, 
00051                       coopy::store::DataSheet& remote,
00052                       MatchUnit& row_unit, 
00053                       Patcher& output,
00054                       const CompareFlags& flags, 
00055                       std::vector<coopy::cmp::RowChange>& rc) {
00056   bool fixedColumns = flags.fixed_columns;
00057   bool diff = output.wantDiff();
00058   bool link = output.wantLinks();
00059   int pRow = row_unit.pivotUnit;
00060   int lRow = row_unit.localUnit;
00061   int rRow = row_unit.remoteUnit;
00062   bool delRow = row_unit.deleted;
00063   string blank = "__NOT_SET__CSVCOMPARE_SSFOSSIL";
00064   SheetCell blankCell;
00065   blankCell.text = blank;
00066   blankCell.escaped = true;
00067   vector<SheetCell> expandLocal, expandRemote, expandPivot, expandMerge;
00068   vector<SheetCell> saveLocal;
00069   vector<int> expandDel;
00070   vector<int> existsLocally;
00071   map<string,SheetCell> cond, value, value0, conflicted_value,
00072     conflicted_parent_value;
00073   vector<string> address;
00074   vector<string> action;
00075   int lastCol = -1;
00076   int addCol = 0;
00077   address.push_back("0");
00078   action.push_back("select");
00079   int at = 0;
00080   for (list<MatchUnit>::iterator it=col_merge.accum.begin();
00081        it!=col_merge.accum.end(); 
00082        it++) {
00083     MatchUnit& unit = *it;
00084     int pCol = unit.pivotUnit;
00085     int lCol = unit.localUnit;
00086     int rCol = unit.remoteUnit;
00087     bool deleted = unit.deleted;
00088     string mval = "";
00089     if (diff||!deleted) {
00090       expandDel.push_back(deleted);
00091       existsLocally.push_back(lCol!=-1);
00092       if (lRow>=0 && lCol>=0) {
00093         //printf("access local %d %d (size %d %d)\n", lCol, lRow, 
00094         //local.width(), local.height());
00095         expandLocal.push_back(local.cellSummary(lCol,lRow));
00096       } else {
00097         expandLocal.push_back(blankCell);
00098       }
00099       if (rRow>=0 && rCol>=0) {
00100         //printf("access remote %d %d\n", rCol, rRow);
00101         expandRemote.push_back(remote.cellSummary(rCol,rRow));
00102       } else {
00103         expandRemote.push_back(blankCell);
00104       }
00105       if (pRow>=0 && pCol>=0) {
00106         //printf("access pivot %d %d\n", pCol, pRow);
00107         expandPivot.push_back(pivot.cellSummary(pCol,pRow));
00108       } else {
00109         expandPivot.push_back(blankCell);
00110       }
00111     }
00112     if (lRow>=0 && lCol>=0 && !deleted) {
00113       string n;
00114       if (names.size()>at) n = names[at];
00115       if (diff || include_column.find(n)!=include_column.end()) {
00116         if (exclude_column.find(n)==exclude_column.end()) {
00117           //printf("I think that %s has name %s\n",
00118           //local.cellSummary(lCol,lRow).toString().c_str(),
00119           //names[at].c_str());
00120           //cond[names[at]] = pivot.cellSummary(pCol,pRow);
00121           cond[n] = local.cellSummary(lCol,lRow);
00122           /*
00123             printf("LOCAL %s IS\n%s\n", 
00124             local.desc().c_str(),
00125             local.toString().c_str());
00126             printf("CONDITION %s %d %d\n", cond[names[at]].toString().c_str(),
00127             lCol, lRow);
00128           */
00129         }
00130       }
00131     }
00132     if (!deleted) {
00133       at++;
00134     }
00135     if (diff) {
00136       if (lCol!=-1) {
00137         lastCol = lCol;
00138         addCol = 0;
00139       } else {
00140         addCol++;
00141       }
00142       char buf[256];
00143       if (addCol>0) {
00144         snprintf(buf,sizeof(buf),"%d+%d", lastCol+1, addCol);
00145       } else {
00146         snprintf(buf,sizeof(buf),"%d", lastCol+1);
00147       }
00148       address.push_back(buf);
00149       if (deleted) {
00150         action.push_back("delete");
00151       } else if (lCol==-1) {
00152         action.push_back("add");
00153       } else {
00154         action.push_back("");
00155       }
00156     }
00157   }
00158   //printf("Onwards\n");
00159   bool conflict = false;
00160   bool change = false;
00161   expandMerge = expandLocal;
00162   at = 0;
00163   for (size_t i=0; i<expandLocal.size(); i++) {
00164     if (fixedColumns) {
00165       if (!existsLocally[i]) continue;
00166     }
00167     SheetCell& _l = expandMerge[i];
00168     SheetCell& _r = expandRemote[i];
00169     SheetCell& _p = expandPivot[i];
00170     bool novel = false;
00171     bool conflicted1 = false;
00172     bool deleted = (bool)expandDel[i];
00173     bool ignored = false;
00174     if (!deleted) {
00175       if (filtered_names.size()>0) {
00176         if (filtered_names.find(names[at])==filtered_names.end()) {
00177           ignored = true;
00178         }
00179       }
00180     }
00181     if (!ignored) {
00182       if (!compare_string(_l,_r,flags)) {
00183         if (_l==blankCell) {
00184           if (!_r.escaped) {
00185             _l = _r;
00186             novel = true;
00187           }
00188         } else {
00189           if (_r!=blankCell) {
00190             // two assertions, do they conflict?
00191             // if pivot is the same as either, then no.
00192             if (compare_string(_p,_l,flags)||compare_string(_p,_r,flags)) {
00193               if (compare_string(_p,_l,flags)) { 
00194                 _l = _r; 
00195                 change = true;
00196                 novel = true;
00197               }
00198             } else {
00199               string resolve = flags.resolve;
00200               if (resolve=="") {
00201                 fprintf(stderr,"# conflict: {{%s}} vs {{%s}} from {{%s}}\n",
00202                         _l.toString().c_str(),
00203                         _r.toString().c_str(),
00204                         _p.toString().c_str());
00205                 conflict = true;
00206                 conflicted1 = true;
00207                 change = true;
00208                 novel = true;
00209                 output.setConflicted();
00210               } else {
00211                 fprintf(stderr,"# auto-resolving conflict: ours:{{%s}} vs theirs:{{%s}} from neither:{{%s}} -- picking %s\n",
00212                         _l.toString().c_str(),
00213                         _r.toString().c_str(),
00214                         _p.toString().c_str(),
00215                         resolve.c_str());
00216                 if (resolve=="ours") {
00217                   // do nothing
00218                 } else if (resolve=="theirs") {
00219                   _l = _r;
00220                   change = true;
00221                   novel = true;
00222                 } else if (resolve=="neither") {
00223                   _l = _p;
00224                   change = true;
00225                   novel = true;
00226                 }
00227               }
00228               //break;
00229             }
00230           }
00231         }
00232       }
00233     }
00234     
00235     if (diff) {
00236       if (!deleted) {
00237         if (novel) {
00238           string n;
00239           if (names.size()>at) n = names[at];
00240           if (exclude_column.find(n)==exclude_column.end()) {
00241             value[n] = _l;
00242             if (conflicted1) {
00243               //printf("SETTING conflicted value\n");
00244               conflicted_value[n] = _r;
00245               conflicted_parent_value[n] = _p;
00246             }
00247           }
00248         }
00249         at++;
00250       }
00251     }
00252     if (diff&&!novel) {
00253       if (!delRow) {
00254         _l = blankCell;
00255         _r = blankCell;
00256         _p = blankCell;
00257         expandLocal[i] = blankCell;
00258       }
00259     }
00260   }
00261 
00262   /*
00263   dbg_printf("row lens merge local remote %d %d %d\n",
00264              (int)expandMerge.size(), 
00265              (int)expandLocal.size(),
00266              (int)expandRemote.size());
00267   */
00268 
00269   /*
00270   if (link) {
00271     LinkDeclare decl;
00272     decl.mode = LINK_DECLARE_MERGE;
00273     decl.column = false;
00274     decl.rc_id_pivot = pRow;
00275     decl.rc_id_local = lRow;
00276     decl.rc_id_remote = rRow;
00277     decl.rc_deleted = delRow;
00278     output.declareLink(decl);
00279   }
00280   */
00281 
00282   if (!diff) {
00283     if (conflict) {
00284       conflicts++;
00285       //output.addRow("[local]",expandLocal,blank);
00286       //output.addRow("[conflicting]",expandRemote,blank);
00287       /*
00288         } else {
00289         if (lRow!=-1 && rRow!=-1) {
00290         output.addRow("",expandMerge,blank);
00291         } else if (lRow!=-1) {
00292         output.addRow("",expandMerge,blank); // local add
00293         } else if (rRow!=-1) {
00294         output.addRow("[add]",expandMerge,blank); // remote add
00295       }
00296       */
00297     }
00298   } else {
00299 
00300     /*
00301     if (conflict) {
00302       //printf("Cannot produce a diff when there are data conflicts\n");
00303       //fprintf(stderr,"Conflict Alert!\n");
00304       //return false;
00305     }
00306     */
00307     if (address!=lastAddress) {
00308       //output.addRow("[for]",address,blank);
00309       lastAddress = address;
00310     }
00311     if (address!=lastAddress || action!=lastAction) {
00312       //output.addRow("[do]",action,blank);
00313       lastAction = action;
00314     }
00315 
00316     bool activity = true;
00317 
00318     /*
00319     if (lRow!=-1) {
00320       if ((int)expandMerge.size()==local.width()) {
00321         if (current_row<local.height()) {
00322           size_t i;
00323           for (i=0; i<expandMerge.size(); i++) {
00324             SheetCell data = expandMerge[i];
00325             SheetCell was = local.cellSummary(i,current_row);
00326             if (was!=data && data!=blankCell) {
00327               break;
00328             }
00329           }
00330           if (i==expandMerge.size()) {
00331             activity = false;
00332           }
00333         }
00334       }
00335     }
00336     */
00337 
00338     /*
00339     dbg_printf("Row: (index p/l/r %d %d %d) act %d del %d / sz %d %d %d %d\n",
00340                pRow, lRow, rRow, 
00341                activity, 
00342                delRow,
00343                (int)expandMerge.size(), local.width(), current_row, local.height());
00344     */
00345 
00346     RowChange rowChange;
00347     RowChange rowChangeMove;
00348     bool haveMove = false;
00349     rowChange.cond = cond;
00350     rowChange.val = value;
00351     rowChange.conflictingVal = conflicted_value;
00352     rowChange.conflictingParentVal = conflicted_parent_value;
00353     rowChange.names = names;
00354     rowChange.conflicted = conflict;
00355     rowChange.pRow = pRow;
00356     rowChange.lRow = lRow;
00357     rowChange.rRow = rRow;
00358     bool prev_had_row = had_row;
00359     if (last_local_row!=-1) {
00360       had_row = true;
00361       had_foreign_row = false;
00362     }
00363     if (!delRow) {
00364       //dbg_printf("Cursor? lRow %d last_local_row %d last_local_row_marked %d had_row %d\n",
00365       //lRow, last_local_row, last_local_row_marked, had_row);
00366       if (lRow!=-1) {
00367         if (had_row||had_foreign_row) { //last_local_row!=-1) {
00368           //if (lRow!=last_local_row+1||last_local_row==-1) {
00369           if (lRow<bottom_local_row||had_foreign_row) {
00370             //if (fixed_row.find(lRow)!=fixed_row.end()) {
00371             if (last_local_row>=0) {
00372               if (last_local_row_marked!=last_local_row) {
00373                 RowChange alt = lastRowChange;
00374                 alt.mode = ROW_CHANGE_CONTEXT;
00375                 if (flags.use_order) {
00376                   if (pivot.height()>0) {
00377                     rc.push_back(alt);
00378                   }
00379                 }
00380               }
00381             }
00382             if (prev_had_row||lRow!=0) {
00383               dbg_printf("MOVE! lRow %d last_local_row %d last_local_row_marked %d\n",
00384                          lRow, last_local_row, last_local_row_marked);
00385               RowChange alt = rowChange;
00386               alt.mode = ROW_CHANGE_MOVE;
00387               if (flags.use_order) {
00388                 haveMove = true;
00389                 rowChangeMove = alt;
00390                 //rc.push_back(alt);
00391               }
00392             }
00393             last_local_row_marked = lRow;
00394           }
00395         }
00396       }
00397     }
00398     if (activity||delRow) {
00399       char buf[256];
00400       if (lRow==-1) {
00401         addition++;
00402         snprintf(buf,sizeof(buf),"%d+%d",last_row+1,addition);
00403       } else {
00404         snprintf(buf,sizeof(buf),"%d",lRow+1);
00405         addition = 0;
00406       }
00407       SheetCell cbuf(buf,false);
00408       expandMerge.insert(expandMerge.begin(),cbuf);
00409       expandLocal.insert(expandLocal.begin(),cbuf);
00410       expandRemote.insert(expandRemote.begin(),cbuf);
00411       //if (change) {
00412       //output.addRow("[-]",expandLocal,blank);
00413       //}
00414       if (lRow==-1) {
00415         if (flags.canInsert()) {
00416           if (haveMove) {
00417             rc.push_back(rowChangeMove);
00418             haveMove = false;
00419           }
00420           //output.addRow("[+++]",expandMerge,blank);
00421           rowChange.mode = ROW_CHANGE_INSERT;
00422           //output.changeRow(rowChange);
00423           //printf("last_local_row_marked %d last_local_row %d lRow %d\n",
00424           //last_local_row_marked, last_local_row, lRow);
00425           if (last_local_row>=0) {
00426             if (last_local_row_marked!=last_local_row) {
00427               RowChange alt = lastRowChange;
00428               alt.mode = ROW_CHANGE_CONTEXT;
00429               if (flags.use_order) {
00430                 if (pivot.height()>0) {
00431                   rc.push_back(alt);
00432                 }
00433               }
00434             }
00435           } else {
00436             if (!(prev_had_row||had_foreign_row||allGone)) {
00437               RowChange alt;
00438               alt.mode = ROW_CHANGE_CONTEXT;
00439               if (flags.use_order) {
00440                 if (pivot.height()>0) {
00441                   rc.push_back(alt);
00442                 }
00443               }
00444             }
00445           }
00446           rc.push_back(rowChange);
00447           if (last_local_row<0) {
00448             had_foreign_row = true;
00449           }
00450           last_local_row_marked = lRow;
00451         }
00452       } else {
00453         if (rRow==-1) {
00454           if (flags.canDelete()) {
00455             if (pRow!=-1) {
00456               if (haveMove) {
00457                 rc.push_back(rowChangeMove);
00458                 haveMove = false;
00459               }
00460               //output.addRow("[---]",expandLocal,blank);
00461               rowChange.mode = ROW_CHANGE_DELETE;
00462               //output.changeRow(rowChange);
00463               rc.push_back(rowChange);
00464               last_local_row_marked = lRow;
00465             }
00466           }
00467         } else {
00468           if (flags.canUpdate()) {
00469             if (value.size()!=0) {
00470               //output.addRow("[+]",expandMerge,blank);
00471               rowChange.mode = haveMove?ROW_CHANGE_MOVE:ROW_CHANGE_UPDATE;
00472               haveMove = false;
00473               //output.changeRow(rowChange);
00474               rc.push_back(rowChange);
00475               last_local_row_marked = lRow;
00476             }
00477           }
00478         }
00479       }
00480     }
00481     if (haveMove) {
00482       rc.push_back(rowChangeMove);
00483       haveMove = false;
00484     }
00485     if (lRow!=-1 && !delRow) {
00486       current_row = lRow;
00487       last_row = lRow;
00488       current_row++;
00489     }
00490     last_local_row = lRow;
00491     lastRowChange = rowChange;
00492     if (last_local_row>=bottom_local_row) {
00493       bottom_local_row = last_local_row;
00494     }
00495   }
00496   return true;
00497 }
00498 
00499 
00500 bool Merger::merge(MergerState& state) {
00501   last_local_row = -1;
00502   bottom_local_row = -1;
00503   last_local_row_marked = -1;
00504   had_row = false;
00505   had_foreign_row = false;
00506   allGone = false;
00507 
00508   coopy::store::DataSheet& pivot = state.pivot;
00509   coopy::store::DataSheet& local = state.local;
00510   coopy::store::DataSheet& remote = state.remote;
00511   const OrderResult& row_local = state.nrow_local;
00512   const OrderResult& row_remote = state.nrow_remote;
00513   const OrderResult& col_local = state.ncol_local;
00514   const OrderResult& col_remote = state.ncol_remote;
00515   Patcher& output = state.output;
00516   const CompareFlags& flags = state.flags;
00517   NameSniffer& local_names = state.local_names;
00518   NameSniffer& remote_names = state.remote_names;
00519 
00520   bool diff = output.wantDiff();
00521   bool link = output.wantLinks();
00522   
00523   if (state.allIdentical && !link) {
00524     return true;
00525   }
00526 
00527   for (int i=0; i<(int)flags.include_columns.size(); i++) { 
00528     include_column[flags.include_columns[i]] = 1;
00529   }
00530   for (int i=0; i<(int)flags.exclude_columns.size(); i++) { 
00531     exclude_column[flags.exclude_columns[i]] = 1;
00532   }
00533 
00534   dbg_printf("Merging column order...\n");
00535   CompareFlags cflags = flags;
00536   cflags.head_trimmed = false;
00537   cflags.tail_trimmed = false;
00538   col_merge.merge(col_local,col_remote,cflags,true);
00539 
00540   dbg_printf("Merging row order...\n");
00541 
00542   if (false) { //trust_ids) {
00543     row_merge.merge_by_id(row_local,row_remote,flags);
00544   } else {
00545     if (col_merge.overlap==0 && diff) {
00546       dbg_printf("No overlap, just use remote...\n");
00547       row_merge.accum.clear();
00548       for (int i=0; i<remote.height(); i++) {
00549         MatchUnit unit;
00550         unit.pivotUnit = -1;
00551         unit.localUnit = -1;
00552         unit.remoteUnit = i;
00553         unit.deleted = false;
00554         row_merge.accum.push_back(unit);
00555       }
00556     } else {
00557       row_merge.merge(row_local,row_remote,flags,false);
00558     }
00559   }
00560 
00561   conflicts = 0;
00562   dbg_printf("Order merges are done...\n");
00563 
00564   allGone = false;
00565   if (diff) {
00566     current_row = 0;
00567     last_row = -1;
00568     addition = 0;
00569     lastAddress.clear();
00570     lastAction.clear();
00571 
00572     local_names.sniff();
00573     remote_names.sniff();
00574     //NameSniffer localName(local);
00575     //NameSniffer remoteName(remote);
00576 
00577     // for now, we will only use filtered index if column manipulations
00578     // are non-existent or trivial
00579     IndexSniffer localIndex(local,state.flags,local_names);
00580     bool constantColumns = true;
00581     bool constantIndex = true;
00582 
00583     vector<int> local_cols;
00584     vector<string> local_col_names;
00585     vector<string> original_col_names;
00586     for (int i=0; i<local.width(); i++) {
00587       local_cols.push_back(i);
00588 
00589       string name = local_names.suggestColumnName(i);
00590       /*
00591       if (name[0]>='0'&&name[0]<='9') {
00592         name = string("[") + name + "]";
00593       }
00594       */
00595       local_col_names.push_back(name);
00596     }
00597     original_col_names = local_col_names;
00598 
00599     vector<int> index_flags = localIndex.suggestIndexes();
00600     RowChange::txt2bool indexes;
00601     bool atLeastOne = false;
00602     for (int i=0; i<(int)original_col_names.size(); i++) {
00603       string name = original_col_names[i];
00604       indexes[name] = (index_flags[i]>0);
00605       if (include_column.find(name)!=include_column.end()) indexes[name] = true;
00606       if (exclude_column.find(name)!=exclude_column.end()) indexes[name] =false;
00607       atLeastOne = atLeastOne||indexes[name];
00608     }
00609     if (!atLeastOne) {
00610       indexes.clear();
00611     }
00612 
00613     vector<OrderChange> cc;
00614 
00615     bool fixedColumns = flags.fixed_columns;
00616 
00617     dbg_printf("Column order pre-deletions is %s\n",
00618                vector2string(local_col_names).c_str());
00619 
00620     // Pass 1: signal any column deletions
00621     for (list<MatchUnit>::iterator it=col_merge.accum.begin();
00622          it!=col_merge.accum.end(); 
00623          it++) {
00624       MatchUnit& unit = *it;
00625       //int pCol = unit.localUnit;
00626       int lCol = unit.pivotUnit;
00627       //int rCol = unit.remoteUnit;
00628       bool deleted = unit.deleted;
00629       if (lCol!=-1 && deleted) {
00630         OrderChange change;
00631         change.indicesBefore = local_cols;
00632         change.namesBefore = local_col_names;
00633         vector<int>::iterator it = std::find(local_cols.begin(),
00634                                              local_cols.end(),
00635                                              lCol);
00636         if (it==local_cols.end()) {
00637           fprintf(stderr,"Merge logic failure\n");
00638           exit(1);
00639         }
00640         int idx = it-local_cols.begin();
00641         if (indexes.find(change.namesBefore[idx])!=indexes.end()) {
00642           if (indexes[change.namesBefore[idx]]) {
00643             constantIndex = false;
00644           }
00645         }
00646         change.mode = ORDER_CHANGE_DELETE;
00647         change.subject = lCol;
00648         local_cols.erase(it);
00649         local_col_names.erase(local_col_names.begin()+idx);
00650         change.indicesAfter = local_cols;
00651         change.namesAfter = local_col_names;
00652         //output.changeColumn(change);
00653         cc.push_back(change);
00654         if (local_cols.size()==0) {
00655           allGone = true;
00656         }
00657       }
00658     }
00659 
00660 
00661     dbg_printf("Column order pre-shuffle is %s\n",
00662                vector2string(local_col_names).c_str());
00663 
00664     // Pass 2: check order
00665     vector<int> shuffled_cols;
00666     for (list<MatchUnit>::iterator it=col_merge.accum.begin();
00667          it!=col_merge.accum.end(); 
00668          it++) {
00669       MatchUnit& unit = *it;
00670       //int pCol = unit.localUnit;
00671       int lCol = unit.localUnit;
00672       //int rCol = unit.remoteUnit;
00673       bool deleted = unit.deleted;
00674       if (lCol!=-1 && !deleted) {
00675         shuffled_cols.push_back(lCol);
00676       }
00677       //printf("[%d:%d:%d %d] ", lCol, pCol, rCol, deleted);
00678     }
00679     //printf("\n");
00680 
00681     dbg_printf("Column order is now %s\n",
00682                vector2string(local_col_names).c_str());
00683 
00684     // Pass 2: signal any column shuffling
00685     // 1 2 3 4
00686     // 2 3 4 1
00687     Mover move;
00688     vector<int> move_order;
00689     
00690     if (local_cols.size()!=shuffled_cols.size()) {
00691       dbg_printf("Match failed %s:%d (%d vs %d)\n",
00692                  __FILE__, __LINE__,
00693                  local_cols.size(), shuffled_cols.size()
00694                  );
00695       fprintf(stderr,"Match failed, please report %s:%d\n",
00696               __FILE__, __LINE__);
00697       exit(1);
00698     }
00699 
00700     move.move(local_cols,shuffled_cols,move_order);
00701     dbg_printf("* move complete\n");
00702  
00703     if (move_order.size()>0) {
00704       // Should send messages for this case, but we're not ready
00705       // yet to exercise it.
00706       // For now, local order will remain unchanged.
00707 
00708       dbg_printf("MOVE order %d\n", (int)local_cols.size());
00709       dbg_printf("  [%s]\n", vector2string(local_cols).c_str());
00710       dbg_printf("  [%s]\n", vector2string(shuffled_cols).c_str());
00711       dbg_printf("  [%s]\n\n", vector2string(move_order).c_str());
00712 
00713       //vector<int> local_cols_save = local_cols;
00714       for (int m=0; m<(int)move_order.size(); m++) {
00715         int a = move_order[m];
00716         //int a = local_cols_save[p]
00717         dbg_printf("Move %d\n", a);
00718 
00719         OrderChange change;
00720         change.indicesBefore = local_cols;
00721         change.namesBefore = local_col_names;
00722         vector<int>::iterator it = std::find(local_cols.begin(),
00723                                              local_cols.end(),
00724                                              a);
00725         if (it==local_cols.end()) {
00726           fprintf(stderr,"Merge logic failure\n");
00727           exit(1);
00728         }
00729         vector<int>::iterator it2 = std::find(shuffled_cols.begin(),
00730                                               shuffled_cols.end(),
00731                                               a);
00732         if (it2==shuffled_cols.end()) {
00733           fprintf(stderr,"Merge logic failure\n");
00734           exit(1);
00735         }
00736         change.subject = *it;
00737         change.object = *it2;
00738         int idx = it-local_cols.begin();
00739         int idx2 = it2-shuffled_cols.begin();
00740         change.mode = ORDER_CHANGE_MOVE;
00741         local_cols.erase(it);
00742         string name = local_col_names[idx];
00743         change.object = *it2;
00744         local_col_names.erase(local_col_names.begin()+idx);
00745         local_cols.insert(local_cols.begin()+idx2,a);
00746         local_col_names.insert(local_col_names.begin()+idx2,name);
00747         change.indicesAfter = local_cols;
00748         change.namesAfter = local_col_names;
00749         //output.changeColumn(change);
00750         if (change.namesBefore!=change.namesAfter) {
00751           cc.push_back(change);
00752         }
00753       }
00754     }
00755 
00756     dbg_printf("Column order is now %s\n",
00757                vector2string(local_col_names).c_str());
00758 
00759     //printf(">>> %s %d\n", __FILE__, __LINE__);
00760 
00761     // Pass 3: signal any column insertions
00762     int at = 0;
00763     for (list<MatchUnit>::iterator it=col_merge.accum.begin();
00764          it!=col_merge.accum.end(); 
00765          it++) {
00766       MatchUnit& unit = *it;
00767       dbg_printf("Add: UNIT %d (%d/%d/%d)\n", at, 
00768                  unit.pivotUnit, unit.localUnit, unit.remoteUnit);
00769       //int pCol = unit.localUnit;
00770       int lCol = unit.pivotUnit;
00771       int rCol = unit.remoteUnit;
00772       bool deleted = unit.deleted;
00773       if (lCol==-1 && rCol!=-1 && !deleted) {
00774         OrderChange change;
00775         change.indicesBefore = local_cols;
00776         change.namesBefore = local_col_names;
00777         change.mode = ORDER_CHANGE_INSERT;
00778         local_cols.insert(local_cols.begin()+at,-rCol-1);
00779 
00780         string name = remote_names.suggestColumnName(rCol);
00781         dbg_printf("Add: Addition of remote name %s\n", name.c_str());
00782         bool collision = false;
00783         if (name[0]>='0'&&name[0]<='9') {
00784           name = string("{") + name + "}";
00785         }
00786         do {
00787           collision = false;
00788           for (int i=0; i<(int)local_col_names.size(); i++) {
00789             if (local_col_names[i]==name) {
00790               collision = true;
00791               name = name + "_";
00792               break;
00793             }
00794           }
00795         } while (collision);
00796         local_col_names.insert(local_col_names.begin()+at,name);
00797         change.indicesAfter = local_cols;
00798         change.namesAfter = local_col_names;
00799         change.subject = local_cols[at];
00800         //output.changeColumn(change);
00801         cc.push_back(change);
00802         at++;
00803       } 
00804       if (unit.localUnit!=-1 && !deleted) {
00805         at++;
00806       }
00807     }
00808 
00809     //printf(">>> %s %d\n", __FILE__, __LINE__);
00810 
00811     dbg_printf("Column order is now %s\n",
00812                vector2string(local_col_names).c_str());
00813 
00814     // PASS 4 - column renames
00815     if (flags.assume_header) {
00816       for (list<MatchUnit>::iterator it=col_merge.accum.begin();
00817            it!=col_merge.accum.end(); 
00818            it++) {
00819         MatchUnit& unit = *it;
00820         int lCol = unit.pivotUnit;
00821         int rCol = unit.remoteUnit;
00822         bool deleted = unit.deleted;
00823         if (lCol!=-1 && rCol!=-1 && !deleted) {
00824           string lName = local_names.suggestColumnName(lCol);
00825           string rName = remote_names.suggestColumnName(rCol);
00826           if (lName!=rName) {
00827             dbg_printf("Renamed %s -> %s\n", lName.c_str(), rName.c_str());
00828             OrderChange change;
00829             change.indicesBefore = local_cols;
00830             change.namesBefore = local_col_names;
00831             change.mode = ORDER_CHANGE_RENAME;
00832             change.indicesAfter = local_cols;
00833             vector<string>::iterator it =  std::find(local_col_names.begin(),
00834                                                      local_col_names.end(),
00835                                                      lName);
00836             if (it==local_col_names.end()) {
00837               fprintf(stderr,"Confused by column name %s\n", lName.c_str());
00838             } else {
00839               *it = rName;
00840               change.namesAfter = local_col_names;
00841               change.subject = local_cols[it-local_col_names.begin()];
00842               cc.push_back(change);
00843             }
00844           }
00845         }
00846       }
00847     }
00848 
00849     if (cc.size()>0) {
00850       constantColumns = false;
00851     }
00852     
00853     names = local_col_names;
00854     filtered_names.clear();
00855 
00856     if (fixedColumns) {
00857       for (int i=0; i<(int)original_col_names.size(); i++) {
00858         filtered_names.insert(original_col_names[i]);
00859       }
00860     }
00861 
00862     if (fixedColumns) {
00863       local_col_names = original_col_names;
00864     }
00865 
00866     //printf(">>> %s %d\n", __FILE__, __LINE__);
00867 
00868 
00869     // LINKIT
00870 
00871     if (link) {
00872       if (local_col_names!=original_col_names) {
00873         local_names.sniff();
00874         remote_names.sniff();
00875       
00876         // perspective: MERGE, COLUMN
00877         for (list<MatchUnit>::iterator it=col_merge.accum.begin();
00878              it!=col_merge.accum.end(); 
00879              it++) {
00880           MatchUnit& unit = *it;
00881           int pCol = unit.pivotUnit;
00882           int lCol = unit.localUnit;
00883           int rCol = unit.remoteUnit;
00884           bool deleted = unit.deleted;
00885           LinkDeclare decl;
00886           decl.mode = LINK_DECLARE_MERGE;
00887           decl.column = true;
00888           decl.rc_id_pivot = pCol;
00889           decl.rc_id_local = lCol;
00890           decl.rc_id_remote = rCol;
00891           decl.rc_deleted = deleted;
00892           if (lCol!=-1) {
00893             decl.rc_str_local = local_names.suggestColumnName(lCol);
00894           }
00895           if (rCol!=-1) {
00896             decl.rc_str_remote = remote_names.suggestColumnName(rCol);
00897           }
00898           output.declareLink(decl);
00899         }
00900       }
00901     }
00902 
00903     vector<RowChange> rc;
00904     // Now process rows
00905     if (!state.allIdentical) {
00906       for (list<MatchUnit>::iterator it=row_merge.accum.begin();
00907            it!=row_merge.accum.end(); 
00908            it++) {
00909         MatchUnit& unit = *it;
00910 
00911         // Special case: if all columns were deleted, then we assume
00912         // all local rows are deleted.
00913         if (allGone) {
00914           unit.localUnit = -1;
00915           unit.pivotUnit = -1;
00916         }
00917 
00918         if (link) {
00919           LinkDeclare decl;
00920           decl.mode = LINK_DECLARE_MERGE;
00921           decl.column = false;
00922           decl.rc_id_pivot = unit.pivotUnit;
00923           decl.rc_id_local = unit.localUnit;
00924           decl.rc_id_remote = unit.remoteUnit;
00925           decl.rc_deleted = unit.deleted;
00926           decl.pivot = PolySheet(&pivot,false);
00927           decl.local = PolySheet(&local,false);
00928           decl.remote = PolySheet(&remote,false);
00929           output.declareLink(decl);
00930         }
00931         if (unit.remoteUnit!=-1 || !allGone) {
00932           bool ok = mergeRow(pivot,local,remote,unit,output,flags,rc);
00933           if (!ok) { return false; }
00934         }
00935       }
00936     }
00937 
00938 
00939     if (!fixedColumns) {
00940       NameChange nc;
00941       nc.mode = NAME_CHANGE_DECLARE;
00942       nc.final = false;
00943       nc.constant = constantColumns;
00944       nc.names = original_col_names;
00945       output.changeName(nc);
00946     }
00947 
00948     //printf(">>> %s %d\n", __FILE__, __LINE__);
00949 
00950     if (!fixedColumns) {
00951       for (int i=0; i<(int)cc.size(); i++) {
00952         OrderChange& change = cc[i];
00953         output.changeColumn(change);
00954       }
00955     }
00956 
00957     {
00958       NameChange nc;
00959       nc.mode = NAME_CHANGE_DECLARE;
00960       nc.final = true;
00961       nc.names = local_col_names;
00962       nc.constant = constantColumns;
00963       output.changeName(nc);
00964     }
00965 
00966     //printf(">>> %s %d\n", __FILE__, __LINE__);
00967 
00968     if (!constantIndex) {
00969       for (int i=0; i<(int)original_col_names.size(); i++) {
00970         string name = original_col_names[i];
00971         indexes[name] = true;
00972       }
00973     }
00974     if (rc.size()>0) {
00975       output.addPoolsFromFlags(state.local);
00976     }
00977     for (int i=0; i<(int)rc.size(); i++) {
00978       /*
00979         scope for being smarter here about what gets scoped in.
00980        */
00981       RowChange& change = rc[i];
00982       change.indexes = indexes;
00983       change.allNames = local_col_names;
00984       output.changeRow(change);
00985     }
00986 
00987     //printf(">>> %s %d\n", __FILE__, __LINE__);
00988 
00989     output.mergeDone();
00990     return true;
00991   }
00992 
00993 
00994 
00995   // MERGE
00996 
00997   if (link) {
00998     local_names.sniff();
00999     remote_names.sniff();
01000       
01001     // perspective: MERGE, COLUMN
01002     bool column_change = false;
01003     for (list<MatchUnit>::iterator it=col_merge.accum.begin();
01004          it!=col_merge.accum.end(); 
01005          it++) {
01006       MatchUnit& unit = *it;
01007       int pCol = unit.pivotUnit;
01008       int lCol = unit.localUnit;
01009       int rCol = unit.remoteUnit;
01010       bool deleted = unit.deleted;
01011       if (pCol!=lCol || pCol!=rCol || deleted) {
01012         column_change = true;
01013       }
01014     }
01015 
01016 
01017     if (column_change) {
01018       for (list<MatchUnit>::iterator it=col_merge.accum.begin();
01019            it!=col_merge.accum.end(); 
01020            it++) {
01021         MatchUnit& unit = *it;
01022         int pCol = unit.pivotUnit;
01023         int lCol = unit.localUnit;
01024         int rCol = unit.remoteUnit;
01025         bool deleted = unit.deleted;
01026         LinkDeclare decl;
01027         decl.mode = LINK_DECLARE_MERGE;
01028         decl.column = true;
01029         decl.rc_id_pivot = pCol;
01030         decl.rc_id_local = lCol;
01031         decl.rc_id_remote = rCol;
01032         decl.rc_deleted = deleted;
01033         if (lCol!=-1) {
01034           decl.rc_str_local = local_names.suggestColumnName(lCol);
01035         }
01036         if (rCol!=-1) {
01037           decl.rc_str_remote = remote_names.suggestColumnName(rCol);
01038         }
01039         output.declareLink(decl);
01040       }
01041     }
01042   }
01043 
01044   vector<string> header;
01045   for (list<MatchUnit>::iterator it=col_merge.accum.begin();
01046        it!=col_merge.accum.end(); 
01047        it++) {
01048     MatchUnit& unit = *it;
01049     //int pCol = unit.localUnit;
01050     int lCol = unit.pivotUnit;
01051     int rCol = unit.remoteUnit;
01052     bool deleted = unit.deleted;
01053     if (!deleted) {
01054       if (lCol!=-1&&rCol!=-1) {
01055         header.push_back("");
01056       } else if (lCol!=-1) {
01057         header.push_back(""); // local add
01058       } else if (rCol!=-1) {
01059         header.push_back("[add]"); // remote add
01060       } else {
01061         header.push_back("[float]");
01062       }
01063     }
01064   }
01065   output.addHeader("[conflict]",header,"");
01066 
01067   for (list<MatchUnit>::iterator it=row_merge.accum.begin();
01068        it!=row_merge.accum.end(); 
01069        it++) {
01070     MatchUnit& unit = *it;
01071     //int _l = unit.localUnit;
01072     //int _p = unit.pivotUnit;
01073     //int _r = unit.remoteUnit;
01074     if (link) {
01075       LinkDeclare decl;
01076       decl.mode = LINK_DECLARE_MERGE;
01077       decl.column = false;
01078       decl.rc_id_pivot = unit.pivotUnit;
01079       decl.rc_id_local = unit.localUnit;
01080       decl.rc_id_remote = unit.remoteUnit;
01081       decl.rc_deleted = unit.deleted;
01082       decl.pivot = PolySheet(&pivot,false);
01083       decl.local = PolySheet(&local,false);
01084       decl.remote = PolySheet(&remote,false);
01085       output.declareLink(decl);
01086     }
01087 
01088     bool deleted = unit.deleted;
01089     if (!deleted) {
01090       vector<RowChange> rc;
01091       bool ok = mergeRow(pivot,local,remote,unit,output,flags,rc);
01092       if (!ok) return false;
01093       for (int i=0; i<(int)rc.size(); i++) {
01094         output.changeRow(rc[i]);
01095       }
01096     }
01097   }
01098 
01099   output.mergeDone();
01100 
01101   if (conflicts==0) {
01102     dbg_printf("No conflicts!\n");
01103     output.stripMarkup();
01104   }
01105 
01106   //dbg_printf("Got merged result (%dx%d)\n", result.width(), result.height());
01107   //CsvFile::write(result,"result.csv");
01108   return true;
01109 }
01110 
01111 
01112 
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines