COOPY » Guide  version 0.6.5
/home/paulfitz/cvs/coopy_scm/coopy/src/libcoopy_core/MergeOutputTdiff.cpp
Go to the documentation of this file.
00001 #include <coopy/MergeOutputTdiff.h>
00002 #include <coopy/SheetStyle.h>
00003 #include <coopy/DataSheet.h>
00004 
00005 #include <stdio.h>
00006 #include <stdlib.h>
00007 
00008 #define WANT_MAP2STRING
00009 #define WANT_VECTOR2STRING
00010 #include <coopy/Stringer.h>
00011 
00012 using namespace std;
00013 using namespace coopy::store;
00014 using namespace coopy::cmp;
00015 
00016 #define OP_MATCH "..."
00017 #define OP_ASSIGN "..."
00018 #define OP_MATCH_ASSIGN "..."
00019 #define OP_CONTEXT "#"
00020 #define OP_NONE ""
00021 
00022 static string celly(const SheetCell& c, bool quote_space = false) {
00023   if (c.text=="NULL"&&!c.escaped) {
00024     return "\'NULL\'";
00025   }
00026   if (c.escaped) {
00027     return "NULL";
00028   }
00029   bool needQuote = false;
00030   bool lastWasMinus = false;
00031   for (int i=0; i<(int)c.text.length() && !needQuote; i++) {
00032     switch (c.text[i]) {
00033     case '\n':
00034     case '\r':
00035       needQuote = true;
00036       break;
00037     case '!':
00038     case ':':
00039     case '=':
00040     case '|':
00041       needQuote = true;
00042       break;
00043     case '-':
00044       break;
00045     case '>':
00046       if (lastWasMinus) {
00047         needQuote = true;
00048       }
00049       break;
00050     case '\'':
00051       needQuote = true;
00052       break;
00053     case ' ':
00054       if (quote_space) {
00055         needQuote = true;
00056       }
00057     default:
00058       if (c.text[i]<32) {
00059         needQuote = true;
00060       }
00061       break;
00062     }
00063     lastWasMinus = (c.text[i]=='-');
00064   }
00065   string v = c.text;
00066   if (needQuote) {
00067     v = quoteSql(c.text,'\'',true);
00068     Stringer::replace(v,"\n","\\n");
00069     Stringer::replace(v,"\r","\\r");
00070   }
00071   return v;
00072 }
00073 
00074 static string stringy(const string& s, bool quote_space = false) {
00075   SheetCell c;
00076   c.text = s;
00077   c.escaped = false;
00078   return celly(c,quote_space);
00079 }
00080 
00081 MergeOutputTdiff::MergeOutputTdiff() {
00082   setSheet("");
00083   sheetNameShown = true;
00084   sheetNameBreakShown = true;
00085   lastWasFactored = false;
00086 }
00087 
00088 bool MergeOutputTdiff::mergeStart() {
00089   const CompareFlags& flags = getFlags();
00090   if (!flags.omit_format_name) {
00091     fprintf(out,"# tdiff version 0.3\n");
00092     if (flags.local_uri!="" && flags.remote_uri=="") {
00093       fprintf(out,"# +++ %s\n", flags.local_uri.c_str());
00094     } else {
00095       if (flags.local_uri!="") {
00096         fprintf(out,"# --- %s\n", flags.local_uri.c_str());
00097       }
00098       if (flags.remote_uri!="") {
00099         fprintf(out,"# +++ %s\n", flags.remote_uri.c_str());
00100       }
00101     }
00102     if (flags.pivot_uri!="") {
00103       fprintf(out,"# ^^^ %s\n", flags.pivot_uri.c_str());
00104     }
00105   }
00106   return true;
00107 }
00108 
00109 void MergeOutputTdiff::showSheet(bool bound) {
00110   if (!sheetNameShown) {
00111     const CompareFlags& flags = getFlags();
00112     if (!flags.omit_sheet_name) {
00113       fprintf(out,"\n@@@ %s\n", sheetName.c_str());
00114     }
00115     sheetNameShown = true;
00116     sheetNameBreakShown = false;
00117   }
00118   if (!bound) {
00119     if (!sheetNameBreakShown) {
00120       if (!flags.omit_sheet_name) {
00121         fprintf(out,"\n");
00122       }
00123       sheetNameBreakShown = true;
00124     }
00125   }
00126 }
00127 
00128 
00129 bool MergeOutputTdiff::mergeDone() {
00130   flushRows();
00131   return true;
00132 }
00133 
00134 bool MergeOutputTdiff::changeColumn(const OrderChange& change) {
00135   showSheet();
00136   constantColumns = false;
00137   switch (change.mode) {
00138   case ORDER_CHANGE_DELETE:
00139     {
00140       int idx = change.identityToIndex(change.subject);
00141       if (change.namesBefore.size()<=idx) {
00142         fprintf(stderr, "Could not find column to remove\n");
00143         exit(1);
00144       } else {
00145         fprintf(out,"@- %s", stringy(change.namesBefore[idx],true).c_str());
00146       }
00147     }
00148     break;
00149   case ORDER_CHANGE_INSERT:
00150     {
00151       int idx = change.identityToIndexAfter(change.subject);
00152       if (change.namesAfter.size()<=idx) {
00153         fprintf(stderr, "Could not find column to insert\n");
00154         exit(1);
00155       } else {
00156         fprintf(out,"@+ %s", stringy(change.namesAfter[idx],true).c_str());
00157       }
00158     }
00159     break;
00160   case ORDER_CHANGE_MOVE:
00161     {
00162       int idx = change.identityToIndex(change.subject);
00163       if (change.namesBefore.size()<=idx) {
00164         fprintf(stderr, "Could not find column to move\n");
00165         exit(1);
00166       } else {
00167         fprintf(out,"@: %s", stringy(change.namesBefore[idx],true).c_str());
00168       }
00169     }
00170     break;
00171   case ORDER_CHANGE_RENAME:
00172     {
00173       int idx = change.identityToIndexAfter(change.subject);
00174       if (change.namesAfter.size()<=idx) {
00175         fprintf(stderr, "Could not find column to rename\n");
00176         exit(1);
00177       } else {
00178         fprintf(out,"@= %s", stringy(change.namesAfter[idx],true).c_str());
00179       }
00180     }
00181     break;
00182   default:
00183     fprintf(stderr,"  Unknown column operation\n\n");
00184     exit(1);
00185     break;
00186   }
00187   fprintf(out, " |");
00188   for (int i=0; i<(int)change.namesAfter.size(); i++) {
00189     fprintf(out,"%s|",stringy(change.namesAfter[i]).c_str());
00190   }
00191   fprintf(out,"\n");
00192 
00193   activeColumn.clear();
00194   for (int i=0; i<(int)change.namesAfter.size(); i++) {
00195     activeColumn[change.namesAfter[i]] = true;
00196   }
00197   //nops = change.namesAfter;
00198   return true;
00199 }
00200 
00201 bool MergeOutputTdiff::operateRow(const RowChange& change, const char *tag) {
00202   /*
00203   vector<string> lnops;
00204   for (int i=0; i<(int)change.names.size(); i++) {
00205     if (activeColumn[change.names[i]]) {
00206       lnops.push_back(change.names[i]);
00207     }
00208   }
00209   */
00210   if (true) { //lnops!=nops) {
00211     if (true) {
00212       fprintf(out, "@ |");
00213       for (int i=0; i<(int)change.names.size(); i++) {
00214         if (activeColumn[change.names[i]]) {
00215           bool select = check(showForSelect,change.names[i]);
00216           bool cond = check(showForCond,change.names[i]);
00217           bool view = check(showForDescribe,change.names[i]);
00218           fprintf(out,"%s%s|",
00219                   stringy(change.names[i]).c_str(),
00220                   select?"=":"");
00221           //(view&&!(cond||select))?"":"");  // = was ->
00222         }
00223       }
00224       fprintf(out,"\n");
00225       showedColumns = true;
00226     }
00227     //nops = lnops;
00228   }
00229 
00230   return true;
00231 }
00232 
00233 // practice mode is unnecessary for this output style
00234 bool MergeOutputTdiff::updateRow(const RowChange& change, const char *tag,
00235                                  bool select, bool update, bool practice,
00236                                  bool factored) {
00237   bool ok = true;
00238 
00239   char ch = '?';
00240   bool assign = false;
00241   if (!practice) {
00242     if (string(tag)=="update") {
00243       ch = '=';
00244       assign = true;
00245     } else if (string(tag)=="insert") {
00246       ch = '+';
00247     } else if (string(tag)=="delete") {
00248       ch = '-';
00249     } else if (string(tag)=="after") {
00250       ch = '*';
00251     } else if (string(tag)=="move") {
00252       ch = ':';
00253       assign = true;
00254     }
00255     fprintf(out, "%s%c |",change.conflicted?"!":"",ch);
00256   }
00257   for (int i=0; i<(int)change.names.size(); i++) {
00258     string name = change.names[i];
00259     if (activeColumn[name]) {
00260       bool conflict = change.conflictingVal.find(name)!=
00261         change.conflictingVal.end();
00262       bool shown = false;
00263       bool transition = false; //showForDesign[name]&&showForSelect[name];
00264       //if (change.cond.find(name)!=change.cond.end() && 
00265       //  showForSelect[name] && select) {
00266       bool select = check(showForSelect,name);
00267       bool cond = check(showForCond,name);
00268       bool view = check(showForDescribe,name);
00269       if (!factored) {
00270         fprintf(out,"%s%s%s%s",
00271                 stringy(name).c_str(),
00272                 select?"=":"",
00273                 (view&&!(cond||select))?((ch=='+')?":->":":*->"):"",
00274                 (cond&&!(view||select))?":":"");
00275       } else {
00276         if (assign&&(!cond)) {
00277           fprintf(out,"*->");
00278         }
00279       }
00280       if (conflict) {
00281         fprintf(out,"!");
00282         if (change.conflictingParentVal.find(name)!=change.conflictingParentVal.end()) {
00283           fprintf(out,"%s!",celly(change.conflictingParentVal.find(name)->second).c_str());
00284         }
00285       }
00286 
00287       if (showForCond[name] && select) {
00288         fprintf(out,"%s",celly(change.cond.find(name)->second).c_str());
00289         transition = true;
00290         shown = true;
00291       }
00292       if (showForDescribe[name] && update) {
00293         SheetCell v;
00294         if (conflict) {
00295           v = change.conflictingVal.find(name)->second;
00296         } else {
00297           v = change.val.find(name)->second;
00298         }
00299         fprintf(out,"%s%s",
00300                 transition?"->":"",
00301                 celly(v).c_str());
00302         if (shown) ok = false; // collision
00303         shown = true;
00304       }
00305       if (!shown) {
00306         fprintf(out,"*");
00307       }
00308       fprintf(out,"|");
00309     }
00310   }
00311   fprintf(out,"\n");
00312   return ok;
00313 }
00314 
00315 bool MergeOutputTdiff::changeRow(const RowChange& change,
00316                                  bool factored,
00317                                  bool caching) {
00318   showSheet();
00319   vector<string> lops;
00320   activeColumn.clear();
00321   prevSelect = showForSelect;
00322   prevDescribe = showForDescribe;
00323   prevCond = showForCond;
00324   showForSelect.clear();
00325   showForDescribe.clear();
00326   showForCond.clear();
00327   for (int i=0; i<(int)change.names.size(); i++) {
00328     string name = change.names[i];
00329     bool condActive = false;
00330     bool valueActive = false;
00331     if (change.cond.find(name)!=change.cond.end()) {
00332       condActive = true;
00333     }
00334     if (change.val.find(name)!=change.val.end()) {
00335       valueActive = true;
00336     }
00337     bool shouldCond = condActive;
00338     bool shouldMatch = condActive && change.indexes.find(name)->second;
00339     bool shouldAssign = valueActive;
00340     if (shouldAssign) {
00341       // conservative choice, should be optional
00342       if (change.cond.find(name)!=change.cond.end()) {
00343         shouldMatch = true;
00344       }
00345     }
00346 
00347     if (change.mode==ROW_CHANGE_INSERT) {
00348       // we do not care about matching
00349       shouldMatch = false; //revSelect[name];
00350     }
00351     if (change.mode==ROW_CHANGE_DELETE) {
00352       // we do not care about assigning
00353       shouldAssign = false; //prevDescribe[name];
00354     }
00355 
00356     // ignoring shouldShow for now.
00357     int opidx = (shouldMatch?2:0) + (shouldAssign?1:0);
00358     string opi[4] = {
00359       OP_NONE,         // !match  !assign
00360       OP_ASSIGN,       // !match   assign
00361       OP_MATCH,        //  match  !assign
00362       OP_MATCH_ASSIGN, //  match   assign
00363     };
00364     string op = opi[opidx];
00365     
00366     if (opidx!=0) {
00367       activeColumn[name] = true;
00368     }
00369 
00370     // no way yet to communicate CONTEXT request
00371     lops.push_back(op + name);
00372     showForSelect[name] = shouldMatch;
00373     showForDescribe[name] = shouldAssign;
00374     showForCond[name] = shouldCond;
00375   }
00376   if (caching) {
00377     // state 0 = no factoring of header
00378     // state 1 = factoring of header
00379     float costFactored = 1;
00380     float costUnfactored = 1.9;
00381     dbg_printf("local ops %s\n", vector2string(lops).c_str());
00382     float costSwitch = 0.25;
00383     if (lops!=ops) {
00384       //if (ops.size()>0) {
00385       costFactored += 1.1;
00386       //}
00387       ops = lops;
00388       costSwitch = 0;
00389     }
00390     //printf("factored %g unfactored %g\n", costFactored, costUnfactored);
00391     formLattice.beginTransitions();
00392     formLattice.addTransition(0,0,costUnfactored);
00393     formLattice.addTransition(1,0,costUnfactored+costSwitch);
00394     formLattice.addTransition(0,1,costFactored+costSwitch);
00395     formLattice.addTransition(1,1,costFactored);
00396     formLattice.endTransitions();
00397     rowCache.push_back(change);
00398     return true;
00399   }
00400 
00401   dbg_printf("round 2 - local ops %s (%d)\n", vector2string(lops).c_str(), factored);
00402   if (factored) {
00403     if (lops!=ops) {
00404       ops = lops;
00405       operateRow(change,"act");
00406     }
00407   } else {
00408     ops = lops;
00409   }
00410   lastWasFactored = factored;
00411   switch (change.mode) {
00412   case ROW_CHANGE_INSERT:
00413     updateRow(change,"insert",false,true,false,factored);
00414     break;
00415   case ROW_CHANGE_DELETE:
00416     updateRow(change,"delete",true,false,false,factored);
00417     break;
00418   case ROW_CHANGE_CONTEXT:
00419     updateRow(change,"after",true,false,false,factored);
00420     break;
00421   case ROW_CHANGE_MOVE:
00422     updateRow(change,"move",true,true,false,factored);
00423     break;
00424   case ROW_CHANGE_UPDATE:
00425     updateRow(change,"update",true,true,false,factored);
00426     break;
00427   default:
00428     fprintf(stderr,"  Unknown row operation\n\n");
00429     exit(1);
00430     break;
00431   }
00432   return true;
00433 }
00434 
00435 
00436 bool MergeOutputTdiff::changeName(const NameChange& change) {
00437   flushRows();
00438   const vector<string>& names = change.names;
00439   bool final = change.final;
00440   bool constant = change.constant;
00441   bool loud = change.loud;
00442   if (!final) {
00443     activeColumn.clear();
00444     for (int i=0; i<(int)names.size(); i++) {
00445       activeColumn[names[i]] = true;
00446       showForSelect[names[i]] = true;
00447       showForDescribe[names[i]] = true;
00448     }
00449     if (loud||!constant) {
00450       showSheet();
00451       //fprintf(out, "/* %s %s ","column","name");
00452       //result.addField(ROW_COL,false);
00453       fprintf(out,"@@ |");
00454       for (int i=0; i<(int)names.size(); i++) {
00455         fprintf(out,"%s|",names[i].c_str());
00456       }
00457       fprintf(out,"\n");
00458       showedColumns = true;
00459     }
00460   }
00461   columns = names;
00462   return true;
00463 }
00464 
00465 
00466 bool MergeOutputTdiff::setSheet(const char *name) {
00467   flushRows();
00468   sheetNameShown = false;
00469   sheetNameBreakShown = true;
00470   sheetName = name;
00471   return true;
00472 }
00473 
00474 
00475 void MergeOutputTdiff::flushRows() {
00476   ops.clear();
00477   //opsLoose.clear();
00478   lastWasFactored = false;
00479   //nops.clear();
00480   activeColumn.clear();
00481   showForSelect.clear();
00482   showForDescribe.clear();
00483   prevSelect.clear();
00484   prevDescribe.clear();
00485   columns.clear();
00486   constantColumns = true;
00487   showedColumns = false;
00488   if (rowCache.size()==0) return;
00489   /*for (int i=0; i<(int)rowCache.size(); i++) {
00490     RowChange& change = rowCache[i];
00491     changeRow(change,false,true);
00492   }*/
00493   if (coopy_is_verbose()) {
00494     formLattice.showPath();
00495   }
00496   for (int i=0; i<(int)rowCache.size(); i++) {
00497     RowChange& change = rowCache[i];
00498     changeRow(change,(formLattice(i)==1),false);
00499   }
00500   formLattice.reset();
00501   rowCache.clear();
00502 }
00503 
00504 bool MergeOutputTdiff::changePool(const PoolChange& change) {
00505   showSheet(true);
00506   fprintf(out,"x %s |", change.poolName.c_str());
00507   for (int i=0; i<(int)change.pool.size(); i++) {
00508     string key = change.pool[i].tableName;
00509     if (key!="") {
00510       key += ".";
00511     }
00512     key += change.pool[i].fieldName;
00513     fprintf(out,"%s",stringy(key).c_str());
00514     if (change.pool[i].invented) {
00515       fprintf(out,"=");
00516     }
00517     fprintf(out,"|");
00518   }
00519   fprintf(out,"\n");
00520   return true;
00521 }
00522 
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines