COOPY » Guide  version 0.6.5
/home/paulfitz/cvs/coopy_scm/coopy/src/libcoopy_fold/FoldTool.cpp
Go to the documentation of this file.
00001 #include <coopy/FoldTool.h>
00002 #include <coopy/FoldedSheet.h>
00003 #include <coopy/CsvSheet.h>
00004 #include <coopy/SchemaSniffer.h>
00005 #include <coopy/ShortTextBook.h>
00006 #include <coopy/CsvFile.h>
00007 #include <coopy/IntSheet.h>
00008 
00009 #include <vector>
00010 #include <map>
00011 #include <set>
00012 
00013 #include <coopy/Stringer.h>
00014 
00015 using namespace std;
00016 using namespace coopy::fold;
00017 using namespace coopy::store;
00018 
00019 typedef vector<int> ints;
00020 
00021 class SheetCellCmp {
00022 public:
00023   bool operator() (const SheetCell& e1, const SheetCell& e2) const {
00024     if (e1.text<e2.text) return true;
00025     if (e1.text>e2.text) return false;
00026     return (e1.escaped?1:0)<(e2.escaped?1:0);
00027   }
00028 };
00029 
00030 class SheetAccess {
00031 public:
00032   PolySheet sheet;
00033   SchemaSniffer schema;
00034 
00035   typedef multimap<SheetCell,int,SheetCellCmp> Mapper;
00036   typedef map<int,Mapper> Mappers;
00037   Mappers mappers;
00038 
00039   SheetAccess() {
00040   }
00041 
00042   void setSheet(const PolySheet& sheet) {
00043     this->sheet = sheet;
00044     schema.setSheet(sheet);
00045   }
00046 
00047   bool isValid() const {
00048     return sheet.isValid();
00049   }
00050 
00051   std::string getName() {
00052     SheetSchema *ss = schema.suggestSchema();
00053     if (!ss) {
00054       printf("No schema!\n");
00055       return "";
00056     }
00057     return ss->getSheetName();
00058   }
00059 
00060   std::string getName(int id) {
00061     SheetSchema *ss = schema.suggestSchema();
00062     if (!ss) {
00063       printf("No schema!\n");
00064       return "";
00065     }
00066     ColumnInfo info = ss->getColumnInfo(id);
00067     return info.getName();
00068   }
00069 
00070   int getId(const char *name) {
00071     SheetSchema *ss = schema.suggestSchema();
00072     if (!ss) {
00073       printf("No schema!\n");
00074       return -1;
00075     }
00076     int result = ss->getColumnIndexByName(name);
00077     if (result<0) {
00078       printf("Could not find column %s\n", name);
00079       for (int i=0; i<ss->getColumnCount(); i++) {
00080         ColumnInfo info = ss->getColumnInfo(i);
00081         printf("  col %d %s\n", i, info.getName().c_str());
00082       }
00083       exit(1);
00084     }
00085     return result;
00086   }
00087 
00088   ints selectFrom(int id, const SheetCell& cell) {
00089     if (id==-1) {
00090       ints result;
00091       for (int i=0; i<sheet.height(); i++) {
00092         result.push_back(i); 
00093       }
00094       //printf("selectFrom generated %d results\n", result.size());
00095       return result;
00096     }
00097 
00098     if (mappers.find(id)==mappers.end()) {
00099       mappers[id] = Mapper();
00100       Mapper& m = mappers[id];
00101       dbg_printf("Generating index for %s:%d\n", getName().c_str(),id);
00102       for (int i=0; i<sheet.height(); i++) {
00103         //if (sheet.cellSummary(id,i)==cell) {
00104         m.insert(std::make_pair(sheet.cellSummary(id,i),i));
00105         //}
00106       }
00107     }
00108     Mapper& m = mappers[id];
00109     ints result;
00110     for (Mapper::iterator it = m.lower_bound(cell);
00111            it != m.upper_bound(cell); it++) {   
00112       result.push_back(it->second);
00113     }
00114     //printf("selectFrom generated %d results\n", result.size());
00115     return result;
00116   }
00117 };
00118 
00119 
00120 
00121 class FoldLayout {
00122 public:
00123   SheetAccess *src;
00124   SheetAccess *dest;
00125   string srcIdName;
00126   string destIdName;
00127   int offset;
00128 
00129   PolySheet srcTable;
00130   PolySheet destTable;
00131   int srcId;
00132   int destId;
00133   typedef multimap<int,int> mapper;
00134   mapper indexedSrc;
00135   mapper indexedDest;
00136 
00137   FoldLayout() {
00138     src = dest = NULL;
00139     srcId = -1;
00140     destId = -1;
00141     offset = 0;
00142   }
00143 
00144   bool updateBase() {
00145     if (src!=NULL) {
00146       srcTable = src->sheet;
00147     }
00148     if (dest!=NULL) {
00149       destTable = dest->sheet;
00150     }
00151     return true;
00152   }
00153 
00154   bool updateIds() {
00155     if (srcId!=-1 && destId!=-1) return true;
00156     COOPY_ASSERT(src);
00157     COOPY_ASSERT(dest);
00158     srcId = src->getId(srcIdName.c_str());
00159     destId = dest->getId(destIdName.c_str());
00160     printf("IDs %s %s %d %d\n", srcIdName.c_str(), destIdName.c_str(), 
00161            srcId, destId);
00162     return (srcId!=-1 && destId!=-1);
00163   }
00164 
00165   bool updateMap() {
00166     if (indexedSrc.size()!=0) return true;
00167     if (indexedDest.size()!=0) return true;
00168     if (!updateIds()) {
00169       printf("Failed to get IDs\n");
00170       return false;
00171     }
00172     COOPY_ASSERT(src);
00173     COOPY_ASSERT(dest);
00174     for (int y=0; y<srcTable.height(); y++) {
00175       int id = srcTable.cellSummary(srcId,y).asInt();
00176       indexedSrc.insert(std::make_pair(y,id));
00177       //printf("src %d -> %d\n", y, id);
00178     }
00179     for (int y=0; y<destTable.height(); y++) {
00180       int id = destTable.cellSummary(destId,y).asInt();
00181       indexedDest.insert(std::make_pair(id,y));
00182       //printf("dest %d -> %d\n", id, y);
00183     }
00184     return true;
00185   }
00186 
00187   int transformForward(int y) {
00188     mapper::iterator it1 = indexedSrc.find(y);
00189     if (it1==indexedSrc.end()) {
00190       printf("src %d went nowhere\n", y);
00191       return -1;
00192     }
00193     mapper::iterator it2 = indexedDest.find(it1->second);
00194     if (it2==indexedDest.end()) {
00195       printf("dest %d from src %d went nowhere\n", it1->second, y);
00196       return -1;
00197     }
00198     return it2->second;
00199   }
00200 
00201 };
00202 
00203 
00204 class FoldEdge {
00205 public:
00206   string table;
00207   string key;
00208 
00209   FoldEdge() {}
00210 
00211   FoldEdge(string ntable, string nkey) {
00212     table = ntable;
00213     key = nkey;
00214   }
00215 
00216   string toString() const {
00217     return table + ":" + key;
00218   }
00219 };
00220 
00221 class FoldEdgeCmp {
00222 public:
00223   bool operator() (const FoldEdge& e1, const FoldEdge& e2) const {
00224     if (e1.table<e2.table) return true;
00225     if (e1.table>e2.table) return false;
00226     return e1.key<e2.key;
00227   }
00228 };
00229 
00230 
00231 class FoldSelector {
00232 public:
00233   // table to select from
00234   string tableName;
00235   // key name in that table
00236   string keyName;
00237   // id of key name in that table
00238   int id;
00239 
00240   // value to match
00241   SheetCell val;
00242   // id in local table
00243   int idLocal;
00244 
00245   int minCt;
00246   int maxCt;
00247 
00248   int actualMinCt;
00249   int actualMaxCt;
00250   int targetLength;
00251 
00252   string title;
00253 
00254   FoldSelector() {
00255     id = -1;
00256     idLocal = -1;
00257     minCt = maxCt = -1;
00258     actualMinCt = -1;
00259     actualMaxCt = 0;
00260     targetLength = -1;
00261   }
00262 
00263   string toString() const {
00264     return tableName + ":" + keyName + "=" + val.toString();
00265   }
00266 };
00267 
00268   
00269 class Folds;
00270 
00271 struct Expansion {
00272 public:
00273   vector<FoldSelector>& expanded;
00274   SheetAccess& base;
00275   PolySheet& src;
00276   int selId;
00277 };
00278 
00279 class FoldCache {
00280 public:
00281   SheetAccess dud;
00282   PolySheet dud_sheet;
00283   PolyBook book;
00284 
00285   map<FoldEdge,vector<FoldSelector>, FoldEdgeCmp > expanded_map;
00286 
00287   void setBook(const PolyBook& book) {
00288     this->book = book;
00289   }
00290 
00291   map<string,SheetAccess> sheets;
00292 
00293   SheetAccess& getSheet(const char *name) {
00294     map<string,SheetAccess>::iterator it = sheets.find(name);
00295     if (it!=sheets.end()) {
00296       return it->second;
00297     }
00298     PolySheet sheet = book.readSheet(name);
00299     if (!sheet.isValid()) {
00300       fprintf(stderr,"Could not find table \"%s\"\n", name);
00301       return dud;
00302     }
00303     sheets[name] = SheetAccess();
00304     sheets[name].setSheet(sheet);
00305     return sheets[name];
00306   }
00307 
00308   //vector<FoldSelector>& 
00309   Expansion getExpansion(const FoldEdge& edge, Folds *folds);
00310 };
00311 
00312 FoldTool::FoldTool() {
00313 }
00314 
00315 class FoldEdgePair {
00316 public:
00317   FoldEdge to, from;
00318   string allowed;
00319   int minCt, maxCt;
00320   string label;
00321 
00322   FoldEdgePair() {
00323     minCt = maxCt = -1;
00324   }
00325 
00326   string toString() const {
00327     string result = to.toString() + "->" + from.toString() + "(" + allowed + ")";
00328     result += ",";
00329     result += stringer_encoder(minCt);
00330     result += ",";
00331     result += stringer_encoder(maxCt);
00332     result += ",";
00333     result += label;
00334     return result;
00335   }
00336 
00337   string desc(const string& ref) const {
00338     if (label!="-") return label;
00339 
00340     string result = "";
00341     if (from.table!=ref) {
00342       result += from.table + ":";
00343     }
00344     result += from.key;
00345     result += "->";
00346     if (to.table!=ref) {
00347       result += to.table + ":";
00348     }
00349     result += to.key;
00350     return result;
00351   }
00352 };
00353 
00354 
00355 class FoldEdgePairCmp {
00356 public:
00357   bool operator() (const FoldEdgePair& e1, const FoldEdgePair& e2) {
00358     FoldEdgeCmp cmp;
00359     if (cmp(e1.from,e2.from)) return true;
00360     if (cmp(e2.from,e1.from)) return false;
00361     if (cmp(e1.to,e2.to)) return true;
00362     if (cmp(e2.to,e1.to)) return false;
00363     return e1.allowed<e2.allowed;
00364   }
00365 };
00366 
00367 class Folds {
00368 public:
00369   typedef multimap<FoldEdge,FoldEdge,FoldEdgeCmp> EdgeMap;
00370   typedef set<FoldEdgePair,FoldEdgePairCmp> AllowedPair;
00371 
00372   EdgeMap fwd;
00373   EdgeMap rev;
00374   AllowedPair allowed;
00375 
00376   void add(const char *fromTable, const char *toTable, 
00377            const char *fromField, const char *toField,
00378            const char *allowedTable, 
00379            int minCt, int maxCt,
00380            const char *label) {
00381     FoldEdge e1, e2;
00382     e1.table = fromTable;
00383     e1.key = fromField;
00384     e2.table = toTable;
00385     e2.key = toField;
00386     fwd.insert(make_pair(e1,e2));
00387     rev.insert(make_pair(e2,e1));
00388     FoldEdgePair p1;
00389     p1.from = e1;
00390     p1.to = e2;
00391     p1.allowed = allowedTable; 
00392     p1.minCt = minCt;
00393     p1.maxCt = maxCt;
00394     p1.label = label;
00395     allowed.insert(p1);
00396     FoldEdgePair p2;
00397     p2.from = e2;
00398     p2.to = e1;
00399     p2.allowed = allowedTable;
00400     p2.minCt = minCt;
00401     p2.maxCt = maxCt;
00402     p2.label = label;
00403     allowed.insert(p2);
00404     //printf("Added %s -> %s\n", p1.toString().c_str(), p2.toString().c_str());
00405     //printf("Allowed %s\n", p1.toString().c_str());
00406     //printf("Allowed %s\n", p2.toString().c_str());
00407   }
00408 };
00409 
00410 class FakeBook : public TextBook {
00411 public:
00412   std::string name;
00413   PolySheet sheet;
00414 
00415   FakeBook() : name(coopy_get_default_table_name()) {
00416   }
00417 
00418   virtual std::vector<std::string> getNames() {
00419     std::vector<std::string> result;
00420     result.push_back(name);
00421     return result;
00422   }
00423 
00424   virtual PolySheet readSheet(const std::string& name) {
00425     if (name==this->name) {
00426       return sheet;
00427     }
00428     return PolySheet();
00429   }
00430 
00431   virtual bool open(const Property& config) {
00432     return false;
00433   }
00434 
00435   virtual PolySheet provideSheet(const SheetSchema& schema) {
00436     return PolySheet();
00437   }
00438 
00439   virtual std::string desc() const {
00440     return "FakeBook";
00441   }
00442 
00443   virtual bool namedSheets() const {
00444     return false;
00445   }
00446 };
00447 
00448 
00449 
00450 Expansion FoldCache::getExpansion(const FoldEdge& edge,
00451                                   Folds *pfolds) {
00452   Folds& folds = *pfolds;
00453   FoldCache& cache = *this;
00454 
00455   bool exists = false;
00456   if (expanded_map.find(edge)==expanded_map.end()) {
00457     expanded_map[edge] = vector<FoldSelector>();
00458   } else {
00459     exists = true;
00460   }
00461   vector<FoldSelector>& expanded = expanded_map[edge];
00462 
00463   SheetAccess& base = cache.getSheet(edge.table.c_str());
00464   PolySheet& src = base.isValid()?base.sheet:dud_sheet;
00465   int selId = -1;
00466   if (edge.key!="") {
00467     selId = base.getId(edge.key.c_str());
00468   }
00469   Expansion result = {expanded,base,src,selId};
00470   if (exists) {
00471     return result;
00472   }
00473 
00474   for (int x=0; x<src.width(); x++) {
00475     FoldEdge e;
00476     e.table = edge.table;
00477     e.key = base.getName(x);
00478     if (selId==-1) {
00479       dbg_printf("Checking %s\n", e.toString().c_str());
00480     }
00481     Folds::EdgeMap::iterator it = folds.fwd.lower_bound(e);
00482     Folds::EdgeMap::iterator it_end = folds.fwd.upper_bound(e);
00483     while (it!=it_end) {
00484       FoldEdgePair p;
00485       p.to = it->first;
00486       p.from = it->second;
00487       p.allowed = edge.table;
00488       if (selId==-1) {
00489         dbg_printf("Checking %s\n", p.toString().c_str());
00490       }
00491       set<FoldEdgePair,FoldEdgePairCmp>::iterator al = folds.allowed.find(p);
00492       if (al!=folds.allowed.end()) {
00493         if (selId==-1) {
00494           dbg_printf("Should expand out %s (to %s)\n", e.key.c_str(),
00495                      it->second.toString().c_str());
00496         }
00497         FoldSelector f;
00498         f.tableName = it->second.table;
00499         f.keyName = it->second.key;
00500         SheetAccess& alt = cache.getSheet(f.tableName.c_str());
00501         if (!alt.isValid()) {
00502           expanded.clear();
00503           return result;
00504         }
00505         f.id = alt.getId(f.keyName.c_str());
00506         f.idLocal = base.getId(it->first.key.c_str());
00507         f.minCt = al->minCt;
00508         f.maxCt = al->maxCt;
00509         f.title = al->desc(base.getName());
00510         expanded.push_back(f);
00511       }
00512       it++;
00513     }
00514     Folds::EdgeMap::iterator it2 = folds.rev.lower_bound(e);
00515     Folds::EdgeMap::iterator it2_end = folds.rev.upper_bound(e);
00516     while (it2!=it2_end) {
00517       FoldEdgePair p;
00518       p.to = it2->first;
00519       p.from = it2->second;
00520       p.allowed = edge.table;
00521       if (selId==-1) {
00522         dbg_printf("Checking %s\n", p.toString().c_str());
00523       }
00524       set<FoldEdgePair,FoldEdgePairCmp>::iterator al = folds.allowed.find(p);
00525       if (al!=folds.allowed.end()) {
00526         if (selId==-1) {
00527           dbg_printf("Should expand in %s (from %s)\n", e.key.c_str(),
00528                      it2->second.toString().c_str());
00529         }
00530         FoldSelector f;
00531         f.tableName = it2->second.table;
00532         f.keyName = it2->second.key;
00533         SheetAccess& alt = cache.getSheet(f.tableName.c_str());
00534         if (!alt.isValid()) {
00535           expanded.clear();
00536           return result;
00537         }
00538         f.id = alt.getId(f.keyName.c_str());
00539         f.idLocal = base.getId(it2->first.key.c_str());
00540         f.minCt = al->minCt;
00541         f.maxCt = al->maxCt;
00542         f.title = al->desc(base.getName());
00543         expanded.push_back(f);
00544       }
00545       it2++;
00546     }
00547   }
00548   return result;
00549 }
00550 
00551 class ColumnHistory {
00552 public:
00553   string name;
00554   string from_table;
00555   string from_name;
00556 };
00557 
00558 class FoldFactor {
00559 public:
00560   int ct;
00561   bool excess;
00562   bool wrap;
00563   int xoffset;
00564   int yoffset;
00565   int depth;
00566   int skips;
00567   bool practice;
00568   bool namer;
00569   string prefix;
00570   IntSheet *zebra;
00571   map<string,ColumnHistory> *history;
00572   FoldSelector *selector;
00573 
00574   FoldFactor() { 
00575     ct = -1; 
00576     excess = false; 
00577     wrap = false;
00578     xoffset = yoffset = 0;
00579     depth = 0;
00580     practice = false;
00581     skips = 0;
00582     zebra = NULL;
00583     history = NULL;
00584     namer = true;
00585     selector = NULL;
00586   }
00587 };
00588 
00589 
00590 void assertColumn(SimpleSheetSchema *s, int offset, const string& name,
00591                   const string& orig_name,
00592                   const FoldFactor& factor) {
00593   if (s==NULL) return;
00594   while (s->getColumnCount()<=offset) {
00595     s->addColumn("...");
00596   }
00597   ColumnInfo c = s->getColumnInfo(offset);
00598   if (c.getName()!=name) {
00599     s->modifyColumn(ColumnRef(offset),ColumnInfo(name));
00600     if (factor.history&&factor.selector) {
00601       ColumnHistory hist;
00602       hist.name = name;
00603       hist.from_table = factor.selector->tableName;
00604       hist.from_name = orig_name;
00605       (*factor.history)[hist.name] = hist;
00606     }
00607   }
00608 }
00609 
00610 
00611 static int fold_expander(const FoldFactor& factor,
00612                          Folds& folds, FoldCache& cache, 
00613                          FoldedSheet& sheet,
00614                          FoldSelector& sel,
00615                          SimpleSheetSchema *schema = NULL,
00616                          int *ywrap = NULL) {
00617 
00618   bool practice = factor.practice;
00619 
00620   Expansion exp = cache.getExpansion(FoldEdge(sel.tableName,sel.keyName),
00621                                      &folds);
00622 
00623   // get selection
00624   vector<int> selected = exp.base.selectFrom(exp.selId,sel.val);
00625 
00626   // Prepare to iterate
00627   if (factor.depth==0) {
00628     if (!practice) {
00629       sheet.resize(exp.src.width(),selected.size()-factor.skips,FoldedCell());
00630     }
00631   }
00632 
00633   int xoffset = factor.xoffset;
00634   int yoffset = factor.yoffset;
00635   int initial_xoffset = xoffset;
00636   int fct = 0;
00637   int fskip = 0;
00638   int cell_length = 0;
00639 
00640   int at = 0;
00641   int z = 1;
00642   bool namer = factor.namer;
00643   for (vector<int>::iterator yit=selected.begin(); yit!=selected.end(); yit++) {
00644     z = 1-z;
00645     int y = *yit;
00646     int y0 = at; //yit-selected.begin();
00647     int dy = 0;
00648 
00649     string prefix = factor.prefix;
00650     if (prefix!=""&&factor.depth>0&&!practice) {
00651       if (sel.actualMaxCt>1) {
00652         prefix += "[";
00653         prefix += stringer_encoder(fct);
00654         //prefix += ":";
00655         //prefix += stringer_encoder(sel.actualMaxCt);
00656         prefix += "]";
00657       }
00658     }
00659 
00660     fct++;
00661     if (fskip<factor.skips) {
00662       fskip++;
00663       continue;
00664     }
00665 
00666     bool out_of_space = (fct>factor.ct && factor.ct!=-1);
00667     if (out_of_space&&!factor.wrap) {
00668       printf("TOO LONG! %d vs %d\n", fct, factor.ct);
00669       if (!practice) {
00670         if (xoffset>=sheet.width()) {
00671           sheet.nonDestructiveResize(sheet.width()+1,sheet.height(),
00672                                      FoldedCell());
00673         }
00674         if (namer) assertColumn(schema,xoffset,prefix + ".excess","excess",
00675                                 factor);
00676         FoldedCell& cell = sheet.cell(xoffset,yoffset);
00677         FoldedSheet *sheet = cell.getOrCreateSheet();
00678         COOPY_ASSERT(sheet);
00679         FoldFactor next_factor;
00680         next_factor.skips = fct-1;
00681         next_factor.namer = namer;
00682         next_factor.selector = &sel;
00683         next_factor.history = factor.history;
00684         fold_expander(next_factor, folds, cache, *sheet, sel);
00685       }
00686       printf("done\n");
00687       xoffset++;
00688       break;
00689     }
00690 
00691     if (out_of_space&&factor.wrap) {
00692       namer = false;
00693       yoffset++;
00694       xoffset = initial_xoffset;
00695       fct -= factor.ct;
00696       if (ywrap!=NULL) {
00697         (*ywrap)++;
00698       }
00699       /*
00700       printf("at %d %d / %d\n", sheet.width(), sheet.height(), yoffset);
00701       FoldedCell& cell = sheet.cell(0,yoffset);
00702       cell.datum = SheetCell("...",false);
00703       */
00704     }
00705 
00706     if (factor.depth==0) {
00707       xoffset = 0;
00708       yoffset = y0-fskip;
00709     }
00710 
00711     // add regular columns
00712     if (!practice) {
00713       if (yoffset>=sheet.height()) {
00714         sheet.nonDestructiveResize(sheet.width(),yoffset+1,
00715                                    FoldedCell());
00716       }
00717 
00718       for (int x=0; x<exp.src.width(); x++) {
00719         if (xoffset>=sheet.width()) {
00720           sheet.nonDestructiveResize(sheet.width()+1,sheet.height(),
00721                                      FoldedCell());
00722         }
00723         string oname = exp.base.getName(x);
00724         string name = oname;
00725         if (prefix!="") {
00726           name = prefix + "." + name;
00727         }
00728         if (namer) assertColumn(schema,xoffset,name,oname,factor);
00729         
00730         FoldedCell& cell = sheet.cell(xoffset,yoffset);
00731         cell.datum = exp.src.cellSummary(x,y);
00732         /*
00733         if (schema) {
00734           printf("[%d:%d:%d] add %d %d -> %d %d [%s]\n", xoffset, 
00735                  schema->getColumnCount(),
00736                  sheet.width(),
00737                  x, y, xoffset,yoffset,
00738                  cell.datum.toString().c_str());
00739         }
00740         */
00741         xoffset++;
00742       }
00743     } else {
00744       xoffset += exp.src.width();
00745     }
00746 
00747     // add expansions
00748     for (int x=0; x<exp.expanded.size(); x++) {
00749       FoldSelector& f = exp.expanded[x];
00750       f.val = exp.src.cellSummary(f.idLocal,y);
00751       string oname = f.title;
00752       string name = oname;
00753       if (prefix!="") {
00754         name = prefix + "." + name;
00755       }
00756       if (f.minCt==-1 && f.maxCt==-1) {
00757         if (!practice) {
00758           if (xoffset>=sheet.width()) {
00759             sheet.nonDestructiveResize(sheet.width()+1,sheet.height(),
00760                                        FoldedCell());
00761           }
00762           if (namer) assertColumn(schema,xoffset,name,oname,factor); // PFHIT
00763           FoldedCell& cell = sheet.cell(xoffset,yoffset);
00764           FoldedSheet *sheet = cell.getOrCreateSheet();
00765           COOPY_ASSERT(sheet);
00766           fold_expander(FoldFactor(),folds, cache, *sheet, f);
00767         }
00768         xoffset++;
00769       } else {
00770         //if (!practice) printf("Go go\n");
00771         FoldFactor next_factor;
00772         next_factor.ct = f.maxCt;
00773         next_factor.excess = f.minCt==1;
00774         next_factor.wrap = f.minCt==2;
00775         next_factor.xoffset = xoffset;
00776         next_factor.yoffset = yoffset;
00777         next_factor.depth = factor.depth+1;
00778         next_factor.practice = practice;
00779         next_factor.prefix = name;
00780         next_factor.namer = namer;
00781         next_factor.selector = &f;
00782         next_factor.history = factor.history;
00783         int next_ywrap = 0;
00784         int o = fold_expander(next_factor, folds, cache, sheet, f, schema,
00785                               &next_ywrap);
00786         if (next_ywrap>dy) {
00787           dy = next_ywrap;
00788         }
00789         xoffset += o;
00790       }
00791     }
00792     int ncell_length = xoffset-initial_xoffset;
00793     if (ncell_length>cell_length) {
00794       cell_length = ncell_length;
00795     }
00796     if (factor.zebra) {
00797       factor.zebra->nonDestructiveResize(1,sheet.height(),0);
00798       for (int i=at; i<at+dy+1; i++) {
00799         //printf("set %d %d (%d)\n", i, z, sheet.height());
00800         factor.zebra->cell(0,i) = z;
00801       }
00802     }
00803     if (factor.depth==0) {
00804       at += dy;
00805     }
00806     at++;
00807   }
00808 
00809   int ncell_length = xoffset-initial_xoffset;
00810   if (ncell_length>cell_length) {
00811     cell_length = ncell_length;
00812   }
00813 
00814   if (practice) {
00815     int len = (int)selected.size();
00816     if (len<sel.actualMinCt||sel.actualMinCt==-1) sel.actualMinCt = len;
00817     if (len>sel.actualMaxCt) sel.actualMaxCt = len;
00818     if (cell_length>sel.targetLength) sel.targetLength = cell_length;
00819   } 
00820   
00821   if (factor.depth>0) {
00822     while (cell_length<sel.targetLength) {
00823       if (xoffset>=sheet.width()) {
00824         sheet.nonDestructiveResize(sheet.width()+1,sheet.height(),
00825                                    FoldedCell());
00826       }
00827       xoffset++;
00828       cell_length++;
00829     }
00830   }
00831 
00832   return cell_length;
00833 }
00834 
00835 
00836 
00837 static void replace(string& str, const string& old, const string& rep) {
00838   size_t pos = 0;
00839   while((pos = str.find(old, pos)) != std::string::npos) {
00840     str.replace(pos, old.length(), rep);
00841     pos += rep.length();
00842   }
00843 }
00844 
00845 
00846 bool FoldTool::fold(PolyBook& src, PolyBook& rdest, FoldOptions& options) {
00847   PolyBook dest;
00848 
00849   dbg_printf("Starting fold/unfold\n");
00850 
00851   if (options.tableName=="" && src.getSheetCount()>1) {
00852     fprintf(stderr,"Please supply a root table name\n");
00853     return false;
00854   }
00855 
00856   FoldCache cache;
00857   cache.setBook(src);
00858 
00859   PolySheet recipe = options.recipe.readSheet("Folds");
00860   IntSheet zebra;
00861   map<string,ColumnHistory> history;
00862   if (recipe.isValid()) {
00863     dbg_printf("Found folds\n");
00864 
00865     SheetAccess& base = cache.getSheet(options.tableName.c_str());
00866     if (!base.isValid()) {
00867       return false;
00868     }
00869     
00870     vector<FoldLayout> layout;
00871     FoldLayout baseLayout;
00872     baseLayout.src = &base;
00873     baseLayout.updateBase();
00874     layout.push_back(baseLayout);
00875     
00876     //options.recipe.write("/tmp/test.sqlite");
00877     //fprintf(stderr, "failed to read recipe (no Folds table)\n");
00878     //    exit(1);
00879 
00880     recipe.hideHeaders();
00881     
00882     Folds folds;
00883     for (int i=0; i<recipe.height(); i++) {
00884       string fromTable = recipe.cellString(0,i);
00885       string fromField = recipe.cellString(1,i);
00886       string toTable = recipe.cellString(2,i);
00887       string toField = recipe.cellString(3,i);
00888       string allowedTable = recipe.cellString(4,i);
00889       SheetCell minCtStr;
00890       SheetCell maxCtStr;
00891       int minCt = -1;
00892       int maxCt = -1;
00893       string label;
00894       if (recipe.width()>6) {
00895         minCtStr = recipe.cellSummary(6,i);
00896         if (!minCtStr.escaped)  minCt = atoi(minCtStr.text.c_str());
00897         maxCtStr = recipe.cellSummary(5,i);
00898         if (!maxCtStr.escaped)  maxCt = atoi(maxCtStr.text.c_str());
00899       }
00900       if (recipe.width()>7) {
00901         label = recipe.cellSummary(7,i).text.c_str();
00902       }
00903       dbg_printf("Recipe line %d: %s %s %s %s (%s) %d:%d '%s'\n", i,
00904                  fromTable.c_str(), toTable.c_str(), fromField.c_str(), toField.c_str(),allowedTable.c_str(),minCt,maxCt,label.c_str());
00905       folds.add(fromTable.c_str(), toTable.c_str(), fromField.c_str(), toField.c_str(),allowedTable.c_str(), minCt, maxCt,label.c_str());
00906       if (fromTable==options.tableName) {
00907         /*
00908           SheetAccess& alt = cache.getSheet(toTable.c_str());
00909           if (!alt.isValid()) {
00910           return false;
00911           }
00912           layout.push_back(FoldLayout());
00913           FoldLayout& l = layout.back();
00914           l.src = &base;
00915           l.dest = &alt;
00916           l.srcIdName = fromField;
00917           l.destIdName = toField;
00918           l.updateBase();
00919           l.updateMap();
00920         */
00921       }
00922     }
00923 
00924     FoldedSheet *fsheet = new FoldedSheet;
00925     PolySheet psheet(fsheet,true);
00926     COOPY_ASSERT(fsheet);
00927     FoldSelector sel;
00928     sel.tableName = options.tableName;
00929 
00930     SimpleSheetSchema *schema = new SimpleSheetSchema;
00931     COOPY_ASSERT(schema);
00932     schema->setSheetName(coopy_get_default_table_name());
00933     FoldFactor factor;
00934     factor.practice = true;
00935     factor.selector = &sel;
00936     int prev_width = -1;
00937     int width = 0;
00938     while (prev_width!=width) {
00939       prev_width = width;
00940       width = fold_expander(factor, folds, cache, *fsheet, sel, schema);
00941       printf("On practice run, width is %d\n", width);
00942     }
00943 
00944     factor.practice = false;
00945     factor.zebra = &zebra;
00946     factor.history = &history;
00947     fold_expander(factor, folds, cache, *fsheet, sel, schema);
00948     printf("After actual run, data width is %d\n", fsheet->width());
00949     printf("After actual run, schema width is %d\n", schema->getColumnCount());
00950 
00951     psheet.setSchema(schema,true);
00952     //printf("Generated sheet %dx%d\n", fsheet->width(), fsheet->height());
00953 
00954     FakeBook *book = new FakeBook();
00955     if (book==NULL) {
00956       fprintf(stderr,"Failed to allocate output\n");
00957       return 1;
00958     }
00959     book->sheet = psheet;
00960     dest.take(book); 
00961     src = dest;
00962   } else {
00963     dest = src;
00964   }
00965 
00966   vector<string> drop_inventory;
00967   vector<string> orig_inventory;
00968   map<string,string> fate_inventory;
00969   map<string,bool> doom_inventory;
00970   PolySheet missing = options.recipe.readSheet("Missing");
00971   //printf("recipe? -- %s\n", options.recipe.toString().c_str());
00972   if (missing.isValid()) {
00973     dbg_printf("Processing list of \"Missing\" columns\n");
00974     for (int y=0; y<missing.height(); y++) {
00975       string name = missing.cellString(0,y);
00976       options.drops.insert(name);
00977     }
00978   }
00979 
00980   if (options.drops.size()>0) {
00981     dbg_printf("Working on drops...\n");
00982     vector<string> names = src.getNames();
00983     for (int i=0; i<src.getSheetCount(); i++) {
00984       PolySheet sheet = src.readSheet(names[i]);
00985       SchemaSniffer ss(sheet);
00986       SheetSchema *schema = ss.suggestSchema();
00987       if (!schema) {
00988         dbg_printf("No schema for %s\n", names[i].c_str());
00989         continue;
00990       }
00991       SimpleSheetSchema s;
00992       s.copy(*schema);
00993       int at = 0;
00994       for (int c=0; c<s.getColumnCount(); c++) {
00995         string iname = s.getColumnInfo(c).getName();
00996         orig_inventory.push_back(iname);
00997         string name;
00998         bool quoted = false;
00999         for (int i=0; i<(int)iname.length(); i++) {
01000           if (iname[i]=='[') quoted = true;
01001           if (!quoted) {
01002             name += iname[i];
01003           }
01004           if (iname[i]==']') quoted = false;
01005         }
01006         if (options.drops.find(name)!=options.drops.end()) {
01007           dbg_printf(" + Dropping column %s\n", iname.c_str());
01008           sheet.deleteColumn(ColumnRef(at));
01009           drop_inventory.push_back(iname);
01010           doom_inventory[iname] = true;
01011         } else {
01012           at++;
01013         }
01014       }
01015     }
01016   }
01017 
01018   PolySheet rename = options.recipe.readSheet("Rename");
01019   map<string,string> rename_map;
01020   if (rename.isValid()) {
01021     dbg_printf("Processing list of \"Rename\" columns\n");
01022     for (int y=0; y<rename.height(); y++) {
01023       string from = rename.cellString(0,y);
01024       string to = rename.cellString(1,y);
01025       rename_map[from] = to;
01026     }
01027   }
01028 
01029   if (rename_map.size()>0) {
01030     vector<string> names = src.getNames();
01031     for (int i=0; i<src.getSheetCount(); i++) {
01032       PolySheet sheet = src.readSheet(names[i]);
01033       SchemaSniffer ss(sheet);
01034       SheetSchema *schema = ss.suggestSchema();
01035       if (!schema) {
01036         dbg_printf("No schema for %s\n", names[i].c_str());
01037         continue;
01038       }
01039       SheetSchema& s = *schema;
01040       int at = 0;
01041       bool mod = false;
01042       for (int c=0; c<s.getColumnCount(); c++) {
01043         string iname = s.getColumnInfo(c).getName();
01044         string prev = iname;
01045 
01046         for (map<string,string>::iterator it=rename_map.begin();
01047              it != rename_map.end(); it++) {
01048           replace(iname,it->first,it->second);
01049         }
01050         if (iname!=prev) {
01051           printf(">>> %s -> %s\n", prev.c_str(), iname.c_str());
01052           s.modifyColumn(ColumnRef(c),ColumnInfo(iname));
01053           fate_inventory[prev] = iname;
01054           mod = true;
01055         }
01056       }
01057     }
01058     for (int i=0; i<src.getSheetCount(); i++) {
01059       //map
01060     }
01061   }
01062 
01063   // add zebra
01064   printf("Copying...\n");
01065   rdest.copy(dest,Property());
01066   printf("Sheets... %d\n",rdest.getNames().size());
01067   PolySheet sheet = rdest.readSheetByIndex(0);
01068   COOPY_ASSERT(sheet.isValid());
01069   sheet.hideHeaders();
01070   int LIGHT = 0xbb*0x100;
01071   int DARK = 0x99*0x100;
01072   printf("SHEET IS %s\n", sheet.toString().c_str());
01073   for (int i=0; i<sheet.height(); i++) {
01074     //printf("i %d zebra %d\n", i, zebra.cell(0,i));
01075     if (i>0) {
01076       if (zebra.cell(0,i)==zebra.cell(0,i-1)) {
01077         sheet.cellString(0,i,"...");
01078       }
01079     }
01080     Poly<Appearance> app = sheet.getRowAppearance(i);
01081     int r = zebra.cell(0,i)?LIGHT:DARK;
01082     int g = r;
01083     int b = r;
01084     if (app.isValid()) {
01085       app->begin();
01086       app->setBackgroundRgb16(r,g,b,
01087                               AppearanceRange::full());
01088       app->end();
01089     }
01090   }
01091   
01092   SimpleSheetSchema adder_schema;
01093   adder_schema.setSheetName("mapping");
01094   adder_schema.addColumn("NAME");
01095   adder_schema.addColumn("FATE");
01096   adder_schema.addColumn("ALIAS");
01097   adder_schema.addColumn("ORIG_TABLE");
01098   adder_schema.addColumn("ORIG_COLUMN");
01099   PolySheet adder = rdest.provideSheet(adder_schema);
01100   adder.setSchema(&adder_schema,false);
01101   adder.resize(5,orig_inventory.size());
01102   adder.createHeaders();
01103   //adder.hideHeaders();
01104   printf("Size %d %d / %d\n", adder.width(), adder.height(),
01105          orig_inventory.size());
01106   for (int i=0; i<(int)orig_inventory.size(); i++) {
01107     string n = orig_inventory[i];
01108     adder.cellString(0,i,n);
01109     if (fate_inventory.find(n)!=fate_inventory.end()) {
01110       adder.cellString(2,i,fate_inventory[n]);
01111       adder.cellString(1,i,"rename");
01112     }
01113     if (doom_inventory.find(n)!=doom_inventory.end()) {
01114       adder.cellString(1,i,"drop");
01115     }
01116     map<string,ColumnHistory>::iterator it = history.find(n);
01117     if (it!=history.end()) {
01118       adder.cellString(3,i,it->second.from_table.c_str());
01119       adder.cellString(4,i,it->second.from_name.c_str());
01120     }
01121   }
01122 
01123   return true;
01124 }
01125 
01126 
01127 
01128 bool FoldTool::unfold(coopy::store::PolyBook& src,
01129                       coopy::store::PolyBook& dest,
01130                       FoldOptions& options) {
01131   PolySheet mapping = src.readSheet("mapping");
01132   if (!mapping.isValid()) {
01133     mapping = options.recipe.readSheet("mapping");
01134   }
01135   if (!mapping.isValid()) {
01136     fprintf(stderr,"Need a sheet/table called 'mapping'\n");
01137     return false;
01138   }
01139 
01140   vector<string> names = src.getNames();
01141   PolySheet sheet;
01142   string sheet_name;
01143   for (int i=0; i<(int)names.size(); i++) {
01144     if (names[i]!="mapping") {
01145       sheet_name = names[i];
01146       sheet = src.readSheetByIndex(i);
01147       break;
01148     }
01149   }
01150   if (!sheet.isValid()) {
01151     fprintf(stderr,"No sheet to operate on\n");
01152     return false;
01153   }
01154 
01155   Property p;
01156   p.put("sheet",sheet_name.c_str());
01157   printf("Getting ready to copy...\n");
01158   dest.copy(src,p);
01159   printf("Copied.\n");
01160 
01161   PolySheet out = dest.readSheet(sheet_name.c_str());
01162   if (!out.isValid()) {
01163     fprintf(stderr,"Cannot open output\n");
01164     return false;
01165   }
01166 
01167   int at = 0;
01168   for (int i=0; i<mapping.height(); i++) {
01169     string name = mapping.cellString(0,i);
01170     string fate = mapping.cellString(1,i);
01171     string prev = mapping.cellString(2,i);
01172     printf("Operating on %s / %s / %s\n", name.c_str(),
01173            fate.c_str(), prev.c_str());
01174     if (fate=="drop") {
01175       out.insertColumn(ColumnRef(at),ColumnInfo(name));
01176       //schema->insertColumn(ColumnRef(at),name.c_str());
01177       at++;
01178     } else if (fate=="rename") {
01179       out.modifyColumn(ColumnRef(at),ColumnInfo(name));
01180       at++;
01181     } else {
01182       at++;
01183     }
01184   }
01185 
01186   return true;
01187 }
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines