COOPY » Guide
version 0.6.5
|
00001 #include <coopy/FoldTool.h> 00002 #include <coopy/FoldedSheet.h> 00003 #include <coopy/CsvSheet.h> 00004 #include <coopy/SchemaSniffer.h> 00005 #include <coopy/ShortTextBook.h> 00006 #include <coopy/CsvFile.h> 00007 #include <coopy/IntSheet.h> 00008 00009 #include <vector> 00010 #include <map> 00011 #include <set> 00012 00013 #include <coopy/Stringer.h> 00014 00015 using namespace std; 00016 using namespace coopy::fold; 00017 using namespace coopy::store; 00018 00019 typedef vector<int> ints; 00020 00021 class SheetCellCmp { 00022 public: 00023 bool operator() (const SheetCell& e1, const SheetCell& e2) const { 00024 if (e1.text<e2.text) return true; 00025 if (e1.text>e2.text) return false; 00026 return (e1.escaped?1:0)<(e2.escaped?1:0); 00027 } 00028 }; 00029 00030 class SheetAccess { 00031 public: 00032 PolySheet sheet; 00033 SchemaSniffer schema; 00034 00035 typedef multimap<SheetCell,int,SheetCellCmp> Mapper; 00036 typedef map<int,Mapper> Mappers; 00037 Mappers mappers; 00038 00039 SheetAccess() { 00040 } 00041 00042 void setSheet(const PolySheet& sheet) { 00043 this->sheet = sheet; 00044 schema.setSheet(sheet); 00045 } 00046 00047 bool isValid() const { 00048 return sheet.isValid(); 00049 } 00050 00051 std::string getName() { 00052 SheetSchema *ss = schema.suggestSchema(); 00053 if (!ss) { 00054 printf("No schema!\n"); 00055 return ""; 00056 } 00057 return ss->getSheetName(); 00058 } 00059 00060 std::string getName(int id) { 00061 SheetSchema *ss = schema.suggestSchema(); 00062 if (!ss) { 00063 printf("No schema!\n"); 00064 return ""; 00065 } 00066 ColumnInfo info = ss->getColumnInfo(id); 00067 return info.getName(); 00068 } 00069 00070 int getId(const char *name) { 00071 SheetSchema *ss = schema.suggestSchema(); 00072 if (!ss) { 00073 printf("No schema!\n"); 00074 return -1; 00075 } 00076 int result = ss->getColumnIndexByName(name); 00077 if (result<0) { 00078 printf("Could not find column %s\n", name); 00079 for (int i=0; i<ss->getColumnCount(); i++) { 00080 ColumnInfo info = ss->getColumnInfo(i); 00081 printf(" col %d %s\n", i, info.getName().c_str()); 00082 } 00083 exit(1); 00084 } 00085 return result; 00086 } 00087 00088 ints selectFrom(int id, const SheetCell& cell) { 00089 if (id==-1) { 00090 ints result; 00091 for (int i=0; i<sheet.height(); i++) { 00092 result.push_back(i); 00093 } 00094 //printf("selectFrom generated %d results\n", result.size()); 00095 return result; 00096 } 00097 00098 if (mappers.find(id)==mappers.end()) { 00099 mappers[id] = Mapper(); 00100 Mapper& m = mappers[id]; 00101 dbg_printf("Generating index for %s:%d\n", getName().c_str(),id); 00102 for (int i=0; i<sheet.height(); i++) { 00103 //if (sheet.cellSummary(id,i)==cell) { 00104 m.insert(std::make_pair(sheet.cellSummary(id,i),i)); 00105 //} 00106 } 00107 } 00108 Mapper& m = mappers[id]; 00109 ints result; 00110 for (Mapper::iterator it = m.lower_bound(cell); 00111 it != m.upper_bound(cell); it++) { 00112 result.push_back(it->second); 00113 } 00114 //printf("selectFrom generated %d results\n", result.size()); 00115 return result; 00116 } 00117 }; 00118 00119 00120 00121 class FoldLayout { 00122 public: 00123 SheetAccess *src; 00124 SheetAccess *dest; 00125 string srcIdName; 00126 string destIdName; 00127 int offset; 00128 00129 PolySheet srcTable; 00130 PolySheet destTable; 00131 int srcId; 00132 int destId; 00133 typedef multimap<int,int> mapper; 00134 mapper indexedSrc; 00135 mapper indexedDest; 00136 00137 FoldLayout() { 00138 src = dest = NULL; 00139 srcId = -1; 00140 destId = -1; 00141 offset = 0; 00142 } 00143 00144 bool updateBase() { 00145 if (src!=NULL) { 00146 srcTable = src->sheet; 00147 } 00148 if (dest!=NULL) { 00149 destTable = dest->sheet; 00150 } 00151 return true; 00152 } 00153 00154 bool updateIds() { 00155 if (srcId!=-1 && destId!=-1) return true; 00156 COOPY_ASSERT(src); 00157 COOPY_ASSERT(dest); 00158 srcId = src->getId(srcIdName.c_str()); 00159 destId = dest->getId(destIdName.c_str()); 00160 printf("IDs %s %s %d %d\n", srcIdName.c_str(), destIdName.c_str(), 00161 srcId, destId); 00162 return (srcId!=-1 && destId!=-1); 00163 } 00164 00165 bool updateMap() { 00166 if (indexedSrc.size()!=0) return true; 00167 if (indexedDest.size()!=0) return true; 00168 if (!updateIds()) { 00169 printf("Failed to get IDs\n"); 00170 return false; 00171 } 00172 COOPY_ASSERT(src); 00173 COOPY_ASSERT(dest); 00174 for (int y=0; y<srcTable.height(); y++) { 00175 int id = srcTable.cellSummary(srcId,y).asInt(); 00176 indexedSrc.insert(std::make_pair(y,id)); 00177 //printf("src %d -> %d\n", y, id); 00178 } 00179 for (int y=0; y<destTable.height(); y++) { 00180 int id = destTable.cellSummary(destId,y).asInt(); 00181 indexedDest.insert(std::make_pair(id,y)); 00182 //printf("dest %d -> %d\n", id, y); 00183 } 00184 return true; 00185 } 00186 00187 int transformForward(int y) { 00188 mapper::iterator it1 = indexedSrc.find(y); 00189 if (it1==indexedSrc.end()) { 00190 printf("src %d went nowhere\n", y); 00191 return -1; 00192 } 00193 mapper::iterator it2 = indexedDest.find(it1->second); 00194 if (it2==indexedDest.end()) { 00195 printf("dest %d from src %d went nowhere\n", it1->second, y); 00196 return -1; 00197 } 00198 return it2->second; 00199 } 00200 00201 }; 00202 00203 00204 class FoldEdge { 00205 public: 00206 string table; 00207 string key; 00208 00209 FoldEdge() {} 00210 00211 FoldEdge(string ntable, string nkey) { 00212 table = ntable; 00213 key = nkey; 00214 } 00215 00216 string toString() const { 00217 return table + ":" + key; 00218 } 00219 }; 00220 00221 class FoldEdgeCmp { 00222 public: 00223 bool operator() (const FoldEdge& e1, const FoldEdge& e2) const { 00224 if (e1.table<e2.table) return true; 00225 if (e1.table>e2.table) return false; 00226 return e1.key<e2.key; 00227 } 00228 }; 00229 00230 00231 class FoldSelector { 00232 public: 00233 // table to select from 00234 string tableName; 00235 // key name in that table 00236 string keyName; 00237 // id of key name in that table 00238 int id; 00239 00240 // value to match 00241 SheetCell val; 00242 // id in local table 00243 int idLocal; 00244 00245 int minCt; 00246 int maxCt; 00247 00248 int actualMinCt; 00249 int actualMaxCt; 00250 int targetLength; 00251 00252 string title; 00253 00254 FoldSelector() { 00255 id = -1; 00256 idLocal = -1; 00257 minCt = maxCt = -1; 00258 actualMinCt = -1; 00259 actualMaxCt = 0; 00260 targetLength = -1; 00261 } 00262 00263 string toString() const { 00264 return tableName + ":" + keyName + "=" + val.toString(); 00265 } 00266 }; 00267 00268 00269 class Folds; 00270 00271 struct Expansion { 00272 public: 00273 vector<FoldSelector>& expanded; 00274 SheetAccess& base; 00275 PolySheet& src; 00276 int selId; 00277 }; 00278 00279 class FoldCache { 00280 public: 00281 SheetAccess dud; 00282 PolySheet dud_sheet; 00283 PolyBook book; 00284 00285 map<FoldEdge,vector<FoldSelector>, FoldEdgeCmp > expanded_map; 00286 00287 void setBook(const PolyBook& book) { 00288 this->book = book; 00289 } 00290 00291 map<string,SheetAccess> sheets; 00292 00293 SheetAccess& getSheet(const char *name) { 00294 map<string,SheetAccess>::iterator it = sheets.find(name); 00295 if (it!=sheets.end()) { 00296 return it->second; 00297 } 00298 PolySheet sheet = book.readSheet(name); 00299 if (!sheet.isValid()) { 00300 fprintf(stderr,"Could not find table \"%s\"\n", name); 00301 return dud; 00302 } 00303 sheets[name] = SheetAccess(); 00304 sheets[name].setSheet(sheet); 00305 return sheets[name]; 00306 } 00307 00308 //vector<FoldSelector>& 00309 Expansion getExpansion(const FoldEdge& edge, Folds *folds); 00310 }; 00311 00312 FoldTool::FoldTool() { 00313 } 00314 00315 class FoldEdgePair { 00316 public: 00317 FoldEdge to, from; 00318 string allowed; 00319 int minCt, maxCt; 00320 string label; 00321 00322 FoldEdgePair() { 00323 minCt = maxCt = -1; 00324 } 00325 00326 string toString() const { 00327 string result = to.toString() + "->" + from.toString() + "(" + allowed + ")"; 00328 result += ","; 00329 result += stringer_encoder(minCt); 00330 result += ","; 00331 result += stringer_encoder(maxCt); 00332 result += ","; 00333 result += label; 00334 return result; 00335 } 00336 00337 string desc(const string& ref) const { 00338 if (label!="-") return label; 00339 00340 string result = ""; 00341 if (from.table!=ref) { 00342 result += from.table + ":"; 00343 } 00344 result += from.key; 00345 result += "->"; 00346 if (to.table!=ref) { 00347 result += to.table + ":"; 00348 } 00349 result += to.key; 00350 return result; 00351 } 00352 }; 00353 00354 00355 class FoldEdgePairCmp { 00356 public: 00357 bool operator() (const FoldEdgePair& e1, const FoldEdgePair& e2) { 00358 FoldEdgeCmp cmp; 00359 if (cmp(e1.from,e2.from)) return true; 00360 if (cmp(e2.from,e1.from)) return false; 00361 if (cmp(e1.to,e2.to)) return true; 00362 if (cmp(e2.to,e1.to)) return false; 00363 return e1.allowed<e2.allowed; 00364 } 00365 }; 00366 00367 class Folds { 00368 public: 00369 typedef multimap<FoldEdge,FoldEdge,FoldEdgeCmp> EdgeMap; 00370 typedef set<FoldEdgePair,FoldEdgePairCmp> AllowedPair; 00371 00372 EdgeMap fwd; 00373 EdgeMap rev; 00374 AllowedPair allowed; 00375 00376 void add(const char *fromTable, const char *toTable, 00377 const char *fromField, const char *toField, 00378 const char *allowedTable, 00379 int minCt, int maxCt, 00380 const char *label) { 00381 FoldEdge e1, e2; 00382 e1.table = fromTable; 00383 e1.key = fromField; 00384 e2.table = toTable; 00385 e2.key = toField; 00386 fwd.insert(make_pair(e1,e2)); 00387 rev.insert(make_pair(e2,e1)); 00388 FoldEdgePair p1; 00389 p1.from = e1; 00390 p1.to = e2; 00391 p1.allowed = allowedTable; 00392 p1.minCt = minCt; 00393 p1.maxCt = maxCt; 00394 p1.label = label; 00395 allowed.insert(p1); 00396 FoldEdgePair p2; 00397 p2.from = e2; 00398 p2.to = e1; 00399 p2.allowed = allowedTable; 00400 p2.minCt = minCt; 00401 p2.maxCt = maxCt; 00402 p2.label = label; 00403 allowed.insert(p2); 00404 //printf("Added %s -> %s\n", p1.toString().c_str(), p2.toString().c_str()); 00405 //printf("Allowed %s\n", p1.toString().c_str()); 00406 //printf("Allowed %s\n", p2.toString().c_str()); 00407 } 00408 }; 00409 00410 class FakeBook : public TextBook { 00411 public: 00412 std::string name; 00413 PolySheet sheet; 00414 00415 FakeBook() : name(coopy_get_default_table_name()) { 00416 } 00417 00418 virtual std::vector<std::string> getNames() { 00419 std::vector<std::string> result; 00420 result.push_back(name); 00421 return result; 00422 } 00423 00424 virtual PolySheet readSheet(const std::string& name) { 00425 if (name==this->name) { 00426 return sheet; 00427 } 00428 return PolySheet(); 00429 } 00430 00431 virtual bool open(const Property& config) { 00432 return false; 00433 } 00434 00435 virtual PolySheet provideSheet(const SheetSchema& schema) { 00436 return PolySheet(); 00437 } 00438 00439 virtual std::string desc() const { 00440 return "FakeBook"; 00441 } 00442 00443 virtual bool namedSheets() const { 00444 return false; 00445 } 00446 }; 00447 00448 00449 00450 Expansion FoldCache::getExpansion(const FoldEdge& edge, 00451 Folds *pfolds) { 00452 Folds& folds = *pfolds; 00453 FoldCache& cache = *this; 00454 00455 bool exists = false; 00456 if (expanded_map.find(edge)==expanded_map.end()) { 00457 expanded_map[edge] = vector<FoldSelector>(); 00458 } else { 00459 exists = true; 00460 } 00461 vector<FoldSelector>& expanded = expanded_map[edge]; 00462 00463 SheetAccess& base = cache.getSheet(edge.table.c_str()); 00464 PolySheet& src = base.isValid()?base.sheet:dud_sheet; 00465 int selId = -1; 00466 if (edge.key!="") { 00467 selId = base.getId(edge.key.c_str()); 00468 } 00469 Expansion result = {expanded,base,src,selId}; 00470 if (exists) { 00471 return result; 00472 } 00473 00474 for (int x=0; x<src.width(); x++) { 00475 FoldEdge e; 00476 e.table = edge.table; 00477 e.key = base.getName(x); 00478 if (selId==-1) { 00479 dbg_printf("Checking %s\n", e.toString().c_str()); 00480 } 00481 Folds::EdgeMap::iterator it = folds.fwd.lower_bound(e); 00482 Folds::EdgeMap::iterator it_end = folds.fwd.upper_bound(e); 00483 while (it!=it_end) { 00484 FoldEdgePair p; 00485 p.to = it->first; 00486 p.from = it->second; 00487 p.allowed = edge.table; 00488 if (selId==-1) { 00489 dbg_printf("Checking %s\n", p.toString().c_str()); 00490 } 00491 set<FoldEdgePair,FoldEdgePairCmp>::iterator al = folds.allowed.find(p); 00492 if (al!=folds.allowed.end()) { 00493 if (selId==-1) { 00494 dbg_printf("Should expand out %s (to %s)\n", e.key.c_str(), 00495 it->second.toString().c_str()); 00496 } 00497 FoldSelector f; 00498 f.tableName = it->second.table; 00499 f.keyName = it->second.key; 00500 SheetAccess& alt = cache.getSheet(f.tableName.c_str()); 00501 if (!alt.isValid()) { 00502 expanded.clear(); 00503 return result; 00504 } 00505 f.id = alt.getId(f.keyName.c_str()); 00506 f.idLocal = base.getId(it->first.key.c_str()); 00507 f.minCt = al->minCt; 00508 f.maxCt = al->maxCt; 00509 f.title = al->desc(base.getName()); 00510 expanded.push_back(f); 00511 } 00512 it++; 00513 } 00514 Folds::EdgeMap::iterator it2 = folds.rev.lower_bound(e); 00515 Folds::EdgeMap::iterator it2_end = folds.rev.upper_bound(e); 00516 while (it2!=it2_end) { 00517 FoldEdgePair p; 00518 p.to = it2->first; 00519 p.from = it2->second; 00520 p.allowed = edge.table; 00521 if (selId==-1) { 00522 dbg_printf("Checking %s\n", p.toString().c_str()); 00523 } 00524 set<FoldEdgePair,FoldEdgePairCmp>::iterator al = folds.allowed.find(p); 00525 if (al!=folds.allowed.end()) { 00526 if (selId==-1) { 00527 dbg_printf("Should expand in %s (from %s)\n", e.key.c_str(), 00528 it2->second.toString().c_str()); 00529 } 00530 FoldSelector f; 00531 f.tableName = it2->second.table; 00532 f.keyName = it2->second.key; 00533 SheetAccess& alt = cache.getSheet(f.tableName.c_str()); 00534 if (!alt.isValid()) { 00535 expanded.clear(); 00536 return result; 00537 } 00538 f.id = alt.getId(f.keyName.c_str()); 00539 f.idLocal = base.getId(it2->first.key.c_str()); 00540 f.minCt = al->minCt; 00541 f.maxCt = al->maxCt; 00542 f.title = al->desc(base.getName()); 00543 expanded.push_back(f); 00544 } 00545 it2++; 00546 } 00547 } 00548 return result; 00549 } 00550 00551 class ColumnHistory { 00552 public: 00553 string name; 00554 string from_table; 00555 string from_name; 00556 }; 00557 00558 class FoldFactor { 00559 public: 00560 int ct; 00561 bool excess; 00562 bool wrap; 00563 int xoffset; 00564 int yoffset; 00565 int depth; 00566 int skips; 00567 bool practice; 00568 bool namer; 00569 string prefix; 00570 IntSheet *zebra; 00571 map<string,ColumnHistory> *history; 00572 FoldSelector *selector; 00573 00574 FoldFactor() { 00575 ct = -1; 00576 excess = false; 00577 wrap = false; 00578 xoffset = yoffset = 0; 00579 depth = 0; 00580 practice = false; 00581 skips = 0; 00582 zebra = NULL; 00583 history = NULL; 00584 namer = true; 00585 selector = NULL; 00586 } 00587 }; 00588 00589 00590 void assertColumn(SimpleSheetSchema *s, int offset, const string& name, 00591 const string& orig_name, 00592 const FoldFactor& factor) { 00593 if (s==NULL) return; 00594 while (s->getColumnCount()<=offset) { 00595 s->addColumn("..."); 00596 } 00597 ColumnInfo c = s->getColumnInfo(offset); 00598 if (c.getName()!=name) { 00599 s->modifyColumn(ColumnRef(offset),ColumnInfo(name)); 00600 if (factor.history&&factor.selector) { 00601 ColumnHistory hist; 00602 hist.name = name; 00603 hist.from_table = factor.selector->tableName; 00604 hist.from_name = orig_name; 00605 (*factor.history)[hist.name] = hist; 00606 } 00607 } 00608 } 00609 00610 00611 static int fold_expander(const FoldFactor& factor, 00612 Folds& folds, FoldCache& cache, 00613 FoldedSheet& sheet, 00614 FoldSelector& sel, 00615 SimpleSheetSchema *schema = NULL, 00616 int *ywrap = NULL) { 00617 00618 bool practice = factor.practice; 00619 00620 Expansion exp = cache.getExpansion(FoldEdge(sel.tableName,sel.keyName), 00621 &folds); 00622 00623 // get selection 00624 vector<int> selected = exp.base.selectFrom(exp.selId,sel.val); 00625 00626 // Prepare to iterate 00627 if (factor.depth==0) { 00628 if (!practice) { 00629 sheet.resize(exp.src.width(),selected.size()-factor.skips,FoldedCell()); 00630 } 00631 } 00632 00633 int xoffset = factor.xoffset; 00634 int yoffset = factor.yoffset; 00635 int initial_xoffset = xoffset; 00636 int fct = 0; 00637 int fskip = 0; 00638 int cell_length = 0; 00639 00640 int at = 0; 00641 int z = 1; 00642 bool namer = factor.namer; 00643 for (vector<int>::iterator yit=selected.begin(); yit!=selected.end(); yit++) { 00644 z = 1-z; 00645 int y = *yit; 00646 int y0 = at; //yit-selected.begin(); 00647 int dy = 0; 00648 00649 string prefix = factor.prefix; 00650 if (prefix!=""&&factor.depth>0&&!practice) { 00651 if (sel.actualMaxCt>1) { 00652 prefix += "["; 00653 prefix += stringer_encoder(fct); 00654 //prefix += ":"; 00655 //prefix += stringer_encoder(sel.actualMaxCt); 00656 prefix += "]"; 00657 } 00658 } 00659 00660 fct++; 00661 if (fskip<factor.skips) { 00662 fskip++; 00663 continue; 00664 } 00665 00666 bool out_of_space = (fct>factor.ct && factor.ct!=-1); 00667 if (out_of_space&&!factor.wrap) { 00668 printf("TOO LONG! %d vs %d\n", fct, factor.ct); 00669 if (!practice) { 00670 if (xoffset>=sheet.width()) { 00671 sheet.nonDestructiveResize(sheet.width()+1,sheet.height(), 00672 FoldedCell()); 00673 } 00674 if (namer) assertColumn(schema,xoffset,prefix + ".excess","excess", 00675 factor); 00676 FoldedCell& cell = sheet.cell(xoffset,yoffset); 00677 FoldedSheet *sheet = cell.getOrCreateSheet(); 00678 COOPY_ASSERT(sheet); 00679 FoldFactor next_factor; 00680 next_factor.skips = fct-1; 00681 next_factor.namer = namer; 00682 next_factor.selector = &sel; 00683 next_factor.history = factor.history; 00684 fold_expander(next_factor, folds, cache, *sheet, sel); 00685 } 00686 printf("done\n"); 00687 xoffset++; 00688 break; 00689 } 00690 00691 if (out_of_space&&factor.wrap) { 00692 namer = false; 00693 yoffset++; 00694 xoffset = initial_xoffset; 00695 fct -= factor.ct; 00696 if (ywrap!=NULL) { 00697 (*ywrap)++; 00698 } 00699 /* 00700 printf("at %d %d / %d\n", sheet.width(), sheet.height(), yoffset); 00701 FoldedCell& cell = sheet.cell(0,yoffset); 00702 cell.datum = SheetCell("...",false); 00703 */ 00704 } 00705 00706 if (factor.depth==0) { 00707 xoffset = 0; 00708 yoffset = y0-fskip; 00709 } 00710 00711 // add regular columns 00712 if (!practice) { 00713 if (yoffset>=sheet.height()) { 00714 sheet.nonDestructiveResize(sheet.width(),yoffset+1, 00715 FoldedCell()); 00716 } 00717 00718 for (int x=0; x<exp.src.width(); x++) { 00719 if (xoffset>=sheet.width()) { 00720 sheet.nonDestructiveResize(sheet.width()+1,sheet.height(), 00721 FoldedCell()); 00722 } 00723 string oname = exp.base.getName(x); 00724 string name = oname; 00725 if (prefix!="") { 00726 name = prefix + "." + name; 00727 } 00728 if (namer) assertColumn(schema,xoffset,name,oname,factor); 00729 00730 FoldedCell& cell = sheet.cell(xoffset,yoffset); 00731 cell.datum = exp.src.cellSummary(x,y); 00732 /* 00733 if (schema) { 00734 printf("[%d:%d:%d] add %d %d -> %d %d [%s]\n", xoffset, 00735 schema->getColumnCount(), 00736 sheet.width(), 00737 x, y, xoffset,yoffset, 00738 cell.datum.toString().c_str()); 00739 } 00740 */ 00741 xoffset++; 00742 } 00743 } else { 00744 xoffset += exp.src.width(); 00745 } 00746 00747 // add expansions 00748 for (int x=0; x<exp.expanded.size(); x++) { 00749 FoldSelector& f = exp.expanded[x]; 00750 f.val = exp.src.cellSummary(f.idLocal,y); 00751 string oname = f.title; 00752 string name = oname; 00753 if (prefix!="") { 00754 name = prefix + "." + name; 00755 } 00756 if (f.minCt==-1 && f.maxCt==-1) { 00757 if (!practice) { 00758 if (xoffset>=sheet.width()) { 00759 sheet.nonDestructiveResize(sheet.width()+1,sheet.height(), 00760 FoldedCell()); 00761 } 00762 if (namer) assertColumn(schema,xoffset,name,oname,factor); // PFHIT 00763 FoldedCell& cell = sheet.cell(xoffset,yoffset); 00764 FoldedSheet *sheet = cell.getOrCreateSheet(); 00765 COOPY_ASSERT(sheet); 00766 fold_expander(FoldFactor(),folds, cache, *sheet, f); 00767 } 00768 xoffset++; 00769 } else { 00770 //if (!practice) printf("Go go\n"); 00771 FoldFactor next_factor; 00772 next_factor.ct = f.maxCt; 00773 next_factor.excess = f.minCt==1; 00774 next_factor.wrap = f.minCt==2; 00775 next_factor.xoffset = xoffset; 00776 next_factor.yoffset = yoffset; 00777 next_factor.depth = factor.depth+1; 00778 next_factor.practice = practice; 00779 next_factor.prefix = name; 00780 next_factor.namer = namer; 00781 next_factor.selector = &f; 00782 next_factor.history = factor.history; 00783 int next_ywrap = 0; 00784 int o = fold_expander(next_factor, folds, cache, sheet, f, schema, 00785 &next_ywrap); 00786 if (next_ywrap>dy) { 00787 dy = next_ywrap; 00788 } 00789 xoffset += o; 00790 } 00791 } 00792 int ncell_length = xoffset-initial_xoffset; 00793 if (ncell_length>cell_length) { 00794 cell_length = ncell_length; 00795 } 00796 if (factor.zebra) { 00797 factor.zebra->nonDestructiveResize(1,sheet.height(),0); 00798 for (int i=at; i<at+dy+1; i++) { 00799 //printf("set %d %d (%d)\n", i, z, sheet.height()); 00800 factor.zebra->cell(0,i) = z; 00801 } 00802 } 00803 if (factor.depth==0) { 00804 at += dy; 00805 } 00806 at++; 00807 } 00808 00809 int ncell_length = xoffset-initial_xoffset; 00810 if (ncell_length>cell_length) { 00811 cell_length = ncell_length; 00812 } 00813 00814 if (practice) { 00815 int len = (int)selected.size(); 00816 if (len<sel.actualMinCt||sel.actualMinCt==-1) sel.actualMinCt = len; 00817 if (len>sel.actualMaxCt) sel.actualMaxCt = len; 00818 if (cell_length>sel.targetLength) sel.targetLength = cell_length; 00819 } 00820 00821 if (factor.depth>0) { 00822 while (cell_length<sel.targetLength) { 00823 if (xoffset>=sheet.width()) { 00824 sheet.nonDestructiveResize(sheet.width()+1,sheet.height(), 00825 FoldedCell()); 00826 } 00827 xoffset++; 00828 cell_length++; 00829 } 00830 } 00831 00832 return cell_length; 00833 } 00834 00835 00836 00837 static void replace(string& str, const string& old, const string& rep) { 00838 size_t pos = 0; 00839 while((pos = str.find(old, pos)) != std::string::npos) { 00840 str.replace(pos, old.length(), rep); 00841 pos += rep.length(); 00842 } 00843 } 00844 00845 00846 bool FoldTool::fold(PolyBook& src, PolyBook& rdest, FoldOptions& options) { 00847 PolyBook dest; 00848 00849 dbg_printf("Starting fold/unfold\n"); 00850 00851 if (options.tableName=="" && src.getSheetCount()>1) { 00852 fprintf(stderr,"Please supply a root table name\n"); 00853 return false; 00854 } 00855 00856 FoldCache cache; 00857 cache.setBook(src); 00858 00859 PolySheet recipe = options.recipe.readSheet("Folds"); 00860 IntSheet zebra; 00861 map<string,ColumnHistory> history; 00862 if (recipe.isValid()) { 00863 dbg_printf("Found folds\n"); 00864 00865 SheetAccess& base = cache.getSheet(options.tableName.c_str()); 00866 if (!base.isValid()) { 00867 return false; 00868 } 00869 00870 vector<FoldLayout> layout; 00871 FoldLayout baseLayout; 00872 baseLayout.src = &base; 00873 baseLayout.updateBase(); 00874 layout.push_back(baseLayout); 00875 00876 //options.recipe.write("/tmp/test.sqlite"); 00877 //fprintf(stderr, "failed to read recipe (no Folds table)\n"); 00878 // exit(1); 00879 00880 recipe.hideHeaders(); 00881 00882 Folds folds; 00883 for (int i=0; i<recipe.height(); i++) { 00884 string fromTable = recipe.cellString(0,i); 00885 string fromField = recipe.cellString(1,i); 00886 string toTable = recipe.cellString(2,i); 00887 string toField = recipe.cellString(3,i); 00888 string allowedTable = recipe.cellString(4,i); 00889 SheetCell minCtStr; 00890 SheetCell maxCtStr; 00891 int minCt = -1; 00892 int maxCt = -1; 00893 string label; 00894 if (recipe.width()>6) { 00895 minCtStr = recipe.cellSummary(6,i); 00896 if (!minCtStr.escaped) minCt = atoi(minCtStr.text.c_str()); 00897 maxCtStr = recipe.cellSummary(5,i); 00898 if (!maxCtStr.escaped) maxCt = atoi(maxCtStr.text.c_str()); 00899 } 00900 if (recipe.width()>7) { 00901 label = recipe.cellSummary(7,i).text.c_str(); 00902 } 00903 dbg_printf("Recipe line %d: %s %s %s %s (%s) %d:%d '%s'\n", i, 00904 fromTable.c_str(), toTable.c_str(), fromField.c_str(), toField.c_str(),allowedTable.c_str(),minCt,maxCt,label.c_str()); 00905 folds.add(fromTable.c_str(), toTable.c_str(), fromField.c_str(), toField.c_str(),allowedTable.c_str(), minCt, maxCt,label.c_str()); 00906 if (fromTable==options.tableName) { 00907 /* 00908 SheetAccess& alt = cache.getSheet(toTable.c_str()); 00909 if (!alt.isValid()) { 00910 return false; 00911 } 00912 layout.push_back(FoldLayout()); 00913 FoldLayout& l = layout.back(); 00914 l.src = &base; 00915 l.dest = &alt; 00916 l.srcIdName = fromField; 00917 l.destIdName = toField; 00918 l.updateBase(); 00919 l.updateMap(); 00920 */ 00921 } 00922 } 00923 00924 FoldedSheet *fsheet = new FoldedSheet; 00925 PolySheet psheet(fsheet,true); 00926 COOPY_ASSERT(fsheet); 00927 FoldSelector sel; 00928 sel.tableName = options.tableName; 00929 00930 SimpleSheetSchema *schema = new SimpleSheetSchema; 00931 COOPY_ASSERT(schema); 00932 schema->setSheetName(coopy_get_default_table_name()); 00933 FoldFactor factor; 00934 factor.practice = true; 00935 factor.selector = &sel; 00936 int prev_width = -1; 00937 int width = 0; 00938 while (prev_width!=width) { 00939 prev_width = width; 00940 width = fold_expander(factor, folds, cache, *fsheet, sel, schema); 00941 printf("On practice run, width is %d\n", width); 00942 } 00943 00944 factor.practice = false; 00945 factor.zebra = &zebra; 00946 factor.history = &history; 00947 fold_expander(factor, folds, cache, *fsheet, sel, schema); 00948 printf("After actual run, data width is %d\n", fsheet->width()); 00949 printf("After actual run, schema width is %d\n", schema->getColumnCount()); 00950 00951 psheet.setSchema(schema,true); 00952 //printf("Generated sheet %dx%d\n", fsheet->width(), fsheet->height()); 00953 00954 FakeBook *book = new FakeBook(); 00955 if (book==NULL) { 00956 fprintf(stderr,"Failed to allocate output\n"); 00957 return 1; 00958 } 00959 book->sheet = psheet; 00960 dest.take(book); 00961 src = dest; 00962 } else { 00963 dest = src; 00964 } 00965 00966 vector<string> drop_inventory; 00967 vector<string> orig_inventory; 00968 map<string,string> fate_inventory; 00969 map<string,bool> doom_inventory; 00970 PolySheet missing = options.recipe.readSheet("Missing"); 00971 //printf("recipe? -- %s\n", options.recipe.toString().c_str()); 00972 if (missing.isValid()) { 00973 dbg_printf("Processing list of \"Missing\" columns\n"); 00974 for (int y=0; y<missing.height(); y++) { 00975 string name = missing.cellString(0,y); 00976 options.drops.insert(name); 00977 } 00978 } 00979 00980 if (options.drops.size()>0) { 00981 dbg_printf("Working on drops...\n"); 00982 vector<string> names = src.getNames(); 00983 for (int i=0; i<src.getSheetCount(); i++) { 00984 PolySheet sheet = src.readSheet(names[i]); 00985 SchemaSniffer ss(sheet); 00986 SheetSchema *schema = ss.suggestSchema(); 00987 if (!schema) { 00988 dbg_printf("No schema for %s\n", names[i].c_str()); 00989 continue; 00990 } 00991 SimpleSheetSchema s; 00992 s.copy(*schema); 00993 int at = 0; 00994 for (int c=0; c<s.getColumnCount(); c++) { 00995 string iname = s.getColumnInfo(c).getName(); 00996 orig_inventory.push_back(iname); 00997 string name; 00998 bool quoted = false; 00999 for (int i=0; i<(int)iname.length(); i++) { 01000 if (iname[i]=='[') quoted = true; 01001 if (!quoted) { 01002 name += iname[i]; 01003 } 01004 if (iname[i]==']') quoted = false; 01005 } 01006 if (options.drops.find(name)!=options.drops.end()) { 01007 dbg_printf(" + Dropping column %s\n", iname.c_str()); 01008 sheet.deleteColumn(ColumnRef(at)); 01009 drop_inventory.push_back(iname); 01010 doom_inventory[iname] = true; 01011 } else { 01012 at++; 01013 } 01014 } 01015 } 01016 } 01017 01018 PolySheet rename = options.recipe.readSheet("Rename"); 01019 map<string,string> rename_map; 01020 if (rename.isValid()) { 01021 dbg_printf("Processing list of \"Rename\" columns\n"); 01022 for (int y=0; y<rename.height(); y++) { 01023 string from = rename.cellString(0,y); 01024 string to = rename.cellString(1,y); 01025 rename_map[from] = to; 01026 } 01027 } 01028 01029 if (rename_map.size()>0) { 01030 vector<string> names = src.getNames(); 01031 for (int i=0; i<src.getSheetCount(); i++) { 01032 PolySheet sheet = src.readSheet(names[i]); 01033 SchemaSniffer ss(sheet); 01034 SheetSchema *schema = ss.suggestSchema(); 01035 if (!schema) { 01036 dbg_printf("No schema for %s\n", names[i].c_str()); 01037 continue; 01038 } 01039 SheetSchema& s = *schema; 01040 int at = 0; 01041 bool mod = false; 01042 for (int c=0; c<s.getColumnCount(); c++) { 01043 string iname = s.getColumnInfo(c).getName(); 01044 string prev = iname; 01045 01046 for (map<string,string>::iterator it=rename_map.begin(); 01047 it != rename_map.end(); it++) { 01048 replace(iname,it->first,it->second); 01049 } 01050 if (iname!=prev) { 01051 printf(">>> %s -> %s\n", prev.c_str(), iname.c_str()); 01052 s.modifyColumn(ColumnRef(c),ColumnInfo(iname)); 01053 fate_inventory[prev] = iname; 01054 mod = true; 01055 } 01056 } 01057 } 01058 for (int i=0; i<src.getSheetCount(); i++) { 01059 //map 01060 } 01061 } 01062 01063 // add zebra 01064 printf("Copying...\n"); 01065 rdest.copy(dest,Property()); 01066 printf("Sheets... %d\n",rdest.getNames().size()); 01067 PolySheet sheet = rdest.readSheetByIndex(0); 01068 COOPY_ASSERT(sheet.isValid()); 01069 sheet.hideHeaders(); 01070 int LIGHT = 0xbb*0x100; 01071 int DARK = 0x99*0x100; 01072 printf("SHEET IS %s\n", sheet.toString().c_str()); 01073 for (int i=0; i<sheet.height(); i++) { 01074 //printf("i %d zebra %d\n", i, zebra.cell(0,i)); 01075 if (i>0) { 01076 if (zebra.cell(0,i)==zebra.cell(0,i-1)) { 01077 sheet.cellString(0,i,"..."); 01078 } 01079 } 01080 Poly<Appearance> app = sheet.getRowAppearance(i); 01081 int r = zebra.cell(0,i)?LIGHT:DARK; 01082 int g = r; 01083 int b = r; 01084 if (app.isValid()) { 01085 app->begin(); 01086 app->setBackgroundRgb16(r,g,b, 01087 AppearanceRange::full()); 01088 app->end(); 01089 } 01090 } 01091 01092 SimpleSheetSchema adder_schema; 01093 adder_schema.setSheetName("mapping"); 01094 adder_schema.addColumn("NAME"); 01095 adder_schema.addColumn("FATE"); 01096 adder_schema.addColumn("ALIAS"); 01097 adder_schema.addColumn("ORIG_TABLE"); 01098 adder_schema.addColumn("ORIG_COLUMN"); 01099 PolySheet adder = rdest.provideSheet(adder_schema); 01100 adder.setSchema(&adder_schema,false); 01101 adder.resize(5,orig_inventory.size()); 01102 adder.createHeaders(); 01103 //adder.hideHeaders(); 01104 printf("Size %d %d / %d\n", adder.width(), adder.height(), 01105 orig_inventory.size()); 01106 for (int i=0; i<(int)orig_inventory.size(); i++) { 01107 string n = orig_inventory[i]; 01108 adder.cellString(0,i,n); 01109 if (fate_inventory.find(n)!=fate_inventory.end()) { 01110 adder.cellString(2,i,fate_inventory[n]); 01111 adder.cellString(1,i,"rename"); 01112 } 01113 if (doom_inventory.find(n)!=doom_inventory.end()) { 01114 adder.cellString(1,i,"drop"); 01115 } 01116 map<string,ColumnHistory>::iterator it = history.find(n); 01117 if (it!=history.end()) { 01118 adder.cellString(3,i,it->second.from_table.c_str()); 01119 adder.cellString(4,i,it->second.from_name.c_str()); 01120 } 01121 } 01122 01123 return true; 01124 } 01125 01126 01127 01128 bool FoldTool::unfold(coopy::store::PolyBook& src, 01129 coopy::store::PolyBook& dest, 01130 FoldOptions& options) { 01131 PolySheet mapping = src.readSheet("mapping"); 01132 if (!mapping.isValid()) { 01133 mapping = options.recipe.readSheet("mapping"); 01134 } 01135 if (!mapping.isValid()) { 01136 fprintf(stderr,"Need a sheet/table called 'mapping'\n"); 01137 return false; 01138 } 01139 01140 vector<string> names = src.getNames(); 01141 PolySheet sheet; 01142 string sheet_name; 01143 for (int i=0; i<(int)names.size(); i++) { 01144 if (names[i]!="mapping") { 01145 sheet_name = names[i]; 01146 sheet = src.readSheetByIndex(i); 01147 break; 01148 } 01149 } 01150 if (!sheet.isValid()) { 01151 fprintf(stderr,"No sheet to operate on\n"); 01152 return false; 01153 } 01154 01155 Property p; 01156 p.put("sheet",sheet_name.c_str()); 01157 printf("Getting ready to copy...\n"); 01158 dest.copy(src,p); 01159 printf("Copied.\n"); 01160 01161 PolySheet out = dest.readSheet(sheet_name.c_str()); 01162 if (!out.isValid()) { 01163 fprintf(stderr,"Cannot open output\n"); 01164 return false; 01165 } 01166 01167 int at = 0; 01168 for (int i=0; i<mapping.height(); i++) { 01169 string name = mapping.cellString(0,i); 01170 string fate = mapping.cellString(1,i); 01171 string prev = mapping.cellString(2,i); 01172 printf("Operating on %s / %s / %s\n", name.c_str(), 01173 fate.c_str(), prev.c_str()); 01174 if (fate=="drop") { 01175 out.insertColumn(ColumnRef(at),ColumnInfo(name)); 01176 //schema->insertColumn(ColumnRef(at),name.c_str()); 01177 at++; 01178 } else if (fate=="rename") { 01179 out.modifyColumn(ColumnRef(at),ColumnInfo(name)); 01180 at++; 01181 } else { 01182 at++; 01183 } 01184 } 01185 01186 return true; 01187 }