COOPY » Guide  version 0.6.5
/home/paulfitz/cvs/coopy_scm/coopy/src/libcoopy_core/TextBook.cpp
Go to the documentation of this file.
00001 #include <coopy/TextBook.h>
00002 #include <coopy/SchemaSniffer.h>
00003 #include <coopy/Pool.h>
00004 
00005 #include <stdio.h>
00006 #include <stdlib.h>
00007 
00008 #include <algorithm>
00009 
00010 #include <sys/stat.h>
00011 #include <unistd.h>
00012 
00013 using namespace std;
00014 using namespace coopy::store;
00015 
00016 bool TextBook::equals(const TextBook& alt, const coopy::cmp::CompareFlags& flags) const {
00017   TextBook *b1 = (TextBook *)this;
00018   TextBook *b2 = (TextBook *)&alt;
00019   vector<string> names = b1->getNames();
00020   vector<string> altNames = b2->getNames();
00021   if (names.size()!=altNames.size()) {
00022     dbg_printf("Table count mismatch, %d vs %d\n",
00023                names.size(),
00024                altNames.size());
00025     return false;
00026   }
00027   for (int k=0; k<names.size(); k++) {
00028     PolySheet s1 = b1->readSheet(names[k]);
00029     SchemaSniffer sniffer1(s1,names[k].c_str());
00030     if (s1.getSchema()==NULL) {
00031       s1.setSchema(sniffer1.suggestSchema(),false);
00032     }
00033     if (flags.assume_header) s1.hideHeaders();
00034     PolySheet s2 = b2->readSheet(names[k]);
00035     if (!s2.isValid()) {
00036       s2 = b2->readSheet(altNames[k]);
00037     }
00038     SchemaSniffer sniffer2(s2,altNames[k].c_str());
00039     if (s2.getSchema()==NULL) {
00040       s2.setSchema(sniffer2.suggestSchema(),false);
00041     }
00042     if (flags.assume_header) s2.hideHeaders();
00043     if (s1.width()!=s2.width() || s1.height()!=s2.height()) {
00044       dbg_printf("Size mismatch, %dx%d vs %dx%d\n",
00045                  s1.width(), s1.height(),
00046                  s2.width(), s2.height());
00047       return false;
00048     }
00049     for (int j=0; j<s1.height(); j++) {
00050       for (int i=0; i<s1.width(); i++) {
00051         if (s1.cellSummary(i,j)!=s2.cellSummary(i,j)) {
00052           printf("Mismatch on cell %d,%d: '%s' vs '%s'\n",
00053                  i,j, 
00054                  s1.cellSummary(i,j).toString().c_str(),
00055                  s2.cellSummary(i,j).toString().c_str());
00056           return false;
00057         }
00058       }
00059     }
00060   }
00061   
00062   return true;
00063 }
00064 
00065 
00066 bool TextBook::copy(const TextBook& alt, const Property& options) {
00067   bool failure = false;
00068   dbg_printf("Copying book\n");
00069   TextBook& src = (TextBook &) alt;
00070   vector<string> names = src.getNames();
00071   vector<string> names0 = getNames();
00072 
00073   string sel = options.get("sheet",PolyValue::makeString("")).asString();
00074 
00075   for (int i=0; i<(int)names.size(); i++) {
00076     string name = names[i];
00077     if (sel!="") {
00078       dbg_printf("Comparing %s and %s\n", sel.c_str(), name.c_str());
00079       if (sel!=name) {
00080         continue;
00081       }
00082     }
00083     dbg_printf("Working on %s\n", name.c_str());
00084     vector<string>::const_iterator it = find(names0.begin(),names0.end(),name);
00085     PolySheet sheet = src.readSheet(name);
00086     src.applyPool(sheet);
00087     string target_name = name;
00088     SheetSchema *schema = sheet.getSchema();
00089     SchemaSniffer sniffer(sheet,name.c_str());
00090     //if (schema==NULL) {
00091     //dbg_printf(" - No schema available, sniffing...\n");
00092     schema = sniffer.suggestSchema();
00093     //}
00094     if (schema==NULL) {
00095       fprintf(stderr, "Cannot determine sheet schema: %s\n", name.c_str());
00096       return false;
00097     }
00098     dbg_printf("  - Have a schema with %d columns\n", 
00099                schema->getColumnCount());
00100     dbg_printf("  - Have a table with %d rows\n", 
00101                sheet.height());
00102     dbg_printf("  - Sheet name [%s]\n", 
00103                schema->getSheetName().c_str());
00104     if (coopy_is_verbose()) {
00105       for (int i=0; i<schema->getColumnCount(); i++) {
00106         ColumnInfo info = schema->getColumnInfo(i);
00107         dbg_printf("    - column %s type %s\n", info.getName().c_str(),
00108                    info.getColumnType().asSqlite(true).c_str());
00109       }
00110     }
00111     PolySheet target = readSheet(target_name);
00112     if (!target.isValid()) {
00113       SheetSchema *pschema = schema;
00114       SimpleSheetSchema sss;
00115       if (src.getPool()) {
00116         if (src.fixSchema(*schema,sss)) {
00117           pschema = &sss;
00118         }
00119       }
00120       if (!addSheet(*pschema)) {
00121         fprintf(stderr, "Failed to create sheet %s\n", name.c_str());
00122         failure = true;
00123         continue;
00124       }
00125       target = readSheet(target_name);
00126     }
00127     if (!target.isValid()) {
00128       fprintf(stderr, "Failed to localize sheet %s\n", name.c_str());
00129       return false;
00130     }
00131     if (target.width()!=sheet.width()) {
00132       if (!target.hasDimension()) {
00133         target.forceWidth(sheet.width());
00134       }
00135     }
00136     if (target.width()!=sheet.width()) {
00137       fprintf(stderr, "Column mismatch %s\n", name.c_str());
00138       fprintf(stderr, "Src width %d, dest width %d\n", 
00139               sheet.width(), target.width());
00140       return false;
00141     }
00142     if (target.height()!=0) {
00143       target.deleteData();
00144     }
00145     if (target.height()!=0) {
00146       if (target.hasDimension()) {
00147         fprintf(stderr, "Could not remove existing data: %s\n", name.c_str());
00148         return false;
00149       }
00150     }
00151     int start = 0;
00152     bool ext = target.hasExternalColumnNames()||target.getSchema()!=NULL;
00153     dbg_printf("  - target.hasExternalColumnNames? %d\n", 
00154                target.hasExternalColumnNames());
00155     
00156     if (schema->headerHeight()>0) {
00157       if (ext) {
00158         start += schema->headerHeight();
00159         if (start>0) {
00160           if (target.getSchema()!=NULL) {
00161             target.getSchema()->setHeaderHeight(0);
00162           }
00163         }
00164       }
00165     }
00166     if (!ext) {
00167       if (schema->headerHeight()<=0 && !schema->isGuess()) {
00168         bool named = false;
00169         int matches = 0;
00170         for (int j=0; j<schema->getColumnCount(); j++) {
00171           ColumnInfo info = schema->getColumnInfo(j);
00172           if (info.hasName()) { 
00173             named = true; 
00174             for (int k=0; k<3&&k<sheet.height(); k++) {
00175               string alt = sheet.cellString(j,k);
00176               if (alt==info.getName()) {
00177                 matches++;
00178                 break;
00179               }
00180             }
00181           }
00182         }
00183         dbg_printf("inplace column name matches: %d of %d\n", matches,
00184                    schema->getColumnCount());
00185         if (named && (matches==0||matches<schema->getColumnCount()*0.75)) {
00186           Poly<SheetRow> pRow = target.insertRow();
00187           SheetRow& row = *pRow;
00188           for (int j=0; j<schema->getColumnCount(); j++) {
00189             ColumnInfo info = schema->getColumnInfo(j);
00190             row.setCell(j,SheetCell(info.getName(),false));
00191           }
00192           row.flush();
00193           target.addedHeader();
00194         }
00195       }
00196     }
00197     dbg_printf("Copying rows from %s to %s (%ld -> %ld)\n",
00198                sheet.desc().c_str(), target.desc().c_str(),
00199                (long int)(&sheet.tail()),
00200                (long int)(&target.tail()));
00201     //printf("Origin is %s\n", sheet.toString().c_str());
00202     //printf("Target is %s\n", target.toString().c_str());
00203     target.beginTransaction();
00204     for (int i=start; i<sheet.height(); i++) {
00205       dbg_printf("Row %d (src height %d target height %d)\n", i,
00206                  sheet.height(),target.height());
00207       Poly<SheetRow> pRow = target.insertRow();
00208       SheetRow& row = *pRow;
00209       for (int j=0; j<sheet.width(); j++) {
00210         row.setCell(j,sheet.getCell(j,i));
00211       }
00212       row.flush();
00213     }
00214     target.endTransaction();
00215     //printf("Origin is after %s\n", sheet.toString().c_str());
00216     //printf("Target is after %s\n", target.toString().c_str());
00217     dbg_printf("Final size for %s: src %dx%d target %dx%d\n", 
00218                name.c_str(),
00219                sheet.width(), sheet.height(),
00220                target.width(), target.height());
00221     if (sheet.getSchema()) {
00222       dbg_printf("Schema src: %s\n", sheet.getSchema()->toString().c_str());
00223     }
00224     if (target.getSchema()) {
00225       dbg_printf("Schema target: %s\n", target.getSchema()->toString().c_str());
00226     }
00227   }
00228   if (failure) exit(1);
00229   return !failure;
00230 }
00231 
00232 
00233 PolySheet TextBook::provideSheet(const SheetSchema& schema) {
00234   dbg_printf("provideSheet %s for %s\n", schema.getSheetName().c_str(),
00235              desc().c_str());
00236   PolySheet result = readSheet(schema.getSheetName());
00237   if (result.isValid()) {
00238     return result;
00239   }
00240   if (addSheet(schema)) {
00241     return readSheet(schema.getSheetName());
00242   }
00243   return result;
00244 }
00245 
00246 
00247 
00248 bool TextBook::exists(const char *fname) {
00249   struct stat s;
00250   int result = stat(fname,&s);
00251   return (result==0);
00252 }
00253 
00254 
00255 bool TextBook::fixSchema(const SheetSchema& in,
00256                          SimpleSheetSchema& out) {
00257   SimpleSheetSchema& sss = out;
00258   sss.copy(in);
00259   if (getPool()) {
00260     for (int c=0; c<sss.getColumnCount(); c++) {
00261       ColumnInfo info = sss.getColumnInfo(c);
00262       string col_name = info.getName();
00263       PoolColumnLink link = getPool()->lookup(sss.getSheetName(),col_name);
00264       if (link.isValid()) {
00265         if (link.isInventor()) {
00266           ColumnType& t = sss.modifyType(c);
00267           t = ColumnType("INTEGER");
00268           t.autoIncrement = true;
00269           t.autoIncrementSet = true;
00270           t.primaryKey = true;
00271           t.primaryKeySet = true;
00272         } else {
00273           ColumnType& t = sss.modifyType(c);
00274           PoolColumnLink org = getPool()->trace(link);
00275           t = ColumnType("INTEGER");
00276           t.foreignKeySet = true;
00277           t.foreignTable = org.getTableName();
00278           t.foreignKey = org.getColumnName();
00279         }
00280       }
00281     }
00282     return true;
00283   }
00284   return false;
00285 }
00286 
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines