COOPY » Guide
version 0.6.5
|
00001 #include <coopy/TextBook.h> 00002 #include <coopy/SchemaSniffer.h> 00003 #include <coopy/Pool.h> 00004 00005 #include <stdio.h> 00006 #include <stdlib.h> 00007 00008 #include <algorithm> 00009 00010 #include <sys/stat.h> 00011 #include <unistd.h> 00012 00013 using namespace std; 00014 using namespace coopy::store; 00015 00016 bool TextBook::equals(const TextBook& alt, const coopy::cmp::CompareFlags& flags) const { 00017 TextBook *b1 = (TextBook *)this; 00018 TextBook *b2 = (TextBook *)&alt; 00019 vector<string> names = b1->getNames(); 00020 vector<string> altNames = b2->getNames(); 00021 if (names.size()!=altNames.size()) { 00022 dbg_printf("Table count mismatch, %d vs %d\n", 00023 names.size(), 00024 altNames.size()); 00025 return false; 00026 } 00027 for (int k=0; k<names.size(); k++) { 00028 PolySheet s1 = b1->readSheet(names[k]); 00029 SchemaSniffer sniffer1(s1,names[k].c_str()); 00030 if (s1.getSchema()==NULL) { 00031 s1.setSchema(sniffer1.suggestSchema(),false); 00032 } 00033 if (flags.assume_header) s1.hideHeaders(); 00034 PolySheet s2 = b2->readSheet(names[k]); 00035 if (!s2.isValid()) { 00036 s2 = b2->readSheet(altNames[k]); 00037 } 00038 SchemaSniffer sniffer2(s2,altNames[k].c_str()); 00039 if (s2.getSchema()==NULL) { 00040 s2.setSchema(sniffer2.suggestSchema(),false); 00041 } 00042 if (flags.assume_header) s2.hideHeaders(); 00043 if (s1.width()!=s2.width() || s1.height()!=s2.height()) { 00044 dbg_printf("Size mismatch, %dx%d vs %dx%d\n", 00045 s1.width(), s1.height(), 00046 s2.width(), s2.height()); 00047 return false; 00048 } 00049 for (int j=0; j<s1.height(); j++) { 00050 for (int i=0; i<s1.width(); i++) { 00051 if (s1.cellSummary(i,j)!=s2.cellSummary(i,j)) { 00052 printf("Mismatch on cell %d,%d: '%s' vs '%s'\n", 00053 i,j, 00054 s1.cellSummary(i,j).toString().c_str(), 00055 s2.cellSummary(i,j).toString().c_str()); 00056 return false; 00057 } 00058 } 00059 } 00060 } 00061 00062 return true; 00063 } 00064 00065 00066 bool TextBook::copy(const TextBook& alt, const Property& options) { 00067 bool failure = false; 00068 dbg_printf("Copying book\n"); 00069 TextBook& src = (TextBook &) alt; 00070 vector<string> names = src.getNames(); 00071 vector<string> names0 = getNames(); 00072 00073 string sel = options.get("sheet",PolyValue::makeString("")).asString(); 00074 00075 for (int i=0; i<(int)names.size(); i++) { 00076 string name = names[i]; 00077 if (sel!="") { 00078 dbg_printf("Comparing %s and %s\n", sel.c_str(), name.c_str()); 00079 if (sel!=name) { 00080 continue; 00081 } 00082 } 00083 dbg_printf("Working on %s\n", name.c_str()); 00084 vector<string>::const_iterator it = find(names0.begin(),names0.end(),name); 00085 PolySheet sheet = src.readSheet(name); 00086 src.applyPool(sheet); 00087 string target_name = name; 00088 SheetSchema *schema = sheet.getSchema(); 00089 SchemaSniffer sniffer(sheet,name.c_str()); 00090 //if (schema==NULL) { 00091 //dbg_printf(" - No schema available, sniffing...\n"); 00092 schema = sniffer.suggestSchema(); 00093 //} 00094 if (schema==NULL) { 00095 fprintf(stderr, "Cannot determine sheet schema: %s\n", name.c_str()); 00096 return false; 00097 } 00098 dbg_printf(" - Have a schema with %d columns\n", 00099 schema->getColumnCount()); 00100 dbg_printf(" - Have a table with %d rows\n", 00101 sheet.height()); 00102 dbg_printf(" - Sheet name [%s]\n", 00103 schema->getSheetName().c_str()); 00104 if (coopy_is_verbose()) { 00105 for (int i=0; i<schema->getColumnCount(); i++) { 00106 ColumnInfo info = schema->getColumnInfo(i); 00107 dbg_printf(" - column %s type %s\n", info.getName().c_str(), 00108 info.getColumnType().asSqlite(true).c_str()); 00109 } 00110 } 00111 PolySheet target = readSheet(target_name); 00112 if (!target.isValid()) { 00113 SheetSchema *pschema = schema; 00114 SimpleSheetSchema sss; 00115 if (src.getPool()) { 00116 if (src.fixSchema(*schema,sss)) { 00117 pschema = &sss; 00118 } 00119 } 00120 if (!addSheet(*pschema)) { 00121 fprintf(stderr, "Failed to create sheet %s\n", name.c_str()); 00122 failure = true; 00123 continue; 00124 } 00125 target = readSheet(target_name); 00126 } 00127 if (!target.isValid()) { 00128 fprintf(stderr, "Failed to localize sheet %s\n", name.c_str()); 00129 return false; 00130 } 00131 if (target.width()!=sheet.width()) { 00132 if (!target.hasDimension()) { 00133 target.forceWidth(sheet.width()); 00134 } 00135 } 00136 if (target.width()!=sheet.width()) { 00137 fprintf(stderr, "Column mismatch %s\n", name.c_str()); 00138 fprintf(stderr, "Src width %d, dest width %d\n", 00139 sheet.width(), target.width()); 00140 return false; 00141 } 00142 if (target.height()!=0) { 00143 target.deleteData(); 00144 } 00145 if (target.height()!=0) { 00146 if (target.hasDimension()) { 00147 fprintf(stderr, "Could not remove existing data: %s\n", name.c_str()); 00148 return false; 00149 } 00150 } 00151 int start = 0; 00152 bool ext = target.hasExternalColumnNames()||target.getSchema()!=NULL; 00153 dbg_printf(" - target.hasExternalColumnNames? %d\n", 00154 target.hasExternalColumnNames()); 00155 00156 if (schema->headerHeight()>0) { 00157 if (ext) { 00158 start += schema->headerHeight(); 00159 if (start>0) { 00160 if (target.getSchema()!=NULL) { 00161 target.getSchema()->setHeaderHeight(0); 00162 } 00163 } 00164 } 00165 } 00166 if (!ext) { 00167 if (schema->headerHeight()<=0 && !schema->isGuess()) { 00168 bool named = false; 00169 int matches = 0; 00170 for (int j=0; j<schema->getColumnCount(); j++) { 00171 ColumnInfo info = schema->getColumnInfo(j); 00172 if (info.hasName()) { 00173 named = true; 00174 for (int k=0; k<3&&k<sheet.height(); k++) { 00175 string alt = sheet.cellString(j,k); 00176 if (alt==info.getName()) { 00177 matches++; 00178 break; 00179 } 00180 } 00181 } 00182 } 00183 dbg_printf("inplace column name matches: %d of %d\n", matches, 00184 schema->getColumnCount()); 00185 if (named && (matches==0||matches<schema->getColumnCount()*0.75)) { 00186 Poly<SheetRow> pRow = target.insertRow(); 00187 SheetRow& row = *pRow; 00188 for (int j=0; j<schema->getColumnCount(); j++) { 00189 ColumnInfo info = schema->getColumnInfo(j); 00190 row.setCell(j,SheetCell(info.getName(),false)); 00191 } 00192 row.flush(); 00193 target.addedHeader(); 00194 } 00195 } 00196 } 00197 dbg_printf("Copying rows from %s to %s (%ld -> %ld)\n", 00198 sheet.desc().c_str(), target.desc().c_str(), 00199 (long int)(&sheet.tail()), 00200 (long int)(&target.tail())); 00201 //printf("Origin is %s\n", sheet.toString().c_str()); 00202 //printf("Target is %s\n", target.toString().c_str()); 00203 target.beginTransaction(); 00204 for (int i=start; i<sheet.height(); i++) { 00205 dbg_printf("Row %d (src height %d target height %d)\n", i, 00206 sheet.height(),target.height()); 00207 Poly<SheetRow> pRow = target.insertRow(); 00208 SheetRow& row = *pRow; 00209 for (int j=0; j<sheet.width(); j++) { 00210 row.setCell(j,sheet.getCell(j,i)); 00211 } 00212 row.flush(); 00213 } 00214 target.endTransaction(); 00215 //printf("Origin is after %s\n", sheet.toString().c_str()); 00216 //printf("Target is after %s\n", target.toString().c_str()); 00217 dbg_printf("Final size for %s: src %dx%d target %dx%d\n", 00218 name.c_str(), 00219 sheet.width(), sheet.height(), 00220 target.width(), target.height()); 00221 if (sheet.getSchema()) { 00222 dbg_printf("Schema src: %s\n", sheet.getSchema()->toString().c_str()); 00223 } 00224 if (target.getSchema()) { 00225 dbg_printf("Schema target: %s\n", target.getSchema()->toString().c_str()); 00226 } 00227 } 00228 if (failure) exit(1); 00229 return !failure; 00230 } 00231 00232 00233 PolySheet TextBook::provideSheet(const SheetSchema& schema) { 00234 dbg_printf("provideSheet %s for %s\n", schema.getSheetName().c_str(), 00235 desc().c_str()); 00236 PolySheet result = readSheet(schema.getSheetName()); 00237 if (result.isValid()) { 00238 return result; 00239 } 00240 if (addSheet(schema)) { 00241 return readSheet(schema.getSheetName()); 00242 } 00243 return result; 00244 } 00245 00246 00247 00248 bool TextBook::exists(const char *fname) { 00249 struct stat s; 00250 int result = stat(fname,&s); 00251 return (result==0); 00252 } 00253 00254 00255 bool TextBook::fixSchema(const SheetSchema& in, 00256 SimpleSheetSchema& out) { 00257 SimpleSheetSchema& sss = out; 00258 sss.copy(in); 00259 if (getPool()) { 00260 for (int c=0; c<sss.getColumnCount(); c++) { 00261 ColumnInfo info = sss.getColumnInfo(c); 00262 string col_name = info.getName(); 00263 PoolColumnLink link = getPool()->lookup(sss.getSheetName(),col_name); 00264 if (link.isValid()) { 00265 if (link.isInventor()) { 00266 ColumnType& t = sss.modifyType(c); 00267 t = ColumnType("INTEGER"); 00268 t.autoIncrement = true; 00269 t.autoIncrementSet = true; 00270 t.primaryKey = true; 00271 t.primaryKeySet = true; 00272 } else { 00273 ColumnType& t = sss.modifyType(c); 00274 PoolColumnLink org = getPool()->trace(link); 00275 t = ColumnType("INTEGER"); 00276 t.foreignKeySet = true; 00277 t.foreignTable = org.getTableName(); 00278 t.foreignKey = org.getColumnName(); 00279 } 00280 } 00281 } 00282 return true; 00283 } 00284 return false; 00285 } 00286