COOPY » Guide  version 0.6.5
/home/paulfitz/cvs/coopy_scm/coopy/src/libcoopy_core/IndexSniffer.cpp
Go to the documentation of this file.
00001 #include <coopy/IndexSniffer.h>
00002 #include <coopy/Dbg.h>
00003 #include <coopy/EfficientMap.h>
00004 
00005 using namespace coopy::store;
00006 using namespace std;
00007 
00008 void IndexSniffer::sniff() {
00009 
00010   int w = sheet.width();
00011   int h = sheet.height();
00012   int len = w;
00013   guessed = true;
00014 
00015   if (!cflags.bias_ids) {
00016     if (sniffer.hasSubset()) {
00017       vector<int> keys = sniffer.getSubset();
00018       flags.clear();
00019       for (int i=0; i<w; i++) {
00020         flags.push_back(0);
00021       }
00022       for (int i=0; i<(int)keys.size(); i++) {
00023         flags[keys[i]] = 1;
00024       }
00025       guessed = false;
00026       return;
00027     }
00028   }
00029 
00030   SheetSchema *schema = sheet.getSchema();
00031   if (schema!=NULL) {
00032     if (schema->providesPrimaryKeys()) {
00033       flags.clear();
00034       len = w;
00035       bool got_something = false;
00036       for (int i=0; i<w; i++) {
00037         ColumnInfo info = schema->getColumnInfo(i);
00038         flags.push_back(info.isPrimaryKey()?1:0);
00039         got_something = got_something || info.isPrimaryKey();
00040       }
00041       if (got_something) {
00042         guessed = false;
00043         return;
00044       } else {
00045         flags.clear();
00046       }
00047     }
00048   }
00049 
00050   // no useful schema? on to guesswork.
00051   vector<string> sofar;
00052   sofar.resize(h);
00053   for (int i=0; i<w; i++) {
00054     efficient_map<string,int> ct;
00055     int collide = 0;
00056     for (int j=0; j<h; j++) {
00057       bool escaped = false;
00058       string v = sheet.cellString(i,j,escaped);
00059       v += escaped?"*":" ";
00060       sofar[j] += v;
00061       //dbg_printf("checking %d %s\n", j, sofar[j].c_str());
00062       if (ct.find(sofar[j])==ct.end()) {
00063         ct[sofar[j]] = 1;
00064       } else {
00065         collide++;
00066         if (collide==1) {
00067           dbg_printf("first collision is for %s\n", v.c_str());
00068         }
00069       }
00070     }
00071     if (collide==0) {
00072       dbg_printf("no collisions for %d\n", i);
00073       len = i+1;
00074       break;
00075     } else {
00076       dbg_printf("%d collisions for %d\n", collide, i);
00077     }
00078   }
00079   flags.clear();
00080   if (len==0) len = w;
00081   for (int i=0; i<w; i++) {
00082     flags.push_back((i<len)?1:0);
00083   }
00084 }
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines