COOPY » Guide
version 0.6.5
|
00001 #include <coopy/IndexSniffer.h> 00002 #include <coopy/Dbg.h> 00003 #include <coopy/EfficientMap.h> 00004 00005 using namespace coopy::store; 00006 using namespace std; 00007 00008 void IndexSniffer::sniff() { 00009 00010 int w = sheet.width(); 00011 int h = sheet.height(); 00012 int len = w; 00013 guessed = true; 00014 00015 if (!cflags.bias_ids) { 00016 if (sniffer.hasSubset()) { 00017 vector<int> keys = sniffer.getSubset(); 00018 flags.clear(); 00019 for (int i=0; i<w; i++) { 00020 flags.push_back(0); 00021 } 00022 for (int i=0; i<(int)keys.size(); i++) { 00023 flags[keys[i]] = 1; 00024 } 00025 guessed = false; 00026 return; 00027 } 00028 } 00029 00030 SheetSchema *schema = sheet.getSchema(); 00031 if (schema!=NULL) { 00032 if (schema->providesPrimaryKeys()) { 00033 flags.clear(); 00034 len = w; 00035 bool got_something = false; 00036 for (int i=0; i<w; i++) { 00037 ColumnInfo info = schema->getColumnInfo(i); 00038 flags.push_back(info.isPrimaryKey()?1:0); 00039 got_something = got_something || info.isPrimaryKey(); 00040 } 00041 if (got_something) { 00042 guessed = false; 00043 return; 00044 } else { 00045 flags.clear(); 00046 } 00047 } 00048 } 00049 00050 // no useful schema? on to guesswork. 00051 vector<string> sofar; 00052 sofar.resize(h); 00053 for (int i=0; i<w; i++) { 00054 efficient_map<string,int> ct; 00055 int collide = 0; 00056 for (int j=0; j<h; j++) { 00057 bool escaped = false; 00058 string v = sheet.cellString(i,j,escaped); 00059 v += escaped?"*":" "; 00060 sofar[j] += v; 00061 //dbg_printf("checking %d %s\n", j, sofar[j].c_str()); 00062 if (ct.find(sofar[j])==ct.end()) { 00063 ct[sofar[j]] = 1; 00064 } else { 00065 collide++; 00066 if (collide==1) { 00067 dbg_printf("first collision is for %s\n", v.c_str()); 00068 } 00069 } 00070 } 00071 if (collide==0) { 00072 dbg_printf("no collisions for %d\n", i); 00073 len = i+1; 00074 break; 00075 } else { 00076 dbg_printf("%d collisions for %d\n", collide, i); 00077 } 00078 } 00079 flags.clear(); 00080 if (len==0) len = w; 00081 for (int i=0; i<w; i++) { 00082 flags.push_back((i<len)?1:0); 00083 } 00084 }