COOPY » Guide  version 0.6.5
/home/paulfitz/cvs/coopy_scm/coopy/src/libcoopy_core/SheetStyle.cpp
Go to the documentation of this file.
00001 #include <coopy/SheetStyle.h>
00002 
00003 #include <ctype.h>
00004 
00005 #include <string>
00006 #include <map>
00007 
00008 using namespace std;
00009 using namespace coopy::store;
00010 
00011 const SheetStyle SheetStyle::defaultStyle;
00012 
00013 bool SheetStyle::setFromFilename(const char *fname) {
00014   string name = fname;
00015   delim = ",";
00016   if (name.length()>=4) {
00017     string ext = name.substr(name.length()-4);
00018     for (size_t i=0; i<ext.length(); i++) {
00019       ext[i] = tolower(ext[i]);
00020     }
00021     if (ext==".tsv") {
00022       delim = "\t";
00023     } else if (ext==".ssv") {
00024       delim = ";";
00025     } else if (ext==".csv") {
00026       delim = ",";
00027     } else if (ext==".wsv") {
00028       delim = " ";
00029     } else if (ext==".list") {
00030       delim = ",";
00031     } else {
00032       return false;
00033     }
00034   } else {
00035     return false;
00036   }
00037   return true;
00038 }
00039 
00040 void SheetStyle::setFromProperty(const Property& config) {
00041   if (config.check("delimiter")) {
00042     delim = config.get("delimiter").asString();
00043   }
00044   if (config.check("null_token")) {
00045     nullToken = config.get("null_token").asString();
00046   }
00047   if (config.check("have_null")) {
00048     haveNull = config.get("have_null").asInt()!=0;
00049   }
00050   if (config.check("avoid_collision")) {
00051     quoteCollision = config.get("avoid_collision").asInt()!=0;
00052   }
00053   if (config.check("mark_header")) {
00054     markHeader = config.get("mark_header").asInt()!=0;
00055   }  
00056 }
00057 
00058 class SeparatorHistory {
00059 public:
00060   int ct;
00061   int best_ct;
00062   int best;
00063   map<int,int> votes;
00064   char ch;
00065 
00066   SeparatorHistory(char ch) : ch(ch) {
00067     ct = 0;
00068     best = -1;
00069     best_ct = 0;
00070   }
00071 
00072   void bump() {
00073     ct++;
00074   }
00075 
00076   void add() {
00077     if (votes.find(ct)==votes.end()) {
00078       votes[ct] = 1;
00079     }
00080     votes[ct]++;
00081     if (votes[ct]>best_ct) {
00082       best_ct = votes[ct];
00083       best = ct;
00084     }
00085     ct = 0;
00086   }
00087 };
00088 
00089 #define SEPARATOR_COMMA 0
00090 #define SEPARATOR_TAB 1
00091 #define SEPARATOR_SEMICOLON 2
00092 #define SEPARATOR_COUNT 3
00093 
00094 void SheetStyle::setFromInspection(const char *buffer, int len) {
00095   SeparatorHistory history[SEPARATOR_COUNT] = { ',', '\t', ';' };
00096   int rows = 0;
00097   bool quoted = false;
00098   bool content = false;
00099   for (int i=0; i<=len; i++) {
00100     char ch = '\n';
00101     if (i<len) {
00102       ch = buffer[i];
00103     }
00104     if (ch=='\"') {
00105       quoted = !quoted;
00106     } else if (!quoted) {
00107       if (ch!='\n'&&ch!='\r') {
00108         content = true;
00109       }
00110       if (ch=='\n') {
00111         rows++;
00112         if (content) {
00113           for (int j=0; j<SEPARATOR_COUNT; j++) {
00114             history[j].add();
00115           }
00116           content = false;
00117         }
00118       } else {
00119         for (int j=0; j<SEPARATOR_COUNT; j++) {
00120           if (ch==history[j].ch) {
00121             history[j].bump();
00122           }
00123         }
00124       }
00125     }
00126   }
00127 
00128   delim = ",";
00129   int best_ct = 0;
00130   for (int j=0; j<SEPARATOR_COUNT; j++) {
00131     if (history[j].best!=0) {
00132       if (history[j].best_ct>best_ct) {
00133         best_ct = history[j].best_ct;
00134         delim = string() + history[j].ch;
00135       }
00136     }
00137   }
00138 }
00139 
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines