Geant4 Cross Reference |
1 // Copyright (C) 2010, Guy Barrand. All rights reserved. 2 // See the file tools.license for terms. 3 4 #ifndef tools_rcsv_ntuple 5 #define tools_rcsv_ntuple 6 7 // A simple ntuple class to read at the csv format. 8 // (csv = comma separated value). 9 10 // This reader can be use to read file at the hippodraw format 11 // which is : 12 // - one header line for the ntuple title. 13 // - one csv line for column names. 14 // - data at csv format. 15 16 #include "rntuple" 17 18 #include <istream> 19 #include <sstream> 20 21 #include "vfind" 22 #include "vmanip" 23 #include "words" 24 #include "snums" 25 #include "sto" 26 #include "s2time" 27 #include "chars" 28 #include "strip" 29 #include "cids" 30 #include "ntuple_binding" 31 #include "sout" 32 #include "num2s" 33 //#include "srep" 34 35 #ifdef TOOLS_MEM 36 #include "mem" 37 #endif 38 39 #include <utility> 40 41 namespace tools { 42 namespace rcsv { 43 44 class ntuple : public virtual read::intuple { 45 typedef read::intuple parent; 46 public: //read::intuple 47 virtual void start() { 48 m_reader.clear(); 49 m_reader.seekg(0,std::ios::beg); 50 if(m_hippo) { 51 skip_line(m_reader,m_sz); 52 skip_line(m_reader,m_sz); 53 } 54 } 55 virtual bool next() { 56 if(!m_sep) return false; //not inited. 57 if(m_reader.tellg()>=m_sz) return false; 58 // first time we are at bol but else we are at eol. 59 char c; 60 m_reader.get(c); 61 if(c==LF()){ 62 if(m_reader.tellg()>=m_sz) { 63 //eof. Tell caller to stop looping on ntuple rows. 64 return false; 65 } 66 //eol. Next char read is going to be at bol. 67 } else { 68 m_reader.putback(c); 69 //bol 70 } 71 // ready for a new row : 72 73 while(skip_comment(m_reader,m_sz)){} 74 if(m_reader.tellg()>=m_sz) return false; 75 76 return _read_line(); 77 } 78 79 virtual read::icol* find_icol(const std::string& a_name){ 80 return find_named<read::icol>(m_cols,a_name); 81 } 82 83 virtual const std::vector<read::icol*>& columns() const {return m_cols;} 84 85 virtual const std::string& title() const {return m_title;} 86 87 virtual bool number_of_entries(tools::uint64 & a_value) const { 88 if(!m_sep) {a_value = 0;return false;} //not inited. 89 ntuple& self = const_cast<ntuple&>(*this); 90 if(m_rows==(-1)) { 91 self.m_rows = 0; 92 self.start(); 93 while(self.next()) {self.m_rows++;} 94 } 95 a_value = (uint64)m_rows; 96 return true; 97 } 98 public: 99 template <class T> 100 class column : public virtual read::icolumn<T> { 101 typedef read::icolumn<T> parent; 102 public: 103 static cid id_class() { 104 static const T s_v = T(); //do that for T = std::string. 105 return 200+_cid(s_v); 106 } 107 public: //icol 108 virtual void* cast(cid a_class) const { 109 if(void* p = cmp_cast<column>(this,a_class)) {return p;} 110 return parent::cast(a_class); 111 } 112 virtual cid id_cls() const {return id_class();} 113 public: //icol 114 virtual const std::string& name() const {return m_name;} 115 virtual bool fetch_entry() const { 116 if(m_user_var) *m_user_var = m_tmp; 117 return true; 118 } 119 public: //icolumn<T> 120 virtual bool get_entry(T& a_v) const { 121 a_v = m_tmp; 122 return true; 123 } 124 public: 125 column(const std::string& a_name,T* a_user_var = 0) 126 :m_name(a_name) 127 ,m_tmp(T()) 128 ,m_user_var(a_user_var) //not owner 129 {} 130 virtual ~column(){} 131 protected: 132 column(const column& a_from) 133 :read::icol(a_from) 134 ,parent(a_from) 135 ,m_name(a_from.m_name) 136 ,m_tmp(a_from.m_tmp) 137 ,m_user_var(a_from.m_user_var) 138 {} 139 column& operator=(const column& a_from){ 140 m_name = a_from.m_name; 141 m_tmp = a_from.m_tmp; 142 m_user_var = a_from.m_user_var; 143 return *this; 144 } 145 public: 146 // should be used in ntuple _read_line only : 147 void set_value(const T& a_v){m_tmp = a_v;} 148 protected: 149 std::string m_name; 150 T m_tmp; 151 T* m_user_var; 152 }; 153 154 #ifdef TOOLS_MEM 155 public: 156 static const std::string& s_class() { 157 static const std::string s_v("tools::rcsv::ntuple"); 158 return s_v; 159 } 160 #endif 161 public: 162 ntuple(std::istream& a_reader) 163 :m_reader(a_reader) 164 ,m_title() 165 ,m_sep(0) 166 ,m_vec_sep(';') 167 ,m_sz(0) 168 ,m_rows(-1) 169 ,m_hippo(false) 170 { 171 #ifdef TOOLS_MEM 172 mem::increment(s_class().c_str()); 173 #endif 174 } 175 virtual ~ntuple() { 176 safe_clear<read::icol>(m_cols); 177 #ifdef TOOLS_MEM 178 mem::decrement(s_class().c_str()); 179 #endif 180 } 181 protected: 182 ntuple(const ntuple& a_from) 183 :parent(a_from) 184 ,m_reader(a_from.m_reader) 185 ,m_title(a_from.m_title) 186 ,m_sep(a_from.m_sep) 187 ,m_vec_sep(a_from.m_vec_sep) 188 ,m_sz(a_from.m_sz) 189 ,m_rows(-1) 190 ,m_hippo(a_from.m_hippo) 191 { 192 #ifdef TOOLS_MEM 193 mem::increment(s_class().c_str()); 194 #endif 195 } 196 ntuple& operator=(const ntuple& a_from){ 197 m_title = a_from.m_title; 198 m_sep = a_from.m_sep; 199 m_vec_sep = a_from.m_vec_sep; 200 m_hippo = a_from.m_hippo; 201 m_rows = a_from.m_rows; 202 return *this; 203 } 204 public: 205 void set_vec_sep(char a_c) {m_vec_sep = a_c;} 206 void set_sep(char a_c) {m_sep = a_c;} 207 void set_hippo(bool a_hippo) {m_hippo = a_hippo;} 208 209 std::istream& istrm() {return m_reader;} 210 211 /* use file::is_hippo for that. 212 static bool is_hippo(std::ostream& a_out,std::istream& a_reader) { 213 // analyse two first data line. 214 215 a_reader.clear(); 216 a_reader.seekg(0,std::ios::end); 217 std::streampos sz = a_reader.tellg(); 218 a_reader.seekg(0,std::ios::beg); 219 if(!sz) { 220 a_out << "tools::rcsv::ntuple::is_hippo :" 221 << " stream is empty." 222 << std::endl; 223 return false; 224 } //file empty. 225 226 std::string _title; 227 if(!read_line(a_reader,sz,_title)) return false; 228 std::string _s; 229 if(!read_line(a_reader,sz,_s)) return false; 230 if(_s.find('\t')==std::string::npos) return false; 231 232 //std::vector<std::string> labels; 233 //words(s,"\t",false,labels); 234 //return labels.size()?true:false; 235 236 return true; 237 } 238 */ 239 static bool find_sep(std::ostream& a_out, 240 std::istream& a_reader,bool a_hippo, 241 bool a_verbose, 242 char& a_sep){ 243 // analyse first data line to find the char separator. 244 245 a_reader.clear(); 246 a_reader.seekg(0,std::ios::end); 247 std::streampos sz = a_reader.tellg(); 248 a_reader.seekg(0,std::ios::beg); 249 if(!sz) { 250 a_out << "tools::rcsv::ntuple::find_sep :" 251 << " stream is empty." 252 << std::endl; 253 a_sep = 0; 254 return false; 255 } //file empty. 256 if(a_verbose) a_out << "file size " << sz << std::endl; 257 258 if(a_hippo) { //skip first two lines : 259 if(!skip_line(a_reader,sz)) {a_sep = 0;return false;} 260 if(!skip_line(a_reader,sz)) {a_sep = 0;return false;} 261 } else { 262 while(skip_comment(a_reader,sz)){} 263 } 264 if(a_reader.tellg()>=sz) {a_sep=0;return false;} //no data line. 265 266 // get first data line : 267 std::string sfirst; 268 {char c; 269 while(true) { 270 if(a_reader.tellg()>=sz) break; 271 a_reader.get(c); 272 if((c==CR())||(c==LF())) break; 273 sfirst += c; 274 }} 275 if(sfirst.empty()) { 276 a_out << "tools::rcsv::ntuple::find_set :" 277 << " first datat line is empty." 278 << std::endl; 279 a_sep = 0; 280 return false; 281 } 282 if(a_verbose) a_out << "first data line \"" << sfirst << "\"" << std::endl; 283 284 //guess sep from first data line : 285 std::istringstream strm(sfirst.c_str()); 286 double d; 287 strm >> d; 288 std::streampos pos = strm.tellg(); 289 if(pos==std::streampos(-1)) { 290 a_out << "tools::rcsv::ntuple::find_sep :" 291 << " first line does not start with a number." 292 << std::endl; 293 a_sep = 0; 294 return false; 295 } //not a number. 296 if(a_verbose) a_out << "first number " << d 297 << " ending at pos " << pos << std::endl; 298 if(pos>=(std::streampos)sfirst.size()) { 299 a_out << "tools::rcsv::ntuple::find_sep :" 300 << " no separator found in first line." 301 << " pos " << pos 302 << " sfirst.size() " << sfirst.size() 303 << std::endl; 304 a_sep = 0; 305 return false; 306 } //no sep. 307 308 strm.get(a_sep); 309 310 return true; 311 } 312 313 public: 314 bool initialize(std::ostream& a_out, 315 char a_sep = 0, //guessed 316 const std::string& a_suffix = "x", //col suffix 317 bool a_verbose = false) { 318 safe_clear<read::icol>(m_cols); 319 m_sep = 0; 320 m_sz = 0; 321 m_rows = -1; 322 323 if(a_suffix.empty()) { 324 a_out << "tools::rcsv::ntuple::initialize : expect a column suffix." << std::endl; 325 return false; 326 } 327 328 m_reader.clear(); 329 m_reader.seekg(0,std::ios::end); 330 m_sz = m_reader.tellg(); 331 m_reader.seekg(0,std::ios::beg); 332 if(!m_sz) { 333 a_out << "tools::rcsv::ntuple::initialize :" 334 << " stream is empty." 335 << std::endl; 336 return false; //file empty. 337 } 338 if(a_verbose) a_out << "file size " << m_sz << std::endl; 339 340 std::vector<std::string> labels; 341 if(m_hippo) { //skip first two lines : 342 std::string _title; 343 if(!read_line(m_reader,m_sz,_title)) { 344 a_out << "tools::rcsv::ntuple::initialize : read_line() failed." << std::endl; 345 m_sz = 0; 346 m_rows = -1; 347 return false; 348 } 349 std::string _s; 350 if(!read_line(m_reader,m_sz,_s)) { 351 a_out << "tools::rcsv::ntuple::initialize : (2) read_line() failed." << std::endl; 352 m_sz = 0; 353 m_rows = -1; 354 return false; 355 } 356 words(_s,"\t",false,labels); //false for glast.tnt that has a trailing \t. 357 } else { 358 while(skip_comment(m_reader,m_sz)){} 359 } 360 if(m_reader.tellg()>=m_sz) { 361 a_out << "tools::rcsv::ntuple::initialize : tellg() >= sz." << std::endl; 362 m_sz = 0; 363 m_rows = -1; 364 return false; 365 } 366 367 // get first data line : 368 std::string sfirst; 369 {{char c; 370 while(true) { 371 if(m_reader.tellg()>=m_sz) break; 372 m_reader.get(c); 373 if((c==CR())||(c==LF())) break; 374 sfirst += c; 375 }} 376 if(sfirst.empty()) { 377 a_out << "tools::rcsv::ntuple::initialize :" 378 << " first datat line is empty." 379 << std::endl; 380 m_sz = 0; 381 m_rows = -1; 382 return false; 383 }} 384 if(a_verbose) a_out << "first data line \"" << sfirst << "\"" << std::endl; 385 386 if(a_sep) { 387 m_sep = a_sep; 388 } else { 389 //guess sep from first data line : 390 std::istringstream strm(sfirst.c_str()); 391 double d; 392 strm >> d; 393 std::streampos pos = strm.tellg(); 394 if(pos==std::streampos(-1)) { 395 a_out << "tools::rcsv::ntuple::initialize :" 396 << " first line does not start with a number." 397 << std::endl; 398 m_sz = 0; 399 m_rows = -1; 400 return false; 401 } 402 if(a_verbose) a_out << "first number " << d << " ending at pos " << pos << std::endl; 403 if(pos>=(std::streampos)sfirst.size()) { 404 a_out << "tools::rcsv::ntuple::initialize :" 405 << " no separator found in first line." 406 << std::endl; 407 m_sz = 0; 408 m_rows = -1; 409 return false; 410 } 411 strm.get(m_sep); 412 } 413 if(a_verbose) a_out << "sep " << (int)m_sep << std::endl; 414 415 // in case sep is ' ', there is an ambiguity with some leading 416 // space in front of first number. 417 if(m_sep==' ') strip(sfirst,leading,' '); 418 419 std::vector<std::string> ws; 420 {std::string sep; 421 sep += m_sep; 422 words(sfirst,sep,m_hippo?false:true,ws);} 423 424 // look if words are numbers : 425 if(a_verbose) a_out << "words " << ws.size() << std::endl; 426 unsigned int index = 0; 427 std::vector<std::string>::iterator it; 428 for(it=ws.begin();it!=ws.end();++it,index++) { 429 if(a_verbose) a_out << "word " << sout(*it) << "" << std::endl; 430 /* with glast.tnt there is trailing \t that will induce an extra empty column. 431 if((*it).empty()) { 432 // do not accept : 433 // <num><sep><num><sep><sep><num>... 434 // but accept a trailing <sep> (glast.tnt) : 435 // <num><sep><num>....<sep><num><sep> 436 if(index==(ws.size()-1)) { 437 break; 438 } else { 439 a_out << "tools::rcsv::ntuple::initialize :" 440 << " empty word." 441 << std::endl; 442 safe_clear<read::icol>(m_cols); 443 m_sep = 0; 444 m_sz = 0; 445 m_rows = -1; 446 return false; 447 } 448 } 449 */ 450 std::string name = a_suffix; 451 if(!numas<uint64>(m_cols.size(),name)){} 452 if(m_hippo) { 453 if(index>=labels.size()) { 454 a_out << "tools::rcsv::ntuple::initialize :" 455 << " warning : not enough labels." 456 << std::endl; 457 } else { 458 name = labels[index]; 459 } 460 } 461 double d; 462 if(to<double>(*it,d)) { 463 if(a_verbose) a_out << "number " << d << std::endl; 464 create_column<double>(name); 465 } else { 466 time_t time; 467 if(s2time(*it,time)) { 468 create_column<csv_time>(name); 469 } else { 470 std::vector<double> v; 471 std::string vec_sep;vec_sep += m_vec_sep; 472 if(snums<double>(*it,vec_sep,v)&&v.size()) { 473 create_column< std::vector<double> >(name); 474 } else { 475 create_column<std::string>(name); 476 } 477 } 478 } 479 } 480 size_t num = m_cols.size(); 481 if(!num) { 482 a_out << "tools::rcsv::ntuple::initialize :" 483 << " zero columns." 484 << std::endl; 485 m_sep = 0; 486 m_sz = 0; 487 m_rows = -1; 488 return false; 489 } 490 491 return true; 492 } 493 494 static const std::string& s_cid(cid a_id) { 495 496 #define TOOLS_RCSV_NTUPLE_IF_CID(a__name,a__type) \ 497 if(a_id==column<a__type>::id_class()) {\ 498 static const std::string s_v(#a__name);\ 499 return s_v;\ 500 } 501 502 #define TOOLS_RCSV_NTUPLE_IF_VEC_CID(a__name,a__type) \ 503 if(a_id==column< std::vector<a__type> >::id_class()) {\ 504 static const std::string s_v(#a__name+std::string("[]"));\ 505 return s_v;\ 506 } 507 508 TOOLS_RCSV_NTUPLE_IF_CID(char,char) 509 else TOOLS_RCSV_NTUPLE_IF_CID(short,short) 510 else TOOLS_RCSV_NTUPLE_IF_CID(int,int) 511 else TOOLS_RCSV_NTUPLE_IF_CID(int64,int64) 512 513 else TOOLS_RCSV_NTUPLE_IF_CID(float,float) 514 else TOOLS_RCSV_NTUPLE_IF_CID(double,double) 515 516 else TOOLS_RCSV_NTUPLE_IF_CID(uchar,uchar) 517 else TOOLS_RCSV_NTUPLE_IF_CID(ushort,ushort) 518 else TOOLS_RCSV_NTUPLE_IF_CID(uint,uint32) //WARNING 519 else TOOLS_RCSV_NTUPLE_IF_CID(uint64,uint64) 520 521 else TOOLS_RCSV_NTUPLE_IF_CID(bool,bool) 522 else if(a_id==column<std::string>::id_class()) { 523 static const std::string s_v("string"); 524 return s_v; 525 } 526 527 else if(a_id==column<csv_time>::id_class()) { 528 static const std::string s_v("time"); 529 return s_v; 530 } 531 532 else TOOLS_RCSV_NTUPLE_IF_VEC_CID(char,char) 533 else TOOLS_RCSV_NTUPLE_IF_VEC_CID(short,short) 534 else TOOLS_RCSV_NTUPLE_IF_VEC_CID(int,int) 535 else TOOLS_RCSV_NTUPLE_IF_VEC_CID(int64,int64) 536 537 else TOOLS_RCSV_NTUPLE_IF_VEC_CID(float,float) 538 else TOOLS_RCSV_NTUPLE_IF_VEC_CID(double,double) 539 540 else TOOLS_RCSV_NTUPLE_IF_VEC_CID(uchar,uchar) 541 else TOOLS_RCSV_NTUPLE_IF_VEC_CID(ushort,ushort) 542 else TOOLS_RCSV_NTUPLE_IF_VEC_CID(uint,uint32) //WARNING 543 else TOOLS_RCSV_NTUPLE_IF_VEC_CID(uint64,uint64) 544 545 else TOOLS_RCSV_NTUPLE_IF_VEC_CID(bool,bool) 546 else if(a_id==column< std::vector<std::string> >::id_class()) { 547 static const std::string s_v("string[]"); 548 return s_v; 549 } 550 551 #undef TOOLS_RCSV_NTUPLE_IF_CID 552 #undef TOOLS_RCSV_NTUPLE_IF_VEC_CID 553 554 else { 555 static const std::string s_v("unknown"); 556 return s_v; 557 } 558 } 559 560 void dump_columns(std::ostream& a_out) const { 561 if((m_sep>=32)&&(m_sep<=126)) { //printable 562 a_out << "separator is '" << m_sep << "'" << std::endl; 563 } else { 564 a_out << "separator is " << (unsigned int)m_sep << std::endl; 565 } 566 a_out << "number of columns " << m_cols.size() << std::endl; 567 std::vector<read::icol*>::const_iterator it; 568 for(it=m_cols.begin();it!=m_cols.end();++it) { 569 a_out << sout((*it)->name()) 570 << " " << s_cid((*it)->id_cls()) 571 << std::endl; 572 } 573 } 574 public: 575 typedef std::pair<std::string,std::string> col_desc; 576 577 bool initialize(std::ostream& a_out,const ntuple_binding& a_bd = ntuple_binding()) { 578 // it assumes a "commented header". 579 580 safe_clear<read::icol>(m_cols); 581 m_sep = 0; 582 m_sz = 0; 583 m_rows = -1; 584 m_hippo = false; 585 586 m_reader.clear(); 587 m_reader.seekg(0,std::ios::end); 588 m_sz = m_reader.tellg(); 589 m_reader.seekg(0,std::ios::beg); 590 if(!m_sz) { 591 a_out << "tools::rcsv::ntuple::initialize(booking) :" 592 << " stream is empty." 593 << std::endl; 594 return false; //file empty. 595 } 596 //if(a_verbose) a_out << "file size " << m_sz << std::endl; 597 598 std::string _title; 599 char _sep,_vec_sep; 600 std::vector<col_desc> _cols; 601 if(!read_commented_header(a_out,m_reader,_title,_sep,_vec_sep,_cols)) return false; 602 603 m_sep = _sep; 604 m_title = std::move(_title); 605 606 tools_vforcit(col_desc,_cols,it) { 607 const std::string& type = (*it).first; 608 const std::string& name = (*it).second; 609 610 #define TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(a__name,a__type) \ 611 if(type==(std::string(#a__name)+"[]")) {\ 612 create_column< std::vector<a__type> >(name,a_bd.find_variable< std::vector<a__type> >(name));\ 613 } 614 615 // see cid2s() for string types. 616 617 if(type=="char") create_column<char>(name,a_bd.find_variable<char>(name)); 618 else if(type=="short") create_column<short>(name,a_bd.find_variable<short>(name)); 619 else if(type=="int") create_column<int>(name,a_bd.find_variable<int>(name)); 620 else if(type=="float") create_column<float>(name,a_bd.find_variable<float>(name)); 621 else if(type=="double") create_column<double>(name,a_bd.find_variable<double>(name)); 622 else if(type=="string") create_column<std::string>(name,a_bd.find_variable<std::string>(name)); 623 624 else if(type=="uchar") create_column<unsigned char>(name,a_bd.find_variable<unsigned char>(name)); 625 else if(type=="ushort") create_column<unsigned short>(name,a_bd.find_variable<unsigned short>(name)); 626 else if(type=="uint") create_column<uint32>(name,a_bd.find_variable<uint32>(name)); //WARNING 627 else if(type=="bool") create_column<bool>(name,a_bd.find_variable<bool>(name)); 628 else if(type=="int64") create_column<int64>(name,a_bd.find_variable<int64>(name)); 629 else if(type=="uint64") create_column<uint64>(name,a_bd.find_variable<uint64>(name)); 630 631 else TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(char,char) 632 else TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(short,short) 633 else TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(int,int) 634 else TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(float,float) 635 else TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(double,double) 636 637 else if(type=="string[]") create_column< std::vector<std::string> >(name,a_bd.find_variable< std::vector<std::string> >(name)); 638 639 else TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(uchar,uchar) 640 else TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(ushort,ushort) 641 else TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(uint,uint32) //WARNING 642 else TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(bool,bool) 643 else TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(int64,int64) 644 else TOOLS_RCSV_NTUPLE_CREATE_VEC_COL(uint64,uint64) 645 646 else { 647 a_out << "tools::rcsv::ntuple::initialize(booking) :" 648 << " unhandled column type " << sout(type) 649 << std::endl; 650 safe_clear<read::icol>(m_cols); 651 m_sep = 0; 652 m_sz = 0; 653 m_rows = -1; 654 m_hippo = false; 655 return false; 656 } 657 658 #undef TOOLS_RCSV_NTUPLE_CREATE_VEC_COL 659 660 } 661 662 size_t num = m_cols.size(); 663 if(!num) { 664 a_out << "tools::rcsv::ntuple::initialize(booking) :" 665 << " zero columns." 666 << std::endl; 667 return false; 668 } 669 670 //a_out << "tools::rroot::ntuple::initialize :" 671 // << " number of columns " << num << "." 672 // << std::endl; 673 674 return true; 675 } 676 677 bool initialize_from_commented_header(std::ostream& a_out) { // it assumes a "commented header". 678 std::string _title; 679 char _sep,_vec_sep; 680 std::vector<col_desc> _cols; 681 if(!read_commented_header(a_out,m_reader,_title,_sep,_vec_sep,_cols)) return false; 682 ntuple_binding nbd; 683 {tools_vforcit(col_desc,_cols,it) nbd.add_column_no_var((*it).second);} //user_var is 0. 684 return initialize(a_out,nbd); 685 } 686 687 bool get_row() const { 688 bool status = true; 689 tools_vforcit(read::icol*,m_cols,it) { 690 if(!(*it)->fetch_entry()) status = false; 691 } 692 return status; 693 } 694 695 protected: 696 bool read_commented_header(std::ostream& a_out,std::istream& a_reader, 697 std::string& a_title,char& a_sep,char& a_vec_sep,std::vector<col_desc>& a_cols) { 698 // analyse first lines starting with '#'. 699 a_title.clear(); 700 a_sep = 0; 701 a_cols.clear(); 702 703 a_reader.clear(); 704 a_reader.seekg(0,std::ios::end); 705 std::streampos sz = a_reader.tellg(); 706 a_reader.seekg(0,std::ios::beg); 707 if(!sz) { 708 a_out << "tools::rcsv::ntuple::read_commented_header :" 709 << " stream is empty." 710 << std::endl; 711 return false; 712 } //file empty. 713 714 715 std::string _class; 716 717 while(true) { 718 if(a_reader.tellg()>=sz) break; 719 //we should be at bol : 720 char c; 721 a_reader.get(c); 722 a_reader.putback(c); 723 if(c!='#') break; //finished, probably a data line now. 724 std::string line; 725 if(!read_line(a_reader,sz,line)) break; //or return false ? 726 727 std::vector<std::string> _words; 728 words(line," ",false,_words); 729 if(!_words.size()) { 730 a_out << "tools::rcsv::ntuple::read_commented_header :" 731 << " syntax error : empty header line." 732 << std::endl; 733 return false; 734 } 735 if((_words[0]=="#class")) { 736 if(_words.size()!=2) { 737 a_out << "tools::rcsv::ntuple::read_commented_header :" 738 << " syntax error in " << sout(line) 739 << std::endl; 740 return false; 741 } 742 _class = _words[1]; 743 } else if(_words[0]=="#title") { 744 if(_words.size()<1) { 745 a_out << "tools::rcsv::ntuple::read_commented_header :" 746 << " syntax error in " << sout(line) 747 << std::endl; 748 return false; 749 } 750 if(_words.size()==1) { 751 a_title.clear(); 752 } else { 753 std::string::size_type pos = line.find(_words[0]); 754 pos += _words[0].size()+1; 755 a_title = line.substr(pos,line.size()-pos); 756 } 757 } else if((_words[0]=="#separator")) { 758 if(_words.size()!=2) { 759 a_out << "tools::rcsv::ntuple::read_commented_header :" 760 << " syntax error in " << sout(line) 761 << std::endl; 762 return false; 763 } 764 unsigned int uisep; 765 if(!to(_words[1],uisep)) { 766 a_out << "tools::rcsv::ntuple::read_commented_header :" 767 << " syntax error in " << sout(line) 768 << std::endl; 769 return false; 770 } 771 a_sep = (char)uisep; 772 } else if((_words[0]=="#vector_separator")) { 773 if(_words.size()!=2) { 774 a_out << "tools::rcsv::ntuple::read_commented_header :" 775 << " syntax error in " << sout(line) 776 << std::endl; 777 return false; 778 } 779 unsigned int uisep; 780 if(!to(_words[1],uisep)) { 781 a_out << "tools::rcsv::ntuple::read_commented_header :" 782 << " syntax error in " << sout(line) 783 << std::endl; 784 return false; 785 } 786 a_vec_sep = (char)uisep; 787 } else if((_words[0]=="#column")) { 788 if(_words.size()<2) { 789 a_out << "tools::rcsv::ntuple::read_commented_header :" 790 << " syntax error in " << sout(line) 791 << std::endl; 792 return false; 793 } 794 std::string stype = _words[1]; 795 std::string label; 796 if(_words.size()==2) { 797 label.clear(); 798 } else { 799 std::string::size_type pos = line.find(_words[1]); 800 pos += _words[1].size()+1; 801 label = line.substr(pos,line.size()-pos); 802 } 803 //a_out << "column " << stype << " " << sout(label) << std::endl; 804 a_cols.push_back(col_desc(stype,label)); 805 } else { 806 a_out << "tools::rcsv::ntuple::read_commented_header :" 807 << " syntax error in " << sout(line) 808 << ", unknown keyword " << sout(_words[0]) 809 << std::endl; 810 //return false; 811 } 812 } 813 814 /* 815 a_out << "class " << _class << std::endl; 816 a_out << "title " << _title << std::endl; 817 a_out << "separator " << _separator << std::endl; 818 */ 819 820 return true; 821 } 822 823 protected: 824 template <class T> 825 column<T>* create_column(const std::string& a_name,T* a_user_var = 0){ 826 if(find_named<read::icol>(m_cols,a_name)) return 0; 827 column<T>* col = new column<T>(a_name,a_user_var); 828 if(!col) return 0; 829 m_cols.push_back(col); 830 return col; 831 } 832 833 protected: 834 static bool read_line(std::istream& a_reader,std::streampos a_sz,std::string& a_s){ 835 a_s.clear(); 836 char c; 837 while(true) { 838 if(a_reader.tellg()>=a_sz) {a_s.clear();return false;} 839 a_reader.get(c); 840 if(c==CR()) continue; 841 if(c==LF()) break; //eol. 842 a_s += c; 843 } 844 return true; 845 } 846 847 static bool skip_line(std::istream& a_reader,std::streampos a_sz){ 848 char c; 849 while(true) { 850 if(a_reader.tellg()>=a_sz) return false; 851 a_reader.get(c); 852 if(c==LF()) break; 853 } 854 return true; 855 } 856 857 static bool skip_comment(std::istream& a_reader,std::streampos a_sz){ 858 //ret true = we had a commented line, false : a data line or nothing. 859 if(a_reader.tellg()>=a_sz) return false; 860 //we should be at bol : 861 char c; 862 a_reader.get(c); 863 if(c=='#') { 864 return skip_line(a_reader,a_sz); 865 //eol. Next char should be bol. 866 } else { 867 a_reader.putback(c); 868 return false; 869 } 870 } 871 872 template <class T> 873 static bool _read(std::istream& a_reader,std::streampos,char,T& a_v) { 874 a_reader >> a_v; 875 if(a_reader.tellg()==std::streampos(-1)) {a_v = 0;return false;} 876 //std::cout << "debug : _read(double) " << a_v << std::endl; 877 return true; 878 } 879 static bool _read_time(std::istream& a_reader,std::streampos a_sz,char a_sep,time_t& a_v) { 880 std::string _s; 881 char c; 882 while(true){ 883 if(a_reader.tellg()>=a_sz) break; 884 a_reader.get(c); 885 if((c==a_sep)||(c==CR())||(c==LF())) { 886 a_reader.putback(c); 887 break; 888 } 889 _s += c; 890 } 891 if(!s2time(_s,a_v)) return false; 892 return true; 893 } 894 static bool _read(std::istream& a_reader,std::streampos a_sz,char a_sep,std::string& a_v) { 895 a_v.clear(); 896 char c; 897 while(true){ 898 if(a_reader.tellg()>=a_sz) break; 899 a_reader.get(c); 900 if((c==a_sep)||(c==CR())||(c==LF())) { 901 a_reader.putback(c); 902 break; 903 } 904 a_v += c; 905 } 906 return true; 907 } 908 909 static bool _vec_read(std::istream& a_reader,std::streampos a_sz, 910 std::istringstream&,std::vector<std::string>&, 911 char a_sep,const std::string& a_vec_sep, 912 std::vector<std::string>& a_v) { 913 std::string _s; 914 if(!_read(a_reader,a_sz,a_sep,_s)) return false; 915 //replace(_s,"\\"+a_vec_sep,"@@"); 916 words(_s,a_vec_sep,true,a_v); 917 //tools_vforit(std::string,a_v,it) replace(*it,"@@",a_vec_sep); 918 return true; 919 } 920 921 template <class T> 922 static bool _vec_read(std::istream& a_reader,std::streampos a_sz, 923 std::istringstream& a_iss,std::vector<std::string>& a_tmp, 924 char a_sep,const std::string& a_vec_sep, 925 std::vector<T>& a_v) { 926 std::string _s; 927 if(!_read(a_reader,a_sz,a_sep,_s)) return false; 928 if(!snums<T>(_s,a_iss,a_tmp,a_vec_sep,a_v)) return false; 929 return true; 930 } 931 932 protected: 933 bool _read_line() { 934 // have to loop on all columns ! 935 typedef read::icol icol_t; 936 937 typedef ntuple::column<char> col_char; 938 typedef ntuple::column<short> col_short; 939 typedef ntuple::column<int> col_int; 940 typedef ntuple::column<float> col_float; 941 typedef ntuple::column<double> col_double; 942 typedef std::string string_t; 943 typedef ntuple::column<string_t> col_string_t; 944 945 typedef ntuple::column<uchar> col_uchar; 946 typedef ntuple::column<ushort> col_ushort; 947 typedef ntuple::column<uint32> col_uint32; 948 typedef ntuple::column<bool> col_bool; 949 typedef ntuple::column<int64> col_int64; 950 typedef ntuple::column<uint64> col_uint64; 951 952 typedef ntuple::column<csv_time> col_time; 953 954 typedef ntuple::column< std::vector<char> > col_vec_char; 955 typedef ntuple::column< std::vector<short> > col_vec_short; 956 typedef ntuple::column< std::vector<int32> > col_vec_int; 957 typedef ntuple::column< std::vector<float> > col_vec_float; 958 typedef ntuple::column< std::vector<double> > col_vec_double; 959 typedef ntuple::column< std::vector<std::string> > col_vec_string_t; 960 961 typedef ntuple::column< std::vector<uchar> > col_vec_uchar; 962 typedef ntuple::column< std::vector<ushort> > col_vec_ushort; 963 typedef ntuple::column< std::vector<uint32> > col_vec_uint32; 964 typedef ntuple::column< std::vector<bool> > col_vec_bool; 965 typedef ntuple::column< std::vector<int64> > col_vec_int64; 966 typedef ntuple::column< std::vector<uint64> > col_vec_uint64; 967 968 std::string vec_sep;vec_sep += m_vec_sep; 969 std::istringstream iss; 970 std::vector<std::string> tmp; 971 972 size_t index = 0; 973 size_t num = m_cols.size(); 974 std::vector<icol_t*>::const_iterator it; 975 for(it=m_cols.begin();it!=m_cols.end();++it,index++) { 976 977 #define TOOLS_RCSV_NTUPLE_IF_COL(a__type) \ 978 if(col_##a__type* _col_##a__type = id_cast<icol_t,col_##a__type>(*(*it))) {\ 979 a__type v;\ 980 if(!_read(m_reader,m_sz,m_sep,v)) return false;\ 981 _col_##a__type->set_value(v);\ 982 } 983 984 #define TOOLS_RCSV_NTUPLE_IF_VEC_COL(a__type) \ 985 if(col_vec_##a__type* _col_vec_##a__type = id_cast<icol_t,col_vec_##a__type>(*(*it))) {\ 986 std::vector<a__type> v;\ 987 if(!_vec_read(m_reader,m_sz,iss,tmp,m_sep,vec_sep,v)) return false;\ 988 _col_vec_##a__type->set_value(v);\ 989 } 990 991 TOOLS_RCSV_NTUPLE_IF_COL(char) 992 else TOOLS_RCSV_NTUPLE_IF_COL(short) 993 else TOOLS_RCSV_NTUPLE_IF_COL(int) 994 else TOOLS_RCSV_NTUPLE_IF_COL(float) 995 else TOOLS_RCSV_NTUPLE_IF_COL(double) 996 else TOOLS_RCSV_NTUPLE_IF_COL(string_t) 997 998 else TOOLS_RCSV_NTUPLE_IF_COL(uchar) 999 else TOOLS_RCSV_NTUPLE_IF_COL(ushort) 1000 else TOOLS_RCSV_NTUPLE_IF_COL(uint32) 1001 else TOOLS_RCSV_NTUPLE_IF_COL(bool) 1002 else TOOLS_RCSV_NTUPLE_IF_COL(int64) 1003 else TOOLS_RCSV_NTUPLE_IF_COL(uint64) 1004 1005 else if(col_time* _col_time = id_cast<icol_t,col_time>(*(*it))) { 1006 time_t v; 1007 if(!_read_time(m_reader,m_sz,m_sep,v)) return false; 1008 csv_time ct;ct.m_l = long(v); 1009 _col_time->set_value(ct); 1010 } 1011 1012 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(char) 1013 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(short) 1014 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(int) 1015 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(float) 1016 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(double) 1017 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(string_t) 1018 1019 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(uchar) 1020 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(ushort) 1021 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(uint32) 1022 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(bool) 1023 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(int64) 1024 else TOOLS_RCSV_NTUPLE_IF_VEC_COL(uint64) 1025 1026 #undef TOOLS_RCSV_NTUPLE_IF_COL 1027 #undef TOOLS_RCSV_NTUPLE_IF_VEC_COL 1028 1029 else { 1030 //std::cout << "column cast failed." << std::endl; 1031 return false; 1032 } 1033 1034 if(index==(num-1)) { //read up to LF() 1035 char c; 1036 while(true){ 1037 if(m_reader.tellg()>=m_sz) break; 1038 m_reader.get(c); 1039 if(c==LF()) break; 1040 } 1041 } else { //read sep : 1042 char sep; 1043 m_reader.get(sep); 1044 } 1045 } 1046 return true; 1047 } 1048 protected: 1049 std::istream& m_reader; 1050 std::string m_title; 1051 char m_sep; 1052 char m_vec_sep; 1053 std::vector<read::icol*> m_cols; 1054 std::streampos m_sz; 1055 int m_rows; //to optimize number_of_entries(). 1056 bool m_hippo; 1057 }; 1058 1059 }} 1060 1061 1062 #include <fstream> 1063 1064 namespace tools { 1065 namespace rcsv { 1066 1067 class fntuple : public ntuple { 1068 typedef ntuple parent; 1069 public: 1070 static const std::string& s_class() { 1071 static const std::string s_v("tools::rcsv::fntuple"); 1072 return s_v; 1073 } 1074 public: 1075 fntuple(const std::string& a_file) 1076 :parent(m_freader) 1077 ,m_file(a_file) 1078 {} 1079 virtual ~fntuple() {m_freader.close();} 1080 protected: 1081 fntuple(const fntuple& a_from) 1082 :read::intuple(a_from) 1083 ,parent(a_from) 1084 ,m_file(a_from.m_file) 1085 {} 1086 fntuple& operator=(const fntuple& a_from){ 1087 parent::operator=(a_from); 1088 m_file = a_from.m_file; 1089 return *this; 1090 } 1091 public: 1092 bool open(){ 1093 m_freader.open(m_file.c_str()); 1094 return m_freader.fail()?false:true; 1095 } 1096 bool initialize(std::ostream& a_out, 1097 char a_sep = 0, //guessed 1098 const std::string& a_suffix = "x", //col suffix 1099 bool a_verbose = false) { 1100 if(!m_freader.is_open()) { 1101 m_freader.open(m_file.c_str()); 1102 if(m_freader.fail()) { 1103 a_out << "tools::rcsv::fntuple::initialize :" 1104 << " can't open " << m_file << "." 1105 << std::endl; 1106 return false; 1107 } 1108 } 1109 return parent::initialize(a_out,a_sep,a_suffix,a_verbose); 1110 } 1111 protected: 1112 std::string m_file; 1113 std::ifstream m_freader; 1114 }; 1115 1116 }} 1117 1118 #endif