// @ZBS { // *MODULE_NAME Perl-like String Splitter and Manipulator // *MASTER_FILE 1 // +DESCRIPTION { // A Perl-like string/list class. Perl-like functions such as split are provided // Very handy when you are doing text file processing. // } // +EXAMPLE { // ZStrVar *zStr = zStrSplit( "\s+", "This is a test" ); // printf( "%s", zStr->get(1) ); // } // *PORTABILITY win32 unix // *REQUIRED_FILES zstr.cpp zstr.h // *VERSION 1.0 // +HISTORY { // } // +TODO { // } // *SELF_TEST yes console // *PUBLISH yes // } // OPERATING SYSTEM specific includes: // STDLIB includes: #include "stdlib.h" #include "string.h" #include "stdio.h" #include "assert.h" // MODULE includes: #include "zstr.h" // ZBSLIB includes: #include "zregexp.h" #include "zhashtable.h" void zStrDelete( ZStrVar *head ) { ZStrVar *next = NULL; while( head ) { next = head->next; delete head; head = next; } } ZStrVar::ZStrVar( char *_s, int len ) { s = NULL; next = NULL; set( _s, len ); } ZStrVar::~ZStrVar() { set( NULL ); } void ZStrVar::set( char *_s, int len ) { if( s ) delete s; if( len > 0 ) { // An explicit sub-string snip length is specified s = (char *)malloc( len+1 ); memcpy( s, _s, len ); s[len] = 0; } else if( len == 0 ) { s = NULL; } else { s = strdup( _s ); } } ZStrVar::operator int() { if( !s ) return (int)0; if( s[0]=='0' && s[1]=='x' ) { unsigned int a = strtoul (s, NULL, 16 ); return a; } return strtol( s, NULL, 10 ); } ZStrVar::operator short() { if( !s ) return (short)0; if( s[0]=='0' && s[1]=='x' ) return (short)strtol( s, NULL, 16 ); return (short)strtol( s, NULL, 10 ); } ZStrVar::operator char() { if( !s ) return (char)0; if( s[0]=='0' && s[1]=='x' ) return (char)strtol( s, NULL, 16 ); return (char)strtol( s, NULL, 10 ); } ZStrVar::operator float() { if( !s ) return (float)0; return (float)atof( s ); } ZStrVar::operator double() { if( !s ) return (double)0; return (double)atof( s ); } ZStrVar::operator char *() { if( !s ) return ""; return s; } ZStrVar *ZStrVar::get(int x) { for( ZStrVar *c=this; x && c; c=c->next, x-- ); if(c) return c; return NULL; } char *ZStrVar::getStr(int x) { for( ZStrVar *c=this; x && c; c=c->next, x-- ); if(c && c->s) { return c->s; } return ""; } int ZStrVar::is(int i,char *compare) { char *s = getStr(i); return !strcmp(s,compare); } int ZStrVar::getAsInt(int x) { for( ZStrVar *c=this; x && c; c=c->next, x-- ); if(c) return (int)*c; return 0; } void zStrChomp( char *str ) { if( str ) { int len = strlen( str ); if( len <= 0 ) return; while( --len ) { if( str[len]=='\r' || str[len]=='\n' ) str[len] = 0; else break; } } } ZStrVar *zStrSplit( char *_regExp, char *text ) { ZRegExp regExp( _regExp ); return zStrSplitByRegExpPtr( ®Exp, text ); } ZStrVar *zStrSplitByRegExpPtr( void *_regExp, char *text ) { ZRegExp *regExp = (ZRegExp *)_regExp; if( !text ) { return new ZStrVar( NULL ); } char *c = text; ZStrVar *first = NULL; ZStrVar *last = NULL; while( regExp->test(c) ) { int pos = regExp->getPos(0); int len = regExp->getLen( 0 ); if( !len ) break; ZStrVar *z = new ZStrVar( c, pos ); if( last ) { last->next = z; } last = z; if( !first ) { first = z; } c += pos + len; } // Anything that is left over into a trailing zstr ZStrVar *z = new ZStrVar( c ); if( last ) { last->next = z; } if( !first ) { first = z; } return first; } char *zStrJoin( char *pattern, ZStrVar *head ) { int len = 0; if( !pattern ) pattern = ""; int lenOfPattern = strlen(pattern); for( ZStrVar *z=head; z; z=z->next ) { len += strlen(*z) + lenOfPattern; } char *buf = (char *)malloc( len+1 ); char *c = buf; for( z=head; z; z=z->next ) { strcpy( c, *z ); c += strlen( *z ); if( z->next ) { // Don't add the pattern to the last one strcpy( c, pattern ); c += lenOfPattern; } } return buf; } int zStrCount( ZStrVar *head ) { int count = 0; for( ZStrVar *z=head; z; z=z->next, count++ ); return count; } char *zStrEscapeQuote( char *text ) { int len = strlen( text ); char *_text = (char *)malloc( len * 2 + 1 ); char *s = text; char *d = _text; while( *s ) { if( *s == '\'' || *s == '\"' ) { *d++ = '\\'; } *d++ = *s++; } *d = 0; return _text; } void zStrHashSplit( char *text, class ZHashTable *hashTable ) { assert( hashTable ); int state = -1; char key[1024]; char val[1024]; int keyLen = 0, valLen = 0; int whichQuote = -1; // Which quoting system was used to start this key or value for( char *c=text; ; c++ ) { assert( keyLen < 1024 && valLen < 1024 ); switch( state ) { case -2: // Terminate return; case -1: // Looking for the start of a key switch( *c ) { case '\'': whichQuote = 1; state = 0; break; case '\"': whichQuote = 2; state = 0; break; case 0: state = -2; break; default: if( *c != ' ' ) { whichQuote = 0; state = 0; c--; } } break; case 0: // Building a key if( *c == '\\' && (*(c+1)=='\'' || *(c+1)=='\"') ) { key[keyLen++] = *(c+1); c++; } else if( whichQuote == 1 && *c == '\'' ) { state = 1; } else if( whichQuote == 2 && *c == '\"' ) { state = 1; } else if( whichQuote == 0 && *c == ' ' ) { state = 1; } else if( whichQuote == 0 && *c == '=' ) { state = 1; c--; } else if( *c == 0 ) { state = -2; } else { key[keyLen++] = *c; } break; case 1: // Looking for equals whichQuote = 0; if( *c == 0 ) { state = -2; } else if( *c == '=' ) { state = 2; } break; case 2: // Looking for the start of a value switch( *c ) { case '\'': whichQuote = 1; state = 3; break; case '\"': whichQuote = 2; state = 3; break; case 0: state = -2; break; default: if( *c != ' ' ) { whichQuote = 0; state = 3; c--; } } break; case 3: // Building a value if( *c == '\\' && (*(c+1)=='\'' || *(c+1)=='\"') ) { val[valLen++] = *(c+1); c++; } else if( whichQuote == 1 && *c == '\'' ) { state = 4; } else if( whichQuote == 2 && *c == '\"' ) { state = 4; } else if( whichQuote == 0 && *c == ' ' ) { state = 4; } else if( *c == 0 ) { c--; state = 4; } else { val[valLen++] = *c; } break; case 4: key[keyLen] = 0; val[valLen] = 0; hashTable->putS( key, val ); keyLen = 0; valLen = 0; state = -1; if( *c ) c--; else return; break; } } } #ifdef SELF_TEST int main() { char *test1[] = { "A bunch of,stuff,divided by,commas", "A line with no commas", "A line with one field and an empty trailing field,", ",A line with trailing field", ",,A,B", ",,,", ",", "", NULL }; int test1Lens[] = { 4, 1, 2, 2, 4, 4, 2, 1, 1 }; char *test2[] = { "This is a test", "This is a test", "This is a test ", " ", "", NULL, }; for( int i=0; inext; assert( z && !strcmp( *z, "is" ) ); z=z->next; assert( z && !strcmp( *z, "a" ) ); z=z->next; assert( z && !strcmp( *z, "test" ) ); z=z->next; assert( !z ); break; case 2: assert( z && !strcmp( *z, "This" ) ); z=z->next; assert( z && !strcmp( *z, "is" ) ); z=z->next; assert( z && !strcmp( *z, "a" ) ); z=z->next; assert( z && !strcmp( *z, "test" ) ); z=z->next; assert( z && !strcmp( *z, "" ) ); z=z->next; assert( !z ); break; case 3: assert( z && !strcmp( *z, "" ) ); z=z->next; assert( z && !strcmp( *z, "" ) ); z=z->next; assert( !z ); break; case 4: case 5: assert( z && !strcmp( *z, "" ) ); z=z->next; assert( !z ); break; } } char *test3[] = { "key1=val1 key2=val2", "key1=val1", "", "=", "key1", "key1='val1'", "key1='val1' key2='val2' key3 = 'val3'", "key1='val1' key2=\"val2\" key3 = \"val3\"", "'key1'=val1 \"key2\"=\"val2\" key3 = \"val3\"", }; for( i=0; i