myreg.cpp
#include <sys/types.h>
#include <regex.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <string>
#include <map>
typedef std::string Str;
typedef std::map< int, Str > Mis;
typedef std::map< int, Mis > Mimis;
int myreg( Str input_string, Str pattern, Mimis &ret, size_t back_ref_size = 42 );
int myreg( Str input_string, Str pattern, Mimis &result, size_t back_ref_size ) {
int ret = 0;
regex_t rg;
//cflag is REG for the time being_With EXTENDED. See the regcomp manpage for how the behavior changes.
ret = regcomp( &rg, pattern.c_str(), REG_EXTENDED );
if( ret ) {
char buf[1024] = "";
regerror( ret, &rg, buf, 1024 );
fprintf( stderr, "regcomp failed. %s\n", buf );
regfree( &rg );
return( 1 );
}
int match_count = 0;
while( 1 ) {
regmatch_t *matching = new regmatch_t[back_ref_size];
//eflag is 0 for the time being. See the regexec manpage for how the behavior changes.
ret = regexec( &rg, input_string.c_str(), back_ref_size, matching, 0 );
if( ret ) {
//This is not an error, it's just that there are no matches
delete [] matching;
break;
}
int first_end_index = -1; //Extract the first trailing position.
int back_ref_count = 0;
for( size_t i = 0; i < back_ref_size; ++i, ++back_ref_count ) {
int start_index = matching[i].rm_so;
int end_index = matching[i].rm_eo;
if( start_index == -1 || end_index == -1 ) {
break;
}
if( i == 0 ) {
first_end_index = end_index;
//Empty match measures.
if( start_index == end_index ) {
++first_end_index;
}
}
int len = end_index - start_index;
if( len ) {
result[match_count][back_ref_count] = input_string.substr( start_index, len );
} else {
result[match_count][back_ref_count] = "";
}
}
if( first_end_index == -1 ) {
delete [] matching;
break;
}
input_string = input_string.substr( first_end_index );
if( input_string.empty() ) {
delete [] matching;
break;
}
++match_count;
}
regfree( &rg );
return( 0 );
}
int main( int argc, char **argv ) {
if( argc != 3 ) {
printf( "./myreg input_string pattern\n\n" );
return( 1 );
}
printf( "input_string:\n%s\n", argv[1] );
printf( "\n" );
printf( "pattern:\n%s\n", argv[2] );
printf( "\n" );
Mimis result;
if( myreg( argv[1], argv[2], result ) ) {
return( 1 );
}
printf( "result:\n" );
for( Mimis::iterator it = result.begin(), eit = result.end(); it != eit; ++it ) {
for( Mis::iterator itt = it->second.begin(), eitt = it->second.end(); itt != eitt; ++itt ) {
printf( "%d %d %s\n", it->first, itt->first, itt->second.c_str() );
}
}
return( 0 );
}
It seems that regular expressions in C and C ++ can't be used quickly.
That's why I decided to use the POSIX regex function.
And here is the code I made.
You can also retrieve the whole match + back reference. (It's hard to write all the back reference processing by yourself, isn't it?)
Execution result below
[todanano@localhost samba]$ ./myreg 'abc123def' '(abc).*(def)' input_string: abc123def pattern: (abc).*(def) result: 0 0 abc123def 0 1 abc 0 2 def [todanano@localhost samba]$ [todanano@localhost samba]$ ./myreg '192.168.0.1-192.168.0.254' '([[:digit:]]{1,3})\.([[:digit:]]{1,3})\.([[:digit:]]{1,3})\.([[:digit:]]{1,3})' input_string: 192.168.0.1-192.168.0.254 pattern: ([[:digit:]]{1,3})\.([[:digit:]]{1,3})\.([[:digit:]]{1,3})\.([[:digit:]]{1,3}) result: 0 0 192.168.0.1 0 1 192 0 2 168 0 3 0 0 4 1 1 0 192.168.0.254 1 1 192 1 2 168 1 3 0 1 4 254 [todanano@localhost samba]$ [todanano@localhost samba]$
Recommended Posts