In trying to understand and use theParser Combinator concept, I've coded up as close an analogue as I could manage within the constraints of the C language.
C doesn't have first-class functions or dynamic code generation, so instead I use a pseudo-object-oriented approach which builds a tree to represent the parser, with function-pointers at each node.
A Parser returns a list of results, so this is implemented with singly-linked lists. But trees and lists are not the focus of this code so the program just uses them without much fuss. Perhaps some operations should be abstracted, like searching to the end of the list.
This program uses twotypedefd pointers. Not trying to hide it or anything, just factoring out lots of extra *s.
What improvements can I make to the overall organization of the program? Or any parts that could be more clear?
pc3.c:
#include <stdio.h>#include <stdlib.h>typedef struct parser *parser;typedef struct result *result;typedef result (*handler)( parser p, char *input );struct parser { handler test; parser a, b; int c;};struct result { result next; int length_matched;};#define new_function_for_(type) \ type new_##type ( struct type input ){ \ type local = calloc( sizeof *local, 1 ); \ return local ? ( *local = input ), local : local; \ }new_function_for_(parser)new_function_for_(result)result parse( parser p, char *input ){ return p->test( p, input ); }result parse_fail( parser p, char *input ){ return NULL; }result parse_succeed( parser p, char *input ){ return new_result( (struct result){ .next = NULL, .length_matched = 0 } );}result parse_term( parser p, char *input ){ return *input == p->c ? new_result( (struct result){ .next = NULL, .length_matched = 1 } ) : NULL;}result parse_alternate( parser p, char *input ){ result res = parse( p->a, input ); if( res ){ result r = res; while( r->next ) r = r->next; r->next = parse( p->b, input ); return res; } else return parse( p->b, input );}void amend_lengths( result r, int length ){ r->length_matched += length; if( r->next ) amend_lengths( r->next, length );}result parse_sequence_next( result r, parser p, char *input ){ if( ! r ) return NULL; result res = parse( p->b, input + r->length_matched ); if( res ){ amend_lengths( res, r->length_matched ); result tail = res; while( tail->next ) tail = tail->next; tail->next = parse_sequence_next( r->next, p, input ); return res; } else return parse_sequence_next( r->next, p, input );}result parse_sequence( parser p, char *input ){ result res = parse( p->a, input ); return parse_sequence_next( res, p, input );}parser fails(){ return new_parser( (struct parser){ .test = parse_fail } ); }parser succeeds(){ return new_parser( (struct parser){ .test = parse_succeed } ); }parser term( int c ){ return new_parser( (struct parser){ .test = parse_term, .c = c } );}parser alternate( parser a, parser b){ return new_parser( (struct parser){ .test = parse_alternate, .a = a, .b = b } );}parser sequence( parser a, parser b){ return new_parser( (struct parser){ .test = parse_sequence, .a = a, .b = b } );}parser many( parser a ){ parser q = new_parser( (struct parser){ .test = parse_sequence, .a = a } ); parser r = new_parser( (struct parser){ .test = parse_alternate, .a = q, .b = succeeds() } ); q->b = r; return r;}parser some( parser a ){ parser q = new_parser( (struct parser){ .test = parse_sequence, .a = a } ); parser r = new_parser( (struct parser){ .test = parse_alternate, .a = q, .b = succeeds() } ); q->b = r; return q;}parser char_class( char *str ){ return *str ? alternate( term( *str ), char_class( str + 1 ) ) : fails();}parser string( char *str ){ return *str ? sequence( term( *str ), string( str + 1 ) ) : succeeds();}void print_results( result res ){ if( res ) printf( "%d ", res->length_matched ), print_results( res->next );}void test( parser p, char *str ){ printf( "%s:", str ); fflush(0); result r = parse( p, str ); print_results( r ); printf( "\n" );}void cases( void (*test)( parser p, char *str ) ){ parser p = string("hello"); test( p, "hello" ); test( p, "hell" ); test( p, "hello w" ); test( p, "xxxxx" ); parser q = many( term( 'x' ) ); test( q, "xy" ); test( q, "xx" ); test( q, "xxxy" ); test( q, "xxxxy" ); test( q, "xxxxx" ); parser r = sequence( many( term( 'x' ) ), string( "xy" ) ); test( r, "xy" ); test( r, "xx" ); test( r, "xxxy" ); test( r, "xxxxy" ); test( r, "xxxxx" );}int main(){ cases( test ); return 0;}Output:
$ ./pc3hello:5 hell:hello w:5 xxxxx:xy:1 0 xx:2 1 0 xxxy:3 2 1 0 xxxxy:4 3 2 1 0 xxxxx:5 4 3 2 1 0 xy:2 xx:xxxy:4 xxxxy:5 xxxxx:Earlier drafts and discussionsin this thread. This program is partly based on a PostScript prototype discussedin this thread.
1 Answer1
Or any parts that could be more clear?
typedef struct parser *parser;Althoughparser instruct parser andtypedef struct parser *parser exists in different name spaces, having one as astruct and the other apointer is far from clear. Suggest:
typedef struct parser *parser_ptr;... or the equivalent.
Type def-ing astruct pointer should be avoided. When neededuse different names if thetypedef is a pointer. The followinglooks wrong.
parser q = new_parser(......q->b = r; // Should not code be a q.b, oh wait, `parser` is a pointer type.// Looks like a block of data is being returned, no wait, its a pointer.result parse(...) { ... }Ref:Is it a good idea to typedef pointers?
Code lack comments. A few comments, at least, would add clarity.
Put test cases andmain() in another files to clearly demarcate the code and the test code.
No need to flusheverything:
printf( "%s:", str );//fflush(0);fflush(stdout);Subtle point. Whenint is used to store a character and subsequent code interacts withchar, the standard C library model is to treat the values asunsigned char. This is important whenchar is signed and values outside0-127 are used. Recallfgets() returns values in theunsigned char, EOF range.
result parse_term( parser p, char *input ){ // return *input == p->c // return (unsigned char) *input == (unsigned char) (p->c)There is testing for anull pointer in many places except for test code. Hmmm.
You mustlog in to answer this question.
Explore related questions
See similar questions with these tags.
