I've attempted to remedythe issues outlined in the answer to my previous question. I've added several hundred blank lines to better illustrate the grouping of functions and generally make things look less dense. I've trimmed all the lines to 80 columns except for one 83 character line inppnarg.h which was inside the original author's comment, so I chose not to alter that. I've added forward declarations for all the static functions inside the.c files so all the static "helper functions" can be placed below the non-static function that uses them, so the implementation can be presented in a more top down fashion overall. I've added a description of every API function next to its declaration in the.h file, and comments in the.c files explaining design decisions that are important for understanding the implementation.
omitted for size
CFLAGS= -std=c99 -g -Wall -Wpedantic -Wextra -Wno-unused-function -Wno-unused-parameter -Wno-switch -Wno-return-type -Wunused-variableCFLAGS+= $(cflags)test : pc11test ./$<pc11test : pc11object.o pc11parser.o pc11io.o pc11test.o $(CC) $(CFLAGS) -o $@ $^ $(LDLIBS)pc11object.o : pc11object.[ch]pc11parser.o : pc11parser.[ch] pc11object.hpc11io.o : pc11io.[ch] pc11object.h pc11parser.hpc11test.o : pc11test.[ch] pc11object.h pc11parser.h pc11io.hclean : rm *.o pc11test.execount : wc -l -c -L pc11*[ch] ppnarg.h cloc pc11*[ch] ppnarg.homitted for size
#define PC11OBJECT_H#include <stdlib.h>#include <stdio.h>#if ! PPNARG_H #include "ppnarg.h"#endif/* Variant subtypes of object, and signatures for function object functions */#define IS_THE_TARGET_OF_THE_HIDDEN_POINTER_ *typedef union object IS_THE_TARGET_OF_THE_HIDDEN_POINTER_ object;typedef object integer;typedef object list;typedef object symbol;typedef object string;typedef object boolean;typedef object suspension;typedef object parser;typedef object operator;typedef operator predicate;typedef operator binoperator;typedef object fSuspension( object env );typedef object fParser( object env, list input );typedef object fOperator( object env, object input );typedef boolean fPredicate( object env, object input );typedef object fBinOperator( object left, object right );typedef enum { INVALID, INT, LIST, SYMBOL, STRING, VOID, SUSPENSION, PARSER, OPERATOR, END_TAGS} tag;enum object_symbol_codes { T, END_OBJECT_SYMBOLS};struct integer { tag t; int i;};struct list { tag t; object first, rest;};struct symbol { tag t; int code; const char *printname; object data;};struct string { tag t; char *str; int disposable;};struct void_ { tag t; void *pointer;};struct suspension { tag t; object env; fSuspension *f; const char *printname;};struct parser { tag t; object env; fParser *f; const char *printname;};struct operator { tag t; object env; fOperator *f; const char *printname;};struct header { int mark; object next; int forward;};union object { tag t; struct integer Int; struct list List; struct symbol Symbol; struct string String; struct void_ Void; struct suspension Suspension; struct parser Parser; struct operator Operator; struct header Header;};/* Global true/false objects. */extern object NIL_; /* .t == INVALID */extern symbol T_;/* Determine if object is non-NULL and non-NIL. Will also convert a boolean T_ or NIL_ to an integer 1 or 0. */static intvalid( object it ){ return it && it->t < END_TAGS && it->t != INVALID;}/* Constructors */integer Int( int i );boolean Boolean( int b );string String( char *str, int disposable );object Void( void *pointer );/* List of one element */list one( object it );/* Join two elements togther. If rest is a list or NIL_, result is a list. */list cons( object first, object rest );/* Join N elements together in a list */#define LIST(...) \ reduce( cons, PP_NARG(__VA_ARGS__), (object[]){ __VA_ARGS__ } )/* Macros capture printnames automatically for these constructors */#define Symbol( n ) \ Symbol_( n, #n, NIL_ )symbol Symbol_( int code, const char *printname, object data );#define Suspension( env, f ) \ Suspension_( env, f, __func__ )suspension Suspension_( object env, fSuspension *f, const char *printname );#define Parser( env, f ) \ Parser_( env, f, __func__ )parser Parser_( object env, fParser *f, const char *printname );#define Operator( env, f ) \ Operator_( env, f, #f )operator Operator_( object env, fOperator *f, const char *printname );/* Printing *//* Print list with dot notation or any object */void print( object a );/* Print list with list notation or any object */void print_list( object a );/* Functions over lists *//* car */object first( list it );/* cdr */list rest( list it );/* Length of list */int length( list ls );/* Force n elements from the front of (lazy?) list */list take( int n, list it );/* Skip ahead n elements in (lazy?) list */list drop( int n, list it );/* Index a (lazy?) list */object nth( int n, list it );/* Apply operator to (lazy?) object */object apply( operator op, object it );/* Produce lazy lists */list infinite( object mother );list chars_from_str( char *str );list chars_from_file( FILE *file );/* Lazy list adapters */list ucs4_from_utf8( list o );list utf8_from_ucs4( list o );/* Maps and folds *//* Transform each element of list with operator; yield new list. */list map( operator op, list it );/* Fold right-to-left over list with f */object collapse( fBinOperator *f, list it );/* Fold right-to-left over array of objects with f */object reduce( fBinOperator *f, int n, object *po );/* Comparisons and Association Lists (Environments) *//* Compare for equality. For symbols, just compare codes. */boolean eq( object a, object b );/* Call eq, but avoid the need to allocate a Symbol object */boolean eq_symbol( int code, object b );/* Return copy of start sharing end */list append( list start, list end );/* Prepend n (key . value) pairs to tail */list env( list tail, int n, ... );/* Return value associated with key */object assoc( object key, list env );/* Call assoc, but avoid the need to allocate a Symbol object */object assoc_symbol( int code, list env );/* Conversions *//* Copy integers and strings into *str. modifies caller supplied pointer */void fill_string( char **str, list it );/* Convert integers and strings from list into a string */string to_string( list ls );/* Dynamically create a symbol object corresponding to printname s. Scans the list of allocations linearly to find a matching printname. Failing that, it allocates a new symbol code from the space [-2,-inf). */symbol symbol_from_string( string s );/* That one lone function without a category to group it in. *//* Report (an analogue of) memory usage. By current measure, an allocation is 64 bytes, ie. 2x 32 byte union objects. */int count_allocations( void );#define _BSD_SOURCE#include "pc11object.h"#include <stdarg.h>#include <string.h>static void print_listn( object a );static int leading_ones( object byte );static int mask_off( object byte, int m );static fSuspension force_first;static fSuspension force_rest;static fSuspension force_apply;fSuspension infinite;static fSuspension force_chars_from_string;static fSuspension force_chars_from_file;static fSuspension force_ucs4_from_utf8;static fSuspension force_utf8_from_ucs4;fBinOperator map;fBinOperator eq;fBinOperator append;fBinOperator assoc;/* Helper macro for constructor functions. */#define OBJECT(...) new_( (union object[]){{ __VA_ARGS__ }} )/* Flags controlling print(). */static int print_innards = 1;static int print_chars = 1;static int print_codes = 0;/* Define simple objects T_ and NIL_, the components of our boolean type. */static union object nil_object = { .t=INVALID };object NIL_ = & nil_object;object T_ = 1 + (union object[]){ {.Header={1}}, {.Symbol={SYMBOL, T, "T", & nil_object}} };/* Allocation function is defined at the end of this file with its file scoped data protected from the vast majority of other functions here. */static object new_( object prototype );integerInt( int i ){ return OBJECT( .Int = { INT, i } );}booleanBoolean( int b ){ return b ? T_ : NIL_;}stringString( char *str, int disposable ){ return OBJECT( .String = { STRING, str, disposable } );}objectVoid( void *pointer ){ return OBJECT( .Void = { VOID, pointer } );}listone( object it ){ return cons( it, NIL_ );}listcons( object first, object rest ){ return OBJECT( .List = { LIST, first, rest } );}symbolSymbol_( int code, const char *printname, object data ){ return OBJECT( .Symbol = { SYMBOL, code, printname, data } );}suspensionSuspension_( object env, fSuspension *f, const char *printname ){ return OBJECT( .Suspension = { SUSPENSION, env, f, printname } );}parserParser_( object env, fParser *f, const char *printname ){ return OBJECT( .Parser = { PARSER, env, f, printname } );}operatorOperator_( object env, fOperator *f, const char *printname ){ return OBJECT( .Operator = { OPERATOR, env, f, printname } );}voidprint( object a ){ switch( a ? a->t : 0 ){ default: printf( "() " ); break; case INT: printf( print_chars ? "'%c' " : "%d ", a->Int.i ); break; case LIST: printf( "(" ), print( a->List.first ), printf( "." ), print( a->List.rest ), printf( ")" ); break; case SUSPENSION: printf( "...(%s) ", a->Suspension.printname ); break; case PARSER: printf( "Parser(%s", a->Parser.printname ), (print_innards & ! a[-1].Header.forward) && (printf( ", " ), print( a->Parser.env ),0), printf( ") " ); break; case OPERATOR: printf( "Oper(%s", a->Operator.printname ), printf( ", " ), print( a->Operator.env ), printf( ") " ); break; case STRING: printf( "\"%s\" ", a->String.str ); break; case SYMBOL: if( print_codes ) printf( "%d:%s ", a->Symbol.code, a->Symbol.printname ); else printf( "%s ", a->Symbol.printname ); break; case VOID: printf( "VOID " ); break; }}voidprint_list( object a ){ switch( a ? a->t : 0 ){ default: print( a ); break; case LIST: printf( "(" ), print_list( first( a ) ), print_listn( rest( a ) ), printf( ") " ); break; }}static voidprint_listn( object a ){ if( ! valid( a ) ) return; switch( a->t ){ default: print( a ); break; case LIST: print_list( first( a ) ), print_listn( rest( a ) ); break; }}/* force_() executes a suspension function to instantiate and yield a value. It may unwrap many layers of suspended operations to shake off any laziness at the front of a list or resolve a lazy calculation down to its result. In order to simulate the feature of lazy evaluation that a lazy list will manifest its elements "in place", the resulting object from force_() must be overwritten over the representation of the suspension object to provide the illusion that the list magically manifests for all handles to that part of the list. Consequently, force_() is declared static to this file and it is exclusively used in the stereotyped form: *it = *force_( it ); Functions outside of this module requiring the forced execution of a potential suspension must use side effect of take() or drop(). Eg. drop( 1, it ) will transform a suspended calculation into its actual resulting value. If it is a lazy list, this will manifest the list node with a new suspension as the rest(). */static objectforce_( object it ){ if( it->t != SUSPENSION ) return it; return force_( it->Suspension.f( it->Suspension.env ) );}object first( list it ){ if( it->t == SUSPENSION ) return Suspension( it, force_first ); if( it->t != LIST ) return NIL_; return it->List.first;}static object force_first ( object it ){ *it = *force_( it ); return first( it );}object rest( list it ){ if( it->t == SUSPENSION ) return Suspension( it, force_rest ); if( it->t != LIST ) return NIL_; return it->List.rest;}static object force_rest ( object it ){ *it = *force_( it ); return rest( it );}intlength( list ls ){ return valid( ls ) ? valid( first( ls ) ) + length( rest( ls ) ) : 0;}listtake( int n, list it ){ if( n == 0 ) return NIL_; *it = *force_( it ); if( ! valid( it ) ) return NIL_; return cons( first( it ), take( n-1, rest( it ) ) );}listdrop( int n, list it ){ if( n == 0 ) return it; *it = *force_( it ); if( ! valid( it ) ) return NIL_; return drop( n-1, rest( it ) );}object nth( int n, list it ){ return first( take( 1, drop( n-1, it ) ) );}objectapply( operator op, object it ){ if( it->t == SUSPENSION ) return Suspension( cons( op, it ), force_apply ); return op->Operator.f( op->Operator.env, it );}static objectforce_apply( list env ){ operator op = first( env ); object it = rest( env ); *it = *force_( it ); return apply( op, it );}listinfinite( object mother ){ return cons( mother, Suspension( mother, infinite ) );}listchars_from_str( char *str ){ if( ! str ) return NIL_; return Suspension( String( str, 0 ), force_chars_from_string );}static listforce_chars_from_string( string s ){ char *str = s->String.str; if( ! *str ) return one( Symbol( EOF ) ); return cons( Int( *str ), Suspension( String( str+1, 0 ), force_chars_from_string ) );}listchars_from_file( FILE *file ){ if( ! file ) return NIL_; return Suspension( Void( file ), force_chars_from_file );}static listforce_chars_from_file( object file ){ FILE *f = file->Void.pointer; int c = fgetc( f ); if( c == EOF ) return one( Symbol( EOF ) ); return cons( Int( c ), Suspension( file, force_chars_from_file ) );}/* UCS4 <=> UTF8 */listucs4_from_utf8( list input ){ if( ! input ) return NIL_; return Suspension( input, force_ucs4_from_utf8 );}listutf8_from_ucs4( list input ){ if( ! input ) return NIL_; return Suspension( input, force_utf8_from_ucs4 );}static listforce_ucs4_from_utf8( list input ){ *input = *force_( input ); object byte; byte = first( input ), input = rest( input ); if( !valid(byte) ) return NIL_; if( eq_symbol( EOF, byte ) ) return input; int ones = leading_ones( byte ); int bits = mask_off( byte, ones ); int n = ones; while( n-- > 1 ){ *input = *force_( input ); byte = first( input ), input = rest( input ); if( eq_symbol( EOF, byte ) ) return input; bits = ( bits << 6 ) | ( byte->Int.i & 0x3f ); } if( bits < ((int[]){0,0,0x80,0x800,0x10000,0x110000,0x4000000})[ ones ] ) fprintf( stderr, "Overlength encoding in utf8 char.\n" ); return cons( Int( bits ), Suspension( input, force_ucs4_from_utf8 ) );}static listforce_utf8_from_ucs4( list input ){ *input = *force_( input ); object code = first( input ); if( eq_symbol( EOF, code ) ) return input; int x = code->Int.i; object next = Suspension( drop( 1, input ), force_utf8_from_ucs4 ); if( x <= 0x7f ) return cons( code, next ); if( x <= 0x7ff ) return LIST( Int( (x >> 6) | 0xc0 ), Int( (x & 0x3f) | 0x80 ), next ); if( x <= 0xffff ) return LIST( Int( (x >> 12) | 0xe0 ), Int( ( (x >> 6) & 0x3f ) | 0x80 ), Int( ( x & 0x3f ) | 0x80 ), next ); if( x <= 0x10ffff ) return LIST( Int( (x >> 18) | 0xf0 ), Int( ( (x >> 12) & 0x3f ) | 0x80 ), Int( ( (x >> 6) & 0x3f ) | 0x80 ), Int( ( x & 0x3f ) | 0x80 ), next ); if( x <= 0x3ffffff ) return LIST( Int( (x >> 24) | 0xf8 ), Int( ( (x >> 18) & 0x3f ) | 0x80 ), Int( ( (x >> 12) & 0x3f ) | 0x80 ), Int( ( (x >> 6) & 0x3f ) | 0x80 ), Int( ( x & 0x3f ) | 0x80 ), next ); if( x <= 0x3fffffff ) return LIST( Int( (x >> 30) | 0xfc ), Int( ( (x >> 24) & 0x3f ) | 0x80 ), Int( ( (x >> 18) & 0x3f ) | 0x80 ), Int( ( (x >> 12) & 0x3f ) | 0x80 ), Int( ( (x >> 6) & 0x3f ) | 0x80 ), Int( ( x & 0x3f ) | 0x80 ), next ); fprintf( stderr, "Invalid unicode code point in ucs4 char.\n" ); return next;}static intleading_ones( object byte ){ if( byte->t != INT ) return 0; int x = byte->Int.i; return x&0200 ? x&0100 ? x&040 ? x&020 ? x&010 ? x&4 ? 6 : 5 : 4 : 3 : 2 : 1 : 0;}static intmask_off( object byte, int m ){ if( byte->t != INT ) return 0; int x = byte->Int.i; return x & (m ? (1<<(8-m))-1 : -1);}listmap( operator op, list it ){ if( ! valid( it ) ) return it; return cons( apply( op, first( it ) ), map( op, rest( it ) ) );}objectcollapse( fBinOperator *f, list it ){ if( !valid( it ) ) return it; object right = collapse( f, rest( it ) ); if( !valid( right ) ) return first( it ); return f( first( it ), right );}objectreduce( fBinOperator *f, int n, object *po ){ return n==1 ? *po : f( *po, reduce( f, n-1, po+1 ) );}booleaneq( object a, object b ){ return Boolean( !valid( a ) && !valid( b ) ? 1 : !valid( a ) || !valid( b ) ? 0 : a->t != b->t ? 0 : a->t == SYMBOL ? a->Symbol.code == b->Symbol.code : !memcmp( a, b, sizeof *a ) ? 1 : 0 );}booleaneq_symbol( int code, object b ){ return eq( (union object[]){ {.Symbol = {SYMBOL, code, "", 0} } }, b );}listappend( list start, list end ){ if( ! valid( start ) ) return end; return cons( first( start ), append( rest( start ), end ) );}listenv( list tail, int n, ... ){ va_list v; va_start( v, n ); list r = tail; while( n-- ){ object a = va_arg( v, object ); object b = va_arg( v, object ); r = cons( cons( a, b ), r ); } va_end( v ); return r;}objectassoc( object key, list b ){ if( !valid( b ) ) return NIL_; object pair = first( b ); if( valid( eq( key, first( pair ) ) ) ) return rest( pair ); else return assoc( key, rest( b ) );}objectassoc_symbol( int code, list b ){ return assoc( (union object[]){ {.Symbol = {SYMBOL, code, "", 0}} }, b );}static intstring_length( object it ){ switch( it ? it->t : 0 ){ default: return 0; case INT: return 1; case STRING: return strlen( it->String.str ); case LIST: return string_length( first( it ) ) + string_length( rest( it ) ); }}voidfill_string( char **str, list it ){ switch( it ? it->t : 0 ){ default: return; case INT: *(*str)++ = it->Int.i; return; case STRING: strcpy( *str, it->String.str ); *str += strlen( it->String.str ); return; case LIST: fill_string( str, first( it ) ); fill_string( str, rest( it ) ); return; }}stringto_string( list ls ){ char *str = calloc( 1 + string_length( ls ), 1 ); string s = OBJECT( .String = { STRING, str, 1 } ); fill_string( &str, ls ); return s;}/* The following functions are isolated to the bottom of this file so that their static variables are protected from all other functions in this file. *//* Allocation of objects */static list allocation_list = NULL;static objectnew_( object prototype ){ object record = calloc( 2, sizeof *record ); if( record ){ record[0] = (union object){ .Header = { 0, allocation_list } }; allocation_list = record; record[1] = *prototype; } return record + 1;}/* Construction of dynamic symbols */static int next_symbol_code = -2;symbolsymbol_from_string( string s ){ list ls = allocation_list; while( ls != NULL && valid( ls + 1 ) ){ if( ls[1].t == SYMBOL && strcmp( ls[1].Symbol.printname, s->String.str ) == 0 ){ return ls + 1; } ls = ls[0].Header.next; } return Symbol_( next_symbol_code--, strdup( s->String.str ), NIL_ );}intcount_allocations( void ){ list ls = allocation_list; int n = 0; while( ls != NULL && valid( ls + 1 ) ){ ++n; ls = ls->Header.next; } return n;}#define PC11PARSER_H#if ! PC11OBJECT_H #include "pc11object.h"#endifenum parser_symbol_codes { VALUE = END_OBJECT_SYMBOLS, OK, FAIL, SATISFY_PRED, EITHER_P, EITHER_Q, SEQUENCE_P, SEQUENCE_Q, SEQUENCE_OP, BIND_P, BIND_OP, INTO_P, INTO_ID, INTO_Q, REGEX_ATOM, PROBE_P, PROBE_MODE, EBNF_SEQ, EBNF_ANY, EBNF_EPSILON, EBNF_MAYBE, EBNF_MANY, END_PARSER_SYMBOLS};/* Parse the input using parser p. */list parse( parser p, list input );/* Check result from parse(). */int is_ok( list result );int not_ok( list result );/* Return OK or FAIL result. */parser succeeds( list result );parser fails( list errormsg );/* Emit debugging output from p. Print on ok iff mode&1; print not ok iff mode&2. */parser probe( parser p, int mode );/* The basic (leaf) parser. */parser satisfy( predicate pred );/* Simple parsers built with satisfy(). */parser alpha( void );parser upper( void );parser lower( void );parser digit( void );parser literal( object example );parser chr( int c );parser str( char *s );parser anyof( char *s );parser noneof( char *s );/* Accept any single element off the input list. */parser item( void );/* Choice ("OR" branches) *//* Combine 2 parsers into a choice. */parser either( parser p, parser q );/* Combine N parsers into a choice. */#define ANY(...) \ reduce( either, \ PP_NARG(__VA_ARGS__), \ (object[]){ __VA_ARGS__ } )/* Sequence ("AND" branches) *//* Combine 2 parsers into a sequence, using op to merge the value portions of results. */parser sequence( parser p, parser q, binoperator op );/* Sequence 2 parsers but drop result from first. */parser xthen( parser x, parser q );/* Sequence 2 parsers but drop result from second. */parser thenx( parser p, parser x );/* Sequence 2 parsers and concatenate results. */parser then( parser p, parser q );/* Sequence N parsers and concatenate results. */#define SEQ(...) \ reduce( then, \ PP_NARG(__VA_ARGS__), \ (object[]){ __VA_ARGS__ } )/* Sequence 2 parsers, but pass result from first as a (id.value) pair in second's env. */parser into( parser p, object id, parser q );/* Repetitions *//* Accept 0 or 1 successful results from p. */parser maybe( parser p );/* Accept 0 or more successful results from p. */parser many( parser p );/* Accept 1 or more successful results from p. */parser some( parser p );/* Transform of values *//* Process succesful result from p by transforming the value portion with op. */parser bind( parser p, operator op );/* Building recursive parsers *//* Create an empty parser, useful for building loops. A forward declaration of a parser. */parser forward( void );/* Compilers *//* Compile a regular expression into a parser. */// E->T ('|' T)*// T->F*// F->A ('*' | '+' | '?')?// A->'.' | '('E')' | C// C->S|L|P// S->'\' ('.' | '|' | '(' | ')' | '[' | ']' | '/' )// L->'[' '^'? ']'? [^]]* ']'// P->Plain charparser regex( char *re );/* Compile a block of EBNF definitions into a list of (symbol.parser) pairs. */// D->N '=' E ';'// N->name// E->T ('|' T)*// T->F*// F->R | N | '[' E ']' | '{' E '}' | '(' E ')' | '/' regex '/'// R->'"' [^"]* '"' | "'" [^']* "'"list ebnf( char *productions, list supplements, list handlers );#include "pc11parser.h"#include <ctype.h>#include <string.h>static fParser success;static fParser fail;static fParser parse_satisfy;static fPredicate is_upper;static fPredicate is_alpha;static fPredicate is_lower;static fPredicate is_digit;static fPredicate is_literal;static fPredicate is_range;static fPredicate is_anyof;static fPredicate is_noneof;static fPredicate always_true;static fParser parse_either;fBinOperator either;static fParser parse_sequence;static fBinOperator concat;fBinOperator then;static fBinOperator left;static fBinOperator right;fBinOperator xthen;fBinOperator thenx;static fParser parse_bind;static fParser parse_into;static fParser parse_probe;static fOperator apply_meta;static fOperator on_dot;static fOperator on_chr;static fOperator on_meta;static fOperator on_class;static fOperator on_term;static fOperator on_expr;static fOperator stringify;static fOperator symbolize;static fOperator encapsulate;static fOperator make_matcher;static fOperator make_sequence;static fOperator make_any;static fOperator make_maybe;static fOperator make_many;static fOperator define_forward;static fOperator compile_bnf;static fOperator compile_rhs;static fOperator define_parser;static fOperator wrap_handler;/* Execute a parser upon an input stream by invoking its function, supplying its env. */listparse( parser p, list input ){ if( !valid( p ) || !valid( input ) || p->t != PARSER ) return cons( Symbol(FAIL), cons( String("parse() validity check failed",0), input ) ); return p->Parser.f( p->Parser.env, input );}/* The result structure from a parser is either ( OK . ( <value> . <remaining input ) ) or ( FAIL . ( <error message> . <remaining input> ) )*/static objectsuccess( object result, list input ){ return cons( Symbol(OK), cons( result, input ) );}static objectfail( object errormsg, list input ){ return cons( Symbol(FAIL), cons( errormsg, input ) );}intis_ok( list result ){ return valid( eq_symbol( OK, first( result ) ) );}intnot_ok( list result ){ return ! is_ok( result );}parsersucceeds( list result ){ return Parser( result, success );}parserfails( list errormsg ){ return Parser( errormsg, fail );}/* For all of the parsers after this point, the associated parse_*() function should be considered the "lambda" or "closure" function for the constructed parser object. C, of course, doesn't have lambdas. Hence these closely associated functions are close by and have related names. These parse_* functions receive an association list of (symbol.value) pairs in their env parameter, and they extract their needed values using assoc_symbol().*//* The satisfy(pred) parser is the basis for all "leaf" parsers. Importantly, it forces the first element off of the (lazy?) input list. Therefore, all other functions that operate upon this result of this parser need not fuss with suspensions at all. */parsersatisfy( predicate pred ){ return Parser( env( NIL_, 1, Symbol(SATISFY_PRED), pred ), parse_satisfy );}static listparse_satisfy( object env, list input ){ predicate pred = assoc_symbol( SATISFY_PRED, env ); drop( 1, input ); object item = first( input ); if( ! valid( item ) ) return fail( String( "empty input", 0 ), input ); return valid( apply( pred, item ) ) ? success( item, rest( input ) ) : fail( LIST( String( "predicate not satisfied", 0 ), pred, NIL_ ), input );}parser item( void ){ return satisfy( Operator( NIL_, always_true ) );}booleanalways_true( object v, object it ){ return T_;}parseralpha( void ){ return satisfy( Operator( NIL_, is_alpha ) );}static booleanis_alpha( object v, object it ){ return Boolean( it->t == INT && isalpha( it->Int.i ) );}parserupper( void ){ return satisfy( Operator( NIL_, is_upper ) );}static booleanis_upper( object v, object it ){ return Boolean( it->t == INT && isupper( it->Int.i ) );}parserlower( void ){ return satisfy( Operator( NIL_, is_lower ) );}static booleanis_lower( object v, object it ){ return Boolean( it->t == INT && islower( it->Int.i ) );}parserdigit( void ){ return satisfy( Operator( NIL_, is_digit ) );}static booleanis_digit( object v, object it ){ return Boolean( it->t == INT && isdigit( it->Int.i ) );}parserliteral( object example ){ return satisfy( Operator( example, is_literal ) );}static booleanis_literal( object example, object it ){ return eq( example, it );}parserchr( int c ){ return literal( Int( c ) );}parserstr( char *s ){ return !*s ? succeeds( NIL_ ) : !s[1] ? chr( *s ) : then( chr( *s ), str( s+1 ) );}parserrange( int lo, int hi ){ return satisfy( Operator( cons( Int( lo ), Int( hi ) ), is_range ) );}static booleanis_range( object bounds, object it ){ int lo = first( bounds )->Int.i, hi = rest( bounds )->Int.i; return Boolean( it->t == INT && lo <= it->Int.i && it->Int.i <= hi );}parseranyof( char *s ){ return satisfy( Operator( String( s, 0 ), is_anyof ) );}static booleanis_anyof( object set, object it ){ return Boolean( it->t == INT && strchr( set->String.str, it->Int.i ) );}parsernoneof( char *s ){ return satisfy( Operator( String( s, 0 ), is_noneof ) );}static booleanis_noneof( object set, object it ){ return Boolean( it->t == INT && ! strchr( set->String.str, it->Int.i ) );}/* The choice combinator. Result is success if either p or q succeed. Short circuits q if p was successful. Not lazy. */parsereither( parser p, parser q ){ return Parser( env( NIL_, 2, Symbol(EITHER_Q), q, Symbol(EITHER_P), p ), parse_either );}static objectparse_either( object env, list input ){ parser p = assoc_symbol( EITHER_P, env ); object result = parse( p, input ); if( is_ok( result ) ) return result; parser q = assoc_symbol( EITHER_Q, env ); return parse( q, input );}/* Sequence 2 parsers and join the 2 results using a binary operator. By parameterizing this "joining" operator, this parser supports then(), thenx() and xthen() while being completely agnostic as to how joining might or might not be done. */parsersequence( parser p, parser q, binoperator op ){ return Parser( env( NIL_, 3, Symbol(SEQUENCE_OP), op, Symbol(SEQUENCE_Q), q, Symbol(SEQUENCE_P), p ), parse_sequence );}static objectparse_sequence( object env, list input ){ parser p = assoc_symbol( SEQUENCE_P, env ); object p_result = parse( p, input ); if( not_ok( p_result ) ) return p_result; parser q = assoc_symbol( SEQUENCE_Q, env ); list remainder = rest( rest( p_result ) ); object q_result = parse( q, remainder ); if( not_ok( q_result ) ){ object q_error = first( rest( q_result ) ); object q_remainder = rest( rest( q_result ) ); return fail( LIST( q_error, String( "after", 0), first( rest( p_result ) ), NIL_ ), q_remainder ); } binoperator op = assoc_symbol( SEQUENCE_OP, env ); return success( op->Operator.f( first( rest( p_result ) ), first( rest( q_result ) ) ), rest( rest( q_result ) ) );}parserthen( parser p, parser q ){ return sequence( p, q, Operator( NIL_, concat ) );}parserxthen( parser x, parser q ){ return sequence( x, q, Operator( NIL_, right ) );}parserthenx( parser p, parser x ){ return sequence( p, x, Operator( NIL_, left ) );}/* Some hacking and heuristics to massage 2 objects together into a list, taking care if either is already a list */static objectconcat( object l, object r ){ if( ! valid( l ) ) return r; if( r->t == LIST && valid( eq_symbol( VALUE, first( first( r ) ) ) ) && ! valid( rest( r ) ) && ! valid( rest( first( r ) ) ) ) return l; switch( l->t ){ case LIST: return cons( first( l ), concat( rest( l ), r ) ); default: return cons( l, r ); }}static objectright( object l, object r ){ return r;}static objectleft( object l, object r ){ return l;}/* Sequence parsers p and q, but define the value portion of the result of p (if successful) as (id.value) in the env of q. */parserinto( parser p, object id, parser q ){ return Parser( env( NIL_, 3, Symbol(INTO_P), p, Symbol(INTO_ID), id, Symbol(INTO_Q), q ), parse_into );}static objectparse_into( object v, list input ){ parser p = assoc_symbol( INTO_P, v ); object p_result = parse( p, input ); if( not_ok( p_result ) ) return p_result; object id = assoc_symbol( INTO_ID, v ); parser q = assoc_symbol( INTO_Q, v ); object q_result = q->Parser.f( env( q->Parser.env, 1, id, first( rest( p_result ) ) ), rest( rest( p_result ) ) ); if( not_ok( q_result ) ){ object q_error = first( rest( q_result ) ); object q_remainder = rest( rest( q_result ) ); return fail( LIST( q_error, String( "after", 0), first( rest( p_result ) ), NIL_ ), q_remainder ); } return q_result;}/* If the parser p succeeds, great! return its result. If not, who cares?! call it a success, but give a nothing value. If this parser is composed using then(), the merging of values will simply ignore this nothing value. It just disappears. If you bind() this parser to an operator, the operator can test if valid( input ) to tell whether p succeeded (and yielded a value) or not (which yielded NIL). */parsermaybe( parser p ){ return either( p, succeeds( NIL_ ) );}/* Uses a forward() to build an infinite sequence of maybe(p). */parsermany( parser p ){ parser q = forward(); *q = *maybe( then( p, q ) ); return q;}parsersome( parser p ){ return then( p, many( p ) );}/* Bind transforms a succesful result from the child parser through the operator. The operator's environment is supplemented with the environment passed to bind itself. */parserbind( parser p, operator op ){ return Parser( env( NIL_, 2, Symbol(BIND_P), p, Symbol(BIND_OP), op ), parse_bind );}static objectparse_bind( object env, list input ){ parser p = assoc_symbol( BIND_P, env ); operator op = assoc_symbol( BIND_OP, env ); object result = parse( p, input ); if( not_ok( result ) ) return result; object payload = rest( result ), value = first( payload ), remainder = rest( payload ); return success( apply( (union object[]){{.Operator={ OPERATOR, append(op->Operator.env, env), op->Operator.f, op->Operator.printname }}}, value ), remainder );}/* Construct a forwarding parser to aid building of loops. This parser can be composed with other parsers. Later, the higher level composed parser can be copied over this object to create the point of recursion in the parser graph. Remembers the fact that it was created as a forward by storing a flag in the hidden allocation record for the parser. This flag is not altered by overwriting the parser's normal union object. */parserforward( void ){ parser p = Parser( 0, 0 ); p[-1].Header.forward = 1; return p;}parserprobe( parser p, int mode ){ return Parser( env( NIL_, 2, Symbol(PROBE_MODE), Int( mode ), Symbol(PROBE_P), p ), parse_probe );}static objectparse_probe( object env, object input ){ parser p = assoc_symbol( PROBE_P, env ); int mode = assoc_symbol( PROBE_MODE, env )->Int.i; object result = parse( p, input ); if( is_ok( result ) && mode&1 ) print( result ), puts(""); else if( not_ok( result ) && mode&2 ) print_list( result ), puts(""); return result;}/* Regex compiler */static parser regex_grammar( void );static parser regex_parser;parserregex( char *re ){ if( !regex_parser ) regex_parser = regex_grammar(); object result = parse( regex_parser, chars_from_str( re ) ); if( not_ok( result ) ) return result; return first( rest( result ) );}#define META "*+?"#define SPECIAL META ".|()[]/"static parserregex_grammar( void ){ parser dot = bind( chr('.'), Operator( NIL_, on_dot ) ); parser meta = anyof( META ); parser escape = xthen( chr('\\'), anyof( SPECIAL "\\" ) ); parser class = xthen( chr('['), thenx( SEQ( maybe( chr('^') ), maybe( chr(']') ), many( noneof( "]" ) ) ), chr(']') ) ); parser character = ANY( bind( escape, Operator( NIL_, on_chr ) ), bind( class, Operator( NIL_, on_class ) ), bind( noneof( SPECIAL ), Operator( NIL_, on_chr ) ) ); parser expr = forward(); { parser atom = ANY( dot, xthen( chr('('), thenx( expr, chr(')') ) ), character ); parser factor = into( atom, Symbol(REGEX_ATOM), bind( maybe( meta ), Operator( NIL_, on_meta ) ) ); parser term = bind( many( factor ), Operator( NIL_, on_term ) ); *expr = *bind( then( term, many( xthen( chr('|'), term ) ) ), Operator( NIL_, on_expr ) ); } return expr;}/* syntax directed compilation to parser */static parserapply_meta( parser a, object it ){ switch( it->Int.i ){ default: return a; case '*': return many( a ); case '+': return some( a ); case '?': return maybe( a ); }}static parseron_dot( object v, object it ){ return item();}static parseron_chr( object v, object it ){ return literal( it );}static parseron_meta( object v, object it ){ parser atom = assoc_symbol( REGEX_ATOM, v ); if( it->t == LIST && valid( eq_symbol( VALUE, first( first( it ) ) ) ) && ! valid( rest( it ) ) && ! valid( rest( rest( it ) ) ) ) return atom; return apply_meta( atom, it );}static parseron_class( object v, object it ){ if( first( it )->Int.i == '^' ) return satisfy( Operator( to_string( rest( it ) ), is_noneof ) ); return satisfy( Operator( to_string( it ), is_anyof ) );}static parseron_term( object v, object it ){ if( ! valid( it ) ) return NIL_; if( it->t == LIST && ! valid( rest( it ) ) ) it = first( it ); if( it->t == PARSER ) return it; return collapse( then, it );}static parseron_expr( object v, object it ){ if( it->t == LIST && ! valid( rest( it ) ) ) it = first( it ); if( it->t == PARSER ) return it; return collapse( either, it );}/* EBNF compiler */static parser ebnf_grammar( void );/* Compile a block of EBNF definitions into an association list of (symbol.parser) pairs. Accepts an association list of supplemental parsers for any syntactic constructs that are easier to build outside of the EBNF syntax. Accepts an association list of operators to bind the results of any named parser from the EBNF block or the supplements. */listebnf( char *productions, list supplements, list handlers ){ static parser ebnf_parser; if( !ebnf_parser ) ebnf_parser = ebnf_grammar(); object result = parse( ebnf_parser, chars_from_str( productions ) ); if( not_ok( result ) ) return result; object payload = first( rest( result ) ); list defs = append( payload, env( supplements, 1, Symbol(EBNF_EPSILON), succeeds(NIL_) ) ); list forwards = map( Operator( NIL_, define_forward ), defs ); list parsers = map( Operator( forwards, compile_rhs ), defs ); list final = map( Operator( forwards, define_parser ), parsers ); map( Operator( forwards, wrap_handler ), handlers ); return final;}static parserebnf_grammar( void ){ if( !regex_parser ) regex_parser = regex_grammar(); parser spaces = many( anyof( " \t\n" ) ); parser defining_symbol = thenx( chr( '=' ), spaces ); parser choice_symbol = thenx( chr( '|' ), spaces ); parser terminating_symbol = thenx( chr( ';' ), spaces ); parser name = some( either( anyof( "-_" ), alpha() ) ); parser identifier = thenx( name, spaces ); parser terminal = bind( thenx( either( thenx( xthen( chr( '"'), many( noneof("\"") ) ), chr( '"') ), thenx( xthen( chr('\''), many( noneof( "'") ) ), chr('\'') ) ), spaces ), Operator( NIL_, make_matcher ) ); parser symb = bind( identifier, Operator( NIL_, symbolize ) ); parser nonterminal = symb; parser expr = forward(); { parser factor = ANY( terminal, nonterminal, bind( xthen( then( chr( '[' ), spaces ), thenx( expr, then( chr( ']' ), spaces ) ) ), Operator( NIL_, make_maybe ) ), bind( xthen( then( chr( '{' ), spaces ), thenx( expr, then( chr( '}' ), spaces ) ) ), Operator( NIL_, make_many ) ), bind( xthen( then( chr( '(' ), spaces ), thenx( expr, then( chr( ')' ), spaces ) ) ), Operator( NIL_, encapsulate ) ), bind( xthen( chr( '/' ), thenx( regex_parser, chr( '/' ) ) ), Operator( NIL_, encapsulate ) ) ); parser term = bind( many( factor ), Operator( NIL_, make_sequence ) ); *expr = *bind( then( term, many( xthen( choice_symbol, term ) ) ), Operator( NIL_, make_any ) ); }; parser definition = bind( then( symb, xthen( defining_symbol, thenx( expr, terminating_symbol ) ) ), Operator( NIL_, encapsulate) ); return some( definition );}/* helpers */static stringstringify( object env, object input ){ return to_string( input );}static symbolsymbolize( object env, object input ){ return symbol_from_string( to_string( input ) );}static listencapsulate( object env, object input ){ return one( input );}/* syntax directed translation to list form */static parsermake_matcher( object env, object input ){ return str( to_string( input )->String.str );}static listmake_sequence( object env, object input ){ if( length( input ) == 0 ) return Symbol(EBNF_EPSILON); if( length( input ) < 2 ) return input; return one( cons( Symbol(EBNF_SEQ), input ) );}static listmake_any( object env, object input ){ if( length( input ) < 2 ) return input; return one( cons( Symbol(EBNF_ANY), input ) );}static listmake_maybe( object env, object input ){ return one( cons( Symbol(EBNF_MAYBE), input ) );}static listmake_many( object env, object input ){ return one( cons( Symbol(EBNF_MANY), input ) );}/* stages of constructing the parsers from list form */static listdefine_forward( object env, object it ){ if( rest( it )->t == PARSER ) return it; return cons( first( it ), forward() );}static parsercompile_bnf( object env, object it ){ operator self = (union object[]){{.Operator={OPERATOR,env,compile_bnf}}}; switch( it->t ){ default: return it; case SYMBOL: { object ob = assoc( it, env ); return valid( ob ) ? ob : it; } case LIST: { object f = first( it ); if( valid( eq_symbol( EBNF_SEQ, f ) ) ) return collapse( then, map( self, rest( it ) ) ); if( valid( eq_symbol( EBNF_ANY, f ) ) ) return collapse( either, map( self, rest( it ) ) ); if( valid( eq_symbol( EBNF_MANY, f ) ) ) return many( map( self, rest( it ) ) ); if( valid( eq_symbol( EBNF_MAYBE, f ) ) ) return maybe( map( self, rest( it ) ) ); if( length( it ) == 1 ) return compile_bnf( env, f ); return map( self, it ); } }}static listcompile_rhs( object env, object it ){ if( rest( it )->t == PARSER ) return it; object result = cons( first( it ), map( (union object[]){{.Operator={OPERATOR,env,compile_bnf}}}, rest( it ) ) ); return result;}static listdefine_parser( object env, object it ){ object lhs = assoc( first( it ), env ); if( valid( lhs ) && lhs->t == PARSER && lhs->Parser.f == NULL ){ object rhs = rest( it ); if( rhs->t == LIST ) rhs = first( rhs ); *lhs = *rhs; } return it;}static listwrap_handler( object env, object it ){ object lhs = assoc( first( it ), env ); if( valid( lhs ) && lhs->t == PARSER ){ object op = rest( it ); parser copy = Parser( 0, 0 ); *copy = *lhs; *lhs = *bind( copy, op ); } return it;}#define PC11IO_H#if ! PC11PARSER_H #include "pc11parser.h"#endifenum io_symbol_codes { ARGS = END_PARSER_SYMBOLS, END_IO_SYMBOLS};int pprintf( char const *fmt, ... );int pscanf( char const *fmt, ... );code omitted for size
#define PC11TEST_H#if ! PC11IO_H #include "pc11io.h"#endifint main( void );#include <ctype.h>#include "pc11test.h"enum test_symbol_codes { TEST = END_IO_SYMBOLS, DIGIT, UPPER, NAME, NUMBER, EOL, SP, postal_address, name_part, street_address, street_name, zip_part, END_TEST_SYMBOLS};static int test_basics();static int test_parsers();static int test_regex();static int test_ebnf();static int test_io();int main( void ){ return 0 || test_basics() || test_parsers() || test_regex() || test_ebnf() || test_io() ;}static fOperator to_upper;static integerto_upper( object env, integer it ){ return Int( toupper( it->Int.i ) );}static inttest_basics(){ puts( __func__ ); list ch = chars_from_str( "abcdef" ); print( ch ), puts(""); print_list( ch ), puts(""); integer a = apply( Operator( NIL_, to_upper ), first( ch ) ); print( a ), puts(""); drop( 1, a ); print( a ), puts(""); drop( 6, ch ); print( ch ), puts(""); print_list( ch ), puts(""); drop( 7, ch ); print( ch ), puts(""); print_list( ch ), puts(""); puts(""); list xs = infinite( Int('x') ); print_list( xs ), puts(""); drop( 3, xs ); print_list( xs ), puts(""); puts(""); return 0;}static inttest_parsers(){ puts( __func__ ); list ch = chars_from_str( "a b c d 1 2 3 4" ); parser p = succeeds( Int('*') ); print_list( parse( p, ch ) ), puts(""); parser q = fails( String("Do you want a cookie?",0) ); print_list( parse( q, ch ) ), puts(""); parser r = item(); print_list( parse( r, ch ) ), puts(""); parser s = either( alpha(), item() ); print_list( parse( s, ch ) ), puts(""); parser t = literal( Int('a') ); print_list( parse( t, ch ) ), puts(""); puts(""); return 0;}static inttest_regex(){ puts( __func__ ); parser a = regex( "." ); print_list( a ), puts(""); print_list( parse( a, chars_from_str( "a" ) ) ), puts(""); print_list( parse( a, chars_from_str( "." ) ) ), puts(""); print_list( parse( a, chars_from_str( "\\." ) ) ), puts(""); puts(""); parser b = regex( "\\." ); print_list( b ), puts(""); print_list( parse( b, chars_from_str( "a" ) ) ), puts(""); print_list( parse( b, chars_from_str( "." ) ) ), puts(""); print_list( parse( b, chars_from_str( "\\." ) ) ), puts(""); puts(""); parser c = regex( "\\\\." ); print_list( c ), puts(""); print_list( parse( c, chars_from_str( "a" ) ) ), puts(""); print_list( parse( c, chars_from_str( "." ) ) ), puts(""); print_list( parse( c, chars_from_str( "\\." ) ) ), puts(""); print_list( parse( c, chars_from_str( "\\a" ) ) ), puts(""); puts(""); parser d = regex( "\\\\\\." ); print_list( d ), puts(""); print_list( parse( d, chars_from_str( "a" ) ) ), puts(""); print_list( parse( d, chars_from_str( "." ) ) ), puts(""); print_list( parse( d, chars_from_str( "\\." ) ) ), puts(""); print_list( parse( d, chars_from_str( "\\a" ) ) ), puts(""); puts(""); parser e = regex( "\\\\|a" ); print_list( e ), puts(""); print_list( parse( e, chars_from_str( "a" ) ) ), puts(""); print_list( parse( e, chars_from_str( "." ) ) ), puts(""); print_list( parse( e, chars_from_str( "\\." ) ) ), puts(""); print_list( parse( e, chars_from_str( "\\a" ) ) ), puts(""); puts(""); parser f = regex( "[abcd]" ); print_list( f ), puts(""); print_list( parse( f, chars_from_str( "a" ) ) ), puts(""); print_list( parse( f, chars_from_str( "." ) ) ), puts(""); puts(""); return 0;}static fOperator stringify;static stringstringify( object env, list it ){ return to_string( it );}static inttest_ebnf(){ puts( __func__ ); Symbol(postal_address); Symbol(name_part); Symbol(street_address); Symbol(street_name); Symbol(zip_part); list parsers = ebnf( "postal_address = name_part street_address zip_part ;\n" "name_part = personal_part SP last_name SP opt_suffix_part EOL\n" " | personal_part SP name_part ;\n" "personal_part = initial '.' | first_name ;\n" "street_address = house_num SP street_name opt_apt_num EOL ;\n" "zip_part = town_name ',' SP state_code SP zip_code EOL ;\n" "opt_suffix_part = 'Sr.' | 'Jr.' | roman_numeral | ;\n" "opt_apt_num = [ apt_num ] ;\n" "apt_num = NUMBER ;\n" "town_name = NAME ;\n" "state_code = UPPER UPPER ;\n" "zip_code = DIGIT DIGIT DIGIT DIGIT DIGIT ;\n" "initial = 'Mrs' | 'Mr' | 'Ms' | 'M' ;\n" "roman_numeral = 'I' [ 'V' | 'X' ] { 'I' } ;\n" "first_name = NAME ;\n" "last_name = NAME ;\n" "house_num = NUMBER ;\n" "street_name = NAME ;\n", env( NIL_, 6, Symbol(EOL), chr('\n'), Symbol(DIGIT), digit(), Symbol(UPPER), upper(), Symbol(NUMBER), some( digit() ), Symbol(NAME), some( alpha() ), Symbol(SP), many( anyof( " \t\n" ) ) ), env( NIL_, 2, Symbol(name_part), Operator( NIL_, stringify ), Symbol(street_name), Operator( NIL_, stringify ) ) ); parser start = assoc_symbol( postal_address, parsers ); if( valid( start ) && start->t == LIST ) start = first( start ); print_list( start ), puts("\n"); print_list( parse( start, chars_from_str( "Mr. luser droog I\n" "2357 Streetname\n" "Anytown, ST 00700\n" ) ) ), puts(""); printf( "%d objects\n", count_allocations() ); return 0;}static inttest_io(){ pprintf( "%s:%c-%c\n", "does it work?", '*', '@' ); return 0;}Size:
$ make countwc -l -c -L pc11*[ch] ppnarg.h 180 4442 78 pc11io.c 13 218 36 pc11io.h 549 13731 77 pc11object.c 361 5955 77 pc11object.h 818 20944 80 pc11parser.c 214 3601 63 pc11parser.h 202 5453 69 pc11test.c 6 82 21 pc11test.h 29 1018 83 ppnarg.h 2372 55444 83 totalcloc pc11*[ch] ppnarg.h 9 text files. 9 unique files. 0 files ignored.github.com/AlDanial/cloc v 1.93 T=0.05 s (194.9 files/s, 51356.5 lines/s)-------------------------------------------------------------------------------Language files blank comment code-------------------------------------------------------------------------------C 4 316 98 1335C/C++ Header 5 241 99 283-------------------------------------------------------------------------------SUM: 9 557 197 1618-------------------------------------------------------------------------------Any improvements to make to the interface, implementation, or documentation?
- \$\begingroup\$Why so many spaces between parts of a line of code where 1 space will do?
if( it->t == LIST && ! valid( rest( it ) ) ) it = first( it );\$\endgroup\$chux– chux2022-07-01 14:55:15 +00:00CommentedJul 1, 2022 at 14:55 - \$\begingroup\$Just a few conventions I've adopted. Adding 2 spaces inside the parens of
ifandwhileandforstatements helps it to look different than a function call (which I write with just one space inside the parens). A separate, but related, convention is to add extra spaces in long or complex expressions to help the operator with the lowest precedence to stand out. I tend to do this with&&,||, and to the left of?and:. I find it less noisy than adding extra parens, but the intent is similar: to help the reader to see how the expression will be parsed.\$\endgroup\$luser droog– luser droog2022-07-02 01:08:07 +00:00CommentedJul 2, 2022 at 1:08 - 2\$\begingroup\$luser droog., manual formatting is simply not productive. If you can tailor/write a code formatter to match your style --> great. Otherwise, use an auto-formatter.\$\endgroup\$chux– chux2022-07-02 04:32:16 +00:00CommentedJul 2, 2022 at 4:32
- 1\$\begingroup\$Agreed. Part of the very purpose of this code is to build such a formatter. I felt like I needed something this flexible to even express a style rule like: "make sure the spacing hierarchy matches the precedence hierarchy".\$\endgroup\$luser droog– luser droog2022-07-03 03:51:33 +00:00CommentedJul 3, 2022 at 3:51
- 1\$\begingroup\$The vertical spacing definitely helps!\$\endgroup\$2022-07-03 13:29:53 +00:00CommentedJul 3, 2022 at 13:29
1 Answer1
Makefile improvements
At first glance, the makefile looks OK, but a few improvements can be made. First, you always overrideCFLAGS, although you allow to add things to be added to it via the environment variable$cflags. However, that lower case form is very non-standard, and it's more common to expectCFLAGS=... make to work. The usual solution is this:
CFLAGS ?= -g -Wall -Wpedantic -Wextra -Wno-unused-function -Wno-unused-parameter -Wno-switch -Wno-return-type -Wunused-variableCFLAGS += -std=c99Where in the first line, we only add those options if noCFLAGS were provided via the environment, and in the second line we unconditionally add any required flags for the build to work.
Second, targets that don't build anything but just run commands should be marked as.PHONY, so if you accidentily created a filetest that has a timestamp newer thanpc11test, it wouldn't preventmake test from working as expect. So add:
.PHONY: test clean countAbout forward declarations
I've added forward declarations for all the static functions inside the .c files so all the static "helper functions" can be placed below the non-static function that uses them, so the implementation can be presented in a more top down fashion overall.
Personally I don't think that is very helpful. Now you have both the forward declaration and the actual definition to keep in sync. The files are long enough that you are going to use search functionality anyway to jump to functions.
Documentation
It is great that you are documenting all the functions and also having them grouped in a sensible way. However, I recommend that you write these documentation comments inDoxygen format.Doxygen is a widely used standard for documenting C and C++ code, and the Doxygen tools can then do all kinds of nice stuff for you: apart from generating documentation in navigatable PDF and HTML formats, it can also warn you when you forgot to document functions and/or function parameters.
Naming things
Especially in pc11object.h, I am a bit surprised by some of the function names, in particular when the comments above them describe those in terms that don't match the function name itself.
For example,drop() has as documentation "Skip ahead n elements". Why not call the functionskip() then, or alternatively, write "Drop the first n elements" in the comments. There are more examples of this, likemap() "Transform",collapse()/reduce() "Fold",env() "Prepend", and so on.
Some comments don't make sense at all to a C programmer, likefirst() being documented as "car". If you don't know LISP, you might think "what does this have to do with automobiles?".
Some comments are needlessly complicated, likeappend() being documented as "return copy of start sharing end". But it also raises questions: does this append one list to another like the function name implies, or does it create a new list that is the concatenation of two lists like the comments hint at?
Make sure the function name, while concise, conveys clearly what is is going to do, and make sure the documentation matches. I would go over all the function names and make changes where appropriate. For example, instead of having to remember whethercollapse() is for lists andreduce() is for arrays of objects, why not make themfold_list() andfold_objects()?
- 1\$\begingroup\$"comments in Doxygen format. Doxygen is the de facto standard for documenting C and C++ code" --> Any support for thede facto assertion?\$\endgroup\$chux– chux2022-07-01 14:57:32 +00:00CommentedJul 1, 2022 at 14:57
- \$\begingroup\$@chux-ReinstateMonica They say so on their website, so it must be true! I can't find any authorative page on the popularity of each documentation generator for C though, but in my personal experience it is the most commonly used, and just searching for "C documentation generator" or "C code documentation" lists Doxygen or sites referring to Doxygen at the top.\$\endgroup\$G. Sliepen– G. Sliepen2022-07-01 15:26:13 +00:00CommentedJul 1, 2022 at 15:26
- \$\begingroup\$UV for anAppeal to Popular Belief.\$\endgroup\$chux– chux2022-07-01 17:53:56 +00:00CommentedJul 1, 2022 at 17:53
You mustlog in to answer this question.
Explore related questions
See similar questions with these tags.