- Notifications
You must be signed in to change notification settings - Fork0
Parser-combinators library.
License
NotificationsYou must be signed in to change notification settings
besok/parsit
Folders and files
Name | Name | Last commit message | Last commit date | |
---|---|---|---|---|
Repository files navigation
This library provides a very simple and lightweight parser (recursive descendant ll(1)) to combine and express agrammar.
The library usesLogos as a lexical analyzer and tokenizer.
This library major incentives were:
- lightweight : very small and does not require a deep dive
- transparency : literally 3 structs with a handful of methods
- speed : good speed (with a gratitude toLogos)
logos = "*"
use logos::Logos;#[derive(Logos,Debug,PartialEq)]enumToken{// Tokens can be literal strings, of any length.#[token("fast")]Fast,#[token(".")]Period,// Or regular expressions.#[regex("[a-zA-Z]+")]Text,// Logos requires one token variant to handle errors,// it can be named anything you wish.#[error]// We can also use this variant to define whitespace,// or any other matches we wish to skip.#[regex(r"[ \t\n\f]+", logos::skip)]Error,}
The library providesParsit<'a,T>
instance that encompasses a set of tokens and auxiliary methods
structParser<'a>{inner:Parsit<'a,Token<'a>>,}
The helpers:
- the macros token! that alleviates comparing and matching single tokens
- methods
then
,then_zip
and others fromStep
- methods
one_or_more
,zero_or_more
fromParsit
fntext(&self,pos:usize) ->Result<Vec<Sentence<'a>>,ParseError<'a>>{self.inner.zero_or_more(pos, |p|self.sentence(p)).into()}
usecrate::parser::Parsit;usecrate::token;usecrate::step::Step;usecrate::parser::EmptyToken;usecrate::error::ParseError;use logos::Logos;#[derive(Logos,Debug,Copy,Clone,PartialEq)]pubenumToken<'a>{#[regex(r"[a-zA-Z-]+")]Word(&'astr),#[token(",")]Comma,#[token(".")]Dot,#[token("!")]Bang,#[token("?")]Question,#[regex(r"[ \t\r\n\u000C\f]+", logos::skip)]Whitespace,#[error]Error,}#[derive(Debug,Copy,Clone,PartialEq)]enumItem<'a>{Word(&'astr),Comma,}#[derive(Debug,Clone,PartialEq)]enumSentence<'a>{Sentence(Vec<Item<'a>>),Question(Vec<Item<'a>>),Exclamation(Vec<Item<'a>>),}structParser<'a>{inner:Parsit<'a,Token<'a>>,}impl<'a>Parser<'a>{fnnew(text:&'astr) ->Parser<'a>{let delegate:Parsit<Token> =Parsit::new(text).unwrap();Parser{inner: delegate}}fnsentence(&self,pos:usize) ->Step<'a,Sentence<'a>>{let items = |p|self.inner.one_or_more(p, |p|self.word(p));let sentence = |p|items(p).then_zip(|p|token!(self.inner.token(p) =>Token::Dot)).take_left().map(Sentence::Sentence);let exclamation = |p|items(p).then_zip(|p|token!(self.inner.token(p) =>Token::Bang)).take_left().map(Sentence::Exclamation);let question = |p|items(p).then_zip(|p|token!(self.inner.token(p) =>Token::Question)).take_left().map(Sentence::Question);sentence(pos).or_from(pos).or(exclamation).or(question).into()}fnword(&self,pos:usize) ->Step<'a,Item<'a>>{token!(self.inner.token(pos) =>Token::Word(v) =>Item::Word(v),Token::Comma =>Item::Comma)}fntext(&self,pos:usize) ->Result<Vec<Sentence<'a>>,ParseError<'a>>{self.inner.zero_or_more(pos, |p|self.sentence(p)).into()}}#[test]fntest(){let parser =Parser::new(r#" I have a strange addiction, It often sets off sparks! I really cannot seem to stop, Using exclamation marks! Anyone heard of the interrobang? The poem is for kids. "#);let result = parser.text(0).unwrap();println!("{:?}", result);}
token
- gives a possibility to pull out a curren tokenone_or_more
- gives a one or more semanticzero_or_more
- gives a zero or more semanticvalidate_eof
- ensure the parser reaches end of the input
token!
- parses the current token. In general, it is used the followingtoken!(p.token(pos) => T::Bang => "!")
wrap!
- implements a simple pattern in grammar likeleft value right
, for instance[1,2,3]
or(a,b)
- can handle the default value like
wrap!(0 => left; value or default; right)
- can handle the option value like
wrap!(0 => left; value ?; right)
- can handle the default value like
seq!
- implements a simple pattern of sequence likeel sep el ...
, for instance1,2,3
- can have a
,
at the end signaling the separator can be at the ned of the seq like1,2,3 (,)?
- can have a
or
- gives an alternative in a horizon of one tokenor_from
- gives a backtracking option
then
- gives a basic combination with a next rule omitting the current onethen_zip
- combines a current result and a next one into a pairthen_skip
- parses the next one but drops the result keeping only current onethen_or_none
-combines a next one in an option with a current one or return a none otherwise
take_left
- drops a right value from a pairtake_right
- drops a left value from a pairmerge
- merge a value into a listto_map
- transforms a list of pairs into a map
or_val
- replaces a value with a default value if it is not presentedor_none
- replaces a value with a none if it is not presented
ok
- transforms a value into an optionerror
- transforms an error into an optionmap
- transforms a valuecombine
- combines a value with another value from a given stepvalidate
- validates a given value and transforms into an error if a validation failed
print
- print a stepprint_with
- print a step with a given prefixprint_as
- print a step with a transformation of valueprint_with_as
- print a step with a transformation of value with a given prefixparsit.env
- Prints a position and env from the source text(with a radius of 3 tokens )
To test a lexer there are methods fromcrate::parsit::test::lexer_test::*
for service
use logos::Logos;usecrate::parsit::test::lexer_test::*;#[derive(Logos,Debug,PartialEq)]pubenumT<'a>{#[regex(r"[a-zA-Z-]+")]Word(&'astr),#[token(",")]Comma,#[token(".")]Dot,#[token("!")]Bang,#[token("?")]Question,#[regex(r"[ \t\r\n]+", logos::skip)]Whitespace,#[error]Error,}#[test]fntest(){expect::<T>("abc, bcs!",vec![T::Word("abc"),T::Comma,T::Word("bcs"),T::Bang]);expect_succeed::<T>("abc, bcs!");expect_failed::<T>("abc, bcs >> !");expect_failed_with::<T,_>("abc, bcs > !", |e| e.is_bad_token_on(">"));}
To test a parser there are methods fromcrate::parsit::test::parser_test::*
for service
- expect : expect to parse a given value
- expect_or_env : expect to parse a given value otherwise it will print an env (
parsit.env
) - expect_pos : expect to parse and get a cursor on a given pos
- expect_pos_or_env : expect to parse and get a cursor on a given pos otherwise it will print an env (
parsit.env
) - fail : should fail parsing
- fail_on : should fail parsing on a given position
use logos::Logos;usecrate::parsit::test::parser_test::fail;usecrate::parsit::test::parser_test::parsit;usecrate::parsit::token;usecrate::parsit::parser::Parsit;usecrate::parsit::step::Step;#[derive(Logos,Debug,PartialEq)]pubenumT<'a>{#[regex(r"[a-zA-Z-]+")]Word(&'astr),#[token(",")]Comma,#[token(".")]Dot,#[token("!")]Bang,#[token("?")]Question,#[regex(r"[ \t\r\n]+", logos::skip)]Whitespace,#[error]Error,}#[test]fntest_expect(){let p =parsit("abc!");let bang = |pos:usize|token!(p.token(pos) =>T::Bang =>"!");let word = |pos:usize|token!(p.token(pos) =>T::Word(v) =>*v);let step =word(0).then_or_val_zip(bang,"").map(|(a, b)|format!("{}{}", a, b));expect(step,"abc!".to_string());}#[test]fntest_expect_or_env(){let p =parsit("abc!");let bang = |pos:usize|token!(p.token(pos) =>T::Bang =>"!");let word = |pos:usize|token!(p.token(pos) =>T::Word(v) =>*v);let step =word(0).then_or_val_zip(bang,"").map(|(a, b)|format!("{}{}", a, b));expect_or_env(p,step,"abc!".to_string());}#[test]fntest_pos(){let p =parsit("abc!");let bang = |pos:usize|token!(p.token(pos) =>T::Bang =>"!");let word = |pos:usize|token!(p.token(pos) =>T::Word(v) => v);let step =word(0).then_or_val_zip(bang,"");expect_pos(step,2);// the next position to parse}#[test]fntest_pos_or_env(){let p =parsit("abc!");let bang = |pos:usize|token!(p.token(pos) =>T::Bang =>"!");let word = |pos:usize|token!(p.token(pos) =>T::Word(v) => v);let step =word(0).then_or_val_zip(bang,"");expect_pos_or_env(p,step,2);// the next position to parse}#[test]fntest_fail(){let p =parsit("abc?!");let bang = |pos:usize|token!(p.token(pos) =>T::Bang =>"!");let word = |pos:usize|token!(p.token(pos) =>T::Word(v) => v);let step =word(0).then_zip(bang);fail(step);}#[test]fntest_fail_on(){let p =parsit("abc?!");let bang = |pos:usize|token!(p.token(pos) =>T::Bang =>"!");let word = |pos:usize|token!(p.token(pos) =>T::Word(v) => v);let step =word(0).then_zip(bang);fail_on(step,1);}
About
Parser-combinators library.
Topics
Resources
License
Uh oh!
There was an error while loading.Please reload this page.
Stars
Watchers
Forks
Releases
No releases published
Packages0
No packages published
Uh oh!
There was an error while loading.Please reload this page.