Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

merged BNF and ABNF parsing #171

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -75,8 +75,7 @@ version = "0.5.0"
default-features = false # to disable Rayon for wasm32

[features]
default = ["ABNF", "serde"]
ABNF = []
default = ["serde"]
serde = ["dep:serde", "dep:serde_json"]
unstable = []
tracing = ["dep:tracing", "dep:tracing-subscriber", "dep:tracing-flame"]
4 changes: 2 additions & 2 deletions src/expression.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#![allow(clippy::vec_init_then_push)]

use crate::error::Error;
use crate::parsers::{self, BNF};
use crate::parsers;
use crate::term::Term;
use std::fmt;
use std::ops;
@@ -153,7 +153,7 @@ impl FromStr for Expression {
type Err = Error;

fn from_str(s: &str) -> Result<Self, Self::Err> {
match all_consuming(parsers::expression::<BNF>).parse(s) {
match all_consuming(parsers::expression).parse(s) {
Result::Ok((_, o)) => Ok(o),
Result::Err(e) => Err(Error::from(e)),
}
25 changes: 4 additions & 21 deletions src/grammar.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
#![allow(clippy::vec_init_then_push)]

#[cfg(feature = "ABNF")]
use crate::ABNF;
use crate::error::Error;
use crate::expression::Expression;
use crate::parsers::{self, BNF, Format};
use crate::parsers;
use crate::production::Production;
use crate::term::Term;
use rand::{Rng, SeedableRng, rng, rngs::StdRng, seq::IndexedRandom};
@@ -232,8 +230,8 @@ impl Grammar {
}

/// parse a grammar given a format
pub fn parse_from<F: Format>(input: &str) -> Result<Self, self::Error> {
match parsers::grammar_complete::<F>(input) {
pub fn parse_from(input: &str) -> Result<Self, self::Error> {
match parsers::grammar_complete(input) {
Result::Ok((_, o)) => Ok(o),
Result::Err(e) => Err(Error::from(e)),
}
@@ -521,23 +519,8 @@ impl fmt::Display for Grammar {

impl str::FromStr for Grammar {
type Err = Error;
#[cfg(feature = "ABNF")]
fn from_str(s: &str) -> Result<Self, Self::Err> {
//try and autodetect the format (in the feature we'll use a detector that returns an enum, hence the gratuitous switch case)
match parsers::is_format_standard_bnf(s) {
true => match parsers::grammar_complete::<BNF>(s) {
Result::Ok((_, o)) => Ok(o),
Result::Err(e) => Err(Error::from(e)),
},
false => match parsers::grammar_complete::<ABNF>(s) {
Result::Ok((_, o)) => Ok(o),
Result::Err(e) => Err(Error::from(e)),
},
}
}
#[cfg(not(feature = "ABNF"))]
fn from_str(s: &str) -> Result<Self, Self::Err> {
match parsers::grammar_complete::<BNF>(s) {
match parsers::grammar_complete(s) {
Result::Ok((_, o)) => Ok(o),
Result::Err(e) => Err(Error::from(e)),
}
4 changes: 0 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -15,8 +15,4 @@ pub use crate::grammar::{Grammar, ParseTree, ParseTreeNode};
pub use crate::production::Production;
pub use crate::term::Term;

#[cfg(feature = "ABNF")]
pub use parsers::ABNF;
pub use parsers::{BNF, Format};

pub(crate) use hashbrown::HashMap;
292 changes: 292 additions & 0 deletions src/parsers.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,292 @@
use crate::expression::Expression;
use crate::grammar::Grammar;
use crate::production::Production;
use crate::term::Term;

use nom::{
IResult, Parser,
branch::alt,
bytes::complete::{tag, take_till, take_until},
character::complete::{self, multispace0, satisfy},
combinator::{all_consuming, eof, not, opt, peek, recognize},
error::ParseError,
multi::many1,
sequence::{delimited, preceded, terminated},
};

fn nonterminal(input: &str) -> IResult<&str, Term> {
let (input, nt) = if input.starts_with('<') {
delimited(
complete::char('<'),
take_till(|c: char| c == '>'),
complete::char('>'),
)
.parse(input)?
} else {
satisfy(|c: char| c.is_alphabetic()).parse(input)?;
take_till(|c: char| c.is_whitespace() || c == ')' || c == ']').parse(input)?
};

let (input, _) = whitespace_plus_comments(input).unwrap();

Ok((input, Term::Nonterminal(nt.to_string())))
}

fn prod_lhs(input: &str) -> IResult<&str, Term> {
let (input, nt) = nonterminal(input)?;

let (input, _) = alt((tag("::="), tag("="))).parse(input)?;
//https://www.rfc-editor.org/rfc/rfc5234.html#section-3.3
let (input, _) = opt(alt((complete::char('|'), complete::char('/')))).parse(input)?;
let (input, _) = whitespace_plus_comments(input).unwrap();

Ok((input, nt))
}

fn prod_rhs(input: &str) -> IResult<&str, Vec<Expression>> {
xt_list_with_separator(expression, expression_next).parse(input)
}

///like `nom::many1` but it accepts a secend parser as an element separator
pub fn xt_list_with_separator<I, F, D, E>(
mut parser: F,
mut delimiter: D,
) -> impl Parser<I, Output = Vec<<F as Parser<I>>::Output>, Error = E>
where
I: Clone + nom::Input + Copy,
F: Parser<I, Error = E>,
D: Parser<I, Error = E>,
E: ParseError<I>,
{
move |mut input: I| {
let mut acc = vec![];
loop {
match parser.parse(input) {
Ok((i, o)) => {
acc.push(o);
input = i;
match delimiter.parse(input) {
Ok((i, _)) => {
input = i;
continue;
}
Err(nom::Err::Error(_)) => break,
Err(e) => return Err(e),
}
}
Err(e) => return Err(e),
}
}
Ok((input, acc))
}
}

pub fn terminal(input: &str) -> IResult<&str, Term> {
let (input, t) = alt((
delimited(complete::char('"'), take_until("\""), complete::char('"')),
delimited(complete::char('\''), take_until("'"), complete::char('\'')),
))
.parse(input)?;

let (input, _) = whitespace_plus_comments(input).unwrap();

Ok((input, Term::Terminal(t.to_string())))
}

///this should never fail, unwrap it when calling directly please!
pub fn whitespace_plus_comments(mut input: &str) -> IResult<&str, char> {
let mut old_input = input;
loop {
(input, _) = multispace0::<&str, nom::error::Error<&str>>.parse(input)?;
(input, _) = opt(preceded(
complete::char(';'),
take_till(|c: char| c == '\r' || c == '\n'),
))
.parse(input)?;

if input == old_input {
break;
}
old_input = input
}
Ok((input, '\0'))
}

pub fn term(input: &str) -> IResult<&str, Term> {
alt((
terminal,
nonterminal,
anonymous_nonterminal,
optional_anonymous_nonterminal,
))
.parse(input)
}

pub fn expression_next(input: &str) -> IResult<&str, &str> {
let (input, _) = alt((complete::char('|'), complete::char('/'))).parse(input)?;
let (input, _) = whitespace_plus_comments(input).unwrap();

Ok((input, ""))
}

pub fn expression(input: &str) -> IResult<&str, Expression> {
let (input, terms) = many1(terminated(term, not(alt((tag("::="), tag("=")))))).parse(input)?;

Ok((input, Expression::from_parts(terms)))
}

pub fn production(input: &str) -> IResult<&str, Production> {
let (input, lhs) = prod_lhs(input)?;
let (input, rhs) = prod_rhs(input)?;
let (input, _) = alt((recognize(peek(eof)), recognize(peek(prod_lhs)))).parse(input)?;

Ok((input, Production::from_parts(lhs, rhs)))
}

pub fn anonymous_nonterminal(input: &str) -> IResult<&str, Term> {
let (input, rhs) =
delimited(complete::char('('), prod_rhs, complete::char(')')).parse(input)?;

let (input, _) = whitespace_plus_comments(input).unwrap();

Ok((input, Term::AnonymousNonterminal(rhs)))
}

pub fn optional_anonymous_nonterminal(input: &str) -> IResult<&str, Term> {
let (input, mut rhs) =
delimited(complete::char('['), prod_rhs, complete::char(']')).parse(input)?;

rhs.push(Expression::from_parts(vec![Term::Terminal("".to_owned())]));

let (input, _) = whitespace_plus_comments(input).unwrap();

Ok((input, Term::AnonymousNonterminal(rhs)))
}

pub fn grammar(input: &str) -> IResult<&str, Grammar> {
let (input, _) = whitespace_plus_comments(input)?;
production(input)?;
let (input, prods) = many1(production).parse(input)?;
Ok((input, Grammar::from_parts(prods)))
}

pub fn grammar_complete(input: &str) -> IResult<&str, Grammar> {
all_consuming(grammar).parse(input)
}

#[cfg(test)]
pub mod tests {
use super::*;

#[test]
fn terminal_match() {
let input = "\"hello world\"";
let expected = Term::Terminal("hello world".to_string());

let (_, actual) = terminal(input).unwrap();
assert_eq!(expected, actual);
}

#[test]
fn use_anon_nonterminal() {
let grammar = "s = ('a' / 'b') 'c'";
let grammar = grammar.parse::<Grammar>().unwrap();
let inputs = vec!["ac", "bc"];
for input in inputs {
assert!(grammar.parse_input(input).next().is_some());
}
}

#[test]
fn parse_optional_anon_nonterminal() {
let input = "s = 'c' ['a' / 'b']";
let expected = "s = 'c' ('a' / 'b' / '')";
let input = input.parse::<Grammar>().unwrap();
let twin = expected.parse::<Grammar>().unwrap();
assert_eq!(input, twin)
}
#[test]
//https://www.rfc-editor.org/rfc/rfc5234.html#section-3.3
fn parse_incremental_alternatives() {
let grammar = "s = a / a s
a = 'b'
a =/ 'c'";
assert!(grammar.parse::<Grammar>().is_ok());
}
#[test]
fn use_incremental_alternatives() {
let input = "s = a / (a s)
a = 'b'
a =/ 'c'";
let grammar = input.parse::<Grammar>().unwrap();
grammar
.parse_input("bcbccbbcbcbcbbbbbbbbbbbbccc")
.next()
.unwrap();
}
#[test]
fn nonterminal_match() {
let input = "<nonterminal-pattern>";
let input_aug = "nonterminal-pattern";
let expected = Term::Nonterminal("nonterminal-pattern".to_string());

let (_, actual) = nonterminal(input).unwrap();
let (_, actual_aug) = nonterminal(input_aug).unwrap();
assert_eq!(expected, actual);
assert_eq!(expected, actual_aug);
}
#[test]
fn expression_match() {
let input = r#"<nonterminal-pattern> "terminal-pattern""#;
let input_aug = r#"nonterminal-pattern "terminal-pattern""#;
let expected = Expression::from_parts(vec![
Term::Nonterminal("nonterminal-pattern".to_string()),
Term::Terminal("terminal-pattern".to_string()),
]);

let (_, actual) = expression(input).unwrap();
let (_, actual_aug) = expression(input_aug).unwrap();
assert_eq!(expected, actual);
assert_eq!(expected, actual_aug);
}
#[test]
fn production_match() {
let input = r#"<nonterminal-pattern> ::= <nonterminal-pattern> "terminal-pattern" | "terminal-pattern";\r\n"#;
let input_aug = r#"nonterminal-pattern = nonterminal-pattern "terminal-pattern" / "terminal-pattern";\r\n"#;
let expected = Production::from_parts(
Term::Nonterminal("nonterminal-pattern".to_string()),
vec![
Expression::from_parts(vec![
Term::Nonterminal("nonterminal-pattern".to_string()),
Term::Terminal("terminal-pattern".to_string()),
]),
Expression::from_parts(vec![Term::Terminal("terminal-pattern".to_string())]),
],
);

let (_, actual) = production(input).unwrap();
let (_, actual_aug) = production(input_aug).unwrap();
assert_eq!(expected, actual);
assert_eq!(expected, actual_aug);
}
#[test]
fn grammar_match() {
let input = r#"<nonterminal-pattern> ::= <nonterminal-pattern> "terminal-pattern" | "terminal-pattern";\r\n"#;
let input_aug = r#"nonterminal-pattern = nonterminal-pattern "terminal-pattern" / "terminal-pattern";\r\n"#;
let expected = Grammar::from_parts(vec![Production::from_parts(
Term::Nonterminal("nonterminal-pattern".to_string()),
vec![
Expression::from_parts(vec![
Term::Nonterminal("nonterminal-pattern".to_string()),
Term::Terminal("terminal-pattern".to_string()),
]),
Expression::from_parts(vec![Term::Terminal("terminal-pattern".to_string())]),
],
)]);

let (_, actual) = grammar(input).unwrap();
let (_, actual_aug) = grammar(input_aug).unwrap();
assert_eq!(expected, actual);
assert_eq!(expected, actual_aug);
}
}
84 changes: 0 additions & 84 deletions src/parsers/augmented.rs

This file was deleted.

79 changes: 0 additions & 79 deletions src/parsers/bnf.rs

This file was deleted.

217 changes: 0 additions & 217 deletions src/parsers/mod.rs

This file was deleted.

35 changes: 0 additions & 35 deletions src/parsers/nom_xt.rs

This file was deleted.

4 changes: 2 additions & 2 deletions src/production.rs
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@

use crate::error::Error;
use crate::expression::Expression;
use crate::parsers::{self, BNF};
use crate::parsers;
use crate::term::Term;
use std::fmt;

@@ -112,7 +112,7 @@ impl fmt::Display for Production {
impl FromStr for Production {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match all_consuming(parsers::production::<BNF>).parse(s) {
match all_consuming(parsers::production).parse(s) {
Result::Ok((_, o)) => Ok(o),
Result::Err(e) => Err(Error::from(e)),
}
4 changes: 2 additions & 2 deletions src/term.rs
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@
use crate::Production;
use crate::error::Error;
use crate::expression::Expression;
use crate::parsers::{self, BNF};
use crate::parsers;
use std::fmt;
use std::ops;
use std::str::FromStr;
@@ -47,7 +47,7 @@ macro_rules! term {
impl FromStr for Term {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match all_consuming(parsers::term::<BNF>).parse(s) {
match all_consuming(parsers::term).parse(s) {
Result::Ok((_, o)) => Ok(o),
Result::Err(e) => Err(Error::from(e)),
}
4 changes: 0 additions & 4 deletions tests/grammar.rs
Original file line number Diff line number Diff line change
@@ -21,7 +21,6 @@ impl From<String> for MetaBNF {
}
}

#[cfg(feature = "ABNF")]
impl From<String> for MetaABNF {
fn from(abnf: String) -> Self {
MetaABNF { abnf }
@@ -36,7 +35,6 @@ const ABNF_FOR_BNF: &str = std::include_str!("./fixtures/bnf.abnf");
static GRAMMAR_FOR_BNF: LazyLock<Grammar> =
LazyLock::new(|| BNF_FOR_BNF.parse().expect("Failed to parse BNF for BNF"));

#[cfg(feature = "ABNF")]
static GRAMMAR_FOR_ABNF: LazyLock<Grammar> = LazyLock::new(|| {
let grammar_abnf = ABNF_FOR_BNF.parse().expect("Failed to parse ABNF for BNF");

@@ -71,7 +69,6 @@ impl Arbitrary for MetaBNF {
}
}

#[cfg(feature = "ABNF")]
impl Arbitrary for MetaABNF {
fn arbitrary(r#gen: &mut Gen) -> Self {
generate_grammar_with_gen(r#gen, &GRAMMAR_FOR_ABNF)
@@ -98,7 +95,6 @@ fn prop_abnf_grammar_from_str(meta: MetaABNF) -> TestResult {
TestResult::from_bool(meta_grammar.is_ok())
}

#[cfg(feature = "ABNF")]
#[test]
fn test_generated_grammars_abnf() {
QuickCheck::new()