Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/pluralize #82

Merged
merged 9 commits into from
Aug 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions changes/82.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Add a new module for a PLURALIZE tag
For a noun or a list of nouns, it will match any singular or plural word.

Implemented by:
Roland M. Mueller (https://github.com/rolandmueller)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "rita-dsl"
version = "0.5.8"
version = "0.5.9"
description = "DSL for building language rules"
authors = [
"Šarūnas Navickas <[email protected]>"
Expand Down
2 changes: 1 addition & 1 deletion rita/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

logger = logging.getLogger(__name__)

__version__ = (0, 5, 8, os.getenv("VERSION_PATCH"))
__version__ = (0, 5, 9, os.getenv("VERSION_PATCH"))


def get_version():
Expand Down
41 changes: 41 additions & 0 deletions rita/modules/pluralize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import sys
import logging
try:
import inflect
except ImportError:
logging.exception(
"Pluralize module requires 'inflect' package to be installed."
"Install it and try again"
)
sys.exit(1)

from rita.macros import resolve_value
from rita.utils import flatten


def pluralizing(initial_list):
""""
For a list of nouns, it will return a list of the plurals and the initial nouns
"""
p = inflect.engine()
plurals = [p.plural(word) for word in initial_list]
return initial_list + plurals


def PLURALIZE(*args, config, op=None):
"""
For a noun or a list of nouns, it will match any singular or plural word
Usage for a single word, e.g.:
PLURALIZE("car")
Usage for lists, e.g.:
vehicles = {"car", "bicycle", "ship"}
PLURALIZE(vehicles)
Will work even for regex or if the lemmatizer of spaCy is making an error
Has dependency to the Python inflect package https://pypi.org/project/inflect/
"""
if type(args[0]) == list:
initial_list = [resolve_value(arg, config=config)
for arg in flatten(args)]
else:
initial_list = [args[0]]
return "any_of", pluralizing(initial_list), op
2 changes: 1 addition & 1 deletion rita/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def load_macro(name, config):
pass

def lazy_load(*args, **kwargs):
print(config.modules)
logger.info(config.modules)
for mod in config.modules:
try:
fn = getattr(mod, name)
Expand Down
22 changes: 22 additions & 0 deletions tests/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,3 +281,25 @@ def test_inlist_word_based(engine):
results = parser(text)
print(results)
assert len(results) == 0


@pytest.mark.parametrize('engine', [standalone_engine, spacy_engine])
def test_pluralize(engine):
parser = engine("""
!IMPORT("rita.modules.pluralize")

vehicles={"car", "motorbike", "bicycle", "ship", "plane"}
{NUM, PLURALIZE(vehicles)}->MARK("VEHICLES")
""")

text = """
There were 7 cars, 2 motorbikes, 1 ship, 1 bicycle and 9 planes
"""

results = set([text
for text, label in parser(text)
if label == "VEHICLES"])
print(results)

assert len(results) == 5
assert {"7 cars", "2 motorbikes", "1 ship", "1 bicycle", "9 planes"} == results
2 changes: 2 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ isolated_build = True
deps =
ply: ply==3.11
spacy
inflect
pytest
pytest-mock
pytest-benchmark
Expand All @@ -20,6 +21,7 @@ deps =
codecov
ply: ply==3.11
spacy
inflect
pytest
pytest-benchmark
pytest-cov
Expand Down