The idea is to convert the query to a Python expression and then evaluate it.
We'll support the following syntax:
word1 word2 - word1 and word2The code is very simple:
word1 AND word2 - word1 and word2
word1 word2 - word1 and word2
word1 OR word2 - word1 ord word2
NOT word - Not containing word
def is_operator(token):Notes:
return token in set(["and", "not", "or", "(", ")"])
def should_insert_and(expr, token):
if not expr:
return 0
if is_operator(expr[-1]):
return 0
if is_operator(token):
return 0
return 1
def match(query, text):
words = set(text.lower().split())
expr = []
for token in query.lower().split():
if should_insert_and(expr, token):
expr.append("and")
if is_operator(token):
expr.append(token)
else:
expr.append(token in words)
py_expr = " ".join(map(str, expr))
return eval(py_expr)
def test():
assert match("a", "a"), "a --- a"
assert not match("a", ""), " --- a"
assert match("a AND b", "a c b"), "a c b --- a AND b"
assert not match("a AND b", "a c"), "a c --- a AND b"
assert match("NOT ( a OR b )", "z"), "z --- NOT ( a OR b )"
assert match("a OR b", "b"), "b --- a OR b"
1. We don't do any fancy tokenization (text and query), but in most cases this should be enough.
2. We place an AND where it's missing.
No comments:
Post a Comment