Fix gnf_converter.py script (#616)
This commit is contained in:
parent
a02b90be44
commit
9e382c4177
@ -28,10 +28,6 @@ def convert_to_gnf(grammar, start):
|
|||||||
if DEBUG:
|
if DEBUG:
|
||||||
with open('debug_mixed.json', 'w+') as fd:
|
with open('debug_mixed.json', 'w+') as fd:
|
||||||
json.dump(grammar, fd)
|
json.dump(grammar, fd)
|
||||||
grammar = break_rules(grammar) # eliminate rules with more than two non-terminals
|
|
||||||
if DEBUG:
|
|
||||||
with open('debug_break.json', 'w+') as fd:
|
|
||||||
json.dump(grammar, fd)
|
|
||||||
grammar = gnf(grammar)
|
grammar = gnf(grammar)
|
||||||
|
|
||||||
# Dump GNF form of the grammar with only reachable rules
|
# Dump GNF form of the grammar with only reachable rules
|
||||||
@ -45,6 +41,53 @@ def convert_to_gnf(grammar, start):
|
|||||||
grammar["Start"] = [start]
|
grammar["Start"] = [start]
|
||||||
return grammar
|
return grammar
|
||||||
|
|
||||||
|
def remove_left_recursion(grammar):
|
||||||
|
# Remove the left recursion in the grammar rules.
|
||||||
|
# This algorithm is adopted from
|
||||||
|
# https://www.geeksforgeeks.org/introduction-of-parsing-ambiguity-and-parsers-set-1/
|
||||||
|
# Note that the current implementation does not
|
||||||
|
# guarantee completeness and will not remove recursions
|
||||||
|
# similar to { "A": ["BC"], "B": ["AD"] }.
|
||||||
|
# Therefore, we need to call this function each time
|
||||||
|
# the rule is updated.
|
||||||
|
old_grammar = copy.deepcopy(grammar)
|
||||||
|
new_grammar = defaultdict(list)
|
||||||
|
no_left_recursion = False
|
||||||
|
while not no_left_recursion:
|
||||||
|
for lhs, rules in old_grammar.items():
|
||||||
|
left_recursion = []
|
||||||
|
others = []
|
||||||
|
for rule in rules:
|
||||||
|
tokens = gettokens(rule)
|
||||||
|
if tokens[0] == lhs:
|
||||||
|
left_recursion.append(tokens)
|
||||||
|
else:
|
||||||
|
others.append(tokens)
|
||||||
|
if left_recursion:
|
||||||
|
new_rule = get_nonterminal()
|
||||||
|
for r in others:
|
||||||
|
r.append(new_rule)
|
||||||
|
left_recursion = [r[1:] + [new_rule] for r in left_recursion]
|
||||||
|
left_recursion.append(["' '"])
|
||||||
|
new_grammar[lhs] = [' '.join(rule) for rule in others]
|
||||||
|
new_grammar[new_rule] = [' '.join(rule) for rule in left_recursion]
|
||||||
|
else:
|
||||||
|
new_grammar[lhs] = [' '.join(rule) for rule in others]
|
||||||
|
no_left_recursion = True
|
||||||
|
for lhs, rules in old_grammar.items():
|
||||||
|
for rule in rules:
|
||||||
|
tokens = gettokens(rule)
|
||||||
|
if tokens[0] == lhs:
|
||||||
|
left_recursion = False
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
break
|
||||||
|
if not no_left_recursion:
|
||||||
|
old_grammar = copy.deepcopy(new_grammar)
|
||||||
|
new_grammar = defaultdict(list)
|
||||||
|
return new_grammar
|
||||||
|
|
||||||
def get_reachable(grammar, start):
|
def get_reachable(grammar, start):
|
||||||
'''
|
'''
|
||||||
Returns a grammar without dead rules
|
Returns a grammar without dead rules
|
||||||
@ -78,6 +121,7 @@ def gnf(grammar):
|
|||||||
new_grammar = defaultdict(list)
|
new_grammar = defaultdict(list)
|
||||||
isgnf = False
|
isgnf = False
|
||||||
while not isgnf:
|
while not isgnf:
|
||||||
|
old_grammar = remove_left_recursion(old_grammar)
|
||||||
for lhs, rules in old_grammar.items():
|
for lhs, rules in old_grammar.items():
|
||||||
for rule in rules:
|
for rule in rules:
|
||||||
tokens = gettokens(rule)
|
tokens = gettokens(rule)
|
||||||
@ -85,6 +129,7 @@ def gnf(grammar):
|
|||||||
new_grammar[lhs].append(rule)
|
new_grammar[lhs].append(rule)
|
||||||
continue
|
continue
|
||||||
startoken = tokens[0]
|
startoken = tokens[0]
|
||||||
|
assert(startoken != lhs)
|
||||||
endrule = tokens[1:]
|
endrule = tokens[1:]
|
||||||
if not isTerminal(startoken):
|
if not isTerminal(startoken):
|
||||||
newrules = []
|
newrules = []
|
||||||
@ -184,22 +229,17 @@ def remove_mixed(grammar):
|
|||||||
new_grammar = defaultdict(list)
|
new_grammar = defaultdict(list)
|
||||||
for lhs, rules in grammar.items():
|
for lhs, rules in grammar.items():
|
||||||
for rhs in rules:
|
for rhs in rules:
|
||||||
# tokens = rhs.split(' ')
|
|
||||||
regen_rule = []
|
|
||||||
# print('---------------------')
|
|
||||||
# print(rhs)
|
|
||||||
tokens = gettokens(rhs)
|
tokens = gettokens(rhs)
|
||||||
if len(gettokens(rhs)) == 1:
|
if len(tokens) == 1:
|
||||||
new_grammar[lhs].append(rhs)
|
new_grammar[lhs].append(rhs)
|
||||||
continue
|
continue
|
||||||
for token in tokens:
|
regen_rule = [tokens[0]]
|
||||||
|
for token in tokens[1:]:
|
||||||
# print(token, isTerminal(token), regen_rule)
|
# print(token, isTerminal(token), regen_rule)
|
||||||
# Identify if there is a terminal in the RHS
|
# Identify if there is a terminal in the RHS
|
||||||
if isTerminal(token):
|
if isTerminal(token):
|
||||||
# Check if a corresponding nonterminal already exists
|
# Check if a corresponding nonterminal already exists
|
||||||
# nonterminal = terminal_exist(token, new_grammar)
|
nonterminal = terminal_exist(token, new_grammar)
|
||||||
nonterminal = None
|
|
||||||
# TODO(andrea) disabled ATM, further investigation using the Ruby grammar needed
|
|
||||||
if nonterminal:
|
if nonterminal:
|
||||||
regen_rule.append(nonterminal)
|
regen_rule.append(nonterminal)
|
||||||
else:
|
else:
|
||||||
@ -211,60 +251,17 @@ def remove_mixed(grammar):
|
|||||||
new_grammar[lhs].append(' '.join(regen_rule))
|
new_grammar[lhs].append(' '.join(regen_rule))
|
||||||
return new_grammar
|
return new_grammar
|
||||||
|
|
||||||
def break_rules(grammar):
|
|
||||||
new_grammar = defaultdict(list)
|
|
||||||
old_grammar = copy.deepcopy(grammar)
|
|
||||||
nomulti = False
|
|
||||||
while not nomulti:
|
|
||||||
for lhs, rules in old_grammar.items():
|
|
||||||
for rhs in rules:
|
|
||||||
tokens = gettokens(rhs)
|
|
||||||
if len(tokens) > 2 and (not isTerminal(rhs)):
|
|
||||||
split = tokens[:-1]
|
|
||||||
nonterminal = terminal_exist(' '.join(split), new_grammar)
|
|
||||||
if nonterminal:
|
|
||||||
newrule = ' '.join([nonterminal, tokens[-1]])
|
|
||||||
new_grammar[lhs].append(newrule)
|
|
||||||
else:
|
|
||||||
nonterminal = get_nonterminal()
|
|
||||||
new_grammar[nonterminal].append(' '.join(split))
|
|
||||||
newrule = ' '.join([nonterminal, tokens[-1]])
|
|
||||||
new_grammar[lhs].append(newrule)
|
|
||||||
else:
|
|
||||||
new_grammar[lhs].append(rhs)
|
|
||||||
nomulti = True
|
|
||||||
for lhs, rules in new_grammar.items():
|
|
||||||
for rhs in rules:
|
|
||||||
# tokens = rhs.split(' ')
|
|
||||||
tokens = gettokens(rhs)
|
|
||||||
if len(tokens) > 2 and (not isTerminal(rhs)):
|
|
||||||
nomulti = False
|
|
||||||
break
|
|
||||||
if not nomulti:
|
|
||||||
old_grammar = copy.deepcopy(new_grammar)
|
|
||||||
new_grammar = defaultdict(list)
|
|
||||||
return new_grammar
|
|
||||||
|
|
||||||
def strip_chars(rule):
|
def strip_chars(rule):
|
||||||
return rule.strip('\n\t ')
|
return rule.strip('\n\t ')
|
||||||
|
|
||||||
def get_nonterminal():
|
def get_nonterminal():
|
||||||
global NONTERMINALSET
|
|
||||||
if NONTERMINALSET:
|
|
||||||
return NONTERMINALSET.pop(0)
|
|
||||||
else:
|
|
||||||
_repopulate()
|
|
||||||
return NONTERMINALSET.pop(0)
|
|
||||||
|
|
||||||
def _repopulate():
|
|
||||||
global COUNT
|
global COUNT
|
||||||
global NONTERMINALSET
|
|
||||||
NONTERMINALSET = [''.join(x) for x in list(combinations(ascii_uppercase, COUNT))]
|
|
||||||
COUNT += 1
|
COUNT += 1
|
||||||
|
return f"GeneratedTermVar{COUNT}"
|
||||||
|
|
||||||
def terminal_exist(token, grammar):
|
def terminal_exist(token, grammar):
|
||||||
for nonterminal, rules in grammar.items():
|
for nonterminal, rules in grammar.items():
|
||||||
if token in rules:
|
if token in rules and len(token) == 1:
|
||||||
return nonterminal
|
return nonterminal
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
4
utils/gramatron/grammars/test1.json
Normal file
4
utils/gramatron/grammars/test1.json
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
{
|
||||||
|
"B": ["'a'", "'b'"],
|
||||||
|
"A": ["A 'a'", "'a'"]
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user