80 lines
3.4 KiB
Python
80 lines
3.4 KiB
Python
import re
|
|
from guessit.patterns import sep, build_or_pattern
|
|
from guessit.patterns.numeral import parse_numeral
|
|
|
|
range_separators = ['-', 'to', 'a']
|
|
discrete_separators = ['&', 'and', 'et']
|
|
excluded_separators = ['.'] # Dot cannot serve as a discrete_separator
|
|
|
|
discrete_sep = sep
|
|
for range_separator in range_separators:
|
|
discrete_sep = discrete_sep.replace(range_separator, '')
|
|
for excluded_separator in excluded_separators:
|
|
discrete_sep = discrete_sep.replace(excluded_separator, '')
|
|
discrete_separators.append(discrete_sep)
|
|
all_separators = list(range_separators)
|
|
all_separators.extend(discrete_separators)
|
|
|
|
range_separators_re = re.compile(build_or_pattern(range_separators), re.IGNORECASE)
|
|
discrete_separators_re = re.compile(build_or_pattern(discrete_separators), re.IGNORECASE)
|
|
all_separators_re = re.compile(build_or_pattern(all_separators), re.IGNORECASE)
|
|
|
|
|
|
def list_parser(value, property_list_name, discrete_separators_re=discrete_separators_re, range_separators_re=range_separators_re, allow_discrete=False, fill_gaps=False):
|
|
discrete_elements = filter(lambda x: x != '', discrete_separators_re.split(value))
|
|
discrete_elements = [x.strip() for x in discrete_elements]
|
|
|
|
proper_discrete_elements = []
|
|
i = 0
|
|
while i < len(discrete_elements):
|
|
if i < len(discrete_elements) - 2 and range_separators_re.match(discrete_elements[i+1]):
|
|
proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i+1] + discrete_elements[i+2])
|
|
i += 3
|
|
else:
|
|
match = range_separators_re.search(discrete_elements[i])
|
|
if match and match.start() == 0:
|
|
proper_discrete_elements[i - 1] += discrete_elements[i]
|
|
elif match and match.end() == len(discrete_elements[i]):
|
|
proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i + 1])
|
|
else:
|
|
proper_discrete_elements.append(discrete_elements[i])
|
|
i += 1
|
|
|
|
discrete_elements = proper_discrete_elements
|
|
|
|
ret = []
|
|
|
|
for discrete_element in discrete_elements:
|
|
range_values = filter(lambda x: x != '', range_separators_re.split(discrete_element))
|
|
range_values = [x.strip() for x in range_values]
|
|
if len(range_values) > 1:
|
|
for x in range(0, len(range_values) - 1):
|
|
start_range_ep = parse_numeral(range_values[x])
|
|
end_range_ep = parse_numeral(range_values[x+1])
|
|
for range_ep in range(start_range_ep, end_range_ep + 1):
|
|
if range_ep not in ret:
|
|
ret.append(range_ep)
|
|
else:
|
|
discrete_value = parse_numeral(discrete_element)
|
|
if discrete_value not in ret:
|
|
ret.append(discrete_value)
|
|
|
|
if len(ret) > 1:
|
|
if not allow_discrete:
|
|
valid_ret = list()
|
|
# replace discrete elements by ranges
|
|
valid_ret.append(ret[0])
|
|
for i in range(0, len(ret) - 1):
|
|
previous = valid_ret[len(valid_ret) - 1]
|
|
if ret[i+1] < previous:
|
|
pass
|
|
else:
|
|
valid_ret.append(ret[i+1])
|
|
ret = valid_ret
|
|
if fill_gaps:
|
|
ret = list(range(min(ret), max(ret) + 1))
|
|
if len(ret) > 1:
|
|
return {None: ret[0], property_list_name: ret}
|
|
if len(ret) > 0:
|
|
return ret[0]
|
|
return None |