Source code for regulations.generator.title_parsing

# vim: set fileencoding=utf-8
import re

from regulations.generator import node_types

# A section title comprises
#   - one or more §
#   - a section label like 11.45 or 11.45-50
#   - one or more space or - separators
#   - a section subject
SECTION_TITLE_REGEX = re.compile(u'^§+ ([-.\w]*)[\s-]*(.*)', re.UNICODE)


[docs]def appendix_supplement(data): """Handle items pointing to an appendix or supplement""" node_type = node_types.type_from_label(data['index']) if len(data['index']) == 2 and node_type in (node_types.APPENDIX, node_types.INTERP): element = {} if node_type == node_types.INTERP: element['is_supplement'] = True else: element['is_appendix'] = True segments = try_split(data['title']) if segments: element['label'], element['sub_label'] = segments[:2] elif '[' in data['title']: position = data['title'].find('[') element['label'] = data['title'][:position].strip() element['sub_label'] = data['title'][position:] else: element['label'] = data['title'] element['section_id'] = '-'.join(data['index']) return element
[docs]def try_split(text, chars=(u'—', '-')): """Utility method for splitting a string by one of multiple chars""" for c in chars: segments = text.split(c) if len(segments) > 1: return [s.strip() for s in segments]
[docs]def section(data): """ Parse out parts of a section title. """ if len(data['index']) == 2 and data['index'][1][0].isdigit(): element = {} element['is_section'] = True element['section_id'] = '-'.join(data['index']) if u"§§ " == data['title'][:3]: element['is_section_span'] = True else: element['is_section_span'] = False match = SECTION_TITLE_REGEX.match(data['title']) element['label'] = match.group(1) element['sub_label'] = match.group(2) return element