Source code for regulations.generator.title_parsing

# vim: set fileencoding=utf-8
import re

from regulations.generator import node_types

# A section title comprises
#   - one or more §
#   - a section label like 11.45 or 11.45-50
#   - one or more space or - separators
#   - a section subject
SECTION_TITLE_REGEX = re.compile(u'^§+ ([-.\w]*)[\s-]*(.*)', re.UNICODE)


[docs]def appendix_supplement(data):
    """Handle items pointing to an appendix or supplement"""
    node_type = node_types.type_from_label(data['index'])
    if len(data['index']) == 2 and node_type in (node_types.APPENDIX,
                                                 node_types.INTERP):
        element = {}
        if node_type == node_types.INTERP:
            element['is_supplement'] = True
        else:
            element['is_appendix'] = True

        segments = try_split(data['title'])
        if segments:
            element['label'], element['sub_label'] = segments[:2]
        elif '[' in data['title']:
            position = data['title'].find('[')
            element['label'] = data['title'][:position].strip()
            element['sub_label'] = data['title'][position:]
        else:
            element['label'] = data['title']

        element['section_id'] = '-'.join(data['index'])
        return element


[docs]def try_split(text, chars=(u'—', '-')):
    """Utility method for splitting a string by one of multiple chars"""
    for c in chars:
        segments = text.split(c)
        if len(segments) > 1:
            return [s.strip() for s in segments]


[docs]def section(data):
    """ Parse out parts of a section title. """
    if len(data['index']) == 2 and data['index'][1][0].isdigit():
        element = {}
        element['is_section'] = True
        element['section_id'] = '-'.join(data['index'])
        if u"§§ " == data['title'][:3]:
            element['is_section_span'] = True
        else:
            element['is_section_span'] = False
        match = SECTION_TITLE_REGEX.match(data['title'])
        element['label'] = match.group(1)
        element['sub_label'] = match.group(2)
        return element