Source code for regulations.generator.layers.location_replace

[docs]class LocationReplace(object): """ Applies location based layers to XML nodes. We use XML so that we only take into account the original text when we're doing a replacement. """ def __init__(self): self.offset_starter = 0 self.offset_counters = None self.offsets = None
[docs] @staticmethod def find_all_offsets(pattern, text, offset=0): """Don't use regular expressions as they are a tad slow""" matches = [] pattern_len = len(pattern) next_match = text.find(pattern) while next_match != -1: matches.append((next_match + offset, next_match + pattern_len + offset)) next_match = text.find(pattern, next_match + 1) return matches
[docs] def update_offsets(self, original, text): """ Offsets change everytime we replace the text, since we add more characters. Update the offsets. """ list_offsets = [] lt = text.find('<') gt = -1 while lt != -1: subtext = text[gt+1: lt] list_offsets.extend(LocationReplace.find_all_offsets( original, subtext, gt + 1)) gt = text.find('>', lt) lt = text.find('<', gt) list_offsets.extend(LocationReplace.find_all_offsets( original, text[gt+1:], gt + 1)) self.offset_counters = list(range(self.offset_starter, self.offset_starter + len(list_offsets))) self.offsets = dict(zip(self.offset_counters, list_offsets))
[docs] def update_offset_starter(self): """ As we're navigating the XML node, we need to keep track of how many offsets we've already seen. """ if len(self.offset_counters) > 0: self.offset_starter = self.offset_counters[-1] + 1
[docs] def location_replace_text(self, text, original, replacement, locations): """Given plain text, do replacements""" self.update_offsets(original, text) text_segments = [] relevant_locations = sorted(self.offsets.keys()) relevant_locations = [l for l in relevant_locations if l in locations] text_begin = 0 for location in relevant_locations: start, end = self.offsets[location] # unrelated text text_segments.append(text[text_begin:start]) # s/original/replacement text_segments.append(replacement) text_begin = end # tail of unrelated text text_segments.append(text[text_begin:]) # offset_starter is shared between segments of xml nodes (in # location_replace, below) if original not in replacement: self.offset_starter += len(locations) self.update_offset_starter() return "".join(text_segments)
[docs] def location_replace(self, xml_node, original, replacement, locations): """ For the xml_node, replace the locations instances of orginal with replacement. @todo: This doesn't appear to be used anymore?""" if xml_node.text: xml_node.text = self.location_replace_text( xml_node.text, original, replacement, locations) for c in xml_node.getchildren(): self.location_replace(c, original, replacement, locations) if xml_node.tail: xml_node.tail = self.location_replace_text( xml_node.tail, original, replacement, locations)