from lxml import etree
from pymods import MODSRecord, OAIRecord
from pymods.constants import NAMESPACES
def parse(source, parser=None):
return etree.parse(source, parser=parser)
[docs]class Reader(etree.XMLParser):
"""
lxml parser
"""
def __init__(self, file_location, iter_elem, parser=None):
"""
Basic XML parser & iterator
:param file_location: XML encoded file
:param iter_elem: element to use as record iterator
:param parser: a custom etree.XMLParser (required for custom etree.ElementBase subclasses)
"""
super(Reader, self).__init__()
if parser is not None:
self.iterator = parse(file_location, parser=parser).iter(iter_elem)
else:
self.iterator = parse(file_location).iter(iter_elem)
def __next__(self):
return next(self.iterator)
def __iter__(self):
return self
# def __index__(self):
# """"""
#
# def __int__(self):
# """"""
[docs]class MODSReader(Reader):
"""
Customized lxml parser for the MODSRecord class. Iterates on mods:mods elements.
"""
def __init__(self, file_location):
"""
Parser/iterator for the MODSRecord class. Iterates on mods:mods elements.
:param file_location:
"""
mods_parser_registration = etree.ElementDefaultClassLookup(element=MODSRecord)
mods_parser = etree.XMLParser()
mods_parser.set_element_class_lookup(mods_parser_registration)
super(MODSReader, self).__init__(file_location, '{0}mods'.format(NAMESPACES['mods']), parser=mods_parser)
[docs]class OAIReader(Reader):
"""
Customized lxml parser for the OAIRecord class. Iterates over oai:record elements in any namespace (repox or oai-pmh).
"""
def __init__(self, file_location):
"""
Parser/iterator for the OAIRecord class. Iterates over record elements in any namespace (repox or oai-pmh).
:param file_location:
"""
oai_parser_registration = etree.ElementDefaultClassLookup(element=OAIRecord)
oai_parser = etree.XMLParser()
oai_parser.set_element_class_lookup(oai_parser_registration)
super(OAIReader, self).__init__(file_location, '{*}record', parser=oai_parser)