372 lines
12 KiB
Python
372 lines
12 KiB
Python
|
#!/usr/bin/python3
|
||
|
|
||
|
"""
|
||
|
Copyright (c) 2022, Ian Santopietro
|
||
|
All rights reserved.
|
||
|
|
||
|
This file is part of RepoLib.
|
||
|
|
||
|
RepoLib is free software: you can redistribute it and/or modify
|
||
|
it under the terms of the GNU Lesser General Public License as published by
|
||
|
the Free Software Foundation, either version 3 of the License, or
|
||
|
(at your option) any later version.
|
||
|
|
||
|
RepoLib is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
GNU Lesser General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU Lesser General Public License
|
||
|
along with RepoLib. If not, see <https://www.gnu.org/licenses/>.
|
||
|
"""
|
||
|
|
||
|
import logging
|
||
|
|
||
|
from . import util
|
||
|
|
||
|
log = logging.getLogger(__name__)
|
||
|
|
||
|
class DebParseError(util.RepoError):
|
||
|
""" Exceptions related to parsing deb lines."""
|
||
|
|
||
|
def __init__(self, *args, code=1, **kwargs):
|
||
|
"""Exceptions related to parsing deb lines.
|
||
|
|
||
|
Arguments:
|
||
|
code (:obj:`int`, optional, default=1): Exception error code.
|
||
|
"""
|
||
|
super().__init__(*args, **kwargs)
|
||
|
self.code = code
|
||
|
|
||
|
def debsplit(line:str) -> list:
|
||
|
""" Improved string.split() with support for things like [] options.
|
||
|
|
||
|
Adapted from python-apt
|
||
|
|
||
|
Arguments:
|
||
|
line(str): The line to split up.
|
||
|
"""
|
||
|
line = line.strip()
|
||
|
line_list = line.split()
|
||
|
for i in line_list:
|
||
|
if util.url_validator(i):
|
||
|
line_list[line_list.index(i)] = decode_brackets(i)
|
||
|
line = ' '.join(line_list)
|
||
|
pieces:list = []
|
||
|
tmp:str = ""
|
||
|
# we are inside a [..] block
|
||
|
p_found = False
|
||
|
for char in line:
|
||
|
if char == '[':
|
||
|
p_found = True
|
||
|
tmp += char
|
||
|
elif char == ']':
|
||
|
p_found = False
|
||
|
tmp += char
|
||
|
elif char.isspace() and not p_found:
|
||
|
pieces.append(tmp)
|
||
|
tmp = ''
|
||
|
continue
|
||
|
else:
|
||
|
tmp += char
|
||
|
# append last piece
|
||
|
if len(tmp) > 0:
|
||
|
pieces.append(tmp)
|
||
|
return pieces
|
||
|
|
||
|
def encode_brackets(word:str) -> str:
|
||
|
""" Encodes any [ and ] brackets into URL-safe form
|
||
|
|
||
|
Technically we should never be recieving these, and there are other things
|
||
|
which should technically be encoded as well. However, square brackets
|
||
|
actively break the URL parsing, and must be strictly avoided.
|
||
|
|
||
|
Arguments:
|
||
|
word (str): the string to encode brackets in.
|
||
|
|
||
|
Returns:
|
||
|
`str`: the encoded string.
|
||
|
"""
|
||
|
word = word.replace('[', '%5B')
|
||
|
word = word.replace(']', '%5D')
|
||
|
return word
|
||
|
|
||
|
def decode_brackets(word:str) -> str:
|
||
|
""" Un-encodes [ and ] from the input
|
||
|
|
||
|
Since our downstream libraries should also be encoding these correctly, it
|
||
|
is better to retain these as the user entered, as that ensures they can
|
||
|
recognize it properly.
|
||
|
|
||
|
Arguments:
|
||
|
word (str): The string to decode.
|
||
|
|
||
|
Returns:
|
||
|
`str`: the decoded string.
|
||
|
"""
|
||
|
word = word.replace('%5B', '[')
|
||
|
word = word.replace('%5D', ']')
|
||
|
return word
|
||
|
|
||
|
def parse_name_ident(tail:str) -> tuple:
|
||
|
""" Find a Repolib name within the given comment string.
|
||
|
|
||
|
The name should be headed with "X-Repolib-Name:" and is not space terminated.
|
||
|
The ident should be headed with "X-Repolib-ID:" and is space terminated.
|
||
|
|
||
|
Either field ends at the end of a line, or at a subsequent definition of a
|
||
|
different field, or at a subsequent ' #' substring. Additionally, the ident
|
||
|
field ends with a subsequent space.
|
||
|
|
||
|
Arguments:
|
||
|
tail (str): The comment to search within.
|
||
|
|
||
|
Returns: tuple(name, ident, comment):
|
||
|
name (str): The detected name, or None
|
||
|
ident (str): The detected ident, or None
|
||
|
comment (str): The string with the name and ident removed
|
||
|
"""
|
||
|
tail = util.strip_hashes(tail)
|
||
|
|
||
|
# Used for sanity checking later
|
||
|
has_name = 'X-Repolib-Name' in tail
|
||
|
log.debug('Line name found: %s', has_name)
|
||
|
has_ident = 'X-Repolib-ID' in tail
|
||
|
log.debug('Line ident found: %s', has_ident)
|
||
|
|
||
|
parts: list = tail.split()
|
||
|
name_found = False
|
||
|
ident_found = False
|
||
|
name:str = ''
|
||
|
ident:str = ''
|
||
|
comment:str = ''
|
||
|
for item in parts:
|
||
|
log.debug("Checking line item: %s", item)
|
||
|
item_is_name = item.strip('#').strip().startswith('X-Repolib-Name')
|
||
|
item_is_ident = item.strip('#').strip().startswith('X-Repolib-ID')
|
||
|
|
||
|
if '#' in item and not item_is_name and not item_is_ident:
|
||
|
name_found = False
|
||
|
ident_found = False
|
||
|
|
||
|
elif item_is_name:
|
||
|
name_found = True
|
||
|
ident_found = False
|
||
|
continue
|
||
|
|
||
|
elif item_is_ident:
|
||
|
name_found = False
|
||
|
ident_found = True
|
||
|
continue
|
||
|
|
||
|
if name_found and not item_is_name:
|
||
|
name += f'{item} '
|
||
|
continue
|
||
|
|
||
|
elif ident_found and not item_is_ident:
|
||
|
ident += f'{item}'
|
||
|
ident_found = False
|
||
|
continue
|
||
|
|
||
|
elif not name_found and not ident_found:
|
||
|
c = item.strip('#')
|
||
|
comment += f'{c} '
|
||
|
|
||
|
name = name.strip()
|
||
|
ident = ident.strip()
|
||
|
comment = comment.strip()
|
||
|
|
||
|
if not name:
|
||
|
if ident:
|
||
|
name = ident
|
||
|
|
||
|
# Final sanity checking
|
||
|
if has_name and not name:
|
||
|
raise DebParseError(
|
||
|
f'Could not parse repository name from comment {comment}. Make sure '
|
||
|
'you have a space between the colon and the Name'
|
||
|
)
|
||
|
if has_ident and not ident:
|
||
|
raise DebParseError(
|
||
|
f'Could not parse repository ident from comment {comment}. Make sure '
|
||
|
'you have a space between the colon and the Ident'
|
||
|
)
|
||
|
|
||
|
return name, ident, comment
|
||
|
|
||
|
|
||
|
class ParseDeb:
|
||
|
""" Parsing for source entries.
|
||
|
|
||
|
Contains parsing helpers for one-line format sources.
|
||
|
"""
|
||
|
|
||
|
def __init__(self, debug:bool = False) -> None:
|
||
|
"""
|
||
|
Arguments:
|
||
|
debug (bool): In debug mode, the structured data is always returned
|
||
|
at the end, instead of checking for sanity (default: `False`)
|
||
|
"""
|
||
|
self.debug = debug
|
||
|
self.last_line: str = ''
|
||
|
self.last_line_valid: bool = False
|
||
|
self.curr_line: str = ''
|
||
|
self.curr_line_valid: bool = False
|
||
|
|
||
|
def parse_options(self, opt:str) -> dict:
|
||
|
""" Parses a string of options into a dictionary that repolib can use.
|
||
|
|
||
|
Arguments:
|
||
|
opt(str): The string with options returned from the line parser.
|
||
|
|
||
|
Returns:
|
||
|
`dict`: The dictionary of options with key:val pairs (may be {})
|
||
|
"""
|
||
|
opt = opt.strip()
|
||
|
opt = opt[1:-1].strip() # Remove enclosing brackets
|
||
|
options = opt.split()
|
||
|
|
||
|
parsed_options:dict = {}
|
||
|
|
||
|
for opt in options:
|
||
|
pre_key, values = opt.split('=')
|
||
|
values = values.split(',')
|
||
|
value:str = ' '.join(values)
|
||
|
try:
|
||
|
key:str = util.options_inmap[pre_key]
|
||
|
except KeyError:
|
||
|
raise DebParseError(
|
||
|
f'Could not parse line {self.curr_line}: option {opt} is '
|
||
|
'not a valid debian repository option or is unsupported.'
|
||
|
)
|
||
|
parsed_options[key] = value
|
||
|
|
||
|
return parsed_options
|
||
|
|
||
|
|
||
|
def parse_line(self, line:str) -> dict:
|
||
|
""" Parse a deb line into its individual parts.
|
||
|
|
||
|
Adapted from python-apt
|
||
|
|
||
|
Arguments:
|
||
|
line (str): The line input to parse
|
||
|
|
||
|
Returns:
|
||
|
(dict): a dict containing the requisite data.
|
||
|
"""
|
||
|
self.last_line = self.curr_line
|
||
|
self.last_line_valid = self.curr_line_valid
|
||
|
self.curr_line = line.strip()
|
||
|
parts:list = []
|
||
|
|
||
|
line_is_comment = self.curr_line == '#'
|
||
|
line_is_empty = self.curr_line == ''
|
||
|
if line_is_comment or line_is_empty:
|
||
|
raise DebParseError(f'Current line "{self.curr_line}" is empty')
|
||
|
|
||
|
line_parsed: dict = {}
|
||
|
line_parsed['enabled'] = True
|
||
|
line_parsed['name'] = ''
|
||
|
line_parsed['ident'] = ''
|
||
|
line_parsed['comments'] = []
|
||
|
line_parsed['repo_type'] = ''
|
||
|
line_parsed['uri'] = ''
|
||
|
line_parsed['suite'] = ''
|
||
|
line_parsed['components'] = []
|
||
|
line_parsed['options'] = {}
|
||
|
|
||
|
if line.startswith('#'):
|
||
|
line_parsed['enabled'] = False
|
||
|
line = util.strip_hashes(line)
|
||
|
parts = line.split()
|
||
|
if not parts[0] in ('deb', 'deb-src'):
|
||
|
raise DebParseError(f'Current line "{self.curr_line}" is invalid')
|
||
|
|
||
|
comments_index = line.find('#')
|
||
|
if comments_index > 0:
|
||
|
raw_comments:str = line[comments_index + 1:].strip()
|
||
|
(
|
||
|
line_parsed['name'],
|
||
|
line_parsed['ident'],
|
||
|
comments
|
||
|
) = parse_name_ident(raw_comments)
|
||
|
line_parsed['comments'].append(comments)
|
||
|
line = line[:comments_index]
|
||
|
|
||
|
parts = debsplit(line)
|
||
|
if len(parts) < 3: # We need at least a type, a URL, and a component
|
||
|
raise DebParseError(
|
||
|
f'The line "{self.curr_line}" does not have enough pieces to be'
|
||
|
'valid'
|
||
|
)
|
||
|
# Determine the type of the repo
|
||
|
repo_type:str = parts.pop(0)
|
||
|
if repo_type in ['deb', 'deb-src']:
|
||
|
line_parsed['repo_type'] = util.SourceType(repo_type)
|
||
|
else:
|
||
|
raise DebParseError(f'The line "{self.curr_line}" is of invalid type.')
|
||
|
|
||
|
# Determine the properties of our repo line
|
||
|
uri_index:int = 0
|
||
|
is_cdrom: bool = False
|
||
|
## The URI index is the vital piece of information we need to parse the
|
||
|
## deb line, as it's position determines what other components are
|
||
|
## present and where they are. This determines the location of the URI
|
||
|
## regardless of where it's at.
|
||
|
for part in parts:
|
||
|
if part.startswith('['):
|
||
|
if 'cdrom' in part:
|
||
|
is_cdrom = True
|
||
|
uri_index = parts.index(part)
|
||
|
else:
|
||
|
uri_index = 1
|
||
|
|
||
|
if is_cdrom:
|
||
|
# This could maybe change if the parser now differentiates between
|
||
|
# CDROM URIs and option lists
|
||
|
raise DebParseError('Repolib cannot currently accept CDROM Sources')
|
||
|
|
||
|
if uri_index != 0:
|
||
|
line_parsed['options'] = self.parse_options(parts.pop(0))
|
||
|
|
||
|
if len(line_parsed) < 2: # Should have at minimum a URI and a suite/path
|
||
|
raise DebParseError(
|
||
|
f'The line "{self.curr_line}" does not have enough pieces to be'
|
||
|
'valid'
|
||
|
)
|
||
|
|
||
|
line_uri = parts.pop(0)
|
||
|
if util.url_validator(line_uri):
|
||
|
line_parsed['uri'] = line_uri
|
||
|
|
||
|
else:
|
||
|
raise DebParseError(
|
||
|
f'The line "{self.curr_line}" has invalid URI: {line_uri}'
|
||
|
)
|
||
|
|
||
|
line_parsed['suite'] = parts.pop(0)
|
||
|
|
||
|
line_components:list = []
|
||
|
for comp in parts:
|
||
|
line_parsed['components'].append(comp)
|
||
|
|
||
|
|
||
|
has_type = line_parsed['repo_type']
|
||
|
has_uri = line_parsed['uri']
|
||
|
has_suite = line_parsed['suite']
|
||
|
|
||
|
if has_type and has_uri and has_suite:
|
||
|
# if we have these three minimum components, we can proceed and the
|
||
|
# line is valid. Otherwise, error out.
|
||
|
return line_parsed.copy()
|
||
|
|
||
|
if self.debug:
|
||
|
return line_parsed.copy()
|
||
|
|
||
|
raise DebParseError(
|
||
|
f'The line {self.curr_line} could not be parsed due to an '
|
||
|
'unknown error (Probably missing the repo type, URI, or a '
|
||
|
'suite/path).'
|
||
|
)
|