forked from varia/varia.website
225 lines
6.4 KiB
Python
225 lines
6.4 KiB
Python
import logging
|
|
import mimetypes
|
|
import os
|
|
import pathlib
|
|
from typing import Callable, Iterable, Optional, Tuple
|
|
|
|
from pip._internal.models.candidate import InstallationCandidate
|
|
from pip._internal.models.link import Link
|
|
from pip._internal.utils.urls import path_to_url, url_to_path
|
|
from pip._internal.vcs import is_url
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
FoundCandidates = Iterable[InstallationCandidate]
|
|
FoundLinks = Iterable[Link]
|
|
CandidatesFromPage = Callable[[Link], Iterable[InstallationCandidate]]
|
|
PageValidator = Callable[[Link], bool]
|
|
|
|
|
|
class LinkSource:
|
|
@property
|
|
def link(self) -> Optional[Link]:
|
|
"""Returns the underlying link, if there's one."""
|
|
raise NotImplementedError()
|
|
|
|
def page_candidates(self) -> FoundCandidates:
|
|
"""Candidates found by parsing an archive listing HTML file."""
|
|
raise NotImplementedError()
|
|
|
|
def file_links(self) -> FoundLinks:
|
|
"""Links found by specifying archives directly."""
|
|
raise NotImplementedError()
|
|
|
|
|
|
def _is_html_file(file_url: str) -> bool:
|
|
return mimetypes.guess_type(file_url, strict=False)[0] == "text/html"
|
|
|
|
|
|
class _FlatDirectorySource(LinkSource):
|
|
"""Link source specified by ``--find-links=<path-to-dir>``.
|
|
|
|
This looks the content of the directory, and returns:
|
|
|
|
* ``page_candidates``: Links listed on each HTML file in the directory.
|
|
* ``file_candidates``: Archives in the directory.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
candidates_from_page: CandidatesFromPage,
|
|
path: str,
|
|
) -> None:
|
|
self._candidates_from_page = candidates_from_page
|
|
self._path = pathlib.Path(os.path.realpath(path))
|
|
|
|
@property
|
|
def link(self) -> Optional[Link]:
|
|
return None
|
|
|
|
def page_candidates(self) -> FoundCandidates:
|
|
for path in self._path.iterdir():
|
|
url = path_to_url(str(path))
|
|
if not _is_html_file(url):
|
|
continue
|
|
yield from self._candidates_from_page(Link(url))
|
|
|
|
def file_links(self) -> FoundLinks:
|
|
for path in self._path.iterdir():
|
|
url = path_to_url(str(path))
|
|
if _is_html_file(url):
|
|
continue
|
|
yield Link(url)
|
|
|
|
|
|
class _LocalFileSource(LinkSource):
|
|
"""``--find-links=<path-or-url>`` or ``--[extra-]index-url=<path-or-url>``.
|
|
|
|
If a URL is supplied, it must be a ``file:`` URL. If a path is supplied to
|
|
the option, it is converted to a URL first. This returns:
|
|
|
|
* ``page_candidates``: Links listed on an HTML file.
|
|
* ``file_candidates``: The non-HTML file.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
candidates_from_page: CandidatesFromPage,
|
|
link: Link,
|
|
) -> None:
|
|
self._candidates_from_page = candidates_from_page
|
|
self._link = link
|
|
|
|
@property
|
|
def link(self) -> Optional[Link]:
|
|
return self._link
|
|
|
|
def page_candidates(self) -> FoundCandidates:
|
|
if not _is_html_file(self._link.url):
|
|
return
|
|
yield from self._candidates_from_page(self._link)
|
|
|
|
def file_links(self) -> FoundLinks:
|
|
if _is_html_file(self._link.url):
|
|
return
|
|
yield self._link
|
|
|
|
|
|
class _RemoteFileSource(LinkSource):
|
|
"""``--find-links=<url>`` or ``--[extra-]index-url=<url>``.
|
|
|
|
This returns:
|
|
|
|
* ``page_candidates``: Links listed on an HTML file.
|
|
* ``file_candidates``: The non-HTML file.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
candidates_from_page: CandidatesFromPage,
|
|
page_validator: PageValidator,
|
|
link: Link,
|
|
) -> None:
|
|
self._candidates_from_page = candidates_from_page
|
|
self._page_validator = page_validator
|
|
self._link = link
|
|
|
|
@property
|
|
def link(self) -> Optional[Link]:
|
|
return self._link
|
|
|
|
def page_candidates(self) -> FoundCandidates:
|
|
if not self._page_validator(self._link):
|
|
return
|
|
yield from self._candidates_from_page(self._link)
|
|
|
|
def file_links(self) -> FoundLinks:
|
|
yield self._link
|
|
|
|
|
|
class _IndexDirectorySource(LinkSource):
|
|
"""``--[extra-]index-url=<path-to-directory>``.
|
|
|
|
This is treated like a remote URL; ``candidates_from_page`` contains logic
|
|
for this by appending ``index.html`` to the link.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
candidates_from_page: CandidatesFromPage,
|
|
link: Link,
|
|
) -> None:
|
|
self._candidates_from_page = candidates_from_page
|
|
self._link = link
|
|
|
|
@property
|
|
def link(self) -> Optional[Link]:
|
|
return self._link
|
|
|
|
def page_candidates(self) -> FoundCandidates:
|
|
yield from self._candidates_from_page(self._link)
|
|
|
|
def file_links(self) -> FoundLinks:
|
|
return ()
|
|
|
|
|
|
def build_source(
|
|
location: str,
|
|
*,
|
|
candidates_from_page: CandidatesFromPage,
|
|
page_validator: PageValidator,
|
|
expand_dir: bool,
|
|
cache_link_parsing: bool,
|
|
) -> Tuple[Optional[str], Optional[LinkSource]]:
|
|
|
|
path: Optional[str] = None
|
|
url: Optional[str] = None
|
|
if os.path.exists(location): # Is a local path.
|
|
url = path_to_url(location)
|
|
path = location
|
|
elif location.startswith("file:"): # A file: URL.
|
|
url = location
|
|
path = url_to_path(location)
|
|
elif is_url(location):
|
|
url = location
|
|
|
|
if url is None:
|
|
msg = (
|
|
"Location '%s' is ignored: "
|
|
"it is either a non-existing path or lacks a specific scheme."
|
|
)
|
|
logger.warning(msg, location)
|
|
return (None, None)
|
|
|
|
if path is None:
|
|
source: LinkSource = _RemoteFileSource(
|
|
candidates_from_page=candidates_from_page,
|
|
page_validator=page_validator,
|
|
link=Link(url, cache_link_parsing=cache_link_parsing),
|
|
)
|
|
return (url, source)
|
|
|
|
if os.path.isdir(path):
|
|
if expand_dir:
|
|
source = _FlatDirectorySource(
|
|
candidates_from_page=candidates_from_page,
|
|
path=path,
|
|
)
|
|
else:
|
|
source = _IndexDirectorySource(
|
|
candidates_from_page=candidates_from_page,
|
|
link=Link(url, cache_link_parsing=cache_link_parsing),
|
|
)
|
|
return (url, source)
|
|
elif os.path.isfile(path):
|
|
source = _LocalFileSource(
|
|
candidates_from_page=candidates_from_page,
|
|
link=Link(url, cache_link_parsing=cache_link_parsing),
|
|
)
|
|
return (url, source)
|
|
logger.warning(
|
|
"Location '%s' is ignored: it is neither a file nor a directory.",
|
|
location,
|
|
)
|
|
return (url, None)
|