add files
This commit is contained in:
parent
a27f14f036
commit
fb52d49f74
1664 changed files with 749794 additions and 0 deletions
|
|
@ -0,0 +1,168 @@
|
|||
"""
|
||||
Soup Sieve.
|
||||
|
||||
A CSS selector filter for BeautifulSoup4.
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2018 Isaac Muse
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from .__meta__ import __version__, __version_info__ # noqa: F401
|
||||
from . import css_parser as cp
|
||||
from . import css_match as cm
|
||||
from . import css_types as ct
|
||||
from .util import DEBUG, SelectorSyntaxError # noqa: F401
|
||||
import bs4 # type: ignore[import-untyped]
|
||||
from typing import Any, Iterator, Iterable
|
||||
|
||||
__all__ = (
|
||||
'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
|
||||
'closest', 'compile', 'filter', 'iselect',
|
||||
'match', 'select', 'select_one'
|
||||
)
|
||||
|
||||
SoupSieve = cm.SoupSieve
|
||||
|
||||
|
||||
def compile( # noqa: A001
|
||||
pattern: str,
|
||||
namespaces: dict[str, str] | None = None,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: dict[str, str] | None = None,
|
||||
**kwargs: Any
|
||||
) -> cm.SoupSieve:
|
||||
"""Compile CSS pattern."""
|
||||
|
||||
if isinstance(pattern, SoupSieve):
|
||||
if flags:
|
||||
raise ValueError("Cannot process 'flags' argument on a compiled selector list")
|
||||
elif namespaces is not None:
|
||||
raise ValueError("Cannot process 'namespaces' argument on a compiled selector list")
|
||||
elif custom is not None:
|
||||
raise ValueError("Cannot process 'custom' argument on a compiled selector list")
|
||||
return pattern
|
||||
|
||||
return cp._cached_css_compile(
|
||||
pattern,
|
||||
ct.Namespaces(namespaces) if namespaces is not None else namespaces,
|
||||
ct.CustomSelectors(custom) if custom is not None else custom,
|
||||
flags
|
||||
)
|
||||
|
||||
|
||||
def purge() -> None:
|
||||
"""Purge cached patterns."""
|
||||
|
||||
cp._purge_cache()
|
||||
|
||||
|
||||
def closest(
|
||||
select: str,
|
||||
tag: bs4.Tag,
|
||||
namespaces: dict[str, str] | None = None,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: dict[str, str] | None = None,
|
||||
**kwargs: Any
|
||||
) -> bs4.Tag:
|
||||
"""Match closest ancestor."""
|
||||
|
||||
return compile(select, namespaces, flags, **kwargs).closest(tag)
|
||||
|
||||
|
||||
def match(
|
||||
select: str,
|
||||
tag: bs4.Tag,
|
||||
namespaces: dict[str, str] | None = None,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: dict[str, str] | None = None,
|
||||
**kwargs: Any
|
||||
) -> bool:
|
||||
"""Match node."""
|
||||
|
||||
return compile(select, namespaces, flags, **kwargs).match(tag)
|
||||
|
||||
|
||||
def filter( # noqa: A001
|
||||
select: str,
|
||||
iterable: Iterable[bs4.Tag],
|
||||
namespaces: dict[str, str] | None = None,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: dict[str, str] | None = None,
|
||||
**kwargs: Any
|
||||
) -> list[bs4.Tag]:
|
||||
"""Filter list of nodes."""
|
||||
|
||||
return compile(select, namespaces, flags, **kwargs).filter(iterable)
|
||||
|
||||
|
||||
def select_one(
|
||||
select: str,
|
||||
tag: bs4.Tag,
|
||||
namespaces: dict[str, str] | None = None,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: dict[str, str] | None = None,
|
||||
**kwargs: Any
|
||||
) -> bs4.Tag:
|
||||
"""Select a single tag."""
|
||||
|
||||
return compile(select, namespaces, flags, **kwargs).select_one(tag)
|
||||
|
||||
|
||||
def select(
|
||||
select: str,
|
||||
tag: bs4.Tag,
|
||||
namespaces: dict[str, str] | None = None,
|
||||
limit: int = 0,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: dict[str, str] | None = None,
|
||||
**kwargs: Any
|
||||
) -> list[bs4.Tag]:
|
||||
"""Select the specified tags."""
|
||||
|
||||
return compile(select, namespaces, flags, **kwargs).select(tag, limit)
|
||||
|
||||
|
||||
def iselect(
|
||||
select: str,
|
||||
tag: bs4.Tag,
|
||||
namespaces: dict[str, str] | None = None,
|
||||
limit: int = 0,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: dict[str, str] | None = None,
|
||||
**kwargs: Any
|
||||
) -> Iterator[bs4.Tag]:
|
||||
"""Iterate the specified tags."""
|
||||
|
||||
yield from compile(select, namespaces, flags, **kwargs).iselect(tag, limit)
|
||||
|
||||
|
||||
def escape(ident: str) -> str:
|
||||
"""Escape identifier."""
|
||||
|
||||
return cp.escape(ident)
|
||||
|
|
@ -0,0 +1,197 @@
|
|||
"""Meta related things."""
|
||||
from __future__ import annotations
|
||||
from collections import namedtuple
|
||||
import re
|
||||
|
||||
RE_VER = re.compile(
|
||||
r'''(?x)
|
||||
(?P<major>\d+)(?:\.(?P<minor>\d+))?(?:\.(?P<micro>\d+))?
|
||||
(?:(?P<type>a|b|rc)(?P<pre>\d+))?
|
||||
(?:\.post(?P<post>\d+))?
|
||||
(?:\.dev(?P<dev>\d+))?
|
||||
'''
|
||||
)
|
||||
|
||||
REL_MAP = {
|
||||
".dev": "",
|
||||
".dev-alpha": "a",
|
||||
".dev-beta": "b",
|
||||
".dev-candidate": "rc",
|
||||
"alpha": "a",
|
||||
"beta": "b",
|
||||
"candidate": "rc",
|
||||
"final": ""
|
||||
}
|
||||
|
||||
DEV_STATUS = {
|
||||
".dev": "2 - Pre-Alpha",
|
||||
".dev-alpha": "2 - Pre-Alpha",
|
||||
".dev-beta": "2 - Pre-Alpha",
|
||||
".dev-candidate": "2 - Pre-Alpha",
|
||||
"alpha": "3 - Alpha",
|
||||
"beta": "4 - Beta",
|
||||
"candidate": "4 - Beta",
|
||||
"final": "5 - Production/Stable"
|
||||
}
|
||||
|
||||
PRE_REL_MAP = {"a": 'alpha', "b": 'beta', "rc": 'candidate'}
|
||||
|
||||
|
||||
class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre", "post", "dev"])):
|
||||
"""
|
||||
Get the version (PEP 440).
|
||||
|
||||
A biased approach to the PEP 440 semantic version.
|
||||
|
||||
Provides a tuple structure which is sorted for comparisons `v1 > v2` etc.
|
||||
(major, minor, micro, release type, pre-release build, post-release build, development release build)
|
||||
Release types are named in is such a way they are comparable with ease.
|
||||
Accessors to check if a development, pre-release, or post-release build. Also provides accessor to get
|
||||
development status for setup files.
|
||||
|
||||
How it works (currently):
|
||||
|
||||
- You must specify a release type as either `final`, `alpha`, `beta`, or `candidate`.
|
||||
- To define a development release, you can use either `.dev`, `.dev-alpha`, `.dev-beta`, or `.dev-candidate`.
|
||||
The dot is used to ensure all development specifiers are sorted before `alpha`.
|
||||
You can specify a `dev` number for development builds, but do not have to as implicit development releases
|
||||
are allowed.
|
||||
- You must specify a `pre` value greater than zero if using a prerelease as this project (not PEP 440) does not
|
||||
allow implicit prereleases.
|
||||
- You can optionally set `post` to a value greater than zero to make the build a post release. While post releases
|
||||
are technically allowed in prereleases, it is strongly discouraged, so we are rejecting them. It should be
|
||||
noted that we do not allow `post0` even though PEP 440 does not restrict this. This project specifically
|
||||
does not allow implicit post releases.
|
||||
- It should be noted that we do not support epochs `1!` or local versions `+some-custom.version-1`.
|
||||
|
||||
Acceptable version releases:
|
||||
|
||||
```
|
||||
Version(1, 0, 0, "final") 1.0
|
||||
Version(1, 2, 0, "final") 1.2
|
||||
Version(1, 2, 3, "final") 1.2.3
|
||||
Version(1, 2, 0, ".dev-alpha", pre=4) 1.2a4
|
||||
Version(1, 2, 0, ".dev-beta", pre=4) 1.2b4
|
||||
Version(1, 2, 0, ".dev-candidate", pre=4) 1.2rc4
|
||||
Version(1, 2, 0, "final", post=1) 1.2.post1
|
||||
Version(1, 2, 3, ".dev") 1.2.3.dev0
|
||||
Version(1, 2, 3, ".dev", dev=1) 1.2.3.dev1
|
||||
```
|
||||
|
||||
"""
|
||||
|
||||
def __new__(
|
||||
cls,
|
||||
major: int, minor: int, micro: int, release: str = "final",
|
||||
pre: int = 0, post: int = 0, dev: int = 0
|
||||
) -> Version:
|
||||
"""Validate version info."""
|
||||
|
||||
# Ensure all parts are positive integers.
|
||||
for value in (major, minor, micro, pre, post):
|
||||
if not (isinstance(value, int) and value >= 0):
|
||||
raise ValueError("All version parts except 'release' should be integers.")
|
||||
|
||||
if release not in REL_MAP:
|
||||
raise ValueError(f"'{release}' is not a valid release type.")
|
||||
|
||||
# Ensure valid pre-release (we do not allow implicit pre-releases).
|
||||
if ".dev-candidate" < release < "final":
|
||||
if pre == 0:
|
||||
raise ValueError("Implicit pre-releases not allowed.")
|
||||
elif dev:
|
||||
raise ValueError("Version is not a development release.")
|
||||
elif post:
|
||||
raise ValueError("Post-releases are not allowed with pre-releases.")
|
||||
|
||||
# Ensure valid development or development/pre release
|
||||
elif release < "alpha":
|
||||
if release > ".dev" and pre == 0:
|
||||
raise ValueError("Implicit pre-release not allowed.")
|
||||
elif post:
|
||||
raise ValueError("Post-releases are not allowed with pre-releases.")
|
||||
|
||||
# Ensure a valid normal release
|
||||
else:
|
||||
if pre:
|
||||
raise ValueError("Version is not a pre-release.")
|
||||
elif dev:
|
||||
raise ValueError("Version is not a development release.")
|
||||
|
||||
return super().__new__(cls, major, minor, micro, release, pre, post, dev)
|
||||
|
||||
def _is_pre(self) -> bool:
|
||||
"""Is prerelease."""
|
||||
|
||||
return bool(self.pre > 0)
|
||||
|
||||
def _is_dev(self) -> bool:
|
||||
"""Is development."""
|
||||
|
||||
return bool(self.release < "alpha")
|
||||
|
||||
def _is_post(self) -> bool:
|
||||
"""Is post."""
|
||||
|
||||
return bool(self.post > 0)
|
||||
|
||||
def _get_dev_status(self) -> str: # pragma: no cover
|
||||
"""Get development status string."""
|
||||
|
||||
return DEV_STATUS[self.release]
|
||||
|
||||
def _get_canonical(self) -> str:
|
||||
"""Get the canonical output string."""
|
||||
|
||||
# Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed..
|
||||
if self.micro == 0:
|
||||
ver = f"{self.major}.{self.minor}"
|
||||
else:
|
||||
ver = f"{self.major}.{self.minor}.{self.micro}"
|
||||
if self._is_pre():
|
||||
ver += f'{REL_MAP[self.release]}{self.pre}'
|
||||
if self._is_post():
|
||||
ver += f".post{self.post}"
|
||||
if self._is_dev():
|
||||
ver += f".dev{self.dev}"
|
||||
|
||||
return ver
|
||||
|
||||
|
||||
def parse_version(ver: str) -> Version:
|
||||
"""Parse version into a comparable Version tuple."""
|
||||
|
||||
m = RE_VER.match(ver)
|
||||
|
||||
if m is None:
|
||||
raise ValueError(f"'{ver}' is not a valid version")
|
||||
|
||||
# Handle major, minor, micro
|
||||
major = int(m.group('major'))
|
||||
minor = int(m.group('minor')) if m.group('minor') else 0
|
||||
micro = int(m.group('micro')) if m.group('micro') else 0
|
||||
|
||||
# Handle pre releases
|
||||
if m.group('type'):
|
||||
release = PRE_REL_MAP[m.group('type')]
|
||||
pre = int(m.group('pre'))
|
||||
else:
|
||||
release = "final"
|
||||
pre = 0
|
||||
|
||||
# Handle development releases
|
||||
dev = m.group('dev') if m.group('dev') else 0
|
||||
if m.group('dev'):
|
||||
dev = int(m.group('dev'))
|
||||
release = '.dev-' + release if pre else '.dev'
|
||||
else:
|
||||
dev = 0
|
||||
|
||||
# Handle post
|
||||
post = int(m.group('post')) if m.group('post') else 0
|
||||
|
||||
return Version(major, minor, micro, release, pre, post, dev)
|
||||
|
||||
|
||||
__version_info__ = Version(2, 6, 0, "final")
|
||||
__version__ = __version_info__._get_canonical()
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,407 @@
|
|||
"""CSS selector structure items."""
|
||||
from __future__ import annotations
|
||||
import copyreg
|
||||
from .pretty import pretty
|
||||
from typing import Any, Iterator, Hashable, Pattern, Iterable, Mapping
|
||||
|
||||
__all__ = (
|
||||
'Selector',
|
||||
'SelectorNull',
|
||||
'SelectorTag',
|
||||
'SelectorAttribute',
|
||||
'SelectorContains',
|
||||
'SelectorNth',
|
||||
'SelectorLang',
|
||||
'SelectorList',
|
||||
'Namespaces',
|
||||
'CustomSelectors'
|
||||
)
|
||||
|
||||
|
||||
SEL_EMPTY = 0x1
|
||||
SEL_ROOT = 0x2
|
||||
SEL_DEFAULT = 0x4
|
||||
SEL_INDETERMINATE = 0x8
|
||||
SEL_SCOPE = 0x10
|
||||
SEL_DIR_LTR = 0x20
|
||||
SEL_DIR_RTL = 0x40
|
||||
SEL_IN_RANGE = 0x80
|
||||
SEL_OUT_OF_RANGE = 0x100
|
||||
SEL_DEFINED = 0x200
|
||||
SEL_PLACEHOLDER_SHOWN = 0x400
|
||||
|
||||
|
||||
class Immutable:
|
||||
"""Immutable."""
|
||||
|
||||
__slots__: tuple[str, ...] = ('_hash',)
|
||||
|
||||
_hash: int
|
||||
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
temp = []
|
||||
for k, v in kwargs.items():
|
||||
temp.append(type(v))
|
||||
temp.append(v)
|
||||
super().__setattr__(k, v)
|
||||
super().__setattr__('_hash', hash(tuple(temp)))
|
||||
|
||||
@classmethod
|
||||
def __base__(cls) -> type[Immutable]:
|
||||
"""Get base class."""
|
||||
|
||||
return cls
|
||||
|
||||
def __eq__(self, other: Any) -> bool:
|
||||
"""Equal."""
|
||||
|
||||
return (
|
||||
isinstance(other, self.__base__()) and
|
||||
all(getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash')
|
||||
)
|
||||
|
||||
def __ne__(self, other: Any) -> bool:
|
||||
"""Equal."""
|
||||
|
||||
return (
|
||||
not isinstance(other, self.__base__()) or
|
||||
any(getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash')
|
||||
)
|
||||
|
||||
def __hash__(self) -> int:
|
||||
"""Hash."""
|
||||
|
||||
return self._hash
|
||||
|
||||
def __setattr__(self, name: str, value: Any) -> None:
|
||||
"""Prevent mutability."""
|
||||
|
||||
raise AttributeError(f"'{self.__class__.__name__}' is immutable")
|
||||
|
||||
def __repr__(self) -> str: # pragma: no cover
|
||||
"""Representation."""
|
||||
|
||||
r = ', '.join([f"{k}={getattr(self, k)!r}" for k in self.__slots__[:-1]])
|
||||
return f"{self.__class__.__name__}({r})"
|
||||
|
||||
__str__ = __repr__
|
||||
|
||||
def pretty(self) -> None: # pragma: no cover
|
||||
"""Pretty print."""
|
||||
|
||||
print(pretty(self))
|
||||
|
||||
|
||||
class ImmutableDict(Mapping[Any, Any]):
|
||||
"""Hashable, immutable dictionary."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
arg: dict[Any, Any] | Iterable[tuple[Any, Any]]
|
||||
) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
self._validate(arg)
|
||||
self._d = dict(arg)
|
||||
self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())]))
|
||||
|
||||
def _validate(self, arg: dict[Any, Any] | Iterable[tuple[Any, Any]]) -> None:
|
||||
"""Validate arguments."""
|
||||
|
||||
if isinstance(arg, dict):
|
||||
if not all(isinstance(v, Hashable) for v in arg.values()):
|
||||
raise TypeError(f'{self.__class__.__name__} values must be hashable')
|
||||
elif not all(isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg):
|
||||
raise TypeError(f'{self.__class__.__name__} values must be hashable')
|
||||
|
||||
def __iter__(self) -> Iterator[Any]:
|
||||
"""Iterator."""
|
||||
|
||||
return iter(self._d)
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""Length."""
|
||||
|
||||
return len(self._d)
|
||||
|
||||
def __getitem__(self, key: Any) -> Any:
|
||||
"""Get item: `namespace['key']`."""
|
||||
|
||||
return self._d[key]
|
||||
|
||||
def __hash__(self) -> int:
|
||||
"""Hash."""
|
||||
|
||||
return self._hash
|
||||
|
||||
def __repr__(self) -> str: # pragma: no cover
|
||||
"""Representation."""
|
||||
|
||||
return f"{self._d!r}"
|
||||
|
||||
__str__ = __repr__
|
||||
|
||||
|
||||
class Namespaces(ImmutableDict):
|
||||
"""Namespaces."""
|
||||
|
||||
def __init__(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
super().__init__(arg)
|
||||
|
||||
def _validate(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
|
||||
"""Validate arguments."""
|
||||
|
||||
if isinstance(arg, dict):
|
||||
if not all(isinstance(v, str) for v in arg.values()):
|
||||
raise TypeError(f'{self.__class__.__name__} values must be hashable')
|
||||
elif not all(isinstance(k, str) and isinstance(v, str) for k, v in arg):
|
||||
raise TypeError(f'{self.__class__.__name__} keys and values must be Unicode strings')
|
||||
|
||||
|
||||
class CustomSelectors(ImmutableDict):
|
||||
"""Custom selectors."""
|
||||
|
||||
def __init__(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
super().__init__(arg)
|
||||
|
||||
def _validate(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
|
||||
"""Validate arguments."""
|
||||
|
||||
if isinstance(arg, dict):
|
||||
if not all(isinstance(v, str) for v in arg.values()):
|
||||
raise TypeError(f'{self.__class__.__name__} values must be hashable')
|
||||
elif not all(isinstance(k, str) and isinstance(v, str) for k, v in arg):
|
||||
raise TypeError(f'{self.__class__.__name__} keys and values must be Unicode strings')
|
||||
|
||||
|
||||
class Selector(Immutable):
|
||||
"""Selector."""
|
||||
|
||||
__slots__ = (
|
||||
'tag', 'ids', 'classes', 'attributes', 'nth', 'selectors',
|
||||
'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
|
||||
)
|
||||
|
||||
tag: SelectorTag | None
|
||||
ids: tuple[str, ...]
|
||||
classes: tuple[str, ...]
|
||||
attributes: tuple[SelectorAttribute, ...]
|
||||
nth: tuple[SelectorNth, ...]
|
||||
selectors: tuple[SelectorList, ...]
|
||||
relation: SelectorList
|
||||
rel_type: str | None
|
||||
contains: tuple[SelectorContains, ...]
|
||||
lang: tuple[SelectorLang, ...]
|
||||
flags: int
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
tag: SelectorTag | None,
|
||||
ids: tuple[str, ...],
|
||||
classes: tuple[str, ...],
|
||||
attributes: tuple[SelectorAttribute, ...],
|
||||
nth: tuple[SelectorNth, ...],
|
||||
selectors: tuple[SelectorList, ...],
|
||||
relation: SelectorList,
|
||||
rel_type: str | None,
|
||||
contains: tuple[SelectorContains, ...],
|
||||
lang: tuple[SelectorLang, ...],
|
||||
flags: int
|
||||
):
|
||||
"""Initialize."""
|
||||
|
||||
super().__init__(
|
||||
tag=tag,
|
||||
ids=ids,
|
||||
classes=classes,
|
||||
attributes=attributes,
|
||||
nth=nth,
|
||||
selectors=selectors,
|
||||
relation=relation,
|
||||
rel_type=rel_type,
|
||||
contains=contains,
|
||||
lang=lang,
|
||||
flags=flags
|
||||
)
|
||||
|
||||
|
||||
class SelectorNull(Immutable):
|
||||
"""Null Selector."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
super().__init__()
|
||||
|
||||
|
||||
class SelectorTag(Immutable):
|
||||
"""Selector tag."""
|
||||
|
||||
__slots__ = ("name", "prefix", "_hash")
|
||||
|
||||
name: str
|
||||
prefix: str | None
|
||||
|
||||
def __init__(self, name: str, prefix: str | None) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
super().__init__(name=name, prefix=prefix)
|
||||
|
||||
|
||||
class SelectorAttribute(Immutable):
|
||||
"""Selector attribute rule."""
|
||||
|
||||
__slots__ = ("attribute", "prefix", "pattern", "xml_type_pattern", "_hash")
|
||||
|
||||
attribute: str
|
||||
prefix: str
|
||||
pattern: Pattern[str] | None
|
||||
xml_type_pattern: Pattern[str] | None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
attribute: str,
|
||||
prefix: str,
|
||||
pattern: Pattern[str] | None,
|
||||
xml_type_pattern: Pattern[str] | None
|
||||
) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
super().__init__(
|
||||
attribute=attribute,
|
||||
prefix=prefix,
|
||||
pattern=pattern,
|
||||
xml_type_pattern=xml_type_pattern
|
||||
)
|
||||
|
||||
|
||||
class SelectorContains(Immutable):
|
||||
"""Selector contains rule."""
|
||||
|
||||
__slots__ = ("text", "own", "_hash")
|
||||
|
||||
text: tuple[str, ...]
|
||||
own: bool
|
||||
|
||||
def __init__(self, text: Iterable[str], own: bool) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
super().__init__(text=tuple(text), own=own)
|
||||
|
||||
|
||||
class SelectorNth(Immutable):
|
||||
"""Selector nth type."""
|
||||
|
||||
__slots__ = ("a", "n", "b", "of_type", "last", "selectors", "_hash")
|
||||
|
||||
a: int
|
||||
n: bool
|
||||
b: int
|
||||
of_type: bool
|
||||
last: bool
|
||||
selectors: SelectorList
|
||||
|
||||
def __init__(self, a: int, n: bool, b: int, of_type: bool, last: bool, selectors: SelectorList) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
super().__init__(
|
||||
a=a,
|
||||
n=n,
|
||||
b=b,
|
||||
of_type=of_type,
|
||||
last=last,
|
||||
selectors=selectors
|
||||
)
|
||||
|
||||
|
||||
class SelectorLang(Immutable):
|
||||
"""Selector language rules."""
|
||||
|
||||
__slots__ = ("languages", "_hash",)
|
||||
|
||||
languages: tuple[str, ...]
|
||||
|
||||
def __init__(self, languages: Iterable[str]):
|
||||
"""Initialize."""
|
||||
|
||||
super().__init__(languages=tuple(languages))
|
||||
|
||||
def __iter__(self) -> Iterator[str]:
|
||||
"""Iterator."""
|
||||
|
||||
return iter(self.languages)
|
||||
|
||||
def __len__(self) -> int: # pragma: no cover
|
||||
"""Length."""
|
||||
|
||||
return len(self.languages)
|
||||
|
||||
def __getitem__(self, index: int) -> str: # pragma: no cover
|
||||
"""Get item."""
|
||||
|
||||
return self.languages[index]
|
||||
|
||||
|
||||
class SelectorList(Immutable):
|
||||
"""Selector list."""
|
||||
|
||||
__slots__ = ("selectors", "is_not", "is_html", "_hash")
|
||||
|
||||
selectors: tuple[Selector | SelectorNull, ...]
|
||||
is_not: bool
|
||||
is_html: bool
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
selectors: Iterable[Selector | SelectorNull] | None = None,
|
||||
is_not: bool = False,
|
||||
is_html: bool = False
|
||||
) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
super().__init__(
|
||||
selectors=tuple(selectors) if selectors is not None else (),
|
||||
is_not=is_not,
|
||||
is_html=is_html
|
||||
)
|
||||
|
||||
def __iter__(self) -> Iterator[Selector | SelectorNull]:
|
||||
"""Iterator."""
|
||||
|
||||
return iter(self.selectors)
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""Length."""
|
||||
|
||||
return len(self.selectors)
|
||||
|
||||
def __getitem__(self, index: int) -> Selector | SelectorNull:
|
||||
"""Get item."""
|
||||
|
||||
return self.selectors[index]
|
||||
|
||||
|
||||
def _pickle(p: Any) -> Any:
|
||||
return p.__base__(), tuple([getattr(p, s) for s in p.__slots__[:-1]])
|
||||
|
||||
|
||||
def pickle_register(obj: Any) -> None:
|
||||
"""Allow object to be pickled."""
|
||||
|
||||
copyreg.pickle(obj, _pickle)
|
||||
|
||||
|
||||
pickle_register(Selector)
|
||||
pickle_register(SelectorNull)
|
||||
pickle_register(SelectorTag)
|
||||
pickle_register(SelectorAttribute)
|
||||
pickle_register(SelectorContains)
|
||||
pickle_register(SelectorNth)
|
||||
pickle_register(SelectorLang)
|
||||
pickle_register(SelectorList)
|
||||
|
|
@ -0,0 +1,139 @@
|
|||
"""
|
||||
Format a pretty string of a `SoupSieve` object for easy debugging.
|
||||
|
||||
This won't necessarily support all types and such, and definitely
|
||||
not support custom outputs.
|
||||
|
||||
It is mainly geared towards our types as the `SelectorList`
|
||||
object is a beast to look at without some indentation and newlines.
|
||||
The format and various output types is fairly known (though it
|
||||
hasn't been tested extensively to make sure we aren't missing corners).
|
||||
|
||||
Example:
|
||||
-------
|
||||
```
|
||||
>>> import soupsieve as sv
|
||||
>>> sv.compile('this > that.class[name=value]').selectors.pretty()
|
||||
SelectorList(
|
||||
selectors=(
|
||||
Selector(
|
||||
tag=SelectorTag(
|
||||
name='that',
|
||||
prefix=None),
|
||||
ids=(),
|
||||
classes=(
|
||||
'class',
|
||||
),
|
||||
attributes=(
|
||||
SelectorAttribute(
|
||||
attribute='name',
|
||||
prefix='',
|
||||
pattern=re.compile(
|
||||
'^value$'),
|
||||
xml_type_pattern=None),
|
||||
),
|
||||
nth=(),
|
||||
selectors=(),
|
||||
relation=SelectorList(
|
||||
selectors=(
|
||||
Selector(
|
||||
tag=SelectorTag(
|
||||
name='this',
|
||||
prefix=None),
|
||||
ids=(),
|
||||
classes=(),
|
||||
attributes=(),
|
||||
nth=(),
|
||||
selectors=(),
|
||||
relation=SelectorList(
|
||||
selectors=(),
|
||||
is_not=False,
|
||||
is_html=False),
|
||||
rel_type='>',
|
||||
contains=(),
|
||||
lang=(),
|
||||
flags=0),
|
||||
),
|
||||
is_not=False,
|
||||
is_html=False),
|
||||
rel_type=None,
|
||||
contains=(),
|
||||
lang=(),
|
||||
flags=0),
|
||||
),
|
||||
is_not=False,
|
||||
is_html=False)
|
||||
```
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
RE_CLASS = re.compile(r'(?i)[a-z_][_a-z\d\.]+\(')
|
||||
RE_PARAM = re.compile(r'(?i)[_a-z][_a-z\d]+=')
|
||||
RE_EMPTY = re.compile(r'\(\)|\[\]|\{\}')
|
||||
RE_LSTRT = re.compile(r'\[')
|
||||
RE_DSTRT = re.compile(r'\{')
|
||||
RE_TSTRT = re.compile(r'\(')
|
||||
RE_LEND = re.compile(r'\]')
|
||||
RE_DEND = re.compile(r'\}')
|
||||
RE_TEND = re.compile(r'\)')
|
||||
RE_INT = re.compile(r'\d+')
|
||||
RE_KWORD = re.compile(r'(?i)[_a-z][_a-z\d]+')
|
||||
RE_DQSTR = re.compile(r'"(?:\\.|[^"\\])*"')
|
||||
RE_SQSTR = re.compile(r"'(?:\\.|[^'\\])*'")
|
||||
RE_SEP = re.compile(r'\s*(,)\s*')
|
||||
RE_DSEP = re.compile(r'\s*(:)\s*')
|
||||
|
||||
TOKENS = {
|
||||
'class': RE_CLASS,
|
||||
'param': RE_PARAM,
|
||||
'empty': RE_EMPTY,
|
||||
'lstrt': RE_LSTRT,
|
||||
'dstrt': RE_DSTRT,
|
||||
'tstrt': RE_TSTRT,
|
||||
'lend': RE_LEND,
|
||||
'dend': RE_DEND,
|
||||
'tend': RE_TEND,
|
||||
'sqstr': RE_SQSTR,
|
||||
'sep': RE_SEP,
|
||||
'dsep': RE_DSEP,
|
||||
'int': RE_INT,
|
||||
'kword': RE_KWORD,
|
||||
'dqstr': RE_DQSTR
|
||||
}
|
||||
|
||||
|
||||
def pretty(obj: Any) -> str: # pragma: no cover
|
||||
"""Make the object output string pretty."""
|
||||
|
||||
sel = str(obj)
|
||||
index = 0
|
||||
end = len(sel) - 1
|
||||
indent = 0
|
||||
output = []
|
||||
|
||||
while index <= end:
|
||||
m = None
|
||||
for k, v in TOKENS.items():
|
||||
m = v.match(sel, index)
|
||||
|
||||
if m:
|
||||
name = k
|
||||
index = m.end(0)
|
||||
if name in ('class', 'lstrt', 'dstrt', 'tstrt'):
|
||||
indent += 4
|
||||
output.append(f'{m.group(0)}\n{" " * indent}')
|
||||
elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'):
|
||||
output.append(m.group(0))
|
||||
elif name in ('lend', 'dend', 'tend'):
|
||||
indent -= 4
|
||||
output.append(m.group(0))
|
||||
elif name in ('sep',):
|
||||
output.append(f'{m.group(1)}\n{" " * indent}')
|
||||
elif name in ('dsep',):
|
||||
output.append(f'{m.group(1)} ')
|
||||
break
|
||||
|
||||
return ''.join(output)
|
||||
|
|
@ -0,0 +1,117 @@
|
|||
"""Utility."""
|
||||
from __future__ import annotations
|
||||
from functools import wraps, lru_cache
|
||||
import warnings
|
||||
import re
|
||||
from typing import Callable, Any
|
||||
|
||||
DEBUG = 0x00001
|
||||
|
||||
RE_PATTERN_LINE_SPLIT = re.compile(r'(?:\r\n|(?!\r\n)[\n\r])|$')
|
||||
|
||||
UC_A = ord('A')
|
||||
UC_Z = ord('Z')
|
||||
|
||||
|
||||
@lru_cache(maxsize=512)
|
||||
def lower(string: str) -> str:
|
||||
"""Lower."""
|
||||
|
||||
new_string = []
|
||||
for c in string:
|
||||
o = ord(c)
|
||||
new_string.append(chr(o + 32) if UC_A <= o <= UC_Z else c)
|
||||
return ''.join(new_string)
|
||||
|
||||
|
||||
class SelectorSyntaxError(Exception):
|
||||
"""Syntax error in a CSS selector."""
|
||||
|
||||
def __init__(self, msg: str, pattern: str | None = None, index: int | None = None) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
self.line = None
|
||||
self.col = None
|
||||
self.context = None
|
||||
|
||||
if pattern is not None and index is not None:
|
||||
# Format pattern to show line and column position
|
||||
self.context, self.line, self.col = get_pattern_context(pattern, index)
|
||||
msg = f'{msg}\n line {self.line}:\n{self.context}'
|
||||
|
||||
super().__init__(msg)
|
||||
|
||||
|
||||
def deprecated(message: str, stacklevel: int = 2) -> Callable[..., Any]: # pragma: no cover
|
||||
"""
|
||||
Raise a `DeprecationWarning` when wrapped function/method is called.
|
||||
|
||||
Usage:
|
||||
|
||||
@deprecated("This method will be removed in version X; use Y instead.")
|
||||
def some_method()"
|
||||
pass
|
||||
"""
|
||||
|
||||
def _wrapper(func: Callable[..., Any]) -> Callable[..., Any]:
|
||||
@wraps(func)
|
||||
def _deprecated_func(*args: Any, **kwargs: Any) -> Any:
|
||||
warnings.warn(
|
||||
f"'{func.__name__}' is deprecated. {message}",
|
||||
category=DeprecationWarning,
|
||||
stacklevel=stacklevel
|
||||
)
|
||||
return func(*args, **kwargs)
|
||||
return _deprecated_func
|
||||
return _wrapper
|
||||
|
||||
|
||||
def warn_deprecated(message: str, stacklevel: int = 2) -> None: # pragma: no cover
|
||||
"""Warn deprecated."""
|
||||
|
||||
warnings.warn(
|
||||
message,
|
||||
category=DeprecationWarning,
|
||||
stacklevel=stacklevel
|
||||
)
|
||||
|
||||
|
||||
def get_pattern_context(pattern: str, index: int) -> tuple[str, int, int]:
|
||||
"""Get the pattern context."""
|
||||
|
||||
last = 0
|
||||
current_line = 1
|
||||
col = 1
|
||||
text = [] # type: list[str]
|
||||
line = 1
|
||||
offset = None # type: int | None
|
||||
|
||||
# Split pattern by newline and handle the text before the newline
|
||||
for m in RE_PATTERN_LINE_SPLIT.finditer(pattern):
|
||||
linetext = pattern[last:m.start(0)]
|
||||
if not len(m.group(0)) and not len(text):
|
||||
indent = ''
|
||||
offset = -1
|
||||
col = index - last + 1
|
||||
elif last <= index < m.end(0):
|
||||
indent = '--> '
|
||||
offset = (-1 if index > m.start(0) else 0) + 3
|
||||
col = index - last + 1
|
||||
else:
|
||||
indent = ' '
|
||||
offset = None
|
||||
if len(text):
|
||||
# Regardless of whether we are presented with `\r\n`, `\r`, or `\n`,
|
||||
# we will render the output with just `\n`. We will still log the column
|
||||
# correctly though.
|
||||
text.append('\n')
|
||||
text.append(f'{indent}{linetext}')
|
||||
if offset is not None:
|
||||
text.append('\n')
|
||||
text.append(' ' * (col + offset) + '^')
|
||||
line = current_line
|
||||
|
||||
current_line += 1
|
||||
last = m.end(0)
|
||||
|
||||
return ''.join(text), line, col
|
||||
Loading…
Add table
Add a link
Reference in a new issue