# -*- encoding: utf-8 -*-
# Copyright (c) 2021 Stephen Bunn <stephen@bunn.io>
# GPLv3 License <https://choosealicense.com/licenses/gpl-3.0/>
"""Contains definitions of content types used throughout the project.
.. py:class:: Url
A basic wrapper around a furl_ URL to keep things consistent between plugins and the
internals of the package without declaring a direct dependency on a third-party.
"""
import abc
import mimetypes
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from pydantic import BaseModel, Field
from ..hasher import HashType
from .types import Url
[docs]class Checksum(BaseModel):
"""Describes a checksum that should be used for content validation.
Parameters:
type (~hasher.HashType):
The type of checksum hash is being defined.
hash (str):
The value of the checksum hash being defined.
data (~typing.Dict, optional):
Model parameter dictionary provided by pydantic.
You should *likely* never use this property unless you need a keyword
argument for a dictionary payload to construct the model.
"""
type: HashType = Field(
title="Type",
description="The type of the checksum.",
)
hash: str = Field(
title="Hash",
description="The hex digest of the checksum.",
)
[docs]class Resource(abc.ABC, BaseModel):
"""The base resource class that resource types must inherit from.
.. important::
This class is abstract and used as an typing interface for the
:class:`~megu.models.content.Content` model.
Concrete implementations of this abstract class such as
:class:`~megu.models.http.HttpResource` must be provided to content in order
for the application to understand how to fetch the content.
Parameters:
data (~typing.Dict, optional):
You should never use this parameter.
Since this is an abstract class, you should never be instantiating it.
"""
@abc.abstractproperty
def fingerprint(self) -> str: # pragma: no cover
"""Get the unique identifier of an resource.
Raises:
NotImplementedError:
Subclasses must implement this property.
Returns:
str:
A string fingerprint of the resource.
"""
raise NotImplementedError(
f"{self.__class__.__qualname__!s} must implement fingerprint property"
)
[docs]class Content(BaseModel):
"""Describes some extracted content that can be downloaded.
Parameters:
id (str):
The plugin-defined content-unique identifier for the content.
name (str):
The human-readable name to describe the content.
url (str):
The absolute URL from where the plugin extracted the content.
This URL string gets translated into a :class:`~megu.models.types.Url`
instance.
quality (float):
The plugin-defined arbitrary quality of the content.
size (int):
The size in bytes the content will take up on the local filesystem.
type (str):
The appropriate mimetype of the content.
resources (List[~megu.models.content.Resource]):
The resources required to fetch and download the extracted content.
meta (~megu.models.content.Meta):
The structured metadata of the extracted content.
checksums (List[~megu.models.content.Checksum]):
A list of checksums that can be used to verify the downloaded content.
extra (Dict[str, ~typing.Any):
The unstructured metadata of the extracted content.
data (~typing.Dict, optional):
Model parameter dictionary provided by pydantic.
You should *likely* never use this property unless you need a keyword
argument for a dictionary payload to construct the model.
"""
[docs] class Config:
"""Configuration for Content model validation."""
arbitrary_types_allowed = True
id: str = Field(
title="ID",
descripion="The unique identifier of the content.",
min_length=1,
)
name: str = Field(
title="Name",
description="A human-readable name to describe the content.",
min_length=1,
)
url: Url = Field(
title="URL",
description="The source URL the content was extracted from.",
)
quality: float = Field(
title="Quality",
description="The quality ranking of the content from the same URL.",
ge=0,
)
size: int = Field(
title="Size",
description="The size of the content in bytes.",
gt=0,
)
type: str = Field(
title="Type",
description="The appropriate mimetype for the content.",
min_length=1,
)
extension: Optional[str] = Field(
title="Extension",
description="The appropriate file extension for the content.",
regex=r"^\..+$",
)
resources: List[Resource] = Field(
title="Resources",
description="The resources to fetch to recreate the remote content locally.",
min_items=1,
)
meta: Meta = Field(
default_factory=Meta,
title="Meta",
description="Meta container for traditional content metadata.",
)
checksums: List[Checksum] = Field(
default_factory=list,
title="Checksums",
description="Checksum list if the fetched content can be validated.",
)
extra: Dict[str, Any] = Field(
default_factory=dict,
title="Extra",
description="Container for miscellaneous content properties.",
)
@property
def ext(self) -> str:
"""File extension for the content.
Returns:
str:
The best suitable file extension for the content.
May be a blank string if a extension cannot be determined.
"""
if self.extension is not None:
return self.extension
return mimetypes.guess_extension(self.type) or ""
@property
def filename(self) -> str:
"""Filename for the content.
Returns:
str:
The appropriate filename for the content.
"""
return f"{self.id!s}{self.ext!s}"
[docs]class Manifest(BaseModel):
"""Describes the downloaded artifacts ready to be merged.
Parameters:
content (~models.content.Content):
The content instance that was download.
artifacts (List[Tuple[~models.content.Resource, ~pathlib.Path]]):
A tuple containing (resource, path) of content resources that were
downloaded to the local filesystem.
data (~typing.Dict, optional):
Model parameter dictionary provided by pydantic.
You should *likely* never use this property unless you need a keyword
argument for a dictionary payload to construct the model.
"""
content: Content = Field(
title="Content",
description="The content responsible for the downloaded manifest.",
)
artifacts: List[Tuple[Resource, Path]] = Field(
title="Artifacts",
description="The list of pairs of downloaded resources and the artifact path.",
min_items=1,
)