Książka ma {0} stron(y).
" + TRANSLATOR_TEMPLATE = "Tłumacz: {0}
" + PUBLISH_DATE_TEMPLATE = "Data pierwszego wydania: {0}
" + PUBLISH_DATE_PL_TEMPLATE = ( + "Data pierwszego wydania w Polsce: {0}
" + ) + + def __init__(self, root: HtmlElement, metadata: Metadata) -> None: + self.root = root + self.metadata = metadata + + def parse_search_results(self) -> List[MetaRecord]: + matches = [] + results = self.root.xpath(LubimyCzytac.BOOK_SEARCH_RESULT_XPATH) + for result in results: + title = self._parse_xpath_node( + root=result, + xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}" + f"{LubimyCzytac.TITLE_TEXT_PATH}", + ) + + book_url = self._parse_xpath_node( + root=result, + xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}" + f"{LubimyCzytac.URL_PATH}", + ) + authors = self._parse_xpath_node( + root=result, + xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}" + f"{LubimyCzytac.AUTHORS_PATH}", + take_first=False, + ) + if not all([title, book_url, authors]): + continue + matches.append( + MetaRecord( + id=book_url.replace(f"/ksiazka/", "").split("/")[0], + title=title, + authors=[strip_accents(author) for author in authors], + url=LubimyCzytac.BASE_URL + book_url, + source=MetaSourceInfo( + id=self.metadata.__id__, + description=self.metadata.__name__, + link=LubimyCzytac.BASE_URL, + ), + ) + ) + return matches + + def parse_single_book( + self, match: MetaRecord, generic_cover: str, locale: str + ) -> MetaRecord: + try: + response = requests.get(match.url) + response.raise_for_status() + except Exception as e: + log.warning(e) + return None + self.root = fromstring(response.text) + match.cover = self._parse_cover(generic_cover=generic_cover) + match.description = self._parse_description() + match.languages = self._parse_languages(locale=locale) + match.publisher = self._parse_publisher() + match.publishedDate = self._parse_from_summary(attribute_name="datePublished") + match.rating = self._parse_rating() + match.series, match.series_index = self._parse_series() + match.tags = self._parse_tags() + match.identifiers = { + "isbn": self._parse_isbn(), + "lubimyczytac": match.id, + } + return match + + def _parse_xpath_node( + self, + xpath: str, + root: HtmlElement = None, + take_first: bool = True, + strip_element: bool = True, + ) -> Optional[Union[str, List[str]]]: + root = root if root is not None else self.root + node = root.xpath(xpath) + if not node: + return None + return ( + (node[0].strip() if strip_element else node[0]) + if take_first + else [x.strip() for x in node] + ) + + def _parse_cover(self, generic_cover) -> Optional[str]: + return ( + self._parse_xpath_node(xpath=LubimyCzytac.COVER, take_first=True) + or generic_cover + ) + + def _parse_publisher(self) -> Optional[str]: + return self._parse_xpath_node(xpath=LubimyCzytac.PUBLISHER, take_first=True) + + def _parse_languages(self, locale: str) -> List[str]: + languages = list() + lang = self._parse_xpath_node(xpath=LubimyCzytac.LANGUAGES, take_first=True) + if lang: + if "polski" in lang: + languages.append("pol") + if "angielski" in lang: + languages.append("eng") + return [get_language_name(locale, language) for language in languages] + + def _parse_series(self) -> Tuple[Optional[str], Optional[Union[float, int]]]: + series_index = 0 + series = self._parse_xpath_node(xpath=LubimyCzytac.SERIES, take_first=True) + if series: + if "tom " in series: + series_name, series_info = series.split(" (tom ", 1) + series_info = series_info.replace(" ", "").replace(")", "") + # Check if book is not a bundle, i.e. chapter 1-3 + if "-" in series_info: + series_info = series_info.split("-", 1)[0] + if series_info.replace(".", "").isdigit() is True: + series_index = get_int_or_float(series_info) + return series_name, series_index + return None, None + + def _parse_tags(self) -> List[str]: + tags = self._parse_xpath_node(xpath=LubimyCzytac.TAGS, take_first=False) + if tags: + return [ + strip_accents(w.replace(", itd.", " itd.")) + for w in tags + if isinstance(w, str) + ] + return None + + def _parse_from_summary(self, attribute_name: str) -> Optional[str]: + value = None + summary_text = self._parse_xpath_node(xpath=LubimyCzytac.SUMMARY) + if summary_text: + data = json.loads(summary_text) + value = data.get(attribute_name) + return value.strip() if value is not None else value + + def _parse_rating(self) -> Optional[str]: + rating = self._parse_xpath_node(xpath=LubimyCzytac.RATING) + return round(float(rating.replace(",", ".")) / 2) if rating else rating + + def _parse_date(self, xpath="first_publish") -> Optional[datetime.datetime]: + options = { + "first_publish": LubimyCzytac.FIRST_PUBLISH_DATE, + "first_publish_pl": LubimyCzytac.FIRST_PUBLISH_DATE_PL, + } + date = self._parse_xpath_node(xpath=options.get(xpath)) + return parser.parse(date) if date else None + + def _parse_isbn(self) -> Optional[str]: + return self._parse_xpath_node(xpath=LubimyCzytac.ISBN) + + def _parse_description(self) -> str: + description = "" + description_node = self._parse_xpath_node( + xpath=LubimyCzytac.DESCRIPTION, strip_element=False + ) + if description_node is not None: + for source in self.root.xpath('//p[@class="source"]'): + source.getparent().remove(source) + description = tostring(description_node, method="html") + description = sanitize_comments_html(description) + + else: + description_node = self._parse_xpath_node(xpath=LubimyCzytac.META_TITLE) + if description_node is not None: + description = description_node + description = sanitize_comments_html(description) + description = self._add_extra_info_to_description(description=description) + return description + + def _add_extra_info_to_description(self, description: str) -> str: + pages = self._parse_from_summary(attribute_name="numberOfPages") + if pages: + description += LubimyCzytacParser.PAGES_TEMPLATE.format(pages) + + first_publish_date = self._parse_date() + if first_publish_date: + description += LubimyCzytacParser.PUBLISH_DATE_TEMPLATE.format( + first_publish_date.strftime("%d.%m.%Y") + ) + + first_publish_date_pl = self._parse_date(xpath="first_publish_pl") + if first_publish_date_pl: + description += LubimyCzytacParser.PUBLISH_DATE_PL_TEMPLATE.format( + first_publish_date_pl.strftime("%d.%m.%Y") + ) + translator = self._parse_xpath_node(xpath=LubimyCzytac.TRANSLATOR) + if translator: + description += LubimyCzytacParser.TRANSLATOR_TEMPLATE.format(translator) + + + return description diff --git a/root/app/calibre-web/cps/metadata_provider/scholar.py b/root/app/calibre-web/cps/metadata_provider/scholar.py new file mode 100644 index 0000000..b3bfbb7 --- /dev/null +++ b/root/app/calibre-web/cps/metadata_provider/scholar.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- + +# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web) +# Copyright (C) 2021 OzzieIsaacs +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see
{{ _('Description:') }}
+ {{ entry.comments[0].text|safe }} +