Files
calibre/recipes/argument.recipe
2026-05-04 23:52:03 +05:30

56 lines
2.1 KiB
Python

import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class argumentRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'!argument'
description = 'casopisargument.cz'
oldest_article = 1
max_articles_per_feed = 20
sections = [
(u'Argument', u'http://casopisargument.cz')
]
language = 'cs'
cover_url = 'http://casopisargument.cz/wp-content/uploads/2016/12/logo-argument.png'
remove_javascript = True
no_stylesheets = True
remove_attributes = []
remove_tags_before = dict(name='h1', attrs={'class':['entry-title']})
remove_tags_after = dict(name='footer', attrs={'class':['entry-footer']})
remove_tags = [dict(name='img'),
dict(name='div', attrs={'id':['AAAsocial-bookmark']})
]
preprocess_regexps = [(re.compile(r'<a\s[^>]*>([^<]*)</a>', re.DOTALL|re.IGNORECASE), lambda match: match.group(1))]
def parse_index(self):
res = []
seen_titles = set()
for section in self.sections:
self.log('\tTrying section ' + section[0])
articles = []
soup = self.index_to_soup(section[1])
content = soup.findAll('article')
if content is None:
raise ValueError('Could not find category content')
for c in content:
article = c.find('a', text=re.compile(r'.'))
if article and article.get('href'):
url = article['href']
title = re.sub(r'^\s*(\S.*\S)\s*$', r'\1', article.text)
if url.startswith('/'):
url = 'http://casopisargument.cz'+url
if title in seen_titles:
self.log('\tAlready seen: ', title)
continue
seen_titles.add(title)
self.log('\tFound article:', title, 'at', url)
articles.append({'title':title, 'url':url, 'description':'', 'date':''})
res.append((section[0], articles))
return res