Update The Week

2026-05-08 10:32:28 +00:00 · 2026-04-06 18:13:59 +05:30
parent 2dc9de23bd
commit 40bfe696f8
2 changed files with 53 additions and 20 deletions
@@ -2,6 +2,8 @@
 # vim:fileencoding=utf-8
 import json
 import re
+import urllib.parse
+from datetime import datetime, timedelta

 from calibre.web.feeds.news import BasicNewsRecipe, classes

@@ -36,11 +38,19 @@ class LiveMint(BasicNewsRecipe):
            self.oldest_article = float(d)

    def get_cover_url(self):
-        soup = self.index_to_soup('https://epaper.livemint.com/')
-        cov = soup.findAll('img', attrs={'src': lambda x: x and x.startswith('/EPAPERIMAGES')})
-        for x in cov:
-            if 'MINT_FRONT_1' in x['src']:
-                return 'https://epaper.livemint.com' + x['src'].replace('-S', '')
+        today = datetime.today().strftime('%d/%m/%Y')
+        if datetime.today().weekday() in (5, 6):
+            today = (
+                datetime.today() - timedelta(days=datetime.today().weekday() - 4)
+            ).strftime('%d/%m/%Y')
+        soup = self.index_to_soup(
+            'https://epaper.livemint.com/Home/GetAllpages?editionid=1&editiondate='
+            + urllib.parse.quote(today, safe=''),
+            raw=True,
+        )
+        for x in json.loads(soup):
+            if 'Mint_Front_' in x['SectionName'] or 'Mint_Cover' in x['SectionName']:
+                return x['MrImageUrl']

    extra_css = '''
        img {margin:0 auto;}
@@ -58,7 +68,12 @@ class LiveMint(BasicNewsRecipe):
            name='article',
            attrs={'id': lambda x: x and x.startswith(('article_', 'box_'))},
        ),
-        dict(attrs={'class': lambda x: x and x.startswith(('storyPage_storyBox__', 'storyBoxSchema'))}),
+        dict(
+            attrs={
+                'class': lambda x: x
+                and x.startswith(('storyPage_storyBox__', 'storyBoxSchema'))
+            }
+        ),
        classes('contentSec'),
    ]

@@ -67,13 +82,11 @@ class LiveMint(BasicNewsRecipe):
        dict(
            attrs={
                'class': lambda x: x
-                and x.startswith(
-                    (
-                        'storyPage_alsoRead__',
-                        'storyPage_firstPublishDate__',
-                        'storyPage_bcrumb__',
-                    )
-                )
+                and x.startswith((
+                    'storyPage_alsoRead__',
+                    'storyPage_firstPublishDate__',
+                    'storyPage_bcrumb__',
+                ))
            }
        ),
        dict(attrs={'id': ['faqSection', 'seoText', 'ellipsisId', 'gift_redeemed_box ']}),
@@ -108,7 +121,9 @@ class LiveMint(BasicNewsRecipe):
    def preprocess_raw_html(self, raw, *a):
        # remove empty p tags
        raw = re.sub(
-            r'(<p>\s*)(<[^(\/|a|i|b|em|strong)])', r'\g<2>', re.sub(
+            r'(<p>\s*)(<[^(\/|a|i|b|em|strong)])',
+            r'\g<2>',
+            re.sub(
                r'(<p>\s*&nbsp;\s*<\/p>)|(<p>\s*<\/p>)|(<p\s*\S+>&nbsp;\s*<\/p>)', '', raw
            ),
        )
@@ -6,10 +6,11 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes


 class TheWeek(BasicNewsRecipe):
-    title = u'The Week'
+    title = 'The Week'
    description = (
        'The Week is the best selling general interest English news magazine. The magazine covers politics, entertainment,'
-        ' social issues, trends, technology, lifestyle and everything else you should be knowing. Best downloaded on Mondays.')
+        ' social issues, trends, technology, lifestyle and everything else you should be knowing. Best downloaded on Mondays.'
+    )
    language = 'en_IN'
    __author__ = 'unkn0wn'
    encoding = 'utf-8'
@@ -34,7 +35,7 @@ class TheWeek(BasicNewsRecipe):
    recipe_specific_options = {
        'date': {
            'short': 'The date of the edition to download (YYYY.MM.DD format)',
-            'long': 'For example, 2024.06.30'
+            'long': 'For example, 2024.06.30',
        }
    }

@@ -44,7 +45,9 @@ class TheWeek(BasicNewsRecipe):
            soup = self.index_to_soup(
                'https://www.magzter.com/IN/Malayala_Manorama/THE_WEEK/Business/'
            )
-            return soup.find('img', id=lambda s: s and 'mgd__lhd__cover' in s.split())['src']
+            return soup.find('img', id=lambda s: s and 'mgd__lhd__cover' in s.split())[
+                'src'
+            ]

    def parse_index(self):
        issue = 'https://www.theweek.in/theweek.html'
@@ -70,6 +73,21 @@ class TheWeek(BasicNewsRecipe):
        return [('Articles', ans)]

    def preprocess_html(self, soup):
-        for img in soup.findAll('img', attrs={'data-src-web': True}):
-            img['src'] = img['data-src-web']
+        for pic in soup.findAll('picture'):
+            if src := pic.find('source'):
+                if img := pic.find('img'):
+                    img['src'] = src['srcset'].split('.jpg')[0] + '.jpg'
+            for src in pic.findAll('source'):
+                src.extract()
+        for img in soup.findAll('img'):
+            src = img.get('data-src-mobile') or img.get('data-src-web')
+            if src:
+                img['src'] = src
        return soup
+
+    def populate_article_metadata(self, article, soup, first):
+        h1 = soup.find('h1')
+        if h1:
+            p = h1.findNext('p')
+            if p:
+                article.text_summary = article.summary = self.tag_to_string(p)