~metalune/public-inbox

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch

[PATCH] lefigaro: support story

Details
Message ID
<20211031094841.4971-1-bopol@e.email>
DKIM signature
pass
Download raw message
Patch: +78 -56
---
 simplynews_sites/lefigaro.py | 134 ++++++++++++++++++++---------------
 1 file changed, 78 insertions(+), 56 deletions(-)

diff --git a/simplynews_sites/lefigaro.py b/simplynews_sites/lefigaro.py
index 10ad397..6aad359 100644
--- a/simplynews_sites/lefigaro.py
+++ b/simplynews_sites/lefigaro.py
@@ -38,15 +38,16 @@ def get_page(url):

    post = soup.select_one("article")

    json_element = soup.find("script", type="application/ld+json")
    if json_element is not None:
        info_json = json.loads(json_element.next)

    if subtitle.endswith("..."):
    if subtitle.endswith("...") and post is not None:
        # None when "story" page (or unknown)
        standfirst = post.select_one("p.fig-standfirst")
        if standfirst is not None:
            subtitle = standfirst.text

    json_element = soup.find("script", type="application/ld+json")
    if json_element is not None:
        info_json = json.loads(json_element.next)

    for element in info_json:
        if element["@type"] == "NewsArticle":
            last_updated = element.get("dateModified")
@@ -75,70 +76,88 @@ def get_page(url):

    article = []

    heading_image = post.select_one(
        "article > figure.fig-media img") or soup.select_one("div.fig-wrapper figure.fig-media img")
    if heading_image is not None:
        article.append(get_image(heading_image))

    post_content = post.select_one("div.fig-content-body")

    if post_content is None:  # not a regular article
        poll_element = post.select_one("div.fig-poll")  # poll "article"
        if poll_element is not None:
            entries = []
            results = poll_element.select("div.fig-poll__result")
            for result in results:
                percentage = result.get("data-percentage")
                label = result.select_one("span.fig-poll__label").text
    if post is None:
        # "story" page (or unknown)
        amp_video = soup.select_one("amp-video")
        if amp_video is not None:
            thumbnail_src = utils.get_property(amp_video, "poster")
            sources = amp_video.select("source") or []
            # DASH (mpd) and HLS (m3u8) are also provided for the same video
            # but we only select mp4 because it's more widely supported
            for source in sources:
                src = utils.get_property(source, "src")
                if utils.get_property(source, "type") == "video/mp4":
                    article.append({
                        "type": "video",
                        "src": src,
                        "poster": thumbnail_src
                    })
    else:
        heading_image = post.select_one(
            "article > figure.fig-media img") or soup.select_one("div.fig-wrapper figure.fig-media img")
        if heading_image is not None:
            article.append(get_image(heading_image))

                entries.append({"value": "{} : {}%".format(label, percentage)})
        post_content = post.select_one("div.fig-content-body")

            article.append({
                "type": "unsorted list",
                "entries": entries
            })
        if post_content is None:  # not a regular article
            poll_element = post.select_one("div.fig-poll")  # poll
            if poll_element is not None:
                entries = []
                results = poll_element.select("div.fig-poll__result")
                for result in results:
                    percentage = result.get("data-percentage")
                    label = result.select_one("span.fig-poll__label").text

            votes = poll_element.get("data-voters")
                    entries.append({"value": "{} : {}%".format(label, percentage)})

            article.append({
                "type": "paragraph",
                "value": "{} votes".format(votes)
            })
                article.append({
                    "type": "unsorted list",
                    "entries": entries
                })

            data["article"] = article
            return data
                votes = poll_element.get("data-voters")

        live_messages = post.select("article.live-message")  # live "article"
        if live_messages is not None:
            for message in live_messages:
                message_title = message.select_one(".live-title")
                article.append({
                    "type": "header",
                    "size": "h2",
                    "value": message_title.text
                    "type": "paragraph",
                    "value": "{} votes".format(votes)
                })
                date = message.select_one("time")
                if date is not None:
                    # date_time = datetime.fromisoformat(date["datetime"])

                data["article"] = article
                return data

            live_messages = post.select("article.live-message")
            # live "article"
            if live_messages is not None:
                for message in live_messages:
                    message_title = message.select_one(".live-title")
                    article.append({
                        "type": "paragraph",
                        "value": "Publié {}".format(date.text)
                        "type": "header",
                        "size": "h2",
                        "value": message_title.text
                    })
                    date = message.select_one("time")
                    if date is not None:
                        # date_time = datetime.fromisoformat(date["datetime"])
                        article.append({
                            "type": "paragraph",
                            "value": "Publié {}".format(date.text)
                        })

                    message_body = message.select_one("div.live-article")
                    for element in message_body:
                        el = get_element(element, True)
                        if el is not None and el != {}:
                            article.append(el)

                message_body = message.select_one("div.live-article")
                for element in message_body:
                    el = get_element(element, True)
                    if el is not None and el != {}:
                        article.append(el)

        data["article"] = article
        return data
            data["article"] = article
            return data

    for element in post_content:
        el = get_element(element)
        for element in post_content:
            el = get_element(element)

        if el is not None and el != {}:
            article.append(el)
            if el is not None and el != {}:
                article.append(el)

    data["article"] = article
    return data
@@ -237,6 +256,9 @@ if __name__ == "__main__":
    # page_url = "sciences/en-direct-covid-19-les-alpes-maritimes-attendent-les-decisions-du-gouvernement-20210222"
    # "live" article

    # page_url = "story/gilles-kepel--la-victoire-des-talibans-est-une-bonne-nouvelle-pour-les-islamistes-sunnites-dans-le-monde-14454"
    # "story"

    page_url = "confinement-partiel-commerces-ce-qu-il-faut-retenir-des-mesures-de-restriction-dans-les-alpes-maritimes-20210222"
    # multiple authors

-- 
2.31.1
Reply to thread Export thread (mbox)