~ghost08/ratt

Add config for press releases of the city of Koblenz, Germany v1 APPLIED

Daniel Hecker: 1
 Add config for press releases of the city of Koblenz, Germany

 1 files changed, 64 insertions(+), 0 deletions(-)
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~ghost08/ratt/patches/27825/mbox | git am -3
Learn more about email & git

[PATCH] Add config for press releases of the city of Koblenz, Germany Export this patch

---
 confs/koblenz-press-releases.yml | 64 ++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 confs/koblenz-press-releases.yml

diff --git a/confs/koblenz-press-releases.yml b/confs/koblenz-press-releases.yml
new file mode 100644
index 0000000..ce325e2
--- /dev/null
+++ b/confs/koblenz-press-releases.yml
@@ -0,0 +1,64 @@
# This configuration creates a feed for the press releases of
# the city of Koblenz, Germany. The city had a working feed once,
# but it broke during an update and they seem to have forgotten,
# that it even existed in the first place.

regex: https://www.koblenz.de/rathaus/verwaltung/pressemeldungen/.*
selectors:
  httpsettings:
    cookie: {}
    header: {}
    useragent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)
      Chrome/90.0.4430.72 Safari/537.36
    insecure: false
  feed:
    title: title
    description: ""
    authorname: ""
    authoremail: ""
  item:
    container: "article"
    title: span.headline
    content: |-
      -- get link to press release
      a = sel:find("a")
      itemURL = "https://www.koblenz.de" .. a:attr("href")

      -- retrieve press release document
      doc, err = goquery.newDocFromURL(itemURL)
      if err ~= nil then
        --return error if the request was unsuccesfull
        error(err)
      end
      print(doc:find(".currentPressRelease"):html():gsub("\/pressezentrale","https://www.koblenz.de/pressezentrale"):gsub("https:\/\/www.koblenz.dehttps:\/\/www.koblenz.de", "https://www.koblenz.de"))
    link: |-
      a = sel:find("a")
      itemURL = "https://www.koblenz.de" .. a:attr("href")
      print(itemURL)
    created: |-
      time = sel:find("time"):first():text()
      -- return current date, if the time field contains time instead of date
      if time:match(".*:.*") then
        print(os.date("%d.%m.%Y"))
        return
      end
      print(time)
    createdformat: "02.01.2006"
    description: div.article-teaser__teaser p
    image: |-
      img = sel:find("img"):first():attr("data-src")
      if img ~= "" then
        -- prepend host if needed
        if not(img:match("https*:\/\/.*")) then
          img = "https://www.koblenz.de" .. img
        end
        print(img)
      end
  nextpage: |-
    link = sel:find(".pagination-next a"):first()
    if (link ~= nil and link ~= "") then
      print("https://www.koblenz.de" .. link:attr("href"))
    end
  nextpageattr: href
  nextpagecount: 15
  sort: ""
-- 
2.34.1
Thanks for your work, you got really into it :)