Urls are not consistent and need to be checked, before
prepending schema and host.
---
confs/koblenz-press-releases.yml | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/confs/koblenz-press-releases.yml b/confs/koblenz-press-releases.yml
index 1764202..f695206 100644
--- a/confs/koblenz-press-releases.yml+++ b/confs/koblenz-press-releases.yml
@@ -22,7 +22,13 @@ selectors:
content: |-
-- get link to press release
a = sel:find("a")
- itemURL = "https://www.koblenz.de" .. a:attr("href")+ -- do not prepend schema and host, if not needed+ link = a:attr("href")+ if not(link:match("https*:\/\/.*")) then+ itemURL = "https://www.koblenz.de" .. link+ else+ itemURL = link+ end -- retrieve press release document
doc, err = goquery.newDocFromURL(itemURL)
@@ -34,7 +40,13 @@ selectors:
print(content)
link: |-
a = sel:find("a")
- itemURL = "https://www.koblenz.de" .. a:attr("href")+ -- do not prepend schema and host, if not needed+ link = a:attr("href")+ if not(link:match("https*:\/\/.*")) then+ itemURL = "https://www.koblenz.de" .. link+ else+ itemURL = link+ end print(itemURL)
created: |-
time = sel:find("time"):first():text()
--
2.37.1