Skip to content
Snippets Groups Projects
Commit c454f670 authored by Frédéric Guillot's avatar Frédéric Guillot
Browse files

Add scraper rules for version2.dk and ing.dk

parent d4839b55
No related branches found
No related tags found
No related merge requests found
...@@ -129,7 +129,7 @@ func (c *Client) buildClient() http.Client { ...@@ -129,7 +129,7 @@ func (c *Client) buildClient() http.Client {
func (c *Client) buildHeaders() http.Header { func (c *Client) buildHeaders() http.Header {
headers := make(http.Header) headers := make(http.Header)
headers.Add("User-Agent", userAgent) headers.Add("User-Agent", userAgent)
headers.Add("Accept", "text/html,application/xhtml+xml,application/xml,application/json,image/*") headers.Add("Accept", "*/*")
if c.etagHeader != "" { if c.etagHeader != "" {
headers.Add("If-None-Match", c.etagHeader) headers.Add("If-None-Match", c.etagHeader)
......
...@@ -10,6 +10,7 @@ var predefinedRules = map[string]string{ ...@@ -10,6 +10,7 @@ var predefinedRules = map[string]string{
"cbc.ca": ".story-content", "cbc.ca": ".story-content",
"github.com": "article.entry-content", "github.com": "article.entry-content",
"igen.fr": "section.corps", "igen.fr": "section.corps",
"ing.dk": "section.body",
"lapresse.ca": ".amorce, .entry", "lapresse.ca": ".amorce, .entry",
"lemonde.fr": "div#articleBody", "lemonde.fr": "div#articleBody",
"lesjoiesducode.fr": ".blog-post-content img", "lesjoiesducode.fr": ".blog-post-content img",
...@@ -20,5 +21,6 @@ var predefinedRules = map[string]string{ ...@@ -20,5 +21,6 @@ var predefinedRules = map[string]string{
"phoronix.com": "div.content", "phoronix.com": "div.content",
"techcrunch.com": "div.article-entry", "techcrunch.com": "div.article-entry",
"theregister.co.uk": "#body", "theregister.co.uk": "#body",
"version2.dk": "section.body",
"wired.com": "main figure, article", "wired.com": "main figure, article",
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment