Skip to content
Snippets Groups Projects
Commit a108cb78 authored by Frédéric Guillot's avatar Frédéric Guillot
Browse files

Handle various invalid date

parent 4f4f5739
No related branches found
No related tags found
No related merge requests found
...@@ -123,7 +123,7 @@ func (a *atom10Entry) entryDate() time.Time { ...@@ -123,7 +123,7 @@ func (a *atom10Entry) entryDate() time.Time {
if dateText != "" { if dateText != "" {
result, err := date.Parse(dateText) result, err := date.Parse(dateText)
if err != nil { if err != nil {
logger.Error("atom: %v", err) logger.Error("atom: %v (entry ID = %s)", err, a.ID)
return time.Now() return time.Now()
} }
......
...@@ -23,6 +23,7 @@ var dateFormats = []string{ ...@@ -23,6 +23,7 @@ var dateFormats = []string{
time.RFC1123Z, time.RFC1123Z,
time.RFC1123, time.RFC1123,
time.ANSIC, time.ANSIC,
"Mon, January 2, 2006, 3:04 PM MST",
"Mon, January 2 2006 15:04:05 -0700", "Mon, January 2 2006 15:04:05 -0700",
"Mon, January 02, 2006, 15:04:05 MST", "Mon, January 02, 2006, 15:04:05 MST",
"Mon, January 02, 2006 15:04:05 MST", "Mon, January 02, 2006 15:04:05 MST",
...@@ -37,6 +38,8 @@ var dateFormats = []string{ ...@@ -37,6 +38,8 @@ var dateFormats = []string{
"Mon Jan 02, 2006 3:04 pm", "Mon Jan 02, 2006 3:04 pm",
"Mon, Jan 02,2006 15:04:05 MST", "Mon, Jan 02,2006 15:04:05 MST",
"Mon Jan 02 2006 15:04:05 -0700", "Mon Jan 02 2006 15:04:05 -0700",
"Monday, 2. January 2006 - 15:04",
"Monday 02 January 2006",
"Monday, January 2, 2006 15:04:05 MST", "Monday, January 2, 2006 15:04:05 MST",
"Monday, January 2, 2006 03:04 PM", "Monday, January 2, 2006 03:04 PM",
"Monday, January 2, 2006", "Monday, January 2, 2006",
...@@ -111,6 +114,11 @@ var dateFormats = []string{ ...@@ -111,6 +114,11 @@ var dateFormats = []string{
"Mon, 02 Jan 2006", "Mon, 02 Jan 2006",
"Mon, 02 Jan 06 15:04:05 MST", "Mon, 02 Jan 06 15:04:05 MST",
"Mon, 02 Jan 2006 3:04 PM MST", "Mon, 02 Jan 2006 3:04 PM MST",
"Mon Jan 02 2006 15:04:05 MST",
"Mon, 01 02 2006 15:04:05 -0700",
"Mon, 2th Jan 2006 15:05:05 MST",
"Jan. 2, 2006, 3:04 a.m.",
"fri, 02 jan 2006 15:04:05 -0700",
"January 02 2006 03:04:05 PM", "January 02 2006 03:04:05 PM",
"January 2, 2006 3:04 PM", "January 2, 2006 3:04 PM",
"January 2, 2006, 3:04 p.m.", "January 2, 2006, 3:04 p.m.",
...@@ -145,6 +153,7 @@ var dateFormats = []string{ ...@@ -145,6 +153,7 @@ var dateFormats = []string{
"2006-1-2T15:04:05Z", "2006-1-2T15:04:05Z",
"2006-1-2 15:04:05", "2006-1-2 15:04:05",
"2006-1-2", "2006-1-2",
"2006-01-02T15:04:05-07:00Z",
"2006-1-02T15:04:05Z", "2006-1-02T15:04:05Z",
"2006-01-02T15:04Z", "2006-01-02T15:04Z",
"2006-01-02T15:04-07:00", "2006-01-02T15:04-07:00",
...@@ -196,41 +205,106 @@ var dateFormats = []string{ ...@@ -196,41 +205,106 @@ var dateFormats = []string{
"01/02/2006 - 15:04", "01/02/2006 - 15:04",
"01/02/2006", "01/02/2006",
"01-02-2006", "01-02-2006",
"Jan. 2006",
} }
var invalidTimezoneReplacer = strings.NewReplacer(
"Europe/Brussels", "CET",
"GMT+0000 (Coordinated Universal Time)", "GMT",
)
var invalidLocalizedDateReplacer = strings.NewReplacer(
"Mo,", "Mon,",
"Di,", "Tue,",
"Mi,", "Wed,",
"Do,", "Thu,",
"Fr,", "Fri,",
"Sa,", "Sat,",
"So,", "Sun,",
"Mär ", "Mar ",
"Mai ", "May ",
"Okt ", "Oct ",
"Dez ", "Dec ",
"lun,", "Mon,",
"mar,", "Tue,",
"mer,", "Wed,",
"jeu,", "Thu,",
"ven,", "Fri,",
"sam,", "Sat,",
"dim,", "Sun,",
"lun.", "Mon",
"mar.", "Tue",
"mer.", "Wed",
"jeu.", "Thu",
"ven.", "Fri",
"sam.", "Sat",
"dim.", "Sun",
"Lundi,", "Monday,",
"Mardi,", "Tuesday,",
"Mercredi,", "Wednesday,",
"Jeudi,", "Thursday,",
"Vendredi,", "Friday,",
"Samedi,", "Saturday,",
"Dimanche,", "Sunday,",
"avr ", "Apr ",
"mai ", "May ",
"jui ", "Jun ",
"juin ", "June ",
"jan.", "January ",
"feb.", "February ",
"mars.", "March ",
"avril.", "April ",
"mai.", "May ",
"juin.", "June ",
"juil.", "july",
"août.", "august",
"sept.", "september",
"oct.", "october",
"nov.", "november",
"dec.", "december",
"Janvier", "January",
"Février", "February",
"Mars", "March",
"Avril", "April",
"Mai", "May",
"Juin", "June",
"Juillet", "July",
"Août", "August",
"Septembre", "September",
"Octobre", "October",
"Novembre", "November",
"Décembre", "December",
)
// Parse parses a given date string using a large // Parse parses a given date string using a large
// list of commonly found feed date formats. // list of commonly found feed date formats.
func Parse(ds string) (t time.Time, err error) { func Parse(rawInput string) (t time.Time, err error) {
timestamp, err := strconv.ParseInt(ds, 10, 64) timestamp, err := strconv.ParseInt(rawInput, 10, 64)
if err == nil { if err == nil {
return time.Unix(timestamp, 0), nil return time.Unix(timestamp, 0), nil
} }
ds = replaceNonEnglishWords(ds) processedInput := invalidLocalizedDateReplacer.Replace(rawInput)
d := strings.TrimSpace(ds) processedInput = invalidTimezoneReplacer.Replace(processedInput)
if d == "" { processedInput = strings.TrimSpace(processedInput)
return t, errors.New("date parser: empty value") if processedInput == "" {
return t, errors.New(`date parser: empty value`)
} }
for _, layout := range dateFormats { for _, layout := range dateFormats {
switch layout { switch layout {
case time.RFC822, time.RFC850, time.RFC1123: case time.RFC822, time.RFC850, time.RFC1123:
if t, err = parseLocalTimeDates(layout, d); err == nil { if t, err = parseLocalTimeDates(layout, processedInput); err == nil {
return return
} }
} }
if t, err = time.Parse(layout, d); err == nil { if t, err = time.Parse(layout, processedInput); err == nil {
return return
} }
} }
lastSpace := strings.LastIndex(ds, " ") err = fmt.Errorf(`date parser: failed to parse date "%s"`, rawInput)
if lastSpace > 0 {
return Parse(ds[0:lastSpace])
}
err = fmt.Errorf(`date parser: failed to parse date "%s"`, ds)
return return
} }
...@@ -249,32 +323,3 @@ func parseLocalTimeDates(layout, ds string) (t time.Time, err error) { ...@@ -249,32 +323,3 @@ func parseLocalTimeDates(layout, ds string) (t time.Time, err error) {
return time.ParseInLocation(layout, ds, loc) return time.ParseInLocation(layout, ds, loc)
} }
// Replace German and French dates to English.
func replaceNonEnglishWords(ds string) string {
r := strings.NewReplacer(
"Mo,", "Mon,",
"Di,", "Tue,",
"Mi,", "Wed,",
"Do,", "Thu,",
"Fr,", "Fri,",
"Sa,", "Sat,",
"So,", "Sun,",
"Mär ", "Mar ",
"Mai ", "May ",
"Okt ", "Oct ",
"Dez ", "Dec ",
"lun,", "Mon,",
"mar,", "Tue,",
"mer,", "Wed,",
"jeu,", "Thu,",
"ven,", "Fri,",
"sam,", "Sat,",
"dim,", "Sun,",
"avr ", "Apr ",
"mai ", "May ",
"jui ", "Jun ",
)
return r.Replace(ds)
}
...@@ -133,11 +133,21 @@ func TestParseWeirdDateFormat(t *testing.T) { ...@@ -133,11 +133,21 @@ func TestParseWeirdDateFormat(t *testing.T) {
"Mon, 30 Mar 2020 19:53 +0000", "Mon, 30 Mar 2020 19:53 +0000",
"Mon, 03/30/2020 - 19:19", "Mon, 03/30/2020 - 19:19",
"2018-12-12T12:12", "2018-12-12T12:12",
"2020-11-08T16:20:00-05:00Z",
"Nov. 16, 2020, 10:57 a.m.",
"Friday 06 November 2020",
"Mon, November 16, 2020, 11:12 PM EST",
"Lundi, 16. Novembre 2020 - 15:54",
"Thu Nov 12 2020 17:00:00 GMT+0000 (Coordinated Universal Time)",
"Sat, 11 04 2020 08:51:49 +0100",
"Mon, 16th Nov 2020 13:16:28 GMT",
"Nov. 2020",
"ven., 03 juil. 2020 15:09:58 +0000",
} }
for _, date := range dates { for _, date := range dates {
if _, err := Parse(date); err != nil { if _, err := Parse(date); err != nil {
t.Fatalf(`Unable to parse date: %q`, date) t.Errorf(`Unable to parse date: %q`, date)
} }
} }
} }
...@@ -179,7 +179,7 @@ func (r *rssItem) entryDate() time.Time { ...@@ -179,7 +179,7 @@ func (r *rssItem) entryDate() time.Time {
if value != "" { if value != "" {
result, err := date.Parse(value) result, err := date.Parse(value)
if err != nil { if err != nil {
logger.Error("rss: %v", err) logger.Error("rss: %v (entry GUID = %s)", err, r.GUID)
return time.Now() return time.Now()
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment