mirror of
https://bitbucket.org/jsuto/piler.git
synced 2024-12-25 19:10:11 +01:00
fixed the date parsing
This commit is contained in:
parent
be0845d1b1
commit
fd4184daa9
@ -14,7 +14,7 @@
|
|||||||
|
|
||||||
#define VERSION "0.1.25-master-branch"
|
#define VERSION "0.1.25-master-branch"
|
||||||
|
|
||||||
#define BUILD 858
|
#define BUILD 859
|
||||||
|
|
||||||
#define HOSTID "mailarchiver"
|
#define HOSTID "mailarchiver"
|
||||||
|
|
||||||
|
@ -111,7 +111,7 @@ long get_local_timezone_offset(){
|
|||||||
|
|
||||||
|
|
||||||
unsigned long parse_date_header(char *datestr, struct __config *cfg){
|
unsigned long parse_date_header(char *datestr, struct __config *cfg){
|
||||||
int n=0;
|
int n=0, len;
|
||||||
long offset=0;
|
long offset=0;
|
||||||
unsigned long ts=0;
|
unsigned long ts=0;
|
||||||
char *p, *q, *r, s[SMALLBUFSIZE];
|
char *p, *q, *r, s[SMALLBUFSIZE];
|
||||||
@ -120,9 +120,17 @@ unsigned long parse_date_header(char *datestr, struct __config *cfg){
|
|||||||
datestr += 5;
|
datestr += 5;
|
||||||
p = datestr;
|
p = datestr;
|
||||||
|
|
||||||
|
tm.tm_year = 0;
|
||||||
|
tm.tm_mon = 0;
|
||||||
|
tm.tm_mday = 0;
|
||||||
|
tm.tm_wday = 0;
|
||||||
|
tm.tm_hour = 0;
|
||||||
|
tm.tm_min = 0;
|
||||||
|
tm.tm_sec = 0;
|
||||||
|
tm.tm_isdst = -1;
|
||||||
|
|
||||||
for(; *datestr; datestr++){
|
for(; *datestr; datestr++){
|
||||||
if(isspace(*datestr)) *datestr = ' ';
|
if(isspace(*datestr) || *datestr == '.' || *datestr == ',') *datestr = ' ';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -132,14 +140,26 @@ unsigned long parse_date_header(char *datestr, struct __config *cfg){
|
|||||||
p = split_str(p, " ", s, sizeof(s)-1);
|
p = split_str(p, " ", s, sizeof(s)-1);
|
||||||
if(strlen(s) > 0){
|
if(strlen(s) > 0){
|
||||||
n++;
|
n++;
|
||||||
|
len = strlen(s);
|
||||||
|
|
||||||
q = strchr(s, ','); if(q) *q='\0';
|
/*
|
||||||
|
* A proper Date: header should look like this:
|
||||||
|
*
|
||||||
|
* Date: Mon, 3 Feb 2014 13:21:07 +0100
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* However some email applications provide crap, eg.
|
||||||
|
*
|
||||||
|
* Sat, 4 Aug 2007 13:36:52 GMT-0700
|
||||||
|
* Sat, 4 Aug 07 13:36:52 GMT-0700
|
||||||
|
* 16 Dec 07 20:45:52
|
||||||
|
* 03 Jun 06 05:59:00 +0100
|
||||||
|
* 30.06.2005 17:47:42
|
||||||
|
*
|
||||||
|
* [wday] mday mon year h:m:s offset
|
||||||
|
*/
|
||||||
|
|
||||||
if(strlen(s) <= 2){ tm.tm_mday = atoi(s); continue; }
|
if(n == 1 && len == 3){
|
||||||
|
|
||||||
if(strlen(s) == 4){ tm.tm_year = atoi(s) - 1900; continue; }
|
|
||||||
|
|
||||||
if(strlen(s) == 3){
|
|
||||||
if(strcmp(s, "Mon") == 0) tm.tm_wday = 1;
|
if(strcmp(s, "Mon") == 0) tm.tm_wday = 1;
|
||||||
else if(strcmp(s, "Tue") == 0) tm.tm_wday = 2;
|
else if(strcmp(s, "Tue") == 0) tm.tm_wday = 2;
|
||||||
else if(strcmp(s, "Wed") == 0) tm.tm_wday = 3;
|
else if(strcmp(s, "Wed") == 0) tm.tm_wday = 3;
|
||||||
@ -147,8 +167,16 @@ unsigned long parse_date_header(char *datestr, struct __config *cfg){
|
|||||||
else if(strcmp(s, "Fri") == 0) tm.tm_wday = 5;
|
else if(strcmp(s, "Fri") == 0) tm.tm_wday = 5;
|
||||||
else if(strcmp(s, "Sat") == 0) tm.tm_wday = 6;
|
else if(strcmp(s, "Sat") == 0) tm.tm_wday = 6;
|
||||||
else if(strcmp(s, "Sun") == 0) tm.tm_wday = 0;
|
else if(strcmp(s, "Sun") == 0) tm.tm_wday = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(n == 1 && len <= 2){
|
||||||
|
n++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(n == 2 && len <= 2){ tm.tm_mday = atoi(s); continue; }
|
||||||
|
|
||||||
|
if(n == 3){
|
||||||
|
if(len == 3){
|
||||||
if(strcmp(s, "Jan") == 0) tm.tm_mon = 0;
|
if(strcmp(s, "Jan") == 0) tm.tm_mon = 0;
|
||||||
else if(strcmp(s, "Feb") == 0) tm.tm_mon = 1;
|
else if(strcmp(s, "Feb") == 0) tm.tm_mon = 1;
|
||||||
else if(strcmp(s, "Mar") == 0) tm.tm_mon = 2;
|
else if(strcmp(s, "Mar") == 0) tm.tm_mon = 2;
|
||||||
@ -165,23 +193,39 @@ unsigned long parse_date_header(char *datestr, struct __config *cfg){
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(strlen(s) == 8){
|
if(len == 2){
|
||||||
|
tm.tm_mon = atoi(s);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if(n == 4){
|
||||||
|
if(len == 4){ tm.tm_year = atoi(s) - 1900; continue; }
|
||||||
|
if(len == 2){ tm.tm_year = atoi(s); if(tm.tm_year < 70) tm.tm_year += 100; continue; }
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if(n == 5 && len >= 5){
|
||||||
r = &s[0];
|
r = &s[0];
|
||||||
|
|
||||||
q = strchr(r, ':'); if(!q) break;
|
q = strchr(r, ':'); if(!q) break;
|
||||||
*q = '\0'; tm.tm_hour = atoi(r); r = q+1;
|
*q = '\0'; tm.tm_hour = atoi(r); r = q+1;
|
||||||
|
|
||||||
q = strchr(r, ':'); if(!q) break;
|
q = strchr(r, ':'); if(q) *q = '\0';
|
||||||
*q = '\0'; tm.tm_min = atoi(r); r = q+1;
|
tm.tm_min = atoi(r);
|
||||||
|
|
||||||
|
if(len == 8){
|
||||||
|
r = q+1;
|
||||||
tm.tm_sec = atoi(r);
|
tm.tm_sec = atoi(r);
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} while(p);
|
} while(p);
|
||||||
|
|
||||||
tm.tm_isdst = -1;
|
|
||||||
ts = mktime(&tm);
|
ts = mktime(&tm);
|
||||||
|
|
||||||
if(p && (*p == '+' || *p == '-')){
|
if(p && (*p == '+' || *p == '-')){
|
||||||
|
@ -138,12 +138,38 @@ int test_htmls(){
|
|||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int test_dates(){
|
||||||
|
int count=0;
|
||||||
|
unsigned long ts;
|
||||||
|
char datestr[SMALLBUFSIZE];
|
||||||
|
struct __config cfg;
|
||||||
|
|
||||||
|
cfg.tweak_sent_time_offset = 0;
|
||||||
|
|
||||||
|
snprintf(datestr, sizeof(datestr)-2, "Date: Mon, 3 Feb 2014 13:16:09 +0100");
|
||||||
|
ts = parse_date_header(datestr, &cfg); printf("%s => %ld\n", datestr, ts);
|
||||||
|
|
||||||
|
snprintf(datestr, sizeof(datestr)-2, "Date: Sat, 4 Aug 07 13:36:52 GMT-0700");
|
||||||
|
ts = parse_date_header(datestr, &cfg); printf("%s => %ld\n", datestr, ts);
|
||||||
|
|
||||||
|
snprintf(datestr, sizeof(datestr)-2, "Date: 23 Sep 09 07:03 -0800");
|
||||||
|
ts = parse_date_header(datestr, &cfg); printf("%s => %ld\n", datestr, ts);
|
||||||
|
|
||||||
|
snprintf(datestr, sizeof(datestr)-2, "Date: 16 Dec 07 20:45:52");
|
||||||
|
ts = parse_date_header(datestr, &cfg); printf("%s => %ld\n", datestr, ts);
|
||||||
|
|
||||||
|
snprintf(datestr, sizeof(datestr)-2, "Date: 30.06.2005 17:47:42");
|
||||||
|
ts = parse_date_header(datestr, &cfg); printf("%s => %ld\n", datestr, ts);
|
||||||
|
|
||||||
|
snprintf(datestr, sizeof(datestr)-2, "Date: 03 Jun 06 05:59:00 +0100");
|
||||||
|
ts = parse_date_header(datestr, &cfg); printf("%s => %ld\n", datestr, ts);
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int main(int argc, char **argv){
|
int main(int argc, char **argv){
|
||||||
int n;
|
int n;
|
||||||
//struct __config cfg;
|
|
||||||
|
|
||||||
//cfg = read_config(CONFIG_FILE);
|
|
||||||
|
|
||||||
n = test_urls();
|
n = test_urls();
|
||||||
printf("testing fixURL(), errors: %d\n", n);
|
printf("testing fixURL(), errors: %d\n", n);
|
||||||
@ -156,5 +182,8 @@ int main(int argc, char **argv){
|
|||||||
n = test_htmls();
|
n = test_htmls();
|
||||||
printf("testing markHTML(), errors: %d\n", n);
|
printf("testing markHTML(), errors: %d\n", n);
|
||||||
|
|
||||||
|
n = test_dates();
|
||||||
|
printf("testing parse_date_header(), errors: %d\n", n);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user