From 154780c4a78cd824ec3dc3551d408fa894d74b3a Mon Sep 17 00:00:00 2001 From: SJ Date: Sat, 26 May 2012 15:18:22 +0200 Subject: [PATCH] pilerimport can retrieve messages from imap4 servers, too --- configure | 2 +- configure.in | 2 +- src/base64.c | 79 ++++++++++++++ src/decoder.h | 2 + src/defs.h | 2 +- src/imap.c | 261 ++++++++++++++++++++++++++++++++++++++++++++++ src/import.c | 118 +++++++++++++++++++++ src/memc.c | 2 +- src/piler.h | 1 + src/pilerimport.c | 181 ++++++++++++++------------------ 10 files changed, 543 insertions(+), 107 deletions(-) create mode 100644 src/base64.c create mode 100644 src/imap.c create mode 100644 src/import.c diff --git a/configure b/configure index 53338eae..3d53286d 100755 --- a/configure +++ b/configure @@ -4151,7 +4151,7 @@ echo; echo CFLAGS="$static -O2 -Wall -g" LIBS="$antispam_libs $sunos_libs " -OBJS="dirs.o misc.o counters.o cfg.o sig.o decoder.o list.o parser.o parser_utils.o rules.o session.o message.o attachment.o digest.o store.o archive.o tai.o $objs" +OBJS="dirs.o base64.o misc.o counters.o cfg.o sig.o decoder.o list.o parser.o parser_utils.o rules.o session.o message.o attachment.o digest.o store.o archive.o tai.o import.o imap.o $objs" ac_config_files="$ac_config_files Makefile src/Makefile etc/Makefile util/Makefile init.d/Makefile" diff --git a/configure.in b/configure.in index be821e32..0862c1a2 100644 --- a/configure.in +++ b/configure.in @@ -274,7 +274,7 @@ echo; echo CFLAGS="$static -O2 -Wall -g" LIBS="$antispam_libs $sunos_libs " -OBJS="dirs.o misc.o counters.o cfg.o sig.o decoder.o list.o parser.o parser_utils.o rules.o session.o message.o attachment.o digest.o store.o archive.o tai.o $objs" +OBJS="dirs.o base64.o misc.o counters.o cfg.o sig.o decoder.o list.o parser.o parser_utils.o rules.o session.o message.o attachment.o digest.o store.o archive.o tai.o import.o imap.o $objs" AC_CONFIG_FILES([Makefile src/Makefile etc/Makefile util/Makefile init.d/Makefile]) AC_OUTPUT diff --git a/src/base64.c b/src/base64.c new file mode 100644 index 00000000..d130c930 --- /dev/null +++ b/src/base64.c @@ -0,0 +1,79 @@ +/* + * base64.c, SJ + */ + +#include +#include + + +char base64_value(char c){ + static const char *base64_table = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + if((int)c > 63) return '='; + + return base64_table[(int)c]; +} + + +void base64_encode_block(unsigned char *in, int inlen, char *out){ + char a, b, c, d, fragment; + + sprintf(out, "===="); + + if(inlen <= 0) return; + + fragment = *in & 0x3; + + a = *in >> 2; + + out[0] = base64_value(a); + + b = fragment << 4; + + if(inlen > 1) + b += *(in+1) >> 4; + + out[1] = base64_value(b); + + if(inlen == 1) return; + + + c = *(in+1) & 0xf; + c = c << 2; + + if(inlen > 2){ + fragment = *(in+2) & 0xfc; + c += fragment >> 6; + + d = *(in+2) & 0x3f; + out[3] = base64_value(d); + } + + out[2] = base64_value(c); +} + + +void base64_encode(unsigned char *in, int inlen, char *out, int outlen){ + int i=0, j, pos=0; + unsigned char buf[3]; + + memset(buf, 0, 3); + memset(out, 0, outlen); + + for(j=0; j +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +unsigned long resolve_host(char *host){ + struct hostent *h; + struct in_addr addr; + + if(!host) return 0; + + if((addr.s_addr = inet_addr(host)) == -1){ + if((h = gethostbyname(host)) == NULL){ + return 0; + } + else return *(unsigned long*)h->h_addr; + } + else return addr.s_addr; +} + + +int process_imap_folder(int sd, int *seq, char *folder, struct session_data *sdata, struct __data *data, struct __config *cfg){ + int rc=ERR, i, n, pos, messages=0, len, readlen, fd; + char *p, *q, tag[SMALLBUFSIZE], tagok[SMALLBUFSIZE], buf[MAXBUFSIZE], puf[MAXBUFSIZE], filename[SMALLBUFSIZE]; + + + snprintf(tag, sizeof(tag)-1, "A%d", *seq); snprintf(tagok, sizeof(tagok)-1, "\r\nA%d OK", (*seq)++); + snprintf(buf, sizeof(buf)-1, "%s SELECT %s\r\n", tag, folder); + send(sd, buf, strlen(buf), 0); + n = recvtimeout(sd, buf, MAXBUFSIZE, 10); + + + if(!strstr(buf, tagok)){ + trimBuffer(buf); + printf("error: %s\n", buf); + return rc; + } + + + p = &buf[0]; + do { + memset(puf, 0, sizeof(puf)); + p = split(p, '\n', puf, sizeof(puf)-1); + + q = strstr(puf, " EXISTS"); + if(q){ + *q = '\0'; + messages = atoi(puf+2); + } + + } while(p); + + + printf("found %d messages\n", messages); + + if(messages <= 0) return rc; + for(i=1; i<=messages; i++){ + + snprintf(tag, sizeof(tag)-1, "A%d", *seq); snprintf(tagok, sizeof(tagok)-1, "\r\nA%d OK", (*seq)++); + snprintf(buf, sizeof(buf)-1, "%s FETCH %d (BODY.PEEK[])\r\n", tag, i); + + snprintf(filename, sizeof(filename)-1, "%s-%d.txt", folder, i); + unlink(filename); + + fd = open(filename, O_CREAT|O_EXCL|O_RDWR|O_TRUNC, S_IRUSR|S_IWUSR); + if(fd == -1){ + printf("cannot open: %s\n", filename); + return rc; + } + + + send(sd, buf, strlen(buf), 0); + memset(buf, 0, sizeof(buf)); + n = recvtimeout(sd, buf, MAXBUFSIZE, 10); + + + len = 0; readlen = n; + + p = strstr(buf, "\r\n"); + if(!p){ + printf("invalid reply: %s", buf); + continue; + } + + *p = '\0'; + pos = strlen(buf) + 2; + + + if(*(p-1) == '}') *(p-1) = '\0'; + + + q = strchr(buf, '{'); + if(q){ + q++; + len = atoi(q); + } + + if(len < 10){ + printf("too short message: %s\n", buf); + continue; + } + + n -= pos; + + q = strstr(p+2, tagok); + if(q){ + n -= strlen(q) + 1; + } + + + write(fd, p+2, n); + + + while(readlen < len){ + memset(buf, 0, sizeof(buf)); + n = recvtimeout(sd, buf, MAXBUFSIZE, 3); + readlen += n; + + p = strstr(buf, tagok); + if(p){ + n -= strlen(p)+1; + } + + write(fd, buf, n); + + } + + close(fd); + + rc = import_message(filename, sdata, data, cfg); + + unlink(filename); + + } + + + return OK; +} + + +int connect_to_imap_server(int sd, int *seq, char *imapserver, char *username, char *password){ + int n, pos=0; + char tag[SMALLBUFSIZE], tagok[SMALLBUFSIZE], buf[MAXBUFSIZE]; + char auth[2*SMALLBUFSIZE]; + unsigned char tmp[SMALLBUFSIZE]; + unsigned long host=0; + struct sockaddr_in remote_addr; + + + host = resolve_host(imapserver); + + remote_addr.sin_family = AF_INET; + remote_addr.sin_port = htons(143); + remote_addr.sin_addr.s_addr = host; + bzero(&(remote_addr.sin_zero),8); + + + if(connect(sd, (struct sockaddr *)&remote_addr, sizeof(struct sockaddr)) == -1){ + printf("connect()\n"); + return ERR; + } + + n = recvtimeout(sd, buf, MAXBUFSIZE, 10); + //printf("connected...\n"); + + + /* + * create auth buffer: username + NUL character + username + NUL character + password + */ + + memset(tmp, 0, sizeof(tmp)); + pos = 0; + + memcpy(tmp+pos, username, strlen(username)); + pos = strlen(username) + 1; + memcpy(tmp+pos, username, strlen(username)); + pos += strlen(username) + 1; + memcpy(tmp+pos, password, strlen(password)); + pos += strlen(password); + + + base64_encode(&tmp[0], pos, &auth[0], sizeof(auth)); + + snprintf(tag, sizeof(tag)-1, "A%d", *seq); snprintf(tagok, sizeof(tagok)-1, "A%d OK", (*seq)++); + snprintf(buf, sizeof(buf)-1, "%s AUTHENTICATE PLAIN %s\r\n", tag, auth); + send(sd, buf, strlen(buf), 0); + n = recvtimeout(sd, buf, MAXBUFSIZE, 10); + if(strncmp(buf, tagok, strlen(tagok))){ + printf("login failed, server reponse: %s\n", buf); + return ERR; + } + + //printf("logged in...\n"); + + return OK; +} + + +int list_folders(int sd, int *seq, char *folders, int foldersize){ + int n; + char *p, *q, tag[SMALLBUFSIZE], tagok[SMALLBUFSIZE], buf[MAXBUFSIZE], puf[MAXBUFSIZE]; + + snprintf(folders, foldersize-1, "INBOX"); + + + snprintf(tag, sizeof(tag)-1, "A%d", *seq); snprintf(tagok, sizeof(tagok)-1, "A%d OK", (*seq)++); + snprintf(buf, sizeof(buf)-1, "%s LIST \"\" %%\r\n", tag); + + send(sd, buf, strlen(buf), 0); + + n = recvtimeout(sd, buf, MAXBUFSIZE, 10); + + p = &buf[0]; + do { + memset(puf, 0, sizeof(puf)); + p = split(p, '\n', puf, sizeof(puf)-1); + trimBuffer(puf); + + if(strncmp(puf, "* LIST ", 7) == 0){ + q = strrchr(puf, ' '); + if(q){ + if(*(q+1) == '"') q += 2; + if(puf[strlen(puf)-1] == '"') puf[strlen(puf)-1] = '\0'; + + if(strncasecmp(q, "junk", 4) && strncasecmp(q, "trash", 5) && strncasecmp(q, "spam", 4) && strncasecmp(q, "draft", 5)){ + strncat(folders, "\n", foldersize-1); + strncat(folders, q, foldersize-1); + } + + } + } + else { + if(strncmp(puf, tagok, strlen(tagok)) == 0) {} + } + + } while(p); + + + return 0; +} + + diff --git a/src/import.c b/src/import.c new file mode 100644 index 00000000..db64d372 --- /dev/null +++ b/src/import.c @@ -0,0 +1,118 @@ +/* + * import.c, SJ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +int import_message(char *filename, struct session_data *sdata, struct __data *data, struct __config *cfg){ + int rc=ERR, fd; + char *rule; + struct stat st; + struct _state state; + struct __counters counters; + + + init_session_data(sdata); + + + if(strcmp(filename, "-") == 0){ + + if(read_from_stdin(sdata) == ERR){ + printf("error reading from stdin\n"); + return rc; + } + + snprintf(sdata->filename, SMALLBUFSIZE-1, "%s", sdata->ttmpfile); + + } + else { + + if(stat(filename, &st) != 0){ + printf("cannot stat() %s\n", filename); + return rc; + } + + if(S_ISREG(st.st_mode) == 0){ + printf("%s is not a file\n", filename); + return rc; + } + + fd = open(filename, O_RDONLY); + if(fd == -1){ + printf("cannot open %s\n", filename); + return rc; + } + close(fd); + + snprintf(sdata->filename, SMALLBUFSIZE-1, "%s", filename); + + sdata->tot_len = st.st_size; + } + + + + sdata->sent = 0; + + state = parse_message(sdata, cfg); + post_parse(sdata, &state, cfg); + + if(sdata->sent > sdata->now) sdata->sent = sdata->now; + if(sdata->sent == -1) sdata->sent = 0; + + /* fat chances that you won't import emails before 1990.01.01 */ + + if(sdata->sent > 631148400) sdata->retained = sdata->sent; + + rule = check_againt_ruleset(data->archiving_rules, &state, sdata->tot_len, sdata->spam_message); + + if(rule){ + printf("discarding %s by archiving policy: %s\n", filename, rule); + rc = OK; + goto ENDE; + } + + make_digests(sdata, cfg); + + rc = process_message(sdata, &state, data, cfg); + +ENDE: + unlink(sdata->tmpframe); + + if(strcmp(filename, "-") == 0) unlink(sdata->ttmpfile); + + + switch(rc) { + case OK: + printf("imported: %s\n", filename); + + bzero(&counters, sizeof(counters)); + counters.c_size += sdata->tot_len; + update_counters(sdata, data, &counters, cfg); + + break; + + case ERR_EXISTS: + printf("discarding duplicate message: %s\n", filename); + break; + + default: + printf("failed to import: %s (id: %s)\n", filename, sdata->ttmpfile); + break; + } + + return rc; +} + diff --git a/src/memc.c b/src/memc.c index 196012f9..1c6ac7f6 100644 --- a/src/memc.c +++ b/src/memc.c @@ -51,7 +51,7 @@ void memcached_init(struct memcached_server *ptr, char *server_ip, int server_po ptr->last_read_bytes = 0; - snprintf(ptr->server_ip, IPLEN, "%s", server_ip); + snprintf(ptr->server_ip, IPLEN-1, "%s", server_ip); ptr->server_port = server_port; ptr->initialised = 0; diff --git a/src/piler.h b/src/piler.h index 9b2f3d6b..ec2214a1 100644 --- a/src/piler.h +++ b/src/piler.h @@ -50,6 +50,7 @@ int retrieve_email_from_archive(struct session_data *sdata, FILE *dest, struct _ int prepare_a_mysql_statement(struct session_data *sdata, MYSQL_STMT **stmt, char *s); +int import_message(char *filename, struct session_data *sdata, struct __data *data, struct __config *cfg); #endif /* _PILER_H */ diff --git a/src/pilerimport.c b/src/pilerimport.c index 7fc0acbd..a0533063 100644 --- a/src/pilerimport.c +++ b/src/pilerimport.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -21,104 +22,9 @@ extern char *optarg; extern int optind; -int import_message(char *filename, struct session_data *sdata, struct __data *data, struct __config *cfg){ - int rc=ERR, fd; - char *rule; - struct stat st; - struct _state state; - struct __counters counters; - - - init_session_data(sdata); - - - if(strcmp(filename, "-") == 0){ - - if(read_from_stdin(sdata) == ERR){ - printf("error reading from stdin\n"); - return rc; - } - - snprintf(sdata->filename, SMALLBUFSIZE-1, "%s", sdata->ttmpfile); - - } - else { - - if(stat(filename, &st) != 0){ - printf("cannot stat() %s\n", filename); - return rc; - } - - if(S_ISREG(st.st_mode) == 0){ - printf("%s is not a file\n", filename); - return rc; - } - - fd = open(filename, O_RDONLY); - if(fd == -1){ - printf("cannot open %s\n", filename); - return rc; - } - close(fd); - - snprintf(sdata->filename, SMALLBUFSIZE-1, "%s", filename); - - sdata->tot_len = st.st_size; - } - - - - sdata->sent = 0; - - state = parse_message(sdata, cfg); - post_parse(sdata, &state, cfg); - - if(sdata->sent > sdata->now) sdata->sent = sdata->now; - if(sdata->sent == -1) sdata->sent = 0; - - /* fat chances that you won't import emails before 1990.01.01 */ - - if(sdata->sent > 631148400) sdata->retained = sdata->sent; - - rule = check_againt_ruleset(data->archiving_rules, &state, sdata->tot_len, sdata->spam_message); - - if(rule){ - printf("discarding %s by archiving policy: %s\n", filename, rule); - rc = OK; - goto ENDE; - } - - make_digests(sdata, cfg); - - rc = process_message(sdata, &state, data, cfg); - -ENDE: - unlink(sdata->tmpframe); - - if(strcmp(filename, "-") == 0) unlink(sdata->ttmpfile); - - - switch(rc) { - case OK: - printf("imported: %s\n", filename); - - bzero(&counters, sizeof(counters)); - counters.c_size += sdata->tot_len; - update_counters(sdata, data, &counters, cfg); - - break; - - case ERR_EXISTS: - printf("discarding duplicate message: %s\n", filename); - break; - - default: - printf("failed to import: %s (id: %s)\n", filename, sdata->ttmpfile); - break; - } - - return rc; -} +int connect_to_imap_server(int sd, int *seq, char *imapserver, char *username, char *password); +int list_folders(int sd, int *seq, char *folders, int foldersize); +int process_imap_folder(int sd, int *seq, char *folder, struct session_data *sdata, struct __data *data, struct __config *cfg); int import_from_mailbox(char *mailbox, struct session_data *sdata, struct __data *data, struct __config *cfg){ @@ -172,6 +78,7 @@ int import_from_maildir(char *directory, struct session_data *sdata, struct __da struct dirent *de; int rc=ERR, tot_msgs=0; char fname[SMALLBUFSIZE]; + struct stat st; dir = opendir(directory); if(!dir){ @@ -179,14 +86,31 @@ int import_from_maildir(char *directory, struct session_data *sdata, struct __da return rc; } + while((de = readdir(dir))){ if(strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) continue; snprintf(fname, sizeof(fname)-1, "%s/%s", directory, de->d_name); - rc = import_message(fname, sdata, data, cfg); + if(stat(fname, &st) == 0){ + if(S_ISDIR(st.st_mode)){ + import_from_maildir(fname, sdata, data, cfg); + } + else { - if(rc != ERR) tot_msgs++; + if(S_ISREG(st.st_mode)){ + rc = import_message(fname, sdata, data, cfg); + if(rc != ERR) tot_msgs++; + } + else { + printf("%s is not a file\n", fname); + } + + } + } + else { + printf("cannot stat() %s\n", fname); + } } closedir(dir); @@ -195,8 +119,45 @@ int import_from_maildir(char *directory, struct session_data *sdata, struct __da } +int import_from_imap_server(char *imapserver, char *username, char *password, struct session_data *sdata, struct __data *data, struct __config *cfg){ + int rc=ERR, sd, seq=1; + char *p, puf[MAXBUFSIZE]; + char folders[MAXBUFSIZE]; + + if((sd = socket(AF_INET, SOCK_STREAM, 0)) == -1){ + printf("cannot create socket\n"); + return rc; + } + + if(connect_to_imap_server(sd, &seq, imapserver, username, password) == ERR){ + close(sd); + return rc; + } + + + list_folders(sd, &seq, &folders[0], sizeof(folders)); + + + p = &folders[0]; + do { + memset(puf, 0, sizeof(puf)); + p = split(p, '\n', puf, sizeof(puf)-1); + + printf("processing folder: %s... ", puf); + + rc = process_imap_folder(sd, &seq, puf, sdata, data, cfg); + + } while(p); + + + close(sd); + + return rc; +} + + void usage(){ - printf("usage: pilerimport [-c ] -e | -m | -d \n"); + printf("usage: pilerimport [-c ] -e | -m | -d | -i -u -p \n"); exit(0); } @@ -204,12 +165,13 @@ void usage(){ int main(int argc, char **argv){ int i, rc; char *configfile=CONFIG_FILE, *mailbox=NULL, *emlfile=NULL, *directory=NULL; + char *imapserver=NULL, *username=NULL, *password=NULL; struct session_data sdata; struct __config cfg; struct __data data; - while((i = getopt(argc, argv, "c:m:e:d:h?")) > 0){ + while((i = getopt(argc, argv, "c:m:e:d:i:u:p:h?")) > 0){ switch(i){ case 'c' : @@ -228,6 +190,18 @@ int main(int argc, char **argv){ mailbox = optarg; break; + case 'i' : + imapserver = optarg; + break; + + case 'u' : + username = optarg; + break; + + case 'p' : + password = optarg; + break; + case 'h' : case '?' : usage(); @@ -241,7 +215,7 @@ int main(int argc, char **argv){ - if(!mailbox && !emlfile && !directory) usage(); + if(!mailbox && !emlfile && !directory && !imapserver) usage(); cfg = read_config(configfile); @@ -276,6 +250,7 @@ int main(int argc, char **argv){ if(emlfile) rc = import_message(emlfile, &sdata, &data, &cfg); if(mailbox) rc = import_from_mailbox(mailbox, &sdata, &data, &cfg); if(directory) rc = import_from_maildir(directory, &sdata, &data, &cfg); + if(imapserver && username && password) rc = import_from_imap_server(imapserver, username, password, &sdata, &data, &cfg);