2012-06-01 14:25:49 +02:00
/*
* reindex . c , SJ
*/
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
# include <sys/time.h>
# include <sys/types.h>
# include <sys/stat.h>
# include <fcntl.h>
# include <unistd.h>
# include <time.h>
# include <locale.h>
# include <syslog.h>
# include <getopt.h>
# include <piler.h>
extern char * optarg ;
extern int optind ;
int progressbar = 0 ;
void usage ( ) {
printf ( " \n usage: reindex \n \n " ) ;
2015-11-03 21:17:29 +01:00
printf ( " [-c|--config <config file>] Config file to use if not the default \n " ) ;
printf ( " -f <from id> Start indexing from this id \n " ) ;
printf ( " -t <to id> Up to this id \n " ) ;
printf ( " -a Reindex all records \n " ) ;
printf ( " -p Display progress counters \n " ) ;
2012-06-01 14:25:49 +02:00
exit ( 0 ) ;
}
2012-10-29 10:22:31 +01:00
void p_clean_exit ( char * msg , int rc ) {
2012-06-01 14:25:49 +02:00
if ( msg ) printf ( " error: %s \n " , msg ) ;
exit ( rc ) ;
}
2018-09-29 08:43:50 +02:00
uint64 get_max_meta_id ( struct session_data * sdata ) {
2012-11-30 12:27:18 +01:00
char s [ SMALLBUFSIZE ] ;
uint64 id = 0 ;
2017-08-11 18:18:45 +02:00
struct sql sql ;
2012-11-30 12:27:18 +01:00
snprintf ( s , sizeof ( s ) - 1 , " SELECT MAX(`id`) FROM %s " , SQL_METADATA_TABLE ) ;
2017-08-11 18:18:45 +02:00
if ( prepare_sql_statement ( sdata , & sql , s ) = = ERR ) return id ;
2013-05-05 11:57:14 +02:00
2017-08-11 18:18:45 +02:00
p_bind_init ( & sql ) ;
2013-05-05 11:57:14 +02:00
2017-08-11 18:18:45 +02:00
if ( p_exec_stmt ( sdata , & sql ) = = OK ) {
2013-05-05 11:57:14 +02:00
2017-08-11 18:18:45 +02:00
p_bind_init ( & sql ) ;
2013-05-05 11:57:14 +02:00
2017-08-11 18:18:45 +02:00
sql . sql [ sql . pos ] = ( char * ) & id ; sql . type [ sql . pos ] = TYPE_LONGLONG ; sql . len [ sql . pos ] = sizeof ( uint64 ) ; sql . pos + + ;
2013-05-05 11:57:14 +02:00
2017-08-11 18:18:45 +02:00
p_store_results ( & sql ) ;
p_fetch_results ( & sql ) ;
p_free_results ( & sql ) ;
2015-12-18 22:38:52 +01:00
}
2013-05-05 11:57:14 +02:00
2017-08-11 18:18:45 +02:00
close_prepared_statement ( & sql ) ;
2013-05-05 11:57:14 +02:00
2012-11-30 12:27:18 +01:00
return id ;
}
2017-08-08 15:34:45 +02:00
uint64 retrieve_email_by_metadata_id ( struct session_data * sdata , struct data * data , uint64 from_id , uint64 to_id , struct config * cfg ) {
2012-06-01 14:25:49 +02:00
char s [ SMALLBUFSIZE ] ;
2021-07-02 10:25:19 +02:00
uint64 stored_id = 0 , reindexed = 0 ;
2015-11-21 23:06:47 +01:00
struct parser_state state ;
2017-08-11 18:18:45 +02:00
struct sql sql ;
2012-06-01 14:25:49 +02:00
2016-01-24 15:23:47 +01:00
if ( cfg - > enable_folders = = 1 )
snprintf ( s , sizeof ( s ) - 1 , " SELECT m.`id`, `piler_id`, `arrived`, `sent`, f.folder_id FROM %s m, %s f WHERE m.id=f.id AND (m.id BETWEEN %llu AND %llu) AND `deleted`=0 " , SQL_METADATA_TABLE , SQL_FOLDER_MESSAGE_TABLE , from_id , to_id ) ;
else
snprintf ( s , sizeof ( s ) - 1 , " SELECT `id`, `piler_id`, `arrived`, `sent` FROM %s WHERE (id BETWEEN %llu AND %llu) AND `deleted`=0 " , SQL_METADATA_TABLE , from_id , to_id ) ;
2012-06-01 14:25:49 +02:00
2017-08-11 18:18:45 +02:00
if ( prepare_sql_statement ( sdata , & sql , s ) = = ERR ) return reindexed ;
2013-05-05 11:57:14 +02:00
2017-08-11 18:18:45 +02:00
p_bind_init ( & sql ) ;
2012-06-01 14:25:49 +02:00
2017-08-11 18:18:45 +02:00
if ( p_exec_stmt ( sdata , & sql ) = = OK ) {
2012-06-01 14:25:49 +02:00
2017-08-11 18:18:45 +02:00
p_bind_init ( & sql ) ;
2012-06-01 14:25:49 +02:00
2017-08-11 18:18:45 +02:00
sql . sql [ sql . pos ] = ( char * ) & stored_id ; sql . type [ sql . pos ] = TYPE_LONGLONG ; sql . len [ sql . pos ] = sizeof ( uint64 ) ; sql . pos + + ;
sql . sql [ sql . pos ] = sdata - > ttmpfile ; sql . type [ sql . pos ] = TYPE_STRING ; sql . len [ sql . pos ] = RND_STR_LEN + 2 ; sql . pos + + ;
sql . sql [ sql . pos ] = ( char * ) & ( sdata - > now ) ; sql . type [ sql . pos ] = TYPE_LONG ; sql . len [ sql . pos ] = sizeof ( unsigned long ) ; sql . pos + + ;
sql . sql [ sql . pos ] = ( char * ) & ( sdata - > sent ) ; sql . type [ sql . pos ] = TYPE_LONG ; sql . len [ sql . pos ] = sizeof ( unsigned long ) ; sql . pos + + ;
2016-01-24 15:23:47 +01:00
if ( cfg - > enable_folders = = 1 ) {
2017-08-11 18:18:45 +02:00
sql . sql [ sql . pos ] = ( char * ) & ( data - > folder ) ; sql . type [ sql . pos ] = TYPE_LONG ; sql . len [ sql . pos ] = sizeof ( unsigned long ) ; sql . pos + + ;
2016-01-24 15:23:47 +01:00
}
2012-06-01 14:25:49 +02:00
2017-08-11 18:18:45 +02:00
p_store_results ( & sql ) ;
2012-06-01 14:25:49 +02:00
2017-08-11 18:18:45 +02:00
while ( p_fetch_results ( & sql ) = = OK ) {
2012-06-01 14:25:49 +02:00
2020-08-10 21:58:06 +02:00
char filename [ SMALLBUFSIZE ] ;
snprintf ( filename , sizeof ( filename ) - 1 , " %llu.eml " , stored_id ) ;
2012-06-01 14:25:49 +02:00
2020-08-10 21:58:06 +02:00
FILE * f = fopen ( filename , " w " ) ;
if ( f ) {
int rc = retrieve_email_from_archive ( sdata , f , cfg ) ;
fclose ( f ) ;
2012-06-01 14:25:49 +02:00
2020-08-10 21:58:06 +02:00
if ( rc ) {
printf ( " cannot retrieve: %s \n " , filename ) ;
unlink ( filename ) ;
continue ;
}
2015-12-18 22:38:52 +01:00
2020-08-10 21:58:06 +02:00
snprintf ( sdata - > filename , SMALLBUFSIZE - 1 , " %s " , filename ) ;
2012-06-01 14:25:49 +02:00
2022-02-17 19:32:45 +01:00
struct stat st ;
sdata - > tot_len = stat ( filename , & st ) = = 0 ? st . st_size : 0 ;
sdata - > internal_sender = sdata - > internal_recipient = sdata - > external_recipient = sdata - > direction = 0 ;
memset ( sdata - > attachments , 0 , SMALLBUFSIZE ) ;
2020-12-04 13:59:47 +01:00
state = parse_message ( sdata , 1 , data , cfg ) ;
2020-12-13 08:36:12 +01:00
post_parse ( sdata , & state , cfg ) ;
2012-06-01 14:25:49 +02:00
2020-08-10 21:58:06 +02:00
rc = store_index_data ( sdata , & state , data , stored_id , cfg ) ;
2012-06-01 14:25:49 +02:00
2021-04-10 20:53:05 +02:00
unlink ( sdata - > tmpframe ) ;
remove_stripped_attachments ( & state ) ;
2020-08-10 21:58:06 +02:00
if ( rc = = OK ) reindexed + + ;
else printf ( " failed to add to %s table: %s \n " , SQL_SPHINX_TABLE , filename ) ;
2012-06-01 14:25:49 +02:00
2020-08-10 21:58:06 +02:00
unlink ( filename ) ;
2013-05-05 11:57:14 +02:00
2020-08-10 21:58:06 +02:00
if ( progressbar ) {
2021-07-11 14:09:16 +02:00
uint64 delta = to_id - from_id + 1 ;
2021-07-02 10:25:19 +02:00
2020-08-10 21:58:06 +02:00
printf ( " processed: %8llu [%3d%%] \r " , reindexed , ( int ) ( 100 * reindexed / delta ) ) ;
fflush ( stdout ) ;
2015-11-03 21:48:20 +01:00
}
2012-06-01 14:25:49 +02:00
}
2020-08-10 21:58:06 +02:00
else printf ( " cannot open: %s \n " , filename ) ;
2013-05-05 11:57:14 +02:00
2012-06-01 14:25:49 +02:00
}
2013-05-05 11:57:14 +02:00
2020-08-10 21:58:06 +02:00
2017-08-11 18:18:45 +02:00
p_free_results ( & sql ) ;
2012-06-01 14:25:49 +02:00
}
2017-08-11 18:18:45 +02:00
close_prepared_statement ( & sql ) ;
2013-05-05 11:57:14 +02:00
2012-06-01 14:25:49 +02:00
if ( progressbar ) printf ( " \n " ) ;
return reindexed ;
}
int main ( int argc , char * * argv ) {
2020-08-10 20:58:34 +02:00
int all = 0 ;
2012-06-01 14:25:49 +02:00
uint64 from_id = 0 , to_id = 0 , n = 0 ;
2012-08-23 10:23:58 +02:00
char * configfile = CONFIG_FILE , * folder = NULL ;
2012-06-01 14:25:49 +02:00
struct session_data sdata ;
2017-08-08 15:34:45 +02:00
struct data data ;
struct config cfg ;
2012-06-01 14:25:49 +02:00
while ( 1 ) {
2020-08-10 20:58:34 +02:00
int c = getopt ( argc , argv , " c:f:t:F:pahv? " ) ;
2012-06-01 14:25:49 +02:00
if ( c = = - 1 ) break ;
switch ( c ) {
case ' c ' :
configfile = optarg ;
break ;
case ' f ' :
from_id = strtoull ( optarg , NULL , 10 ) ;
break ;
case ' t ' :
to_id = strtoull ( optarg , NULL , 10 ) ;
break ;
2012-11-30 12:27:18 +01:00
case ' a ' :
all = 1 ;
break ;
2012-08-23 10:23:58 +02:00
case ' F ' :
folder = optarg ;
break ;
2012-06-01 14:25:49 +02:00
case ' p ' :
progressbar = 1 ;
break ;
default :
usage ( ) ;
break ;
}
}
2020-08-10 20:58:34 +02:00
if ( all = = 0 & & ( from_id = = 0 | | to_id = = 0 ) ) usage ( ) ;
2012-06-01 14:25:49 +02:00
2017-05-14 17:18:09 +02:00
if ( ! can_i_write_directory ( NULL ) ) __fatal ( " cannot write current directory! " ) ;
2012-06-01 14:25:49 +02:00
( void ) openlog ( " reindex " , LOG_PID , LOG_MAIL ) ;
2013-09-11 09:19:29 +02:00
srand ( getpid ( ) ) ;
2012-06-01 14:25:49 +02:00
cfg = read_config ( configfile ) ;
if ( read_key ( & cfg ) ) {
printf ( " %s \n " , ERR_READING_KEY ) ;
return 1 ;
}
2012-08-23 10:23:58 +02:00
data . folder = 0 ;
2012-09-03 17:12:02 +02:00
data . recursive_folder_names = 0 ;
2014-01-13 13:06:10 +01:00
2013-08-14 14:24:30 +02:00
inithash ( data . mydomains ) ;
initrules ( data . archiving_rules ) ;
initrules ( data . retention_rules ) ;
2015-08-28 22:50:28 +02:00
initrules ( data . folder_rules ) ;
2012-08-23 10:23:58 +02:00
2014-02-19 15:16:20 +01:00
init_session_data ( & sdata , & cfg ) ;
if ( open_database ( & sdata , & cfg ) = = ERR ) {
p_clean_exit ( " cannot connect to mysql server " , 1 ) ;
}
2018-09-29 08:43:50 +02:00
load_rules ( & sdata , data . folder_rules , SQL_FOLDER_RULE_TABLE ) ;
2015-08-28 22:50:28 +02:00
2012-08-23 10:23:58 +02:00
if ( folder ) {
2018-09-29 08:43:50 +02:00
data . folder = get_folder_id ( & sdata , folder , 0 ) ;
2012-08-23 10:23:58 +02:00
if ( data . folder = = 0 ) {
printf ( " error: could not get folder id for '%s' \n " , folder ) ;
return 0 ;
}
}
2012-06-01 14:25:49 +02:00
2013-01-06 22:16:21 +01:00
load_mydomains ( & sdata , & data , & cfg ) ;
2012-06-01 14:25:49 +02:00
2012-11-30 12:27:18 +01:00
if ( all = = 1 ) {
from_id = 1 ;
2018-09-29 08:43:50 +02:00
to_id = get_max_meta_id ( & sdata ) ;
2012-11-30 12:27:18 +01:00
}
2012-08-23 10:23:58 +02:00
n = retrieve_email_by_metadata_id ( & sdata , & data , from_id , to_id , & cfg ) ;
2012-06-01 14:25:49 +02:00
printf ( " put %llu messages to %s table for reindexing \n " , n , SQL_SPHINX_TABLE ) ;
2015-08-28 22:50:28 +02:00
clearrules ( data . folder_rules ) ;
2013-08-14 14:24:30 +02:00
clearhash ( data . mydomains ) ;
2013-07-12 22:54:45 +02:00
2013-04-28 14:18:09 +02:00
close_database ( & sdata ) ;
2012-06-01 14:25:49 +02:00
return 0 ;
}