#!/usr/bin/perl use strict; use warnings; use Data::Dumper; use DBI; use File::Find; use File::Spec; use utf8; use Encode; my $db = "dir_test"; my $user = "xxx"; my $pass = "xxx"; =rem DB STRUCT: CREATE TABLE rep_dirs ( id serial NOT NULL, path text NOT NULL, CONSTRAINT rep_dirs_pkey PRIMARY KEY (id, path), CONSTRAINT rep_dirs_id_key UNIQUE (id) ) CREATE TABLE files ( id serial NOT NULL, rep_id integer NOT NULL, path text NOT NULL, node_id integer NOT NULL, access_time timestamp without time zone NOT NULL, mod_time timestamp without time zone NOT NULL, change_time timestamp without time zone NOT NULL, status integer NOT NULL DEFAULT 2, CONSTRAINT files_pkey PRIMARY KEY (id, rep_id, path) ) =cut # status == 0 => old file # status == 1 => touch # status == 2 => new my $dbh = DBI->connect( "dbi:Pg:dbname=$db", $user, $pass, { RaiseError => 1, AutoCommit => 1, pg_enable_utf8 => 1, } ) or die $DBI::errstr; # prepare db-access my %sth_lst; #----------------------------------------------------------------------- # insert new File in DB $sth_lst{insert}= $dbh->prepare( 'INSERT INTO files ( path, access_time, mod_time, change_time, node_id, rep_id, status ) VALUES ( ?, to_timestamp(?), to_timestamp(?), to_timestamp(?), ?, ?, 2 )' ); # get all search dirs $sth_lst{select_dirs}= $dbh->prepare( 'SELECT id, path FROM rep_dirs' ); # get atime ctime mtime and node for a file $sth_lst{select_iamci_by_path}= $dbh->prepare( q!SELECT id, date_part('epoch',access_time)::int, date_part('epoch',mod_time)::int, date_part('epoch',change_time)::int, node_id FROM files WHERE path = ? AND rep_id=?! ); # get all files by status $sth_lst{select_path_by_status}= $dbh->prepare( 'SELECT path FROM files WHERE status = ? AND rep_id = ?' ); # update file $sth_lst{update_amci_by_id}= $dbh->prepare( 'UPDATE files SET access_time=to_timestamp(?), mod_time=to_timestamp(?), change_time=to_timestamp(?), node_id=?, status=1 WHERE id = ?' ); # reset status $sth_lst{reset_status}= $dbh->prepare( 'UPDATE files SET status=0 WHERE rep_id=?' ); # update status for a file $sth_lst{update_status_by_id}= $dbh->prepare( 'UPDATE files SET status=? WHERE id=?' ); # delete all files with status==0 $sth_lst{delete_old}= $dbh->prepare( 'DELETE FROM files WHERE status = 0 AND rep_id=?' ); #----------------------------------------------------------------------- # autoflush STDOUT $| = 1; # main for my $dir (get_dirs(\%sth_lst)) { print "Searching filesystem ($dir->{path}) ...\n"; my $stime_tmp = time; $dbh->begin_work(); reset_status(\%sth_lst,$dir->{id}); my $max=scalar(get_old_files(\%sth_lst,$dir->{id})); $dbh->commit(); $stime_tmp=time-$stime_tmp; print "DB Status Reset! Duration: $stime_tmp sec\n"; $dbh->begin_work(); $stime_tmp = time; my $cnt=0; find(sub{ return unless(-f $File::Find::name); process_file(\%sth_lst,$dir,$File::Find::name); fortschritt($max,++$cnt); },$dir->{path}); print "\n"; $stime_tmp=time-$stime_tmp; print "DB Upadated! Duration: $stime_tmp sec\n"; $dbh->commit(); $stime_tmp = time; my @old_files=get_old_files(\%sth_lst,$dir->{id}); my @new_files=get_new_files(\%sth_lst,$dir->{id}); $stime_tmp=time-$stime_tmp; print scalar(@new_files)." paths only on FS! ".scalar(@old_files)." paths only in DB! Duration: $stime_tmp sec\n"; if(@old_files) { $stime_tmp = time; delete_old_files(\%sth_lst,$dir->{id}); $stime_tmp=time-$stime_tmp; print "Old paths in DB Deleted! Duration: $stime_tmp sec\n"; } } $dbh->disconnect(); exit; ######################################################################## ######################################################################## sub get_dirs { my $sths=shift; my @dirs; $sths->{select_dirs}->execute(); while(my ($id,$path) = $sths->{select_dirs}->fetchrow_array()) { push(@dirs,{ path=>$path, id=>$id }); } $sths->{select_dirs}->finish(); return @dirs; } sub reset_status { my $sths=shift; my $id=shift; # status auf 0 => alles alte Dateien $sths->{reset_status}->execute($id); $sths->{reset_status}->finish(); } sub get_new_files { return get_files_by_status(@_[0,1],2); } sub get_old_files { return get_files_by_status(@_[0,1],0); } sub get_files_by_status { my $sths=shift; my $id=shift; my $status=shift; my @lst; $sths->{select_path_by_status}->execute($status,$id); while(my ($path) = $sths->{select_path_by_status}->fetchrow_array()) { push(@lst,$path); } $sths->{select_path_by_status}->finish(); return @lst; } sub delete_old_files { my $sths=shift; my $id=shift; $sths->{delete_old}->execute($id); $sths->{delete_old}->finish(); return 1; } sub process_file { my $sths=shift; my $rep=shift; my $file=shift; # absuter pfad my $path=File::Spec->rel2abs($file,$rep->{path}); # sicher UTF-8 $file=encode('UTF-8', $file); # datei muss existieren! return 0 unless(-f $path); my ($ino,$atime,$mtime,$ctime)=(stat($path))[1,8,9,10]; # werte müssen da sein! return 0 unless($ino && $atime && $mtime && $ctime); # schon vorhanden? $sths->{select_iamci_by_path}->execute($file,$rep->{id}); my ($id,$at_db,$mt_db,$ct_db,$nd_db)=$sths->{select_iamci_by_path}->fetchrow_array(); $sths->{select_iamci_by_path}->finish(); if(defined($id)) { if(($at_db != $atime || $mt_db != $mtime || $ct_db != $ctime || $nd_db != $ino)) { # update status=1 $sths->{update_amci_by_id}->execute($atime,$mtime,$ctime,$ino,$id); $sths->{update_amci_by_id}->finish; } else { # touch status=1 $sths->{update_status_by_id}->execute(1,$id); $sths->{update_status_by_id}->finish; } } else { # insert status=2 $sths->{insert}->execute($file,$atime,$mtime,$ctime,$ino,$rep->{id}); $sths->{insert}->finish(); } return 1; } # Terminal output fortschritt sub fortschritt { my $max=shift || 0; my $cnt=shift || 0; if($max==0) { printf("\r%u Files",$cnt); } else { printf("\r%.2f%% (%u Files)",(100/$max)*$cnt,$cnt); } }