#!/usr/bin/perl # Define options use strict; use warnings; # Define used modules use Text::Diff; use DBI; use File::Find; use Set::Scalar; # Define variables my $db; my $user; my $pass; my $host; my $query; my $sth; my $dbh; my $stime; my $etime; my $endtime; my $stime_tmp; my $etime_tmp; my $endtime_tmp; my @db; my @fs; my @only_db; my @only_fs; # DB data $db = "ofrs"; $user = "ofrs"; $pass = "ofrs"; $host = "localhost"; # Connect to DB $dbh = DBI->connect("DBI:mysql:$db:$host", $user, $pass); # Set start time $stime = time; # Set query $query = "SELECT id,path FROM repdir;"; # Run query my $sth1 = $dbh->prepare($query); $sth1->execute(); # Read query results into array @db while (my @result = $sth1->fetchrow_array) { checkRepDir($result[0], $result[1]); } # Set end time $etime = time; # Calculate used time $endtime = ($etime - $stime); # Print needed time print "Total duration: $endtime sec\n"; ########## Functions ########## # compares a file tree from the file system with the path entries from the database. Adds paths, which are only on the filesystem to the database, updates changed paths in the database, remove missing paths from the database. sub checkRepDir { # Define variables my @insert_db; my @update_db; my @delete_db; # Set variables my $repdir_id = $_[0]; my $repdir_path = $_[1]; print "Running check for replication directory $repdir_path with ID $repdir_id ...\n"; ### Getting database paths ### # Set query $query = "SELECT path,ino,mtime,ctime FROM dir WHERE repdir_id = $repdir_id;"; print "Running DB query ... "; $stime_tmp = time; # Run query $sth = $dbh->prepare($query); $sth->execute(); # Read query results into array @db while (my @result = $sth->fetchrow_array) { push(@db,$result[0].";".$result[1].";".$result[2].";".$result[3]."\n"); } $etime_tmp = time; $endtime_tmp = ($etime_tmp - $stime_tmp); print scalar(@db)." paths found! Duration: $endtime_tmp sec\n"; ### Getting filesystem paths ### print "Searching filesystem ... "; $stime_tmp = time; # Search file system File::Find::find({wanted => sub{wanted();}, "no_chdir" => 0}, $repdir_path); # read paths from filesystem into array @fs sub wanted{ if (-d $File::Find::name) { my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks) = stat($File::Find::name); push(@fs,$File::Find::name.";".$ino.";".$mtime.";".$ctime."\n"); } } $etime_tmp = time; $endtime_tmp = ($etime_tmp - $stime_tmp); print scalar(@fs)." paths found! Duration: $endtime_tmp sec\n"; ### Find the added, updated and deleted paths ### print "Comparing results ... "; $stime_tmp = time; my $temp1 = Set::Scalar->new(@db); my $temp2 = Set::Scalar->new(@fs); @only_db = $temp1->difference($temp2)->members; @only_fs = $temp2->difference($temp1)->members; $etime_tmp = time; $endtime_tmp = ($etime_tmp - $stime_tmp); print scalar(@only_fs)." paths only on FS! ".scalar(@only_db)." paths only in DB! Duration: $endtime_tmp sec\n"; ### Updating the database with the newest information ### !!!A big mess at the moment, I will try to clean that up in the next step ...!!! print "Updating database ... \n"; $stime_tmp = time; $query = ""; foreach my $item_fs (@only_fs) { # Split up the parts of the line from the filesystem into path, ino, mtime, ctime my @parts_fs = split(/;/, $item_fs); # If there are any items in @only_db, check if some lines have just a changed mtime or ctime, but not a new path. If @only_db contains no elements, then just add the path to the database. if (@only_db > 0) { foreach my $item_db (@only_db) { # Split up the parts of the line from the database into path, ino, mtime, ctime my @parts_db = split(/;/, $item_db); # Check if the path and the inode from the filesystem-item and the database-item is the same. If yes, only the mtime or ctime has changed, so only update the database. If no, the path is really new, so just insert it into the database if (($parts_fs[0] eq $parts_db[0]) && ($parts_fs[1] eq $parts_db[1])) { # Path and inode is the same, only mtime or ctime has changed $query = "UPDATE `ofrs`.`dir` SET `mtime` = '$parts_fs[2]' , `ctime` = '$parts_fs[3]' WHERE `repdir_id` = '$repdir_id' AND `path` = '$parts_fs[0]' AND `ino` = '$parts_fs[1]'; "; last; } else { # Path and/or inode is not the same, so this is a complete new entry $query = "INSERT INTO `ofrs`.`dir` (`id` , `repdir_id` , `path` , `ino` , `mtime` , `ctime`) VALUES (NULL , '$repdir_id' , '$parts_fs[0]' , '$parts_fs[1]' , '$parts_fs[2]' , '$parts_fs[3]'); "; } } } else { $query = "INSERT INTO `ofrs`.`dir` (`id` , `repdir_id` , `path` , `ino` , `mtime` , `ctime`) VALUES (NULL , '$repdir_id' , '$parts_fs[0]' , '$parts_fs[1]' , '$parts_fs[2]' , '$parts_fs[3]'); "; } # Run query $sth = $dbh->prepare($query); $sth->execute(); } while (@only_db) { # Split up the parts of the line from the database into path, ino, mtime, ctime my @parts_db = split(/;/, shift(@only_db)); # If there are any items in @only_fs, check if the database-item was just updated (in the previous foreach-section), or if the database-item must be completly removed if (@only_fs > 0) { foreach my $item_fs (@only_fs) { my @parts_fs = split(/;/, $item_fs); if (($parts_db[0] eq $parts_fs[0]) && ($parts_db[1] eq $parts_db[1])) { # Do nothing, already updated in the previous foreach-section $query = ""; last; } else { # Path does not exist anymore on the filesystem, so delete it from the database $query = "DELETE FROM `ofrs`.`dir` WHERE `path` = '$parts_db[0]' AND `ino` = '$parts_db[1]';"; } } } else { # Path does not exist anymore on the filesystem, so delete it $query = "DELETE FROM `ofrs`.`dir` WHERE `path` = '$parts_db[0]' AND `ino` = $parts_db[1]';"; } if ($query ne "") { # Run query $sth = $dbh->prepare($query); $sth->execute(); } } $etime_tmp = time; $endtime_tmp = ($etime_tmp - $stime_tmp); print scalar(@only_fs)." paths inserted! Duration: $endtime_tmp sec\n"; # Empty arrays undef @fs; undef @db; undef @only_fs; undef @only_db; }