Thread Hilfe für Regex (21 answers)
Opened by lousek at 2011-08-06 17:24

lousek
 2011-08-09 23:36
#151425 #151425
User since
2011-01-19
28 Artikel
BenutzerIn

user image
Guten Abend

So, nun nachfolgend mein Script.
Wie auch als Kommentar beschrieben, ist besonders der Teil mit Einfügen / Aktualisieren / Löschen in der Datenbank noch recht hässlich programmiert. Ich nehme das zumindest an, wenn ich sehe, wie effizient man in Perl programmieren kann (oder könnte ...)

Code: (dl )
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
#!/usr/bin/perl

# Define options
use strict;
use warnings;

# Define used modules
use Text::Diff;
use DBI;
use File::Find;
use Set::Scalar;


# Define variables
my $db;
my $user;
my $pass;
my $host;
my $query;
my $sth;
my $dbh;
my $stime;
my $etime;
my $endtime;
my $stime_tmp;
my $etime_tmp;
my $endtime_tmp;

my @db;
my @fs;
my @only_db;
my @only_fs;

# DB data
$db = "ofrs";
$user = "ofrs";
$pass = "ofrs";
$host = "localhost";

# Connect to DB
$dbh = DBI->connect("DBI:mysql:$db:$host", $user, $pass);

# Set start time
$stime = time;

# Set query
$query = "SELECT id,path FROM repdir;";

# Run query
my $sth1 = $dbh->prepare($query);
$sth1->execute();

# Read query results into array @db
while (my @result = $sth1->fetchrow_array) {
checkRepDir($result[0], $result[1]);
}


# Set end time
$etime = time;

# Calculate used time
$endtime = ($etime - $stime);

# Print needed time
print "Total duration: $endtime sec\n";





########## Functions ##########

# compares a file tree from the file system with the path entries from the database. Adds paths, which are only on the filesystem to the database, updates changed paths in the database, remove missing paths from the database.
sub checkRepDir {

# Define variables
my @insert_db;
my @update_db;
my @delete_db;

# Set variables
my $repdir_id = $_[0];
my $repdir_path = $_[1];

print "Running check for replication directory $repdir_path with ID $repdir_id ...\n";

### Getting database paths ###

# Set query
$query = "SELECT path,ino,mtime,ctime FROM dir WHERE repdir_id = $repdir_id;";

print "Running DB query ... ";

$stime_tmp = time;

# Run query
$sth = $dbh->prepare($query);
$sth->execute();

# Read query results into array @db
while (my @result = $sth->fetchrow_array) {
push(@db,$result[0].";".$result[1].";".$result[2].";".$result[3]."\n");
}

$etime_tmp = time;
$endtime_tmp = ($etime_tmp - $stime_tmp);

print scalar(@db)." paths found! Duration: $endtime_tmp sec\n";

### Getting filesystem paths ###

print "Searching filesystem ... ";

$stime_tmp = time;

# Search file system
File::Find::find({wanted => sub{wanted();}, "no_chdir" => 0}, $repdir_path);

# read paths from filesystem into array @fs
sub wanted{
if (-d $File::Find::name) {
my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks) = stat($File::Find::name);
push(@fs,$File::Find::name.";".$ino.";".$mtime.";".$ctime."\n");
}
}

$etime_tmp = time;
$endtime_tmp = ($etime_tmp - $stime_tmp);

print scalar(@fs)." paths found! Duration: $endtime_tmp sec\n";

### Find the added, updated and deleted paths ###

print "Comparing results ... ";

$stime_tmp = time;

my $temp1 = Set::Scalar->new(@db);
my $temp2 = Set::Scalar->new(@fs);

@only_db = $temp1->difference($temp2)->members;
@only_fs = $temp2->difference($temp1)->members;

$etime_tmp = time;
$endtime_tmp = ($etime_tmp - $stime_tmp);

print scalar(@only_fs)." paths only on FS! ".scalar(@only_db)." paths only in DB! Duration: $endtime_tmp sec\n";

### Updating the database with the newest information ### !!!A big mess at the moment, I will try to clean that up in the next step ...!!!

print "Updating database ... \n";

$stime_tmp = time;

$query = "";
foreach my $item_fs (@only_fs) {
# Split up the parts of the line from the filesystem into path, ino, mtime, ctime
my @parts_fs = split(/;/, $item_fs);
# If there are any items in @only_db, check if some lines have just a changed mtime or ctime, but not a new path. If @only_db contains no elements, then just add the path to the database.
if (@only_db > 0) {
foreach my $item_db (@only_db) {
# Split up the parts of the line from the database into path, ino, mtime, ctime
my @parts_db = split(/;/, $item_db);
# Check if the path and the inode from the filesystem-item and the database-item is the same. If yes, only the mtime or ctime has changed, so only update the database. If no, the path is really new, so just insert it into the database
if (($parts_fs[0] eq $parts_db[0]) && ($parts_fs[1] eq $parts_db[1])) {
# Path and inode is the same, only mtime or ctime has changed
$query = "UPDATE `ofrs`.`dir` SET `mtime` = '$parts_fs[2]' , `ctime` = '$parts_fs[3]' WHERE `repdir_id` = '$repdir_id' AND `path` = '$parts_fs[0]' AND `ino` = '$parts_fs[1]'; ";
last;
} else {
# Path and/or inode is not the same, so this is a complete new entry
$query = "INSERT INTO `ofrs`.`dir` (`id` , `repdir_id` , `path` , `ino` , `mtime` , `ctime`) VALUES (NULL , '$repdir_id' , '$parts_fs[0]' , '$parts_fs[1]' , '$parts_fs[2]' , '$parts_fs[3]'); ";
}
}
} else {
$query = "INSERT INTO `ofrs`.`dir` (`id` , `repdir_id` , `path` , `ino` , `mtime` , `ctime`) VALUES (NULL , '$repdir_id' , '$parts_fs[0]' , '$parts_fs[1]' , '$parts_fs[2]' , '$parts_fs[3]'); ";
}
# Run query
$sth = $dbh->prepare($query);
$sth->execute();
}

while (@only_db) {
# Split up the parts of the line from the database into path, ino, mtime, ctime
my @parts_db = split(/;/, shift(@only_db));
# If there are any items in @only_fs, check if the database-item was just updated (in the previous foreach-section), or if the database-item must be completly removed
if (@only_fs > 0) {
foreach my $item_fs (@only_fs) {
my @parts_fs = split(/;/, $item_fs);
if (($parts_db[0] eq $parts_fs[0]) && ($parts_db[1] eq $parts_db[1])) {
# Do nothing, already updated in the previous foreach-section
$query = "";
last;
} else {
# Path does not exist anymore on the filesystem, so delete it from the database
$query = "DELETE FROM `ofrs`.`dir` WHERE `path` = '$parts_db[0]' AND `ino` = '$parts_db[1]';";
}
}
} else {
# Path does not exist anymore on the filesystem, so delete it
$query = "DELETE FROM `ofrs`.`dir` WHERE `path` = '$parts_db[0]' AND `ino` = $parts_db[1]';";
}
if ($query ne "") {
# Run query
$sth = $dbh->prepare($query);
$sth->execute();
}
}

$etime_tmp = time;
$endtime_tmp = ($etime_tmp - $stime_tmp);

print scalar(@only_fs)." paths inserted! Duration: $endtime_tmp sec\n";

# Empty arrays
undef @fs;
undef @db;
undef @only_fs;
undef @only_db;

}


Übrigens, zwei Perl-Bücher sind auf dem Weg zu mir, dann wird mein Stil mit der Zeit hoffentlich etwas besser :)

LG
lousek

View full thread Hilfe für Regex