pastebin - collaborative debugging tool
kpaste.net RSS


md5db md5sum, FIFO version
Posted by Anonymous on Sun 30th Sep 2012 10:03
raw | new post

  1. #!/usr/bin/perl
  2.  
  3. use 5.12.3;
  4. use strict;
  5. use warnings;
  6. use Cwd qw(abs_path cwd);
  7. use IO::Handle qw(autoflush);
  8. use File::Basename qw(basename dirname);
  9. use File::Path qw(make_path remove_tree);
  10. #use File::Slurp qw(read_file);
  11. use diagnostics;
  12.  
  13. use threads qw(yield);
  14. use threads::shared;
  15. use Thread::Queue;
  16. use Thread::Semaphore;
  17. #use Fcntl qw(:flock);
  18. use POSIX qw(SIGINT ceil mkfifo);
  19. use POSIX ":sys_wait_h";
  20.  
  21. chomp(my $cores = `grep -c ^processor /proc/cpuinfo`);
  22.  
  23. my (@lib, $mode);
  24.  
  25. # Path to and name of log file to be used for logging.
  26. my $logf = "$ENV{HOME}/md5db.log";
  27.  
  28. # Delimiter used for database
  29. my $delim = "\t\*\t";
  30.  
  31. # Array for storing the actual arguments used by the script internally.
  32. # Might be useful for debugging.
  33. my @cmd = (basename($0));
  34.  
  35. # Name of database file.
  36. my $db = 'md5.db';
  37.  
  38. # Clear screen command.
  39. my $clear = `clear && echo`;
  40.  
  41. # Creating a hash that will store the names of files that are
  42. # too big to fit into RAM. We'll process them last.
  43. my %large :shared;
  44.  
  45. # Creating a few shared variables.
  46. # %err will be used for errors
  47. # $n will be used to count the number of files processed
  48. # %md5h is the database hash
  49. my %err :shared;
  50. my $n :shared = 0;
  51. my %md5h :shared;
  52. my %file_contents :shared;
  53. my $stopping :shared = 0;
  54. my $file_stack :shared = 0;
  55. my $busy :shared = 0;
  56.  
  57. my $tempfs = '/dev/shm/md5sum' . '.' . int(rand(10000));
  58. my $disk_size = 1000000000;
  59.  
  60. # This will be used to control access to the logger subroutine.
  61. my $semaphore = Thread::Semaphore->new();
  62.  
  63. POSIX::sigaction(SIGINT, POSIX::SigAction->new(\&handler))
  64. || die "Error setting SIGINT handler: $!\n";
  65.  
  66. # Creating a custom POSIX signal handler.
  67. # First we create a shared variable that will work as a SIGINT switch.
  68. # Then we define the handler subroutine.
  69. # All the handler does is set the variable $saw_sigint.
  70. my $saw_sigint :shared = 0;
  71. sub handler {
  72.         $saw_sigint = 1;
  73. }
  74.  
  75. # Open file handle for the log file
  76. open(my $LOG, '>>', $logf) or die "Can't open '$logf': $!";
  77.  
  78. # Make the $LOG file handle unbuffered for instant logging.
  79. $LOG->autoflush(1);
  80.  
  81. # Duplicate STDERR as a regular file handle
  82. open(my $SE, ">&STDERR") or die "Can't duplicate STDERR: $!";
  83.  
  84. ### Subroutine for printing usage instructions
  85. sub usage {
  86.  
  87.         my $s = basename($0);
  88.  
  89.         say <<"HELP"
  90. Usage: $s [options] [directory 1] .. [directory N]
  91.  
  92.         -help Print this help message.
  93.  
  94.         -double Check database for files that have identical
  95.         hashes.
  96.  
  97.         -import Import MD5 sums to the database from already existing
  98.         \*.MD5 files in each directory.
  99.  
  100.         -index Index new files in each directory.
  101.  
  102.         -test Test the MD5 sums of the files in the database to see if
  103.         they've changed.
  104.  
  105. HELP
  106. }
  107.  
  108. our %exit_status :shared; # store each exit status
  109. our $zombies :shared = 0; # count zombied child processes
  110. my %pid :shared;
  111.  
  112. # set SIGCHLD handler to increment $zombies.
  113. $SIG{CHLD} = sub { lock($zombies);
  114.         $zombies++ };
  115.  
  116. sub grim_reaper {
  117.         REAPER: if ($zombies) {
  118.                 my $zombie;
  119.                 lock($zombies);
  120.                 lock(%exit_status);
  121.                 $zombies = 0; # reset the zombie counter
  122.                 foreach my $pid (keys(%pid)) {
  123.                         while (($zombie = waitpid(-1, $pid)) != -1) {
  124.                                 $exit_status{$zombie} = $?;
  125.                                 say "reaped $zombie" . ($? ? " with exit $?" : "");
  126.                         }
  127.                         { lock(%pid);
  128.                         delete($pid{$pid}); }
  129.                 }
  130.         }
  131.         sleep 5;
  132.         goto(REAPER) if (!$stopping);
  133. }
  134.  
  135. # This loop goes through the argument list as passed to the script
  136. # by the user when ran.
  137. foreach my $arg (@ARGV) {
  138.  
  139.         # If argument starts with a dash '-', interprete it as an option
  140.         if ($arg =~ /^-/) {
  141.  
  142.                 given ($arg) {
  143.                         # When '-double', set script mode to 'double', and call
  144.                         # the md5double subroutine later.
  145.                         when (/^-double$/) {
  146.                                 if (!$mode) { push(@cmd, $arg); $mode = 'double'; }
  147.                         }
  148.  
  149.                         # When '-import', set script mode to 'import', and call
  150.                         # the md5import subroutine later.
  151.                         when (/^-import$/) {
  152.                                 if (!$mode) { push(@cmd, $arg); $mode = 'import'; }
  153.                         }
  154.  
  155.                         # When '-help', set script mode to 'help', and print
  156.                         # usage instructions later.
  157.                         when (/^-help$/) {
  158.                                 if (!$mode) { push(@cmd, $arg); $mode = 'help'; }
  159.                         }
  160.  
  161.                         # When '-index', set script mode to 'index', and call
  162.                         # the md5index subroutine later.
  163.                         when (/^-index$/) {
  164.                                 if (!$mode) { push(@cmd, $arg); $mode = 'index'; }
  165.                         }
  166.  
  167.                         # When '-test', set the script mode to 'test', and call
  168.                         # the md5test subroutine later.
  169.                         when (/^-test$/) {
  170.                                 if (!$mode) { push(@cmd, $arg); $mode = 'test'; }
  171.                         }
  172.                 }
  173.         # If argument is a directory, include it in the @lib array
  174.         } elsif (-d $arg) { push(@lib, $arg); push(@cmd, $arg); }
  175. }
  176.  
  177. # If no switches were used, print usage instructions
  178. if (!@lib || !$mode || $mode eq 'help')
  179.         { usage; exit; }
  180.  
  181. #say "@cmd\n";
  182.  
  183. # Subroutine for controlling the log file
  184. # Applying a semaphore so multiple threads won't try to
  185. # access it at once, just in case ;-)
  186. sub logger {
  187.  
  188.         $semaphore->down();
  189.  
  190.         my($arg, $sw, @fn, $n);
  191.  
  192.         # Creating a variable to hold the current time.
  193.         my $now = localtime(time);
  194.  
  195.         # Array of accepted switches to this subroutine
  196.         my @larg = qw{start int gone corr diff end};
  197.  
  198.         # Loop through all the arguments passed to this subroutine
  199.         # Perform checks that decide which variable the arguments are to
  200.         # be assigned to.
  201.         CHECK: while (@_) {
  202.                         $arg = shift(@_);
  203.  
  204.                         # If $arg is a switch, set the $sw variable and start
  205.                         # the next iteration of the CHECK loop.
  206.                         foreach (@larg) {
  207.                                 if ($_ eq $arg) { $sw = $arg; next CHECK; }
  208.                         }
  209.  
  210.                         # If $arg is a number assign it to $n, if it's a file
  211.                         # add it to @fn.
  212.                         if ($arg =~ /^[0-9]*$/) { $n = $arg; }
  213.                         else { push(@fn, $arg); }
  214.         }
  215.         given ($sw) {
  216.                 # Starts writing the log.
  217.                 when ('start') {
  218.                         say $LOG "\n**** Logging started on $now ****\n";
  219.                         say $LOG "Running script in '$mode' mode.\n";
  220.                 }
  221.                 # When the script is interrupted by user pressing ^C,
  222.                 # say so in STDOUT, close the log.
  223.                 when ('int') {
  224.                         say "\nInterrupted by user!\n";
  225.                         say $LOG $n . " file(s) were tested.";
  226.                         say $LOG "\n**** Logging ended on $now ****\n";
  227.                         close $LOG or die "Can't close '$LOG': $!";
  228.                 }
  229.                 # Called when file has been deleted or moved.
  230.                 when ('gone') {
  231.                         say $LOG $fn[0] . "\n\t" . "has been (re)moved.\n";
  232.                         $err{$fn[0]} = "has been (re)moved.\n";
  233.                 }
  234.                 # Called when file has been corrupted.
  235.                 when ('corr') {
  236.                         say $LOG $fn[0] . "\n\t" .
  237.                         "has been corrupted.\n";
  238.                         $err{$fn[0]} = "has been corrupted.\n";
  239.                 }
  240.                 when ('diff') {
  241.                         say $LOG $fn[0] . "\n\t" .
  242.                                 "doesn't match the hash in database.\n";
  243.                         $err{$fn[0]} = "doesn't match the hash in database.\n";
  244.                 }
  245.                 # Called when done, and to close the log.
  246.                 # If no errors occurred write "Everything is OK!" to the log.
  247.                 # If errors occurred print the %err hash.
  248.                 # Either way, print number of files processed.
  249.                 when ('end') {
  250.                         if (!%err) {
  251.                                 say $LOG "\nEverything is OK!\n";
  252.                         } else {
  253.                                 say "\n**** Errors Occurred ****\n";
  254.                                 foreach my $fn (sort keys %err) {
  255.                                         say $SE $fn . "\n\t" . $err{$fn};
  256.                                 }
  257.                         }
  258.  
  259.                         say $LOG $n . " file(s) were tested.\n" if ($n);
  260.                         say $LOG "\n**** Logging ended on $now ****\n";
  261.                         close $LOG or die "Can't close '$LOG': $!";
  262.                 }
  263.         }
  264.         $semaphore->up();
  265. }
  266.  
  267.  
  268. # Subroutine for reading a database file into the database hash.
  269. # This is the first subroutine that will be executed and all others
  270. # depend upon it, cause without it we don't have a
  271. # database hash to work with.
  272. sub file2hash {
  273.  
  274.         # The format string which is used for parsing the database file
  275.         my $format = qr/^.*\t\*\t[[:alnum:]]{32}$/;
  276.         my (@dbfile, @gone);
  277.  
  278.         # Open the database file and read it into the @dbfile variable
  279.         open(MD5DB, '<', $db) or die "Can't open '$db': $!";
  280.         chomp (@dbfile = (<MD5DB>));
  281.         close(MD5DB) or die "Can't close '$db': $!";
  282.  
  283.         # Loop through all the lines in the database file and split
  284.         # them before storing in the database hash.
  285.         # Also, print each line to STDOUT for debug purposes
  286.         foreach my $line (@dbfile) {
  287.  
  288.                 # If current line matches the proper database file format,
  289.                 # continue.
  290.                 if ($line =~ /$format/) {
  291.  
  292.                         my ($fn, $hash) = (split(/\Q$delim/, $line));
  293.  
  294.                         # If $fn is a real file and not already in the hash,
  295.                         # continue.
  296.                         if (-f $fn && ! $md5h{$fn}) {
  297.                                         $md5h{$fn} = $hash;
  298.                                         say "$fn". "$delim" . "$hash";
  299.  
  300.                         # Saves the names of deleted or moved files in '@gone'
  301.                         # for printing at the end of this subroutine.
  302.                         } else { push(@gone, $fn); }
  303.                 }
  304.         }
  305.  
  306.         # Clears the screen, thereby scrolling past the database file print
  307.         print $clear;
  308.  
  309.         # Loops through the @gone array and logs every file name
  310.         # that's been deleted or moved.
  311.         foreach my $fn (@gone) { logger('gone', $fn); }
  312. }
  313.  
  314. # Subroutine for printing the database hash to the database file
  315. sub hash2file {
  316.  
  317.         open(MD5DB, '>', $db) or die "Can't open '$db': $!";
  318.         # Loops through all the keys in the database hash and prints
  319.         # the entries (divided by the $delim variable) to the database file.
  320.         foreach my $k (sort keys %md5h) {
  321.                 say MD5DB $k . $delim . $md5h{$k} if ($md5h{$k});
  322.         }
  323.         close(MD5DB) or die "Can't close '$db': $!";
  324. }
  325.  
  326. # Subroutine for finding files
  327. # Finds all the files inside the directory name passed to it,
  328. # and sorts the output before storing it in the @files array.
  329. sub getfiles {
  330.  
  331.         my $dn = shift;
  332.         my @files;
  333.  
  334.         open(FIND, '-|', 'find', $dn, '-type', 'f', '-name', '*', '-nowarn')
  335.         or die "Can\'t run 'find': $!";
  336.         while (my $fn = (<FIND>)) {
  337.                         chomp($fn);
  338.                         $fn =~ s($dn/)();
  339.                         push(@files, $fn) if (-f $fn && $fn ne basename($db));
  340.         }
  341.         close(FIND) or die "Can't close 'find': $!";
  342.         return @files;
  343. }
  344.  
  345. sub md5double {
  346.  
  347.         if (!keys(%md5h)) {
  348.                 say "No database file. Run the script in 'index' mode first\n" .
  349.                 "to index the files.";
  350.                 exit;
  351.         }
  352.  
  353.         # Loop through the %md5h hash and save the checksums as keys in a
  354.         # new hash called %exists. Each of those keys will hold an
  355.         # anonymous array with the matching file names.
  356.         my %exists;
  357.         foreach my $fn (keys(%md5h)) {
  358.                 my $hash = $md5h{$fn};
  359.                 if (!$exists{${hash}}) {
  360.                         $exists{${hash}}->[0] = $fn;
  361.                 } else {
  362.                         push(@{$exists{${hash}}}, $fn);
  363.                 }
  364.         }
  365.  
  366.         # Loop through the %exists hash and print files that are identical,
  367.         # if any.
  368.         foreach my $hash (keys(%exists)) {
  369.                 if (scalar(@{$exists{${hash}}}) > 1) {
  370.                         say "These files have the same hash (${hash}):";
  371.                         foreach my $fn (@{$exists{${hash}}}) {
  372.                                 say $fn;
  373.                         }
  374.                         say "";
  375.                 }
  376.         }
  377. }
  378.  
  379. # Subroutine for finding and parsing *.MD5 files, adding the hashes
  380. # to the database hash and thereby also to the file.
  381. # It takes 1 argument:
  382. # (1) the thread queue
  383. sub md5import {
  384.  
  385.         my $md5fn = shift;
  386.  
  387.         my ($fn, $hash, @fields, @lines);
  388.  
  389.         # The format string which is used for parsing the *.MD5 files.
  390.         my $format = qr/^[[:alnum:]]{32}\s\*.*/;
  391.  
  392.         # If the file extension is *.MD5 in either upper- or
  393.         # lowercase, continue.
  394.         if ($md5fn =~ /.md5$/i) {
  395.  
  396.                 # Open the *.MD5 file and read its contents to the
  397.                 # @lines array.
  398.                 open(MD5, '<', $md5fn) or die "Can't open '$md5fn': $!";
  399.                 chomp(@lines = (<MD5>));
  400.                 close(MD5) or die "Can't close '$md5fn': $!";
  401.  
  402.                 # Loop to check that the format of the *.MD5 file really
  403.                 # is correct before proceeding.
  404.                 foreach my $line (@lines) {
  405.  
  406.                         # If format string matches the line(s) in the *.MD5
  407.                         # file, continue.
  408.                         if ($line =~ /$format/) {
  409.  
  410.                                 # Split the line so that the hash and file name go
  411.                                 # into @fields array.
  412.                                 # After that strip the path (if any) of the file
  413.                                 # name, and prepend the path of the *.MD5 file to
  414.                                 # it instead.
  415.                                 # Store hash and file name in the $hash and $fn
  416.                                 # variables for readability.
  417.                                 @fields = split(/\s\Q*/, $line, 2);
  418.                                 my $path = dirname($md5fn);
  419.                                 $hash = $fields[0];
  420.  
  421.                                 if ($path eq '.') { $fn = basename($fields[1]); }
  422.                                 else { $fn = dirname($md5fn)
  423.                                 . '/' . basename($fields[1]); }
  424.  
  425.                                 # Convert CR+LF newlines to proper LF to avoid
  426.                                 # identical file names from being interpreted as
  427.                                 # different.
  428.                                 $fn =~ s/\r//;
  429.  
  430.                                 # Unless file name already is in the database hash,
  431.                                 # print a message, add it to the hash.
  432.                                 if (! $md5h{$fn} && -f $fn) {
  433.  
  434.                                         say "$fn" . "\n\t" .
  435.                                         "Imported MD5 sum from '" .
  436.                                         basename($md5fn) .
  437.                                         "'.\n";
  438.  
  439.                                         $md5h{$fn} = $hash;
  440.  
  441.                                         # If file name is not a real file, write to
  442.                                         # the log.
  443.                                         # If file name is in database hash but the
  444.                                         # MD5 sum from the MD5 file doesn't match,
  445.                                         # print to the log.
  446.                                 } elsif (! -f $fn) { logger('gone', $fn); }
  447.                                 elsif ($md5h{$fn} ne $hash)
  448.                                 { logger('diff', $md5fn); }
  449.                         }
  450.                 }
  451.         }
  452. }
  453.  
  454. # This routine is called if something goes wrong and the script needs
  455. # to quit prematurely.
  456. sub iquit {
  457.         hash2file();
  458.         if (-d $tempfs) {
  459.                 remove_tree($tempfs) or die "Cant remove_tree $tempfs: $!";
  460.         }
  461.  
  462.         exit;
  463. }
  464.  
  465. # This routine is called to get the MD5 sum of a file.
  466. # It has two different modes depending on if the file is larger than
  467. # $disk_size or not.
  468. sub md5sum {
  469.         my $fn = shift;
  470.         my $tid = threads->tid();
  471.         my $FIFO = $tempfs . '/' . 'md5sum' . $tid;
  472.         my $hash;
  473.  
  474.         #say $FIFO;
  475.         #say $tempfs;
  476.  
  477.         # Yield while a large file is being processed by another thread.
  478.         while ($busy) { yield(); }
  479.  
  480.         if ($large{$fn}) {
  481.                 lock($busy);
  482.                 $busy = 1;
  483.                 chomp($hash = (split(/\Q */, qx/md5sum -b "$fn"/, 2))[0]);
  484.                 $busy = 0;
  485.         } else {
  486.  
  487.                 my ($md5_read, $md5_write);
  488.  
  489.                 # Open a binary (:raw) pipe to 'md5sum -b' and redirect
  490.                 # the output from the process to the FIFO for this thread.
  491.                 my $pid = open($md5_write, '|-:raw', qq{md5sum -b > "$FIFO"})
  492.                 or die "Cant open pipe to md5sum: $!";
  493.  
  494.                 { lock(%pid);
  495.                 $pid{$pid} = 1; }
  496.  
  497.                 open ($md5_read, '<', $FIFO) || die "can't open $FIFO: $!";
  498.  
  499.                 print $md5_write $file_contents{$fn};
  500.  
  501.                 # Close the 'md5sum' process so we can get the output from it
  502.                 # below.
  503.                 close($md5_write);
  504.  
  505.                 # Wait for the 'md5sum' process to finish.
  506.                 waitpid($pid, 0);
  507.  
  508.                 { lock(%pid);
  509.                 delete($pid{$pid}); }
  510.  
  511.                 # If ^C was pressed during the time 'md5sum' was running, quit.
  512.                 if ($? == 2) { iquit(); }
  513.  
  514.                 # Read from the FIFO, split the output and close the FIFO.
  515.                 chomp($hash = (split(/\Q */, (<$md5_read>), 2))[0]);
  516.                 close($md5_read) || die "can't close $FIFO: $!";
  517.  
  518.                 # Lock the $file_stack variable and decrement it by the length()
  519.                 # of $file_contents{$fn}
  520.                 { lock($file_stack);
  521.                 $file_stack -= length($file_contents{$fn}); }
  522.  
  523.                 # Lock %file_contents and remove the file data saved in
  524.                 # $file_contents{$fn}
  525.                 { lock(%file_contents);
  526.                 delete($file_contents{$fn}); }
  527.  
  528.                 # If ^C has been pressed, quit.
  529.                 if ($saw_sigint) { iquit(); }
  530.         }
  531.         # Return the hash that's been calculated by 'md5sum'.
  532.         return $hash;
  533. }
  534.  
  535. # Subroutine to index the files
  536. # i.e calculate and store the MD5 sums in the database hash/file.
  537. # It takes 1 argument:
  538. # (1) the thread queue
  539. sub md5index {
  540.         my $q = shift;
  541.         my $tid = threads->tid();
  542.  
  543.         # Loop through the thread que.
  544.         LOOP2: while ((my $fn = $q->dequeue_nb()) || !$stopping) {
  545.                         if ($fn) {
  546.                                 $md5h{$fn} = md5sum($fn);
  547.                                 my $active = threads->running();
  548.                                 say "$active $fn: done indexing (${file_stack})";
  549.  
  550.                                 { lock($n);
  551.                                 $n++; }
  552.  
  553.                         } else { yield(); }
  554.  
  555.                         # If the $saw_sigint variable has been tripped.
  556.                         # Quit this 'while' loop, thereby closing the thread.
  557.                         if ($saw_sigint) { iquit(); }
  558.         }
  559.  
  560.         while (!$q->pending() && !$stopping) {
  561.                 yield();
  562.                 goto(LOOP2);
  563.         }
  564. }
  565.  
  566. # Subroutine for testing to see if the MD5 sums
  567. # in the database file are correct (i.e. have changed or not).
  568. # It takes 1 argument:
  569. # (1) the thread queue
  570. sub md5test {
  571.         my $q = shift;
  572.         my $tid = threads->tid();
  573.         my ($oldmd5, $newmd5);
  574.  
  575.         # Loop through the thread queue.
  576.         LOOP: while ((my $fn = $q->dequeue_nb()) || !$stopping) {
  577.                                 if ($fn) {
  578.                                         $newmd5 = md5sum($fn);
  579.                                         #say "$tid $fn: done testing (${file_stack})";
  580.                                         $oldmd5 = $md5h{$fn};
  581.  
  582.                                         # If the new MD5 sum doesn't match the one in the hash,
  583.                                         # and file doesn't already exist in the %err hash,
  584.                                         # log it and replace the old MD5 sum in the hash with
  585.                                         # the new one.
  586.                                         if ($newmd5 ne $oldmd5 && ! $err{$fn}) {
  587.                                                 logger('diff', $fn);
  588.                                                 $md5h{$fn} = $newmd5;
  589.                                         }
  590.  
  591.                                         { lock($n);
  592.                                         $n++; }
  593.  
  594.                                         } else { yield(); }
  595.                                 # If the $saw_sigint variable has been tripped.
  596.                                 # Quit this 'while' loop, thereby closing the thread.
  597.                                 if ($saw_sigint) { iquit(); }
  598.         }
  599.  
  600.         while (!$q->pending() && !$stopping) {
  601.                 yield();
  602.                 goto(LOOP);
  603.         }
  604. }
  605.  
  606. # Create the thread queue.
  607. my $q = Thread::Queue->new();
  608.  
  609. # Depending on which script mode is active,
  610. # set the @run array to the correct arguments.
  611. # This will be used to start the threads later.
  612. my @run;
  613. given ($mode) {
  614.         when ('index') {
  615.                 @run = (\&md5index, $q);
  616.         }
  617.         when ('test') {
  618.                 @run = (\&md5test, $q);
  619.         }
  620. }
  621.  
  622. MAKE_FIFOS: if (! -d ${tempfs}) {
  623.         make_path(${tempfs});
  624.         if (! -d $tempfs) {
  625.                 die "Couldnt make_path $tempfs: $!"; }
  626. }
  627.  
  628. foreach my $core (1 .. $cores) {
  629.         my $FIFO = $tempfs . '/' . 'md5sum' . $core;
  630.  
  631.         unless (-p $FIFO) {
  632.                 unlink($FIFO); # discard any failure, will catch later
  633.                 mkfifo($FIFO, 0777)
  634.                 || die "can't mkfifo $FIFO: $!";
  635.         }
  636. }
  637.  
  638. # If script mode is either 'import' or 'double' we'll start only
  639. # one thread, else we'll start as many as the available number of CPUs.
  640. my @threads;
  641. START_THREADS: if ($mode ne 'import' && $mode ne 'double') {
  642.         foreach (1 .. $cores) {
  643.                 push(@threads, threads->create(@run));
  644.         }
  645.         push(@threads, threads->create(\&grim_reaper));
  646. }
  647.  
  648. # This loop is where the actual action takes place
  649. # (i.e. where all the subroutines get called from)
  650. foreach my $dn (@lib) {
  651.         if (-d $dn) {
  652.                 # Changing $dn to the absolute path.
  653.                 $dn = abs_path($dn);
  654.  
  655.                 # Adding the current PATH to the $db variable.
  656.                 $db = "$dn/$db";
  657.  
  658.                 # Change directory to $dn.
  659.                 chdir($dn)
  660.                         or die "Can't change directory to '$dn': $!";
  661.  
  662.                 # Start logging.
  663.                 logger('start');
  664.  
  665.                 # If the database file is a real file,
  666.                 # store it in the database hash.
  667.                 file2hash() if (-f $db);
  668.  
  669.                 given ($mode) {
  670.                         when ('double') {
  671.                                 # Find identical files in database.
  672.                                 md5double();
  673.                         }
  674.                         when ('import') {
  675.                                 # For all the files in $dn, run md5import.
  676.                                 foreach my $fn (getfiles($dn)) { md5import($fn); }
  677.                         }
  678.                         when ('index') {
  679.                                 #say("Enqueueing files");
  680.                                 # Get all the file names and put them in the queue.
  681.                                 foreach my $fn (getfiles($dn)) {
  682.                                         if ($saw_sigint) { iquit(); }
  683.                                         while ($file_stack >= $disk_size) {
  684.                                                 my $active = threads->running();
  685.                                                 #say("${active}: $file_stack > $disk_size");
  686.                                                 yield();
  687.                                         }
  688.                                         # Unless file name exists in the database hash,
  689.                                         # continue.
  690.                                         unless ($md5h{$fn}) {
  691.                                                 my $size = (stat($fn))[7];
  692.                                                 if (!$size) { next; }
  693.                                                 if ($size < $disk_size) {
  694.  
  695.                                                         #say("${fn}: reading");
  696.                                                         open(FILE, '<:raw', $fn) or die "Can't open '$fn': $!";
  697.                                                         sysread(FILE, $file_contents{$fn}, $size);
  698.                                                         close(FILE) or die "Can't close '$fn': $!";
  699.  
  700.                                                         #say("${fn}: queueing");
  701.                                                         { lock($file_stack);
  702.                                                         $file_stack += length($file_contents{$fn}); }
  703.  
  704.                                                         $q->enqueue($fn);
  705.                                                 } else {
  706.                                                         $large{$fn} = 1;
  707.                                                 }
  708.                                         }
  709.                                 }
  710.                         }
  711.                         when ('test') {
  712.                                 #say("Enqueueing files");
  713.                                 # Fetch all the keys for the database hash and put
  714.                                 # them in the queue.
  715.                                 foreach my $fn (sort(keys(%md5h))) {
  716.                                         if ($saw_sigint) { iquit(); }
  717.                                         while ($file_stack >= $disk_size) {
  718.                                                 #my $active = threads->running();
  719.                                                 #say("${active}: $file_stack > $disk_size");
  720.                                                 yield();
  721.                                         }
  722.  
  723.                                         my $size = (stat($fn))[7];
  724.                                         if ($size < $disk_size) {
  725.  
  726.                                                 #say("${fn}: reading");
  727.                                                 open(FILE, '<:raw', $fn) or die "Can't open '$fn': $!";
  728.                                                 sysread(FILE, $file_contents{$fn}, $size);
  729.                                                 close(FILE) or die "Can't close '$fn': $!";
  730.  
  731.                                                 #say("${fn}: queueing");
  732.  
  733.                                                 { lock($file_stack);
  734.                                                 $file_stack += length($file_contents{$fn}); }
  735.  
  736.                                                 $q->enqueue($fn);
  737.                                         } else {
  738.                                                 $large{$fn} = 1;
  739.                                         }
  740.                                 }
  741.                         }
  742.                 }
  743.  
  744.                 while ($file_stack > 0) {
  745.                         #say("$file_stack > 0");
  746.                         yield();
  747.                 }
  748.  
  749.                 if (%large) {
  750.  
  751.                         foreach my $fn (sort(keys(%large))) {
  752.                                 #say("${fn}: queueing");
  753.                                 $q->enqueue($fn);
  754.                         }
  755.                 }
  756.  
  757. #       use Digest::MD5;
  758. #       $md5 = Digest::MD5->new;
  759. #       $md5->add('foo', 'bar');
  760. #       $md5->add('baz');
  761. #       $digest = $md5->hexdigest;
  762. #       print "Digest is $digest\n";
  763.  
  764.         }
  765. }
  766.  
  767. $stopping = 1;
  768. foreach my $t (threads->list()) { $t->join(); }
  769. #say("All threads joined");
  770.  
  771. # Print the hash to the database file and close the log
  772. hash2file();
  773. logger('end', $n);

Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.

Syntax highlighting:

To highlight particular lines, prefix each line with {%HIGHLIGHT}




All content is user-submitted.
The administrators of this site (kpaste.net) are not responsible for their content.
Abuse reports should be emailed to us at