#!/usr/bin/perl # Last change: PFB 9 May 2001 2:21 am # srep.cgi ############################################################################ # POD Documentation =head1 PROGRAM NAME AND AUTHOR Search and Replace - Version 1.3 Build Date: May 9, 2001 srep.cgi by Peter F. Brown peterbrown@worldcommunity.com =head1 WHAT IT IS Search and Replace (ok, it's not an original name...) It's quite fast. It processed 13,402,165 replaces, (13.4 million) (a 67 meg text file with 163,441 lines) in 1 minute, 10 seconds on a Pentium 166 with 64 megs of RAM. (This was the test in v1.0) On one client's system it processed 13,029 files, with 7,487 replaces, in 11 seconds. As an alternate method, I recommend using VEDIT, the fastest huge file text editor in the world, at 'www.vedit.com'. HELP: type "sr -h for help" edit s/r values below if you're not using command line parameters =head1 COPYRIGHT Copyright 2001 Peter F. Brown SR complies with the GNU GENERAL PUBLIC LICENSE and is released as "Open Source Software". NO WARRANTY IS OFFERED FOR THE USE OF THIS SOFTWARE ! Just remember. Back up your file first! (when you mess with huge data sets, please do save yourself grief and backup the file up :-) =head1 BUG REPORTS AND SUPPORT Send bug reports to peterbrown@worldcommunity.com. Visit the author's web site at 'worldcommunity.com' to view information about support, customer quotes, a resume link, and fees for custom Perl/MySQL programming. =head1 OBTAINING THE LATEST VERSION ==> Get the most recent version of this program at: http://worldcommunity.com =head1 REQUIREMENTS Perl 5 =head1 CHANGELOG - v1.3 - May 9, 2001 . added output of actual replaces to 'sr.replaces.log' . changed the formal name to 'Search and Replace' . changed the file name to 'srep.cgi' (for stability) - v1.2 - May 6, 2001 . Initial Public Release. . Changed to line method of parsing. . Added prompts, recursive directories, logs, essentially a complete rewrite. - v1.0 - May 20, 1998 . Initial release. Used 'chunk' method of parsing text, instead of lines . Only operated on one file at a time =cut ############################################################################## # setup area # these are the approved file extensions that the # program will look for. @file_extensions = qw[htm html shtml txt cgi pl js]; # I recommend using an extension for the log file below that # is NOT included in the array above. $log_file = './sr.log'; $replaces_log = './sr.replaces.log'; # end of setup area ############################################################################## use File::Find; $clear = `clear`; print $clear; $version = 'v1.3'; $sr_header = qq~Search and Replace $version - replaces characters in a text file. by Peter F. Brown; peterbrown\@worldcommunity.com Copyright 2001 Peter F. Brown. All Rights Reserved Worldwide. Open Source Software. [http://worldcommunity.com]~; if ($#ARGV == 0 and $ARGV[0] eq "-u") { print "\nUsing values in text file.\n"; # EDIT VALUES HERE IF YOU'RE NOT USING COMMAND LINE PARAMETERS ################################################################### # you can use regular expressions here, if you're brave. # NOTE: this version doesn't support $1 parenthesizing # (perhaps in the next version) # note for DOS users: be careful of the 8.3 and \ conventions # when you name your files. Otherwise, sr should work under DOS. # NOTE: Using single quotes may have a different effect. $input_file = ""; $search_string = ""; $replace_string = ""; $prompt_replace = "yes"; $save_backups = "yes"; $outfile = "outfile.sr"; $double_check = "no"; $case_sensitive = "no"; } ################ end of s/r editing ############################### elsif ($#ARGV == 0 and $ARGV[0] eq "-h") { &help_header; } elsif ($#ARGV == 2) { $input_file = $ARGV[0]; $search_string = $ARGV[1]; $replace_string = $ARGV[2]; $prompt_replace = 'yes'; $save_backups = 'yes'; $outfile = 'outfile.sr'; $double_check = 'no'; $case_sensitive = 'no'; } elsif ($#ARGV == 7) { $input_file = $ARGV[0]; $search_string = $ARGV[1]; $replace_string = $ARGV[2]; $prompt_replace = $ARGV[3]; $save_backups = $ARGV[4]; $outfile = $ARGV[5]; $double_check = $ARGV[6]; $case_sensitive = $ARGV[7]; } else { &help_header; } # convert input vars $prompt_replace = lc($prompt_replace); $save_backups = lc($save_backups); $double_check = lc($double_check); $case_sensitive = lc($case_sensitive); if ( $prompt_replace ne 'yes' and $prompt_replace ne 'no' ) { print "\nPrompt Replace must equal either 'yes' or 'no' or BLANK.\n"; print "If you leave it blank, it will default to 'YES'.\n"; print "Exiting ... \n\n"; exit; } if ( $save_backups ne 'yes' and $save_backups ne 'no' ) { print "\nSave Backups must equal either 'yes' or 'no' or BLANK.\n"; print "If you leave it blank, it will default to 'YES'.\n"; print "Exiting ... \n\n"; exit; } if ( $double_check ne 'yes' and $double_check ne 'no' ) { print "\nDouble Check must equal either 'yes' or 'no' or BLANK.\n"; print "If you leave it blank, it will default to 'NO'.\n"; print "Exiting ... \n\n"; exit; } if ( $case_sensitive ne 'yes' and $case_sensitive ne 'no' ) { print "\nCase Sensitive must equal either 'yes' or 'no' or BLANK.\n"; print "If you leave it blank, it will default to 'NO'.\n"; print "Exiting ... \n\n"; exit; } # check user input #............................. $| = 1; # check for Unix or DOS, for console input if (-e "/dev/tty") {$console = "/dev/tty";} else {$console = "con";} unless ( open(USER_PROMPT, "$console")) { print "Can't open console: $!\n"; exit; } #.............................. $process = "false"; while ($process eq "false") { print qq~ $sr_header You have specified the following: Input File: $input_file Search String: $search_string Replace String: $replace_string Prompt Replace: $prompt_replace (prompts at each replace) Save Backups: $save_backups Temp File: $outfile Double Check: $double_check (double checks each replace) Case Sensitive: $case_sensitive For a fast UNPROMPTED replace of a directory tree, type: "srep.cgi CURDIR 'SEARCHSTR' 'REPLACESTR' no no outfile.sr no no" NOTE: 'case_sensitive' only applies to searching. The replace value will use the case of the 'replace_string'. NOTE: If Input File equals 'CURDIR', then all the TEXT files in the current directory and all of its subdirectories will be processed. NOTE: If Save Backups is set to 'yes', then the input file will be copied to $input_file\.bak In either case, the input file ($input_file) will be overwritten with the temp file, for 'in place' editing. Do you wish to continue (enter only "y" or "n")? ~; $continue = ; chop $continue; $continue = lc($continue); if ($continue eq "y") { $process = "true"; } elsif ($continue eq "n") { $process = "false"; close(USER_PROMPT); print "\n"; exit; } else { $process = "false"; } } close(USER_PROMPT); #........................................................................ #........................................................................ #... process ... $files_processed = 0; $grand_total_replaces = 0; $start = time; # open log file; use append mode unless ( open (LOG, ">>$log_file")) { print "Error opening log file ($log_file\): $!\n"; exit; } # open replaces log file; use append mode unless ( open (REPLACES_LOG, ">>$replaces_log")) { print "Error opening replaces log file ($replaces_log\): $!\n"; exit; } print LOG "\nSearch and Replace LOG File:\n\n"; print REPLACES_LOG "\nSearch and Replace REPLACES LOG File:\n\n"; $log_header = qq~ SEARCHING USING THE FOLLOWING PARAMETERS: Input File: $input_file Search String: $search_string Replace String: $replace_string Prompt Replace: $prompt_replace (prompts at each replace) Save Backups: $save_backups Temp File: $outfile Double Check: $double_check (double checks each replace) Case Sensitive: $case_sensitive =========================================================================== ~; print LOG $log_header; print REPLACES_LOG $log_header; if ( $input_file eq 'CURDIR' ) { # we go into dir mode @DIRLIST = qw[.]; find(\&process_file, @DIRLIST); } else { &process_file('single_file'); } $end = time; $seconds = $end - $start; $minutes = $seconds / 60; print LOG "Processed $files_processed Files and $grand_total_replaces Replaces in $minutes minutes ($seconds seconds.)\n"; print REPLACES_LOG "Processed $files_processed Files and $grand_total_replaces Replaces in $minutes minutes ($seconds seconds.)\n"; print "Processed $files_processed Files and $grand_total_replaces Replaces in $minutes minutes ($seconds seconds.)\n"; close(LOG); close(REPLACES_LOG); exit; ########################################################################### # process_file sub process_file { my ($file_mode) = @_; if ( $file_mode ne 'single_file' ) { $input_file = $_; $input_file_long = $File::Find::name; } $files_processed++; $good_ext = 'no'; foreach $file_extension ( @file_extensions ) { if ( $input_file =~ /\.$file_extension$/ ) { $good_ext = 'yes'; } } unless ( $good_ext eq 'yes' ) { print LOG "File $input_file_long does not end in an approved extension. Skipping.\n"; print "File $input_file_long does not end in an approved extension. Skipping.\n"; return; } if ( -d $input_file ) { print LOG "Skipping $input_file_long (directory.)\n"; print "Skipping $input_file_long (directory.)\n"; return; } unless ( -T $input_file ) { print LOG "Skipping $input_file_long (NOT a TEXT File.)\n"; print "Skipping $input_file_long (NOT a TEXT File.)\n"; return; } $backup_file = $input_file . '.BAK'; # get file info @info = stat($input_file); $file_mode = $info[2]; $file_uid = $info[4]; $file_gid = $info[5]; $permissions = sprintf ("%04o", $file_mode & 07777); $permissions = oct($permissions); # open input file unless ( open(IN_FILE, "< $input_file" )) { print LOG "Error with input file ($input_file_long): $!\n"; print "Error with input file ($input_file_long): $!\n"; return; } # open temp output file unless ( open (OUT_FILE, ">$outfile")) { print LOG "Error with output file ($outfile_long): $!\n"; print "Error with output file ($outfile_long): $!\n"; return; } $lines = 0; $replaces = 0; print LOG "\nProcessing $input_file_long\n"; print "\nProcessing $input_file_long\n"; # file looping here #............................ while () { $line = $_; $found = 'no'; if ( $case_sensitive eq 'yes' ) { if ( $line =~ /$search_string/ ) { $found = 'yes'; } } else { if ( $line =~ /$search_string/i ) { $found = 'yes'; } } if ( $found eq 'yes' ) { $line_check = $line; if ( $prompt_replace eq 'yes' ) { print LOG "\nPRIOR TO REPLACE:\n... [$line_check\]\n\n"; print "\nPRIOR TO REPLACE:\n... [$line_check\]\n\n"; $response = 'n'; $response = &user_prompt('Do you wish to replace these occurrences?'); if ( $response eq 'y' ) { if ( $case_sensitive eq 'yes' ) {$line_check =~ s/$search_string/$replace_string/g;} else {$line_check =~ s/$search_string/$replace_string/gi;} print LOG "\nAFTER REPLACE:\n... [$line_check\]\n\n"; print "\nAFTER REPLACE:\n... [$line_check\]\n\n"; if ( $double_check eq 'yes' ) { $response = 'n'; $response = &user_prompt('Was the replace done correctly?'); if ( $response eq 'y' ) { $line = $line_check; $replaces++; } else { print LOG "\nError in Replace. Line Replace Not Saved. Skipping.\n"; print "\nError in Replace. Line Replace Not Saved. Skipping.\n"; } } else { $line = $line_check; $replaces++; } } } # prompt replace equals yes else { # prompt replace equals no, so just go ahead and replace it if ( $case_sensitive eq 'yes' ) {$line_check =~ s/$search_string/$replace_string/g;} else {$line_check =~ s/$search_string/$replace_string/gi;} $line = $line_check; $replaces++; } } # line contains search string print OUT_FILE ($line); $lines++; } $_ = $input_file; # end of file looping #............................ close(IN_FILE); close(OUT_FILE); # rename files # restore permissions and ownership if ( $save_backups eq 'yes' ) { rename($input_file, $backup_file) or print LOG "ALERT! can't rename $input_file_long to $backup_file: $!\n"; chown ($file_uid, $file_gid, $backup_file) == 1 or print LOG "ALERT! can't chown $backup_file: $!\n"; chmod ($permissions, $backup_file) == 1 or print LOG "ALERT! can't chmod $backup_file: $!\n"; print LOG "Wrote backup to: $backup_file.\n"; print "Wrote backup to: $backup_file.\n"; } rename($outfile, $input_file) or print LOG "ALERT! can't rename $outfile to $input_file_long: $!\n"; chown ($file_uid, $file_gid, $input_file) == 1 or print LOG "ALERT! can't chown $input_file_long: $!\n"; chmod ($permissions, $input_file) == 1 or print LOG "ALERT! can't chmod $input_file_long: $!\n"; # finish if ($replaces > 0) { print LOG "Processed $replaces replaces of $search_string with $replace_string in $input_file_long.\n"; print REPLACES_LOG "Processed $replaces replaces of $search_string with $replace_string in $input_file_long.\n"; print "Processed $replaces replaces of $search_string with $replace_string in $input_file_long.\n"; } else { print LOG "No replaces done.\n"; print "No replaces done.\n"; } $grand_total_replaces = $grand_total_replaces + $replaces; print LOG "Lines processed: $lines\n\n"; print "Lines processed: $lines\n\n"; } ########################################################################### # help_header sub help_header { print qq~ $sr_header Syntax: sr OPTIONAL: OPTIONAL: OPTIONAL: OPTIONAL: OPTIONAL: For a fast UNPROMPTED replace of a directory tree, type: "srep.cgi CURDIR 'SEARCHSTR' 'REPLACESTR' no no outfile.sr no no" NOTE: 'case_sensitive' only applies to searching. The replace value will use the case of the 'replace_string'. NOTE: If Input File equals 'CURDIR', then all the TEXT files in the current directory and all of its subdirectories will be processed. NOTE: If Save Backups is set to 'yes', then the input file will be copied to $input_file\.bak In either case, the input file will be overwritten with the temp file, for 'in place' editing. Note: You can use regular expressions in your s/r values. Using single or double quotes may change your s/r values. REMEMBER: BACK UP YOUR FILE FIRST!!! ~; exit; } ########################################################################### # user_prompt sub user_prompt { # syntax: $response = &user_prompt($message); my ($message) = @_; my $prompt = "false"; my $continue; $message = $message . '("(y) yes", "(n) no")?' . "\n"; unless ( open(USER_PROMPT, "$console")) { print LOG "Can't open console: $!\n"; print "Can't open console: $!\n"; exit; } #.............................. while ($prompt eq "false") { print $message; $continue = ; chop $continue; $continue = lc($continue); if ($continue eq "y") { $prompt = "true"; } elsif ($continue eq "n") { $prompt = "true"; } else { $process = "false"; } } close(USER_PROMPT); return($continue); } ###########################################################################