#!/usr/local/bin/perl # # Read in what we have to do # @olabel = ('Outside Server','Inside Server Selection Trees - Absolute','Inside Server Selection Trees - Relative','Outside Server Selection Trees - Absolute','Outside Server Selection Trees - Relative'); # %procoptvocab = ( "convertfile",0,"convertdir",0,"map",0,"skip",0,"logfile",0,"newbasedir",0,"oldbaseurlstem",0,"copyover",0, "newbaseurlstem",0,"doconvertfile",0,"doconvertdir",0,"selectionname",0,"selectiontext",0,"selectionfile",0,"summary",0,"userelative",0, "contd",0,"html",0,"endhtml",0); # $today= `date`; chomp($today); $today =~ s/\d\d:\d\d:\d\d ... //; # Today's Date # $nconvertfile=0; $ndoconvertfile=0; $nskip=0; $nmap=0; $nconvertdir=0; $ndoconvertdir=0; $nsummary=0; $optionvalues{"logfile"}= "mapping/logfile"; $optionvalues{"newbasedir"} = 'converted/'; $optionvalues{"oldbaseurlstem"} = 'http://www.npac.syr.edu/users/gcf'; $optionvalues{"copyover"} = 1; $optionvalues{"newbaseurlstem"} = 'http://boss.npac.syr.edu:8080/users/gcf'; $optionvalues{"selectionname"} = "Wisdom Selection"; $optionvalues{"selectiontext"} = ""; $optionvalues{"selectionfile"} = ""; $optionvalues{"userelative"} = 1; # &readopt($ARGV[0]); # $logfile =''; $logdir =''; if(defined($optionvalues{"logfile"}) && ($optionvalues{"logfile"} ne '') ) { $logfile= $optionvalues{"logfile"}; $logdir = $logfile; $logdir =~ s%/[^/]*$%/%; $logfile1 = $logfile . '1'; $logfile2 = $logfile . '2'; $logfile3 = $logfile . '3'; $logfile4 = $logfile . '4'; $logfile5 = $logfile . '5'; $logfile6 = $logfile . '6'; $logfile7 = $logfile . '7'; $logfile8 = $logfile . '8'; $logfile9 = $logfile . '9'; $logfile10 = $logfile . '10'; $logfilelabel = $logfile . 'label'; open(L,">".$logfile) || die "$0: Cannot log to $logfile\n"; open(L1,">".$logfile1) || die "$0: Cannot log to $logfile1\n"; open(L2,">".$logfile2) || die "$0: Cannot log to $logfile2\n"; open(L3,">".$logfile3) || die "$0: Cannot log to $logfile3\n"; open(L4,">".$logfile4) || die "$0: Cannot log to $logfile4\n"; open(L5,">".$logfile5) || die "$0: Cannot log to $logfile5\n"; open(L6,">".$logfile6) || die "$0: Cannot log to $logfile6\n"; open(L7,">".$logfile7) || die "$0: Cannot log to $logfile7\n"; open(L8,">".$logfile8) || die "$0: Cannot log to $logfile8\n"; open(L9,">".$logfile9) || die "$0: Cannot log to $logfile9\n"; open(L10,">".$logfile10) || die "$0: Cannot log to $logfile10\n"; open(Llabel,">".$logfilelabel) || die "$0: Cannot log to $logfilelabel\n"; for($i=0;$i<5;$i++) { print Llabel 'URL type ',($i+1),' ',$olabel[$i],"\n"; } print Llabel "\nContents of Different Log Files\n"; print Llabel "$logfile1: Type 1 URL's Outside Server\n"; print Llabel "$logfile2: Type 2 and 3 URL's Inside Server and Selection Tree\n"; print Llabel "$logfile3: Type 4 and 5 URL's Inside Server but outside Selection Tree\n"; print Llabel "$logfile4: File Conversion Summary (1 entry for each file and directory processed)\n"; print Llabel "$logfile5: Version of $logfile4 suitable to read back in (Internal Use Only)\n"; print Llabel "$logfile6: Version of $logfile1 and $logfile2 and $logfile3 suitable to read back in (Internal Use Only)\n"; print Llabel "$logfile7: Those Type 2 and 3 URL's Inside Server and Selection Tree which were NOT seen in file conversion -- this is a potential error\n"; print Llabel "$logfile8: Summary of Various warning Messages which are also potential errors\n"; print Llabel "$logfile8: Also has summary of extra substitutions found outside TAGs\n"; print Llabel "$logfile9: Version of $logfile8 (first warning summary) suitable to read back in (Internal Use Only)\n"; print Llabel "$logfile10: Version of $logfile8 (second substitution summary) suitable to read back in (Internal Use Only)\n"; close(Llabel); print "Log Files $logfile etc.\n"; open(ORG,'<'.$ARGV[0]) || die "$0: Cannot open $ARGV[0]\n"; $logout= $logfile. 'INPUT'; open(COPY,'>'.$logout) || die "$0: Cannot open $logout\n"; while($line=) { print COPY $line; } close(ORG); close(COPY); } $newbasedir = $optionvalues{"newbasedir"}; $newbasedir .= '/' unless($newbasedir =~ m%/$%); # $oldbaseurlstem = $optionvalues{"oldbaseurlstem"}; $oldbaseurlstem =~ s%/$%%; $beginning = $oldbaseurlstem; $beginning =~ s%^([^/]+)//([^/]+)(.*)$%$1//$2%; # $copyover = $optionvalues{"copyover"}; $userelative = $optionvalues{"userelative"}; # $newbaseurlstem = $optionvalues{"newbaseurlstem"}; $newbaseurlstem =~ s%/$%%; $newbeginning = $newbaseurlstem; $newbeginning =~ s%^([^/]+)//([^/]+)(.*)$%$1//$2%; if( $newbaseurlstem ne '' ) { for($i=0;$i < $nconvertdir; $i++) { $inputmap[$nmap] = $oldbaseurlstem . '/' . $fconvertdir[$i]; $outputmap[$nmap] = $newbaseurlstem . '/' . $fconvertdir[$i]; $typemap[$nmap] = 1; $nmap++; } for($i=0;$i < $nconvertfile; $i++) { $inputmap[$nmap] = $oldbaseurlstem . '/' . $convertfile[$i]; $outputmap[$nmap] = $newbaseurlstem . '/' . $convertfile[$i]; $typemap[$nmap] = 1; $nmap++; } } # # Make wisdom control file if($logdir && (defined($ARGV[1])) ) { open(MASTER,$ARGV[1]) || die("Unable to open $ARGV[1]"); $actual = $logdir . 'Listofchangedurls.html'; open(ACTUAL,'>' . $actual ) || die("Unable to write to $actual"); while( $line= ) { chomp($line); if( $line =~ m%\@OLDSTEM\@% ) { $line =~ s%\@OLDSTEM\@%$oldbaseurlstem%; } elsif( $line =~ m%\@NEWSTEM\@% ) { $line =~ s%\@NEWSTEM\@%$newbaseurlstem%; } elsif( $line =~ m%\@ADDIN\@% ) { for($i=0;$i<$nconvertdir;$i++) { my($file) = $newbaseurlstem .'/'. $fconvertdir[$i]; print ACTUAL "Test Against\n"; print ACTUAL ''."\n"; } for($i=0;$i<$nconvertfile;$i++) { my($file) = $newbaseurlstem .'/'. $convertfile[$i]; print ACTUAL "Test Against\n"; print ACTUAL ''."\n"; } next; } print ACTUAL $line,"\n"; } close(MASTER); close(ACTUAL); } # # This is version for setting Slave URL # if($logdir && (defined($ARGV[1])) ) { $newmaster = $ARGV[1].'1'; open(MASTER,$newmaster ) || die("Unable to open $newmaster"); $actual = $logdir . 'Listofchangedurls1.html'; open(ACTUAL,'>' . $actual ) || die("Unable to write to $actual"); while( $line= ) { chomp($line); if( $line =~ m%\@OLDSTEM\@% ) { $line =~ s%\@OLDSTEM\@%$oldbaseurlstem%; } elsif( $line =~ m%\@NEWSTEM\@% ) { $line =~ s%\@NEWSTEM\@%$newbaseurlstem%; } elsif( $line =~ m%\@ADDIN\@% ) { for($i=0;$i<$nconvertdir;$i++) { my($file) = $oldbaseurlstem .'/'. $fconvertdir[$i]; print ACTUAL "Match or Not to\n"; print ACTUAL ''."\n"; } for($i=0;$i<$nconvertfile;$i++) { my($file) = $newbaseurlstem .'/'. $convertfile[$i]; print ACTUAL "Test Against\n"; print ACTUAL ''."\n"; } next; } print ACTUAL $line,"\n"; } close(MASTER); close(ACTUAL); } # # README File for this selection if(defined($ARGV[2])) { my($readme) = $optionvalues{"selectionfile"}; if(defined($readme) && ($readme ne "") ) { $readme = $newbasedir . $readme; open(MASTER,$ARGV[2]) || die("Unable to open $ARGV[2]"); open(ACTUAL,'>' . $readme) || die("Unable to write to $readme"); while( $line= ) { chomp($line); if( $line =~ m%\@DATE\@% ) { $line =~ s%\@DATE\@%$today%; } if( $line =~ m%\@NAME\@% ) { $line =~ s%\@NAME\@%$optionvalues{"selectionname"}%; } if( $line =~ m%\@TEXT\@% ) { $line =~ s%\@TEXT\@%$optionvalues{"selectiontext"}%; } if( $line =~ m%^\@FILES\@$% ) { for($i=0;$i<$nconvertfile;$i++) { my($file) = $oldbaseurlstem . '/' . $convertfile[$i]; print ACTUAL "$file\n"; } next; } if( $line =~ m%^\@DIRECTORIES\@$% ) { for($i=0;$i<$nconvertdir;$i++) { my($file) = $oldbaseurlstem . '/' . $fconvertdir[$i]; print ACTUAL "$file\n"; } next; } if( $line =~ m%^\@MAPS\@$% ) { for($i=0;$i<$nmap;$i++) { my($type) = $typemap[$i]; next unless($type > 0); my($mess) = "In Tag"; $mess = "Always" if($type == 2); print ACTUAL "$inputmap[$i]"; print ACTUAL "$outputmap[$i]"; print ACTUAL "$mess\n"; } next; } print ACTUAL $line,"\n"; } close(MASTER); close(ACTUAL); if($logdir) { system("cp $readme ${logdir}$optionvalues{'selectionfile'}"); } } } # if($ndoconvertdir == 0 ) { $ndoconvertdir = $nconvertdir; for($i=0;$i<$nconvertdir;$i++) { $fdoconvertdir[$i]=$fconvertdir[$i]; } } # $ndoconvertdir = 0 if(($fdoconvertdir[0] eq 'no') || ($fdoconvertdir[0] eq 'none')); # if($ndoconvertfile == 0 ) { $ndoconvertfile = $nconvertfile; for($i=0;$i<$nconvertfile;$i++) { $fdoconvertfile[$i]=$convertfile[$i]; } } # $ndoconvertfile = 0 if(($fdoconvertfile[0] eq 'no') || ($fdoconvertfile[0] eq 'none')); # for($imap=0;$imap <$nmap; $imap++) { $mapused[$imap] =0; } # ######################################################## $count = 0; $replace_count = 0; $ignoredfiles =0; $copied =0; # if( $ndoconvertfile > 0 ) { foreach $file (@fdoconvertfile) { $depth=0; $initialfile = $file; $beginurl = "$oldbaseurlstem/$file"; my($createdir)= $file; my($makeit) = $newbasedir; while( $createdir =~ s%^([^/]+)/%% ) { $makeit .= $1; mkdir $makeit,0755 unless(-e $makeit); $makeit .= '/'; ++$depth; } &processfile($file,$beginurl); } } # if( $ndoconvertdir > 0 ) { $beginurl = "$oldbaseurlstem/"; foreach $dir (@fdoconvertdir) { unless( -d $dir ) { die( "Missing recursive file $dir\n"); next; } my($createdir)= $dir; $depth=1; my($makeit) = $newbasedir; while( $createdir =~ s%^([^/]+)/%% ) { $makeit .= $1; mkdir $makeit,0755 unless(-e $makeit); $makeit .= '/'; ++$depth; } $initialfile = $dir; $beginurl = "$oldbaseurlstem/$dir"; &recursivelyconvertdir($dir,$beginurl); } } # # Read Existing Summaries for($i=0; $i<$nsummary; $i++) { my($localfile)= $fsummary[$i]; my($oldlogfile5,$oldlogfile6,$key,$stat,$newfile,$title,$line,$seen,$type,$chgd,$first,$alias,$summary); $oldlogfile5 = $localfile . '5'; open(OLDLOG,'<' . $oldlogfile5) || die("Unable to read from $oldlogfile5"); while( $line = ) { chomp($line); ($key,$stat,$newfile,$title) = split(/ \@\@ /,$line); $filestatus{$key} = $stat; $filenew{$key} = $newfile; $filetitle{$key} = $filetitle; } close(OLDLOG); $oldlogfile6 = $localfile . '6'; open(OLDLOG,'<' . $oldlogfile6) || die("Unable to read from $oldlogfile6"); while( $line = ) { chomp($line); ($key,$seen,$first,$chgd,$type,$alias,$summary) = split(/ \@\@ /,$line); $urlseen{$key} = $seen; $urlfirstseen{$key} = $first; $urltype{$key} = $type; $urlchanged{$key} = $chgd; $urlalias{$key} = $alias; $urlsummary{$key} = $summary; } close(OLDLOG); my($oldlogfile9,$oldlogfile10,$extra,$warn); $oldlogfile9 = $localfile . '9'; open(OLDLOG,'<' . $oldlogfile9) || die("Unable to read from $oldlogfile9"); while( $line = ) { chomp($line); ($key,$warn) = split(/ \@\@ /,$line); $warning{$key} = $warn; } close(OLDLOG); my($oldlogfile10) = $localfile . '10'; open(OLDLOG,'<' . $oldlogfile10) || die("Unable to read from $oldlogfile10"); while( $line = ) { chomp($line); ($key,$extra) = split(/ \@\@ /,$line); $fileextra{$key} = $extra; } close(OLDLOG); } # print "***** File Summary *****\n"; print L4 "\n ***** File Conversion Summary *****\n" if($logfile); foreach $key (sort keys %filestatus) { print "File $key $filetitle{$key} $filestatus{$key} -- $filenew{$key}\n" unless($logfile); print L4 "File $key $filetitle{$key} $filestatus{$key} -- $filenew{$key}\n" if($logfile); print L5 "$key @@ $filestatus{$key} @@ $filenew{$key} @@ $filetitle{$key}\n" if($logfile); } # print "\nTotal URL's Found = $count \n"; print L "\nTotal URL's Found = $count \n" if($logfile); print "Total URL's replaced = $replace_count\n"; print L "Total URL's replaced = $replace_count\n" if($logfile); print "Total files copied $copied\n"; print L "Total files copied $copied\n" if($logfile); print "Total files ignored $ignoredfiles\n"; print L "Total files ignored $ignoredfiles\n" if($logfile); # for($imap;$imap <$nmap; $imap++) { print "Map $inputmap[$imap] sent to $outputmap[$imap] $mapused[$imap] times\n"; print L "Map $inputmap[$imap] sent to $outputmap[$imap] $mapused[$imap] times\n" if($logfile); } # print L8 "Warning AND extra substitution messages\n" if($logfile); foreach $key (keys %warning) { print "Warning on $key with target $warning{$key}\n"; print L8 "Warning on $key with target $warning{$key}\n" if($logfile); print L9 "$key @@ $warning{$key}\n"; } # foreach $key (keys %fileextra) { print "Extra Substitution on $key with target $fileextra{$key}\n"; print L8 "Extra Substitution on $key with target $fileextra{$key}\n" if($logfile); print L10 "$key @@ $fileextra{$key}\n"; } # print "\n ***** URL Outside selection and server Summary *****\n"; print L1 "\n ***** URL Outside selection and server Summary *****\n" if($logfile); foreach $key (sort keys %urlseen) { my($chgd) = $urlchanged{$key}; my($type) = $urltype{$key}; print L6 "$key @@ $urlseen{$key} @@ $urlfirstseen{$key} @@ $chgd @@ $type @@ $urlalias{$key} @@ $urlsummary{$key}\n" if($logfile); next unless($type ==1); print "$key ($chgd) seen $urlseen{$key} times first from $urlfirstseen{$key} $urlalias{$key} $urlsummary{$key}\n" unless($logfile); print L1 "$key ($chgd) seen $urlseen{$key} times first from $urlfirstseen{$key} $urlalias{$key} $urlsummary{$key}\n" if($logfile); } # print "\n ***** URL seen in Selection Summary *****\n"; print L2 "\n ***** URL seen in Selection Summary *****\n" if($logfile); print L7 "\n ***** URL seen in Selection Summary But File Missing *****\n" if($logfile); foreach $key (sort keys %urlseen) { my($chgd) = $urlchanged{$key}; my($type) = $urltype{$key}; my($mess) = $olabel[$type-1]; next unless(($type == 2)||($type == 3)); next if( $urlfirstseen{$key} =~ m%topicareas/Simple% ); my($exists) = ''; my($keyfile) = $chgd; $keyfile =~ s%^$newbaseurlstem/%%; $keyfile =~ s%/$%%; unless( exists($filestatus{$keyfile})) { $exists = '** MISSING **'; print L7 "$key ($chgd) seen $urlseen{$key} times first from $urlfirstseen{$key} $urlalias{$key} $urlsummary{$key}\n" if($logfile); } print "$key $exists ($chgd) seen $urlseen{$key} times first from $urlfirstseen{$key} $urlalias{$key} $urlsummary{$key}\n" unless($logfile); print L2 "$key $exists ($chgd) seen $urlseen{$key} times first from $urlfirstseen{$key} $urlalias{$key} $urlsummary{$key}\n" if($logfile); } # print "\n ***** URL seen on Server Outside Selection Summary *****\n"; print L3 "\n ***** URL seen on Server Outside Selection Summary *****\n" if($logfile); foreach $key (sort keys %urlseen) { my($chgd) = $urlchanged{$key}; my($type) = $urltype{$key}; next unless(($type == 4)||($type == 5)); print "$key ($chgd) seen $urlseen{$key} times first from $urlfirstseen{$key} $urlalias{$key} $urlsummary{$key}\n" unless($logfile); print L3 "$key ($chgd) seen $urlseen{$key} times first from $urlfirstseen{$key} $urlalias{$key} $urlsummary{$key}\n" if($logfile); } # close(L) if $logfile; close(L1) if $logfile; close(L2) if $logfile; close(L3) if $logfile; close(L4) if $logfile; close(L5) if $logfile; close(L6) if $logfile; close(L7) if $logfile; close(L8) if $logfile; close(L9) if $logfile; close(L10) if $logfile; exit; # sub recursivelyconvertdir { # my($dir,$oldbaseurlstem) =@_; my($fileurl1,$newdir); # unless(-e $dir) { ++$ignoredfiles; print L "Nonexistent input directory $dir\n" if($logfile); $warning{$dir} = 'Nonexistent directory'; return; } $newdir = $newbasedir . $dir; $newdir =~ s%\&%and%g; $newdir =~ s%\$%D%g; unless( opendir(DIR, $dir) ) { print ("Unable to open directory $dir\n"); print L ("Unable to open directory $dir\n") if($logfile); $warning{$dir} = 'CANNOT OPEN directory'; closedir(DIR); return; } unless( -e $newdir) { mkdir $newdir,0755; } $fileurl1 = $oldbaseurlstem; $fileurl1 .= '/' unless( $oldbaseurlstem =~ m%/$% ); $filestatus{$dir} = "Directory"; # Needed as a directory is a legal URL my(@filenames) = readdir(DIR); closedir(DIR); OUTERFILE: foreach $name (@filenames) { next if( $name =~ m%^\.% ); next if( $name =~ m%~$% ); foreach $skipfile (@fskip) { if( $skipfile =~ m%/% ) { my($testfile) = "$dir/$name"; if( $testfile =~ m%^$skipfile$% ) { next OUTERFILE; } } else { next OUTERFILE if( $name =~ m%^$skipfile$% ); } } # my($actualname) = "$dir/$name"; if( -d $actualname ) { ++$depth; &recursivelyconvertdir($actualname,$fileurl1.$name); --$depth; next; } &processfile("$dir/$name",$fileurl1.$name); } return; } # sub processfile { # my($file,$fileurl)=@_; my($line,$newfile,$localreplace); # $title = ''; $globalfile = $file; $try=0; while() { last if(-e $file); next if (++$try < 5); ++$ignoredfiles; print L "Nonexistent input file $file\n" if($logfile); $warning{$file} = 'Nonexistent input file'; return; } $newfile = $newbasedir . $file; $newfile =~ s%\&%and%g; $fileurl =~ s%\&%and%g; $newfile =~ s%\$%D%g; $fileurl =~ s%\$%D%g; $oldglobalurl = $fileurl; $mapped = -1; if($nmap > 0 ) { # map url's for($imap=0;$imap<$nmap;$imap++) { if( $oldglobalurl =~ m%$inputmap[$imap]% ) { # Takes last map found if($typemap[$imap] >=1 ) { $mapped = $imap; } } } } $newglobalurl = $oldglobalurl; if( $mapped > 0 ) { $newglobalurl =~ s%$inputmap[$mapped]%$outputmap[$mapped]%; } if( -e $newfile) { unless($copyover) { ++$ignoredfiles; print L "Copy already Exists so Ignore" if($logfile); $warning{$file} = 'Copied Already'; return; } } unless(($file =~ m%\.html%i) || ($file =~ m%\.htm%i) ) { my($com) ="cp '$file' $newfile"; $filestatus{$file} = 'Straight Copy'; $filenew{$file} = $newfile; $filetitle{$file} = 'Unknown Title'; $try=0; while() { if(system($com)) { print STDERR "Trial $try Cannot copy $file to $newfile\n"; print L "Trial $try Cannot copy $file to $newfile\n" if($logfile); next if (++$try < 5); print L "Cannot copy $file to $newfile\n" if($logfile); $filestatus{$file} = 'FAILED Copy'; $warning{$file} = 'FAILED Copy'; return; } last; } $try=0; while() { last if(chmod(0644,$newfile)); print STDERR "Trial $try Cannot chmod $newfile\n"; next if (++$try < 5); $warning{$file} = 'FAILED Mode Change'; last; } ++$copied; return; } # # Process HTML file $try=0; while() { last if(open(I,$file)); print STDERR "Trial $try Cannot open(read) $file\n"; print L "Trial $try Cannot open(read) $file\n" if($logfile); die "$0: Cannot read $file\n" if(++$try >5); } $try=0; while() { last if(open(W,'>' . $newfile)); print STDERR "Trial $try Cannot open(write) $newfile\n"; print L "Trial $try Cannot open(write) $newfile\n" if($logfile); die "$0: Cannot write $newfile\n" if(++$try >5); } print L "Copy $file to $newfile ******************\n"; $localreplace = $replace_count; $readtitle =0; # $/='>'; while () { $line =$_; $backline = ''; while ($line=~/\<([^\>\<]*)\>(.*)$/s) { $fulltag=$1; $line=$2; $backline .= $`; if( $readtitle == 1 ) { $title .= $`; } if( $fulltag =~ m%title%i ) { $readtitle++; $readtitle = 2 if($readtitle > 2); $backline .= '<' . $fulltag . '>'; next; } ($tag,%args)=&split_tag($fulltag); &check_tag("A", "HREF", ); &check_tag("IMG", "SRC", ); &check_tag("IMG", "LOWSRC", ); &check_tag("IMG", "DYNSRC", ); &check_tag("FIG" , "SRC", ); &check_tag("OVERLAY", "SRC", ); &check_tag("BODY", "BACKGROUND", ); &check_tag("TABLE", "BACKGROUND", ); &check_tag("BGSOUND", "SRC", ); &check_tag("FRAME", "SRC", ); &check_tag("AREA", "HREF", ); &check_tag("APPLET", "CODEBASE" ); &check_tag("PARAM", "VALUE", ); &check_tag("FORM", "ACTION", ); $backline .= '<' . $fulltag . '>'; } $backline .= $line; if($nmap > 0 ) { # map url's generally whether or not they are in Tags for($imap=0;$imap<$nmap;$imap++) { next if($typemap[$imap] == 0); if( $backline =~ m%$inputmap[$imap]%s ) { $fileextra{$initialfile} = $inputmap[$imap]; $backline =~ s%$inputmap[$imap]%$outputmap[$imap]%sg; } } } print W $backline; } close(W); close(I); $localreplace = $replace_count - $localreplace; $filestatus{$file} = "Processed $localreplace"; $filenew{$file} = $newfile; $filetitle{$file} = $title; ++$copied; return; } sub split_tag { my($fulltag)=@_; my($tagname,$etc,$var,$quote,$arg,$value,@tagargs); undef $etc; $fulltag =~ /^\s*(\S+)\s*(.*)$/s; $tagname =$1; $etc =$2; while ($etc !~ /^\s*$/) { $etc =~ /^\s*([^=\s]+)\s*(=)?\s*(\S+)?(.*)$/s; $arg=$1; last unless($arg); $var=$2; $value=$3; $etc=$4; ($value,$etc)=((undef),$value.$etc) unless $var=~/^=$/; ($quote)=$value=~/^([\"\'])/; if ($quote) { if ($value !~ /.$quote$/) { $etc =~ /^([^$quote]*)$quote?(.*)$/s; $value.=$1.$quote; $etc = $2; } $value =~ s/$quote\s*(.*)\s*$quote$/$1/s; $value =~ s/\s*$//s; } $arg="\U$arg\E"; push(@tagargs,$arg,$value); } $tagname="\U$tagname\E"; ($tagname,@tagargs); } sub check_tag { my($thetag,$thearg)=@_; my($targeturl,$testurl,$changed,$actualurl,$relative); # return unless( ($thetag eq $tag) && defined($targeturl = $args{$thearg}) ); $targeturl =~ s%#[^/]+$%%; return if( $targeturl eq ''); return if( $targeturl =~ m%mailto:%i ); return if( $targeturl =~ m%mailbox:%i ); return if( $targeturl =~ m%news:%i ); return if( $targeturl =~ m%javascript:%i ); if( $tag eq 'PARAM' ) { return unless( $targeturl =~ m%^http://% ); } $testurl = $targeturl; $testurl =~ s%/\./%/%g; $testurl =~ s%[^:]//%/%g; $testurl =~ s%^\./%%; $testurl =~ s%\&%and%g; $testurl =~ s%\$%D%g; $parent =0; while( $testurl =~ s%^\.\./%% ) { ++$parent; } while( $testurl =~ m%\.\./% ) { if( $testurl =~ s%^\.\./%%) { ++$parent; } elsif($testurl =~ m%/[^/]+/\.\./%) { $testurl =~ s%/[^/]+/\.\./%%; } else { $warning{$initialfile}= 'BADURL ' .$targeturl; return; } } $relative=0; unless( $testurl =~ m%//% ) { unless( $testurl =~ m%^/% ) { $actualurl = $oldglobalurl; $actualurl =~ s%/[^/]+$%/%; for($i=0;$i<$parent;$i++) { unless( $actualurl =~ s%/[^/]+/$%/% ) { $warning{$initialfile}= 'BADURL ' .$targeturl; return; } } if( $actualurl eq '' ) { $warning{$initialfile}= 'BADURL ' .$targeturl; return; } $testurl = $actualurl . $testurl; } else { $testurl = $beginning . $testurl; } $relative=1; } if($parent > $depth ) { $warning{$initialfile}= 'BADURL ' . $targeturl; return; } if($testurl =~ m%^[^/]+$% ) { $testurl = $beginning . '/' . $testurl; die("Illegal Condition $tag $targeturl $testurl"); } unless( $testurl =~ m%//% ) { # File in same directory $actualurl = $oldglobalurl; $actualurl =~ s%/[^/]+$%/$testurl%; &add_url($tag,$actualurl,'Unchanged',4); return; } $mapped = -1; if($nmap > 0 ) { # map url's for($imap=0;$imap<$nmap;$imap++) { if( $testurl =~ m%$inputmap[$imap]% ) { # Takes last map found if($typemap[$imap] >=1 ) { $mapped = $imap; $mappedtype = $typemap[$imap]; } } } } if( $mapped == -1 ) { # No change in URL suggested if( $relative == 0 ) { # Original was absolute and can remain so &add_url($tag,$testurl,$testurl,4); } else { # Original was relative and must become absolute $fulltag =~ s/$targeturl/$testurl/; $replace_count++; &add_url($tag,$testurl,$testurl,5); } } else { # Change in URL suggested if( $relative == 1 ) { # Original was relative -- it can stay relative but mappings can change relative address $changed = $testurl; $changed =~ s/$inputmap[$mapped]/$outputmap[$mapped]/; $changedrel = &jumpfoil($newglobalurl,$changed); $fulltag =~ s/$targeturl/$changedrel/; $urlflag =3; $urlflag = 5 if($mappedtype == 2); &add_url($tag,$testurl,$changed,$urlflag); return; } else { # Original was absolute $changed = $testurl; $changed =~ s/$inputmap[$mapped]/$outputmap[$mapped]/; if( $userelative ) { $changedrel = &jumpfoil($newglobalurl,$changed); } else { $changedrel = $changed; } $fulltag =~ s/$targeturl/$changedrel/; $replace_count++; $urlflag = 2; $urlflag = 4 if($mappedtype == 2); &add_url($tag,$testurl,$changed,$urlflag); } } # } sub add_url { my($tag,$ref,$changed,$action)=@_; if(exists($urlseen{$ref})) { ++ $urlseen{$ref}; } else { ++$count; $urlseen{$ref} = 1; $urlchanged{$ref} = $changed; if( $ref =~ m%^$beginning% ) { $urltype{$ref} = $action; } else { $urltype{$ref} = 1; } $urlfirstseen{$ref} = $globalfile; $urlalias{$ref} = ''; $urlsummary{$ref} = ''; } if($mapped >= 0 ) { ++$mapused[$mapped]; } $urlalias{$ref} = 'alias' if( $globalfile =~ m%alias.*html% ); $urlsummary{$ref} = 'Summary' if( $globalfile =~ m%Full.*Page.html%); } # # read dictionary from PROCOPTIONS sub readopt { # my($procfile)=@_; my($curkey,$vocabmem,$reading_html,$colon,$possiblekey,$savekey,$entry,$rest,$line); # open(PROCOPTIONS,$procfile) || die("\nSorry couldnt Open Procoptions ",$procfile, "\n"); # $reading_html=0; # Initial HTML option # # $curkey='none'; while( $line= ) { chomp($line); $colon = index($line,":"); if ( ($line =~ /^\s/) || $colon < 0 ) { die("\nIllegal Continuation Line", $rest,"\n") if $curkey eq 'none'; $line =~ s/^\s+/ /; $optionvalues{$curkey} = &join($optionvalues{$curkey},$line); next; } $possiblekey = substr($line ,0, $colon); $possiblekey =~ s/\s+//g; $savekey=$possiblekey; $possiblekey =~ tr/A-Z/a-z/; $entry = -1; foreach $vocabmem (keys %procoptvocab) { if( $possiblekey eq $vocabmem ) { $entry = $procoptvocab{$vocabmem}; last; } } if( $entry < 0 ) { die("\nIllegal key Processing Options ", $possiblekey, "\n"); } if( $possiblekey eq "html" ) { $reading_html=1; next; } if( $possiblekey eq "endhtml" ) { $reading_html=0; next; } $rest = substr($line, $colon+1); if( $possiblekey eq "contd" ) { if ($curkey eq 'none') { die("\nIllegal Continuation Line", $rest,"\n"); } if( $savekey eq "CONTD" ) { $optionvalues{$curkey} .= "\n" . $rest; next; } $optionvalues{$curkey} = &join($optionvalues{$curkey},$rest); next; } &procreadline($curkey); $optionvalues{$possiblekey} = $rest; $optionvalues_html{$possiblekey} = $reading_html; $curkey=$possiblekey; } # End loop over input lines # &procreadline($curkey); close(PROCOPTIONS); # # return; # } # end readopt sub procreadline { # Process an input option # my($key)=@_; my($value); # $value = $optionvalues{$key}; $value =~ s/^\s*//; $value =~ s/\s*$//; if($key eq 'convertfile' ) { $convertfile[$nconvertfile] = $value; ++$nconvertfile; } if($key eq 'doconvertfile' ) { $fdoconvertfile[$ndoconvertfile] = $value; ++$ndoconvertfile; } if($key eq 'convertdir' ) { $fconvertdir[$nconvertdir] = $value; ++$nconvertdir; } if($key eq 'doconvertdir' ) { $fdoconvertdir[$ndoconvertdir] = $value; ++$ndoconvertdir; } if($key eq 'skip' ) { $fskip[$nskip] = $value; ++$nskip; } if($key eq 'summary' ) { $fsummary[$nsummary] = $value; ++$nsummary; } if($key eq 'map' ) { my(@analyse)= split(/\s*,\s*/,$value); $inputmap[$nmap] = $analyse[0]; $outputmap[$nmap] = $analyse[1]; die("Illegal map $value\n") unless defined($analyse[2]); die("Illegal map $value\n") if defined($analyse[3]); if($analyse[2] =~ m%no%i ) { # This is only used for WebWisdom ListofChangedURL's $typemap[$nmap] = 0; } elsif( $analyse[2] =~ m%always%i ) { $typemap[$nmap] = 2; } else { $typemap[$nmap] = 1; } ++$nmap; } # return; } # sub tohtml { # Convert HTML magic characters in current line my($convline)=@_; return($convline) unless $convline; $convline =~ s/&/&/g; $convline =~ s//>/g; $convline =~ s/"/"/g; $convline; } # # Join 2 lines together returning result # add white space as necessary sub join { my($a,$b) = @_; $a = $a . ' ' if( ($a =~ /\S$/) && ($b =~ /^\S/) ) ; return $a . $b ; } sub jumpfoil{ # Return relative path from $from to $to # my($from,$to)=@_; my($path); # if($from =~ m%^$newbeginning% ) { $from =~ s%^$newbeginning/%%; $to =~ s%^$newbeginning/%%; } return $to if( $to =~ m%^/% ); return $to if( $to =~ m%^http% ); die("Undefined jump from ",$from," to",$to,"\n") if( $from =~ m%^/% ); # $from =~ s%/\./%/%g; $from =~ s%//%/%g; $from =~ s%^\./%%; # Remove null directory movements $to =~ s%/\./%/%g; $to =~ s%//%/%g; $to =~ s%^\./%%; # Remove null directory movements return $to unless $from =~ s%/[^/]*$%%; # Remove last slash and filename leaving directories $path=''; $from =~ s%\.%\\\.%g; # Ensure all dots are real dots in $from! while($from) { if( ($to =~ m%^${from}/%) ) { $to =~ s%^${from}/%% ; last; } if( ($to =~ m%^${from}$%) ) { $to =~ s%^${from}%%; last; } $path .= '../'; $from='' unless $from =~ s%/[^/]+$%% ; } $path .= $to; return $path; } # End routine returning relative path # #