#!/usr/bin/perl # This script asks user a text string, condense the white spaces in it, # and matches it with all *.txt files in the specified directory &parse_form; print "Content-type: text/html\n\n"; print "查詢結果\n"; $backgroundimage = $FORM{'backgroundimage'}; if ($backgroundimage =~ /\S+/) { $background = "background=$backgroundimage"; } $targetDir = $FORM{'targetDir'}; if ($targetDir !~ /\S+/) { print "

沒有指定搜尋路徑, 表單設計錯誤

\n"; print ""; exit; } $targetDirURL = $FORM{'targetDirURL'}; if ($targetDirURL !~ /\S+/) { $targetDirURL = $targetDir; } # input how many days back #$daysBack = 0.5; $daysBack = $FORM{'daysBack'}; if ($daysBack =~ /\d+/) { $seconds = $daysBack * 24 * 60 * 60; $timeBegin = time - $seconds; } # input target string $targetStr = $FORM{'targetStr'}; if ($targetStr !~ /\S+/) { print "

沒有指定搜尋字串, 請回上一頁重新輸入

\n"; print ""; exit; } $printContext = $FORM{'printContext'}; print "

查詢結果

\n"; print "

指定搜尋字串為 \"$targetStr\"

\n"; if (defined ($timeBegin)) { my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($timeBegin); $mon++; $year+=1900; print "

系統搜尋自 $year 年 $mon 月 $mday 日 $hour 時開始之檔案

\n"; } print "


\n"; # condense white space $targetStr =~ s/\s+/ /g; #print "target string is: ", $targetStr, "\n"; # globbing chdir("$targetDir"); #@filelist = <*txt>; @filelist = `ls *.html`; chomp(@filelist); # remove the trailing newline characters #print @filelist; #while (defined ($file = glob("*.txt"))) #{ # print "$file\n"; #} # read file contents print "

    \n"; foreach $file (@filelist) { # print "searching file: ", $file, "\n"; open(IN, $file) || die $!; @contents = ; close(IN); $timeDoc = -1; foreach $line (@contents) { if ($line =~ //) { $timeDoc = $1; } } if ($timeDoc < 0) { $timeDoc = -M $file; $timeDoc = time - $timeDoc * 24 * 60 * 60; } # print "$file extract raw time: ", time, " ", $timeDoc, "
    \n"; $fulltext = join('',@contents); $fulltext =~ s/\s+/ /g; if (($fulltext =~ /$targetStr/) && (!defined ($timeBegin) || (defined ($timeDoc) && ($timeDoc >= $timeBegin)))) { my ($before, $after) = (50, 50); my ($beforeStr, $matchedStr, $afterStr, $context); if ($printContext == 1) { if (length $` < $before) { $before = length $`; } if (length $' < $after) { $after = length $'; } $beforeStr = substr($`,-$before); $matchedStr = "$&"; $afterStr = substr($', 0, $after); $beforeStr =~ s//>/g; $beforeStr =~ s/"/"/g; $afterStr =~ s//>/g; $afterStr =~ s/"/"/g; $context = "$beforeStr" . "$matchedStr" . "$afterStr"; } push(@resultFileList, $file); # print "$file matched: $timeDoc is later than $timeBegin\n"; # print "
  1. \n"; print "
  2. \n"; foreach $line (@contents) { if ($line =~ /(.*)<\/title>/) { print $1, "</a>\n"; } elsif ($line =~ /<font size=\+1>作者: <\/font>(.*)$/) { print ", ", $1, "\n"; } elsif ($line =~ /張貼時間: (.*):<p>/) { print ", ", $1, "\n"; } } if ($printContext == 1) { print "<p> <font color=red>------------</font><br>\n"; print $context, "<br>\n"; print "<font color=red>------------</font><p>\n"; } } } print "</ol>\n"; #print scalar(@resultFileList), " ", $num = @resultFileList, " ", $#resultFileList, "\n"; print "<p><hr size=7 width=75%><p>\n"; print "共有 ", scalar(@resultFileList), "\n"; print "筆訊息含有指定字串 \"$targetStr\":<p>\n"; print "</body></html>"; ####################### # Parse Form Subroutine sub parse_form { local($name,$value); # Get the input read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'}); # Split the name-value pairs @pairs = split(/&/, $buffer); foreach $pair (@pairs) { ($name, $value) = split(/=/, $pair); # Un-Webify plus signs and %-encoding $value =~ tr/+/ /; $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; # Remove any NULL characters, Server Side Includes $value =~ s/\0//g; $value =~ s/<!--(.|\n)*-->//g; if ($allow_html != 1) { $value =~ s/<([^>]|\n)*>//g; } else { unless ($name eq 'body') { $value =~ s/<([^>]|\n)*>//g; } } $FORM{$name} = $value; } }