=pod synopsis.pl: Programme to demonstrate the use of TokensV2.pm. This script should serve both as a synopsis on using TokensV2.pm and as a programme to format MS Messenger Logs recovered with Photorec. See http://www.cgsecurity.org/wiki/PhotoRec Copyright 2013 Gabriel Czernikier This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . =cut #use strict; use TokensV2; sub printFile; my @FORMAT = ( ['(?:)+(?:)+.*?', sub { my $fh = $_[1]; my ($d, $t, $f, $s, $T) = $_[0] =~ m|((?:)+)(?:)+(.*?)|; my $F = join '
', $f =~ m||g; print $fh "

($d $t) $F
$T

"; } ], ['.*?.*?', sub { my $fh = $_[1]; my ($d, $t, $f, $s, $T) = $_[0] =~ m|.*?(.*?)|; print $fh "

($d $t) $f
$T

"; } ], ['.*?.*?', sub { my $fh = $_[1]; my ($d, $t, $f, $s, $T) = $_[0] =~ m|.*?(.*?)|; print $fh "

($d $t) $f
$T

"; } ], ['.*?.*?', sub { my $fh = $_[1]; my ($d, $t, $f, $s, $T) = $_[0] =~ m|.*?(.*?)|; print $fh "

($d $t) $f
$T

"; } ], ['.*?.*?', sub { my $fh = $_[1]; my ($d, $t, $f, $s, $T) = $_[0] =~ m|.*?(.*?)|; print $fh "

($d $t) $f
$T

"; } ], ['.*?', sub { my $fh = $_[1]; my ($d, $t, $f, $s, $T) = $_[0] =~ m|(.*?)|; print $fh "

($d $t) $f
$T

"; } ], ['.*?', sub { my $fh = $_[1]; my ($d, $t, $f, $s, $T) = $_[0] =~ m|(.*?)|; print $fh "

($d $t) $f
$T

"; } ] ); sub printFile { return if @_==1; #if (@_==2) { # my $fh = $_[1]; # ${_[0]} =~ s/${_[0]}

\n"; # return; #} return if @_==2; # @_==5 printFile @{$_[1]}, $_[4]; my $coderef = $FORMAT[$_[3]][1]; &$coderef($_[0], $_[4]); my $fh = $_[4]; print $fh "\n"; printFile @{$_[2]}, $_[4]; } sub joinParseTree { return if @_==0; return if @_==1; #if (@_==1) { # return ${_[0]}; #} # @_==4 return joinParseTree(@{$_[1]}) . $_[0] . joinParseTree(@{$_[2]}); } open my $fH, "<:encoding(utf8)", "tokens-processed.txt"; # H for HASH, not handle my %HASH = map { /([^\s]*)/; $1 => undef } grep $_ ne "\n", <$fH>; close $fH; open $fH, ">>:encoding(utf8)", "tokens-processed.txt"; print $fH "----------bookmark----------\n"; local $/ = undef; my $regex = join "\n", map $$_[0], @FORMAT; while(glob "$myarg_1/**") { print "$_\n"; next if exists $HASH{$_}; # Need to accept arbitrary data, then match only over UTF-8 data # i.e., Need to let non utf-8 data to be silently ignored # See perldoc PerlIO open my $fh, "<:encoding(utf8)", $_; my $text = <$fh>; # clean up of control characters (produced as from Photorec 1.14 and earlier) $text =~ s/[\x00-\x1F]|\x7F//sog; my $oName = $_; $oName =~ s/$myarg_1/$myarg_2/; $oName =~ s/\.txt/.html/; my @parse_tree; eval { local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n required alarm 5; @parse_tree = parse($text, $regex); alarm 0; }; if ($@) { die unless $@ eq "alarm\n"; # propagate unexpected errors # timed out print $fH "$_ timed out\n"; next; } my $parse_text = joinParseTree @parse_tree; open $fhHTML, ">:encoding(utf8)", $oName; print $fhHTML < ENDDOC ; printFile @parse_tree, $fhHTML; $text =~ s/-( *)(?=-)/- \1/og; $text =~ s/(.{1,512})/\n/og; print $fhHTML " $text"; $parse_text =~ s/-( *)(?=-)/- \1/og; $parse_text =~ s/(.{1,512})/\n/og; print $fhHTML " $parse_text "; close $fhHTML; print $fH "$_\n"; } 1;