Changed file extensions from txt to {pl|pm}

author: unknown <gabocze@git.gnu.org.ua> 2014-09-23 04:14:19 -0300
committer: unknown <gabocze@git.gnu.org.ua> 2014-09-23 04:14:19 -0300
commit: 84251691307b7e99bd7cbd15a20190ca14c65a1d (patch)
tree: 4b825dc642cb6eb9a060e54bf8d69288fbee4904
parent: 33a8e325b372a62101b5fbe1842a164479d0adcc (diff)
download: vital-84251691307b7e99bd7cbd15a20190ca14c65a1d.tar.gz
vital-84251691307b7e99bd7cbd15a20190ca14c65a1d.tar.bz2
2 files changed, 0 insertions, 340 deletions
diff --git a/TokensV2.pm.txt b/TokensV2.pm.txt
deleted file mode 100644
index 619d7df..0000000
--- a/TokensV2.pm.txt
+++ /dev/null
@@ -1,174 +0,0 @@
-=pod
-  TokensV2: Programme to discover tokens, where there are not.
-
- Copyright 2013 Gabriel Czernikier
-
-
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-=cut
-use strict;
-#package declarations
-sub digest_single;
-sub parse_programme;
-sub printAll;
-
-my @REGEX;
-my @MASK;
-my $long_tokens;
-my @DIGEST_SINGLE;
-my $literal_char_count;
-
-sub digest_single {   # $target, $regex, $eaten_left_literal_chars, $eaten_right_literal_chars
-  #return unless $_[1] ne '';
-  #return if length $_[1] < 8;   # x>(.*?)<
-  return unless ($_[2]+$_[3])/$literal_char_count<0.999999;
-  return @{$DIGEST_SINGLE[$_[2]][$_[3]]} if defined $DIGEST_SINGLE[$_[2]][$_[3]];
-  pos($_[0]) = undef;
-  goto VISITING unless $_[0] =~ /(${_[1]})/g;
-  my $digit = 2;
-  while( my $cg = eval '$'.$digit++ ) {
-    my @suspicious_tokens = $cg =~ /$long_tokens/g;
-    goto VISITING if @suspicious_tokens>=2;
-  }
-  my $pff = (pos $_[0]);
-  my $pii = $pff - (length $1) if defined $pff;
-  $DIGEST_SINGLE[$_[2]][$_[3]] = [$_[2], $_[3], $pii, $pff];
-  return @{$DIGEST_SINGLE[$_[2]][$_[3]]};
-
-  VISITING:
-  my $re = $_[1];
-  # strip off left shelter, also understood as walking righwards across the regex .+? stopping at a hopefuly serviceable string
-  goto VISITING_2 unless $re =~ s/(.+?)(.\(\.\*\?\).|\([^.])/\2/;  # .+? is also the "shelter"
-  my $increment_eaten_literal_chars = $+[1];
-  $increment_eaten_literal_chars -= 5 if $1 =~ /\(\.\*\?\)/;   # discount the single occurrence of non-literal chars: (.*?), left alone surrounding ""
-  # expansions
-  $re =~ s/^\(\?:(?:[^)]|\)[^?+*])+\)\*//;
-  $re =~ s/^\(\?:((?:[^)]|\)[^?+*])+)\)\+/\1\(\?:\1\)\*/;
-  $re =~ s/^\(\?:((?:[^)]|\)[^?+*])+)\)\?/\1/;
-  my ($el, $er, $pi, $pf) = digest_single $_[0], $re, $_[2]+$increment_eaten_literal_chars, $_[3];
-
-  VISITING_2:
-  $re = $_[1];
-  # strip off right shelter, also understood as walking leftwards across the regex .+? stopping at a hopefuly serviceable string
-  goto SUBDIGEST unless $re =~ s/(.*(?:.\(\.\*\?\).|\)[?+*]))(.+?)$/\1/;  # .+? is the "shelter"
-
-  $increment_eaten_literal_chars = $+[2] - $-[2];
-  $increment_eaten_literal_chars -= 5 if $2 =~ /\(\.\*\?\)/;   # discount the single occurrence of non-literal chars: (.*?), left alone surrounding ""
-  # expansions
-  $re =~ s/\(\?:(?:[^)]|\)[^?+*])+\)\*$//;
-  $re =~ s/\(\?:((?:[^)]|\)[^?+*])+)\)\+$/\(\?:\1\)\*\1/;
-  $re =~ s/\(\?:(?:[^)]|\)[^?+*])+\)\?$//;
-  my ($el2, $er2, $pi2, $pf2) = digest_single $_[0], $re, $_[2], $_[3]+$increment_eaten_literal_chars;
-
-  SUBDIGEST:
-  $DIGEST_SINGLE[$_[2]][$_[3]] = [$el, $er, $pi, $pf] if (defined $pi) && ($er+$el<=$er2+$el2 || !defined $pi2);
-  return @{$DIGEST_SINGLE[$_[2]][$_[3]]} if defined $DIGEST_SINGLE[$_[2]][$_[3]];
-  $DIGEST_SINGLE[$_[2]][$_[3]] = [$el2, $er2, $pi2, $pf2] if (defined $pi2) && ($er2+$el2<$er+$el || !defined $pi);
-  return @{$DIGEST_SINGLE[$_[2]][$_[3]]} if defined $DIGEST_SINGLE[$_[2]][$_[3]];
-  $DIGEST_SINGLE[$_[2]][$_[3]] = [];
-  return @{$DIGEST_SINGLE[$_[2]][$_[3]]};
-}
-
-sub digest_multiple {
-  my $regex_num = 0;
-  my $Pos_Ini;
-  my $Eaten_Left;
-  my $Eaten_Right;
-  my $Pos_Fin;
-  my $Regex_Num;
-
-  my @DIGEST_MULTIPLE = ();
-  for my $regex (@REGEX) {
-    @DIGEST_SINGLE = undef;
-    my @literal_chars = $regex =~ /[^.*(?:)+]/g;
-    $literal_char_count = scalar @literal_chars;
-    my ($eaten_left, $eaten_right, $pos_ini, $pos_fin) = digest_single $_[0], $regex, 0, 0;
-    return $eaten_left, $eaten_right, $pos_ini, $pos_fin, $regex_num if(($eaten_right+$eaten_left)/$literal_char_count<0.2);
-    push @DIGEST_MULTIPLE, [$eaten_left, $eaten_right, $pos_ini, $pos_fin, $regex_num] if defined $pos_ini && $pos_fin!=0;
-    $regex_num++;
-  }
-
-  $regex_num = 0;
-  while(@DIGEST_MULTIPLE!=0) {
-    my ($eaten_left, $eaten_right, $pos_ini, $pos_fin) = @{shift @DIGEST_MULTIPLE};
-    if($eaten_right+$eaten_left<$Eaten_Right+$Eaten_Left || !defined $Pos_Ini) {
-      $Pos_Ini = $pos_ini;
-      $Eaten_Left = $eaten_left;
-      $Eaten_Right = $eaten_right;
-      $Pos_Fin = $pos_fin;
-      $Regex_Num = $regex_num;
-    }
-    $regex_num++;
-  }
-  return $Eaten_Left, $Eaten_Right, $Pos_Ini, $Pos_Fin, $Regex_Num if defined $Pos_Ini;
-}
-
-sub parse_programme {
-  return if $_[0] eq '';
-  return $_[0] if length $_[0] < 8; # <x>y</x>
-  my ($eaten_left, $eaten_right, $pos_ini, $pos_fin, $regex_num) = digest_multiple($_[0]);
-  if(not defined $pos_ini) {
-    #my $oldfh = select;
-    #select STDERR;
-    #print "$ARGV[1], [ini-no-parseable]${target}[fin-no-parseable]\n";
-    #select $oldfh;
-    return $_[0];
-  }
-  my $mask_right = @MASK[$regex_num];
-  $mask_right =~ s/.*(.{$eaten_right})/\1/;
-  my $mask_left = @MASK[$regex_num];
-  $mask_left =~ s/(.{$eaten_left}).*/\1/;
-  my $match_length = $pos_fin-$pos_ini;
-  my ($target_left,$match,$target_right) = $_[0] =~ /(.{$pos_ini})(.{$match_length})(.*)/;
-  shift;  # discard unused argument
-  return $mask_left.$match.$mask_right, +[parse_programme $target_left], +[parse_programme $target_right], $regex_num;
-}
-
-sub printAll {
-  return if @_==0;
-  if (@_==1) {
-    print "[ini-nonparse]${_[0]}[fin-nonparse]\n";
-    return;
-  }
-  printAll @{$_[1]};
-  print "[ini-prod]${_[0]}[fin-prod]\n";
-  printAll @{$_[2]};
-}
-
-sub parse {
-  @REGEX = ();
-  @MASK = ();
-  my $target = shift;
-  my $regex = shift;
-  while($regex =~ /^(.+)$/mg) {
-    my $_ = $1;
-    my $other = $_;
-    # strip out non-literal chars from MASK
-    $other =~ s/\(\?:(.+?)\)\+/\1/g;
-    $other =~ s/\(\?:(.+?)\)\?/\1/g;
-    $other =~ s/\.\*\?//g;
-    push @MASK, $other;
-    # surround wildcards with capturing group for REGEX
-    s/(\.\*\?)/\(\1\)/g;
-    push @REGEX, $_;
-  }
-
-  $long_tokens = '\b' . join '\b|\b', grep length>=3,keys %{+{  map +($_=>undef), map /\w+/g, @REGEX  }};
-  $long_tokens = $long_tokens . '\b';
-
-  parse_programme $target;
-}
-
-1;
-\ No newline at end of file
diff --git a/synopsis.pl.txt b/synopsis.pl.txt
deleted file mode 100644
index f3aa9e1..0000000
--- a/synopsis.pl.txt
+++ /dev/null
@@ -1,166 +0,0 @@
-=pod
-  synopsis.pl: Programme to demonstrate the use of TokensV2.pm.
-  This script should serve both as a synopsis on using TokensV2.pm and as a programme to format MS Messenger Logs recovered with Photorec.
-  See http://www.cgsecurity.org/wiki/PhotoRec
-
-
- Copyright 2013 Gabriel Czernikier
-
-
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-=cut
-
-use TokensV2;
-
-sub printFile;
-my @FORMAT = (
-['<Message Date=".*?" Time=".*?" DateTime=".*?" SessionID=".*?"><From>(?:<User FriendlyName=".*?"/>)+</From><To>(?:<User FriendlyName=".*?"/>)+</To><Text(?: Style=".*?")?>.*?</Text></Message>',
-  sub {
-    my $fh = $_[1];
-    my ($d, $t, $f, $s, $T) = $_[0] =~ m|<Message Date="(.*?)" Time="(.*?)" DateTime=".*?" SessionID=".*?">(<From>(?:<User FriendlyName=".*?"/>)+</From>)<To>(?:<User FriendlyName=".*?"/>)+</To><Text(?: Style="(.*?)")?>(.*?)</Text></Message>|;
-    my $F = join '<br />', $f =~ m|<User FriendlyName="(.*?)"/>|g;
-    print $fh "<p><font size=\"-2\">($d&nbsp;$t)</font> $F<br /><span style=\"$s\">$T</span></p>";
-  }
-],
-['<Invitation Date=".*?" Time=".*?" DateTime=".*?" SessionID=".*?"><From><User FriendlyName=".*?"/></From><File>.*?</File><Text(?: Style=".*?")?>.*?</Text></Invitation>',
-  sub {
-    my $fh = $_[1];
-    my ($d, $t, $f, $s, $T) = $_[0] =~ m|<Invitation Date="(.*?)" Time="(.*?)" DateTime=".*?" SessionID=".*?"><From><User FriendlyName="(.*?)"/></From><File>.*?</File><Text(?: Style="(.*?)")?>(.*?)</Text></Invitation>|;
-    print $fh "<p><font size=\"-2\">($d&nbsp;$t)</font> $f<br /><span style=\"$s\">$T</span></p>";
-  }
-],
-['<InvitationResponse Date=".*?" Time=".*?" DateTime=".*?" SessionID=".*?"><From><User FriendlyName=".*?"/></From><File>.*?</File><Text(?: Style=".*?")?>.*?</Text></InvitationResponse>',
-  sub {
-    my $fh = $_[1];
-    my ($d, $t, $f, $s, $T) = $_[0] =~ m|<InvitationResponse Date="(.*?)" Time="(.*?)" DateTime=".*?" SessionID=".*?"><From><User FriendlyName="(.*?)"/></From><File>.*?</File><Text(?: Style="(.*?)")?>(.*?)</Text></InvitationResponse>|;
-    print $fh "<p><font size=\"-2\">($d&nbsp;$t)</font> $f<br /><span style=\"$s\">$T</span></p>";
-  }
-],
-['<Invitation Date=".*?" Time=".*?" DateTime=".*?" SessionID=".*?"><From><User FriendlyName=".*?"/></From><Application>.*?</Application><Text(?: Style=".*?")?>.*?</Text></Invitation>',
-  sub {
-    my $fh = $_[1];
-    my ($d, $t, $f, $s, $T) = $_[0] =~ m|<Invitation Date="(.*?)" Time="(.*?)" DateTime=".*?" SessionID=".*?"><From><User FriendlyName="(.*?)"/></From><Application>.*?</Application><Text(?: Style="(.*?)")?>(.*?)</Text></Invitation>|;
-    print $fh "<p><font size=\"-2\">($d&nbsp;$t)</font> $f<br /><span style=\"$s\">$T</span></p>";
-  }
-],
-['<InvitationResponse Date=".*?" Time=".*?" DateTime=".*?" SessionID=".*?"><From><User FriendlyName=".*?"/></From><Application>.*?</Application><Text(?: Style=".*?")?>.*?</Text></InvitationResponse>',
-  sub {
-    my $fh = $_[1];
-    my ($d, $t, $f, $s, $T) = $_[0] =~ m|<InvitationResponse Date="(.*?)" Time="(.*?)" DateTime=".*?" SessionID=".*?"><From><User FriendlyName="(.*?)"/></From><Application>.*?</Application><Text(?: Style="(.*?)")?>(.*?)</Text></InvitationResponse>|;
-    print $fh "<p><font size=\"-2\">($d&nbsp;$t)</font> $f<br /><span style=\"$s\">$T</span></p>";
-  }
-],
-['<Join Date=".*?" Time=".*?" DateTime=".*?" SessionID=".*?"><User FriendlyName=".*?"/><Text(?: Style=".*?")?>.*?</Text></Join>',
-  sub {
-    my $fh = $_[1];
-    my ($d, $t, $f, $s, $T) = $_[0] =~ m|<Join Date="(.*?)" Time="(.*?)" DateTime=".*?" SessionID=".*?"><User FriendlyName="(.*?)"/><Text(?: Style="(.*?)")?>(.*?)</Text></Join>|;
-    print $fh "<p><font size=\"-2\">($d&nbsp;$t)</font> $f<br /><span style=\"$s\">$T</span></p>";
-  }
-],
-['<Leave Date=".*?" Time=".*?" DateTime=".*?" SessionID=".*?"><User FriendlyName=".*?"/><Text(?: Style=".*?")?>.*?</Text></Leave>',
-  sub {
-    my $fh = $_[1];
-    my ($d, $t, $f, $s, $T) = $_[0] =~ m|<Leave Date="(.*?)" Time="(.*?)" DateTime=".*?" SessionID=".*?"><User FriendlyName="(.*?)"/><Text(?: Style="(.*?)")?>(.*?)</Text></Leave>|;
-    print $fh "<p><font size=\"-2\">($d&nbsp;$t)</font> $f<br /><span style=\"$s\">$T</span></p>";
-  }
-]
-);
-
-sub printFile {
-  return if @_==1;
-  if (@_==2) {
-    my $fh = $_[1];
-    ${_[0]} =~ s/</&lt;/g;
-    print $fh "<p>${_[0]}</p>\n";
-    return;
-  }
-  # @_==5
-  printFile @{$_[1]}, $_[4];
-  my $coderef = $FORMAT[$_[3]][1];
-  &$coderef($_[0], $_[4]);
-  my $fh = $_[4];
-  print $fh "\n";
-  printFile @{$_[2]}, $_[4];
-}
-
-sub joinParseTree {
-  return if @_==0;
-  if (@_==1) {
-    return ${_[0]};
-  }
-  # @_==4
-  return joinParseTree(@{$_[1]}) . $_[0] . joinParseTree(@{$_[2]});
-}
-
-open my $fH, "<:encoding(utf8)", "tokens-processed.txt"; # H for HASH, not handle
-my %HASH = map { /([^\s]*)/; $1 => undef } grep $_ ne "\n", <$fH>;
-close $fH;
-open $fH, ">>:encoding(utf8)", "tokens-processed.txt";
-print $fH "----------bookmark----------\n";
-
-local $/ = undef;
-
-my $regex = join "\n", map $$_[0], @FORMAT;
-while(glob "TXT/**") {
-  print "$_\n";
-  next if exists $HASH{$_};
-  open my $fh, "<:encoding(utf8)", $_;
-  my $text = <$fh>;
-  my $oName = $_;
-  $oName =~ s/TXT/HTML/;
-  $oName =~ s/\.txt/.html/;
-  my @parse_tree;
-    eval {
-            local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n required
-            alarm 5;
-      @parse_tree = parse($text, $regex);
-            alarm 0;
-    };
-    if ($@) {
-            die unless $@ eq "alarm\n";   # propagate unexpected errors
-      # timed out
-      print $fH "$_ timed out\n";
-      next;
-    }
-  my $parse_text = joinParseTree @parse_tree;
-  open $fhHTML, ">:encoding(utf8)", $oName;
-  print $fhHTML <<ENDDOC
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
-"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-
-<html xmlns="http://www.w3.org/1999/xhtml">
-<head>
-<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
-</head>
-<body>
-ENDDOC
-;
-  printFile @parse_tree, $fhHTML;
-  $text =~ s/-( *)(?=-)/- \1/g;
-  $text =~ s/(.{1,512})/<!-- \1 -->\n/g;
-  print $fhHTML "
-<!-- Original text (cut on every 512 Char, -( *)(?=-) sequences escaped as - \\1):-->
-$text";
-  $parse_text =~ s/-( *)(?=-)/- \1/g;
-  $parse_text =~ s/(.{1,512})/<!-- \1 -->\n/g;
-  print $fhHTML "
-<!-- Parsed text (cut on every 512 Char, -( *)(?=-) sequences escaped as - \\1):-->
-$parse_text
-</body>
-</html>";
-  close $fhHTML;
-  print $fH "$_\n";
-}
-
author	unknown <gabocze@git.gnu.org.ua>	2014-09-23 04:14:19 -0300
committer	unknown <gabocze@git.gnu.org.ua>	2014-09-23 04:14:19 -0300
commit	84251691307b7e99bd7cbd15a20190ca14c65a1d (patch)
tree	4b825dc642cb6eb9a060e54bf8d69288fbee4904
parent	33a8e325b372a62101b5fbe1842a164479d0adcc (diff)
download	vital-84251691307b7e99bd7cbd15a20190ca14c65a1d.tar.gz vital-84251691307b7e99bd7cbd15a20190ca14c65a1d.tar.bz2