THIS IS A TEST INSTANCE. ALL YOUR CHANGES WILL BE LOST!!!!
...
Requirements
You will need convert
giftopnm and jpegtopnm
(imagemagicknetpbm) and gocr
installed.
Additionally, you will need the perl module
...
No Format |
---|
# FuzzyOcr plugin, version 1 # written by Christian Holler decoder_at_own-hero_dot_net package FuzzyOcr; use strict; use Mail::SpamAssassin; use Mail::SpamAssassin::Util; use Mail::SpamAssassin::Plugin; use String::Approx 'adistr'; our @ISA = qw (Mail::SpamAssassin::Plugin); our @words = ( ); our $cnt = 0; # Default values our $treshold = "0.3"; our $countreq = 2; # constructor: register the eval rule sub new { my ( $class, $mailsa ) = @_; $class = ref($class) || $class; my $self = $class->SUPER::new($mailsa); bless( $self, $class ); $self->register_eval_rule("check_fuzzy_ocr"); return $self; } sub parse_config { my ($self, $opts) = @_; if ($opts->{key} eq "focr_word") { push(@words, $opts->{value}); } elsif ($opts->{key} eq "focr_treshold") { $treshold = $opts->{value}; } elsif ($opts->{key} eq "focr_counts_required") { $countreq = $opts->{value}; } } sub check_fuzzy_ocr { my ( $self, $pms ) = @_; $cnt = 0; foreach my $p ( $pms->{msg}->find_parts("image") ) { my ( $ctype, $boundary, $charset, $name ) = Mail::SpamAssassin::Util::parse_content_type( $p->get_header('content-type') ); if (($ctype eq "image/gif") || ($ctype eq "image/jpeg")) { if ($ctype eq "image/gif") { open OCR, "|/usr/bin/giftopnm - |/usr/bin/convertgocr -i - pnm:- > /tmp/spamassassin.focr.$$"; } else { open OCR, "|/usr/bin/jpegtopnm - |/usr/bin/gocr -i - > /tmp/spamassassin.focr.$$"; } foreach $p ( $p->decode() ) { print OCR $p; } close OCR; open OCR, "/tmp/spamassassin.focr.$$"; while (<OCR>) { s/[^a-zA-Z ]//g; $_ = lc; my $w; foreach $w (@words) { $w = lc $w; my $matched = adistr($w, $_); if (abs($matched) < $treshold) { $cnt++; } } } close OCR; unlink "/tmp/spamassassin.focr.$$"; } } if ($cnt >= $countreq) { my $score = 4 + ($cnt - $countreq); $pms->_handle_hit("FUZZY_OCR", $score, "BODY: ", $pms->{conf}->{descriptions}->{FUZZY_OCR}." ($cnt word occurrences found)"); } return 0; } 1; |