#!/usr/bin/perl
# $Id: extract-data 40 2012-03-04 20:02:13Z kdeugau $
#
# Extract relay IP, URI hosts from mail in folder
#
# Heavily reworked 2009/01/21 by Kris Deugau <kdeugau@deepnet.cx>
# from original learn-spam-from-IMAP script:
# dmz@dmzs.com - March 19, 2004
# http://www.dmzs.com/tools/files/spam.phtml
# LGPL
#
# Hack-n-patch'ed to convert to IP and URI extraction script:
# Kris Deugau <kdeugau@deepnet.cx> 2009/05/19
#
# Little bits to feed a lightweight package-creating tool:
##wrapreq libmail-imapclient-perl (>=3.11), libio-socket-ssl-perl, install-sa
##wrapver 0.1
##wrapsum Extract relay IP and URI hosts from mail in IMAP folder

use strict;
use warnings;
use Time::Local;
use POSIX qw(strftime);
use IO::Socket::SSL;
use Mail::IMAPClient;
use Getopt::Std;
use NetAddr::IP;

# hmm. can't make (enough) sense of the docs.  gonna have to parse headers for IPs myself...  :/
# but we did find enough to extract the URIs...
# only required if using a custom SA install
#use lib '/opt/spamassassin/share/perl/5.10.0';
use Mail::SpamAssassin;
use Mail::SpamAssassin::PerMsgStatus;

my %opts;
getopts("druv", \%opts);

my $debug = ($opts{d} ? 1 : 0);
my $sadebug = 0;	# sa-learn -D spits out a LOT of useless crap - better to only activate if specifically needed
my $imapdebug = 0;	# so does Mail::IMAPClient...  as in, the whole content of all the mail you look at.  O_o
my $delete_after_learning = 0;	# set to 1 if you want to delete mail right away after learning
				# - note this makes it rather harder to deep-scan the messages to create local rules
my $verbose = ($opts{v} ? 1 : 0);

my $tmpdir = '/var/tmp';
my $salearn = '/usr/local/bin/sa-learn';
my $learnargs = ($sadebug ? ' -D' : '').' --showdots ';

die "eeep!  $salearn doesn't exist!\n" if ! -e $salearn;

my $folder = 'reported.needsextraction';
#my $folder = 'confirmed';

# non-SSL IMAP settings:
#my $imap = Mail::IMAPClient->new( Server=> 'imapmailhost:143',
#                                  User => 'imapspamuser',
#                                  Password => 'imapspamuserpassword',
#                                  Debug => $imapdebug);

print "about to open IMAP connection\n" if $debug;

my $imap = Mail::IMAPClient->new(
	User => 'junkmail', 
	Password => 'k3c86z2',
	Socket   => IO::Socket::SSL->new(
		Proto    => 'tcp',
		PeerAddr => 'mail.company.com',
		PeerPort => 993, # IMAP over SSL standard port
		),
	Debug => $imapdebug,
	)

	or die "IMAP login failed: $@\n";

if (!defined($imap)) { die "IMAP Login Failed"; }

my $msgcount = $imap->message_count($folder);

print $msgcount, " message(s) to process\n";

## Process the spam mailbox
$imap->select($folder);
my @msgs = $imap->search("ALL");

# Since the data goes into files anyway, why not make a mockery of a maildir and let sa-learn iterate over it?
#my $spamtmp = "$tmpdir/spam.".time.".$$";  # this should give us a suitably pseudorandom directory
#mkdir $spamtmp or die "couldn't create temporary pen for spam: $!";

my $spamtest = Mail::SpamAssassin->new();
  # don't keep dereferencing this

my %iplist;
my %urilist;

# put together an array of netblocks we won't/can't list for various reasons
my @dontlistme = (
    # Microsoft/Hotmail/Windows Live Mail
#IP-Network                    207.68.128.0/18
#IP-Network                    207.68.192.0/20
#IP-Network-Block              207.068.128.000 - 207.068.207.255
	NetAddr::IP->new("65.52.0.0/14"),
	NetAddr::IP->new("207.68.176.96/27"),
#IP-Network                    157.54.0.0/15
#IP-Network                    157.56.0.0/14
#IP-Network                    157.60.0.0/16
	NetAddr::IP->new("157.55.0.192/26"),
	NetAddr::IP->new("157.55.1.128/26"),
	NetAddr::IP->new("157.55.2.0/24"),
#IP-Network                    207.46.0.0/16
	NetAddr::IP->new("207.46.66.0/28"),
	NetAddr::IP->new("213.199.144.0/20"),
	NetAddr::IP->new("216.32.180.0/22"),
# "Frontbridge"/"bigfish" (inherited)
#IP-Network                    204.231.192.0/24
	NetAddr::IP->new("204.231.192.41"),

    # AOL - note only some IPs show mail-ish rDNS
    #IP-Network                    64.12.0.0/16	
	NetAddr::IP->new("64.12.78.136/30"),
	NetAddr::IP->new("64.12.78.142"),
	NetAddr::IP->new("64.12.100.31"),
	NetAddr::IP->new("64.12.102.136/29"),
	NetAddr::IP->new("64.12.140.129"),
	NetAddr::IP->new("64.12.140.130"),
	NetAddr::IP->new("64.12.143.144/30"),
	NetAddr::IP->new("64.12.143.152/30"),
	NetAddr::IP->new("64.12.206.39"),
	NetAddr::IP->new("64.12.206.40/30"),
	NetAddr::IP->new("64.12.207.128/27"),
	NetAddr::IP->new("64.12.207.144/29"),
	NetAddr::IP->new("64.12.207.152/30"),
	NetAddr::IP->new("64.12.207.160/28"),
	NetAddr::IP->new("64.12.207.176/29"),
    #IP-Network                    205.188.0.0/16
	NetAddr::IP->new("205.188.58.0/28"),
	NetAddr::IP->new("205.188.91.94/31"),
	NetAddr::IP->new("205.188.91.96/31"),
	NetAddr::IP->new("205.188.105.143"),
	NetAddr::IP->new("205.188.105.144/30"),
	NetAddr::IP->new("205.188.169.196/29"),
	NetAddr::IP->new("205.188.249.64/29"),
	NetAddr::IP->new("205.188.249.128/29"),
	NetAddr::IP->new("205.188.249.148/30"),
	NetAddr::IP->new("205.188.169.200/23"),
	NetAddr::IP->new("205.188.255.0/28"),

    # Google/GMail
	NetAddr::IP->new("209.85.128.0/17"),
	NetAddr::IP->new("72.14.192.0/18"),
	NetAddr::IP->new("74.125.0.0/16"),

    # Yahoo!/Inktomi
	NetAddr::IP->new("98.136.0.0/14"),
	NetAddr::IP->new("66.196.64.0/18"),
	NetAddr::IP->new("67.195.0.0/16"),
	NetAddr::IP->new("69.147.64.0/18"),
	NetAddr::IP->new("206.190.32.0/18"),
	NetAddr::IP->new("68.142.192.0/18"),
	NetAddr::IP->new("216.252.96.0/19"),
	NetAddr::IP->new("124.83.128.0/17"),
	NetAddr::IP->new("217.146.184.0/21"),
	NetAddr::IP->new("124.108.96.0/20"),
	NetAddr::IP->new("76.13.0.0/16"),
	NetAddr::IP->new("68.180.128.0/17"),
	NetAddr::IP->new("209.191.64.0/18"),
	NetAddr::IP->new("212.82.104.0/21"),
	NetAddr::IP->new("66.163.160.0/19"),
#inetnum:        87.248.110.0 - 87.248.111.255
#route:          87.248.104.0/21
	NetAddr::IP->new("87.248.110.0/23"),
	NetAddr::IP->new("203.188.200.0/22"),
	NetAddr::IP->new("217.12.0.0/20"),
	NetAddr::IP->new("77.238.184.0/23"),
#IP-Network                    74.6.0.0/16
	NetAddr::IP->new("74.6.114.48/24"),
	NetAddr::IP->new("74.6.228.32/27"),
	NetAddr::IP->new("74.6.228.64/26"),
	NetAddr::IP->new("202.165.96.0/21"),
#route:          87.248.112.0/21
#inetnum:        87.248.114.0 - 87.248.115.255
	NetAddr::IP->new("87.248.114.0/24"),
#route:          116.214.0.0/20
	NetAddr::IP->new("116.214.12.0/24"),
	NetAddr::IP->new("202.86.4.0/22"),
	NetAddr::IP->new("77.238.188/23"),
	NetAddr::IP->new("217.146.182.0/23"),
	NetAddr::IP->new("114.111.64.0/18"),
	NetAddr::IP->new("115.178.12.0/23"),
	NetAddr::IP->new("121.101.151.212"),
#inetnum:      121.101.144.0 - 121.101.159.255
	NetAddr::IP->new("121.101.144.0/20"),
	NetAddr::IP->new("66.94.224.0/19"),
	NetAddr::IP->new("203.104.16.0/21"),
	NetAddr::IP->new("124.108.120.0/21"),
#inetnum:        180.222.112.0 - 180.222.119.255
	NetAddr::IP->new("180.222.112.0/21"),
	NetAddr::IP->new("72.30.0.0/16"),
	NetAddr::IP->new("217.146.176.0/21"),
	NetAddr::IP->new("106.10.128.0/18"),

    # MessageLabs - may add these to trusted_networks instead
	NetAddr::IP->new("85.158.139.0/24"),
# observed: .35, .51
	NetAddr::IP->new("194.106.220.0/23"),
	NetAddr::IP->new("193.109.254.0/23"),
	NetAddr::IP->new("119.161.0.0/19"),
	NetAddr::IP->new("216.82.240.0/20"),
# buh? rDNS says Yahoo, but...
#inetnum:        203.209.224.0 - 203.209.255.255
#descr:          Alibaba (Beijing) Technology Co., Ltd.
	NetAddr::IP->new("203.209.230.22"),

    # Bell Canada - note only some IPs show mail-ish rDNS
    #IP-Network                    209.226.0.0/16
    #IP-Network                    207.236.0.0/16
	NetAddr::IP->new("209.226.175.0/24"),
	NetAddr::IP->new("207.236.237.0/26"),

    # Craigslist
    #IP-Network                    208.82.236.0/22
	NetAddr::IP->new("208.82.236.0/22"),

    # Apple.com/mac.com - note only some IPs show mail-ish rDNS
    #IP-Network                    17.0.0.0/8
    # asmtpout0(11-30).mac.com
    # 17.148.16.	011 -> 86	030 -> 105
	NetAddr::IP->new("17.148.16.64/26"),

    # Vodafone - note only some IPs show mail-ish rDNS
    #route:        212.183.128.0/19
	NetAddr::IP->new("212.183.156.224/29"),

    # Facebook - only exclude mail-ish hostnames
    # used SPF record to pick blocks
#"v=spf1 ip4:69.63.179.25 ip4:69.63.178.128/25 ip4:69.63.184.0/25 ip4:66.220.144.128/25
# ip4:66.220.155.0/24 ip4:69.171.232.128/25 ip4:66.220.157.0/25 ip4:69.171.244.0/24 mx -all"
    #IP-Network                    69.63.176.0/20
	NetAddr::IP->new("69.63.178.128/25"),
	NetAddr::IP->new("69.63.184.0/25"),
    #IP-Network                    66.220.144.0/20
	NetAddr::IP->new("66.220.144.128/25"),
	NetAddr::IP->new("66.220.155.0/24"),
	NetAddr::IP->new("66.220.157.0/25"),
    #IP-Network                    69.171.224.0/19
	NetAddr::IP->new("69.171.232.128/25"),
	NetAddr::IP->new("69.171.244.0/24"),

    # IBM Lotus Live - rdns mostly mail
    #CIDR:       8.12.152.0/24
	NetAddr::IP->new("8.12.152.0/24"),

# ISPs
    # Eastlink (formerly Persona [Sudbury etc]) - only a few IPs observed with mail-ish rDNS
    #IP-Network                    24.222.0.0/16
	NetAddr::IP->new("24.222.0.30"),
	NetAddr::IP->new("24.224.136.0/27"),

    # Cogeco - only a few IPs observed with mail-ish rDNS
    #IP-Network                    216.221.64.0/19
	NetAddr::IP->new("216.221.81.192"),
	NetAddr::IP->new("216.221.81.25"),
	NetAddr::IP->new("216.221.81.28/30"),
	NetAddr::IP->new("216.221.81.96/30"),
    # seznam.cz
    #route:          77.75.72.0/23
	NetAddr::IP->new("77.75.72.44"),

    # T-Online - only mail-ish hostnames
#route:        194.25.0.0/16
#inetnum:      194.25.134.0 - 194.25.134.255
#netname:      DTOS-ULM-001
#descr:        www.t-online.de
#descr:        mail.t-online.de
#descr:        php.t-online.de
	NetAddr::IP->new("194.25.134.80/29"),
	NetAddr::IP->new("194.25.134.16/29"),

    # Telestra (Australia)
    #inetnum:      61.9.128.0 - 61.9.255.255
    # may miss one or two
	# .168.135-152
	NetAddr::IP->new("61.9.168.136/29"),
	NetAddr::IP->new("61.9.168.144/29"),
	# .189.132-152
	NetAddr::IP->new("61.9.189.132/30"),
	NetAddr::IP->new("61.9.189.136/29"),
	NetAddr::IP->new("61.9.189.144/29"),
    # "TelestraClear" - related to above?
    #inetnum:      203.97.0.0 - 203.97.127.255
	NetAddr::IP->new("203.97.33.64"),
	NetAddr::IP->new("203.97.33.68"),
	NetAddr::IP->new("203.97.37.64"),

    # Earthlink
    #IP-Network                    207.69.0.0/16
	NetAddr::IP->new("207.69.200.28"),
    #IP-Network                    209.86.0.0/16
	NetAddr::IP->new("209.86.89.60/30"),
	NetAddr::IP->new("209.86.89.64/28"),

    # Sprint
    #IP-Network                    68.24.0.0/13
	# actually their PCS mail relay, so sez rDNS
	NetAddr::IP->new("68.28.27.84"),

    # Vodafone (New Zealand)
    # inetnum:        203.109.128.0 - 203.109.159.255
	NetAddr::IP->new("203.109.136/28"),

    # Optus (Australia)
    # inetnum:        211.28.0.0 - 211.31.255.255
	NetAddr::IP->new("211.29.132.0/24"),

    #IP-Network                    204.209.192.0/20
    #Org-Name                      AGT Limited.
    # telus tentacle?
    # 204.209.205.13 -> defout.telus.net. -> 204.209.205.13
	NetAddr::IP->new("204.209.205.13"),

    #route:          195.188.0.0/16
    #descr:          Telewest Broadband
    # blueyonder.co.uk
	NetAddr::IP->new("195.188.213.0/28"),

    #inetnum:      210.50.0.0 - 210.50.63.255
    #descr:        Primus Telecommunications
    # (Australia)
	NetAddr::IP->new("210.50.30.224/28"),

    # iiNet Limited (Australia)
    #inetnum:        203.10.1.0 - 203.10.1.255
	NetAddr::IP->new("203.10.1.232/29"),
	NetAddr::IP->new("203.10.1.240/29"),
    #inetnum:      203.59.0.0 - 203.59.255.255
	NetAddr::IP->new("203.59.1.104/29"),
	NetAddr::IP->new("203.59.1.128/26"),

    # Road Runner
    #IP-Network                    75.176.0.0/12
	NetAddr::IP->new("75.180.132.120/29"),

    # Comcast
    #IP-Network                    76.96.0.0/17
	NetAddr::IP->new("76.96.62.0/25"),

    #IP-Network                    142.146.0.0/16
    #Org-Name                      Rogers Communications Inc.
	NetAddr::IP->new("142.146.31.20/30"),

    #address:        Claranet UK Ltd
    #route:          195.8.64.0/19
    #inetnum:        195.8.89.32 - 195.8.89.47
    #descr:          Claranet UK SMTP relay platform
	NetAddr::IP->new("195.8.89.37"),

    #inetnum:         80.8.0.0 - 80.15.255.255
    #descr:           France Telecom S.A.
    #inetnum:        80.12.242.0 - 80.12.242.255 (several ranges)
    #descr:          Mail Essentials Project
    # orange.fr SMTP cluster.  whois WTF
	NetAddr::IP->new("80.12.242.0/24"),
    #route:        193.252.0.0/18
    #descr:        France Telecom
    # more "mail essentials project"
	NetAddr::IP->new("193.252.22.208/29"),

    #inetnum:         212.216.0.0 - 212.216.255.255
    #descr:           Telecom Italia Net
    #inetnum:        212.216.172.0 - 212.216.177.255
    #descr:          Telecom Italia IDC  - ISP&VAS MNGT
	NetAddr::IP->new("212.216.176.0/24"),

    #inetnum:        151.189.0.0 - 151.189.255.255
    #descr:          Arcor Online GmbH
	NetAddr::IP->new("151.189.21.32/27"),

    #CIDR:           74.208.0.0/16
    #OrgName:        1&1 Internet Inc.
	NetAddr::IP->new("74.208.122.35"),

    #inetnum:     187.31/16
    #owner:       Internet Group do Brasil SA
	# .16 and .17 seem to be SMTP relay hosts
	NetAddr::IP->new("187.31.0.16/31"),

    #address:        1&1 Internet AG
    #route:          217.72.192.0/20
    #descr:          Web.de
	NetAddr::IP->new("217.72.192.227"),

    #descr:          France Telecom Espana
    #route:          62.36.0.0/16
    #inetnum:        62.36.0.0 - 62.36.23.255
	NetAddr::IP->new("62.36.20.205"),

    # Primus (Canada)
    # CIDR:           216.254.192.0/19, 216.254.128.0/18
	NetAddr::IP->new("216.254.180.38"),

## Edumactional places

    # UAlberta - only one IP observed with mail-ish rDNS
    #IP-Network                    129.128.0.0/16
	NetAddr::IP->new("129.128.5.19"),

    # Queens University
    #IP-Network                    130.15.0.0/16
	NetAddr::IP->new("130.15.241.183"),

    # University of Maryland Baltimore County - mail servers only
    #CIDR:       130.85.0.0/16
	NetAddr::IP->new("130.85.25.76/30"),

    # University of Florida
    #IP-Network                    128.227.0.0/16
	NetAddr::IP->new("128.227.74.70"),
	NetAddr::IP->new("128.227.74.149"),
	NetAddr::IP->new("128.227.74.165"),

    # University of Texas
    #IP-Network                    129.109.0.0/16
	NetAddr::IP->new("129.109.195.0/28"),
    #IP-Network                    129.106.0.0/16
	NetAddr::IP->new("129.106.148.58"),

    # Roxbury Community College
    #IP-Network                    209.104.233.0/24
	NetAddr::IP->new("209.104.233.247"),

    # Stanford University
    #IP-Network                    171.64.0.0/14
	NetAddr::IP->new("171.67.219.80/30"),

    # Cleveland State University
    #IP-Network                    137.148.0.0/16
	NetAddr::IP->new("137.148.18.13/32"),

    # University of Missouri - dba the Missouri Research and Education Network (MOREnet)
    #CIDR:       204.184.0.0/15
	NetAddr::IP->new("204.185.165.125"),

    # Yale University
    #IP-Network                    130.132.0.0/16
	NetAddr::IP->new("130.132.50.7"),
	NetAddr::IP->new("130.132.50.146"),

    # Rutgers University
    #IP-Network                    165.230.0.0/16
	NetAddr::IP->new("165.230.151.182"),

    # Clemson University
    #IP-Network                    130.127.0.0/16
	NetAddr::IP->new("130.127.235.21"),

    # Wilkes University
    #IP-Network                    146.94.0.0/16
	NetAddr::IP->new("146.94.192.152"),

    # Virginia Community College System
    #IP-Network                    164.106.0.0/16
	NetAddr::IP->new("164.106.130.251"),

    # University of Virginia
    #IP-Network                    199.111.0.0/16
    # Mary Washington College
    #IP-Network                    199.111.64.0/19
	NetAddr::IP->new("199.111.84.18"),

    #inetnum:        130.95.0.0 - 130.95.255.255
    #address:        The University of Western Australia
	NetAddr::IP->new("130.95.3.211"),

    #inetnum:      130.56.0.0 - 130.56.255.255
    #address:      IIS, Australian National University
	NetAddr::IP->new("130.56.64.134"),

    #IP-Network                    129.81.0.0/16
    #Org-Name                      Tulane University
	NetAddr::IP->new("129.81.224.84"),

    #Org-Name                      Texas A&M University
    #IP-Network                    192.195.88.0/21
	NetAddr::IP->new("192.195.88.20"),
    #IP-Network                    165.95.0.0/16
	NetAddr::IP->new("165.95.144.40"),

    #IP-Network                    162.82.0.0/16
    #Org-Name                      William Beaumont Hospital
    # rDNS on seen IP ends in .edu
	NetAddr::IP->new("162.82.215.18"),

    #IP-Network                    169.232.0.0/16
    #Org-Name                      University of California, Los Angeles
	NetAddr::IP->new("169.232.46.169"),

    #IP-Network                    128.138.0.0/16
    #Org-Name                      University of Colorado
	NetAddr::IP->new("128.138.128.231"),

    #IP-Network                    209.221.168.0/24
    #Org-Name                      City University
	NetAddr::IP->new("209.221.168.36"),

    #IP-Network                    163.120.0.0/16
    #Org-Name                      DePauw University

    #IP-Network                    205.137.240.0/20
    #Org-Name                      Shelby County Schools
	NetAddr::IP->new("205.137.241.81"),

    #IP-Network                    199.17.0.0/16
    #Org-Name                      Minnesota State Colleges and Universities
	NetAddr::IP->new("199.17.25.194"),

    #CIDR:           162.129.0.0/16
    #OrgName:        The Johns Hopkins Medical Institutions
	NetAddr::IP->new("162.129.8.151"),
    #CIDR:           128.220.0.0/16
	NetAddr::IP->new("128.220.161.140"),

    #CIDR:           144.167.0.0/16
    #OrgName:        University of Arkansas at Little Rock
	NetAddr::IP->new("144.167.3.152"),

# List servers
    # Debian listserver - relays spam, so we can't list it.
	NetAddr::IP->new("82.195.75.100"),

# ESPs
    # moderately abusable, but mostly legit
    # Org-Name	iContact (Broadwick Corp./Preation Inc.)
	NetAddr::IP->new("216.27.93.0/25"),
	NetAddr::IP->new("207.254.213.192/26"),

    # are these guys an ESP?  rDNS in many blocks shows secureserver.net, with SMTPish overtones
    #Org-Name                      GoDaddy.com, Inc.
    #IP-Network                    64.202.160.0/19
	NetAddr::IP->new("64.202.160.0/24"),
	NetAddr::IP->new("64.202.165.0/26"),
	NetAddr::IP->new("64.202.165.180/30"),
	NetAddr::IP->new("64.202.165.192/29"),
	NetAddr::IP->new("64.202.165.224/28"),
    #IP-Network                    72.167.0.0/16
	NetAddr::IP->new("72.167.82.80/29"),
	NetAddr::IP->new("72.167.82.90"),
	NetAddr::IP->new("72.167.224.0/28"),
	NetAddr::IP->new("72.167.234.224/27"),
    #IP-Network                    208.109.0.0/16
	NetAddr::IP->new("208.109.80.23"),
	NetAddr::IP->new("208.109.80.58"),
	NetAddr::IP->new("208.109.80.24"),
	NetAddr::IP->new("208.109.80.74"),
	NetAddr::IP->new("208.109.80.80"),
	NetAddr::IP->new("208.109.80.81"),
    #IP-Network                    68.178.128.0/17
	NetAddr::IP->new("68.178.232.18"),

    #IP-Network                    173.201.0.0/16
	NetAddr::IP->new("173.201.193.0/23"),

    # EmailBrain - note no actual netblocks of their own.  :(
	NetAddr::IP->new("66.100.171.162"),
#163.171.100.66.in-addr.arpa domain name pointer eb08.ebhost9.com.
#164.171.100.66.in-addr.arpa domain name pointer a.eb08.ebhost9.com.
#165.171.100.66.in-addr.arpa domain name pointer b.eb08.ebhost9.com.
#166.171.100.66.in-addr.arpa domain name pointer c.eb08.ebhost9.com.

    # Tucows - don't recall if they're affiliated with anyone else
	#CIDR:           64.96.0.0/14
	NetAddr::IP->new("64.98.42.0/24"),

    # not really an ESP, exactly, but best-fit
    #IP-Network                    208.47.184.0/23
    #Org-Name                      SYNACOR
	# mailrelay.embarq.synacor.com
	NetAddr::IP->new("208.47.184.3"),

    #CIDR:           208.75.120.0/22
    #OrgName:        Constant Contact, Inc


# Mailbox providers

    # Hushmail
	#IP-Network                    65.39.178.0/24
	# all existent rdns in this range are smtp, but not all exist
	NetAddr::IP->new("65.39.178.128/27"),

# eBay/PayPal
	NetAddr::IP->new("66.211.160.0/19"),

# Misc legit
    # Texas Instruments
    #IP-Network                    192.94.94.0/24
	NetAddr::IP->new("192.94.94.40"),

# Government tentacles
    #inetnum:         193.39.144.0 - 193.39.159.255
    #descr:           City of Edinburgh District Council
	NetAddr::IP->new("193.39.157.39"),

    ); # done def for @dontlistme

MSG: for (my $i=0; $i<$msgcount; $i++) {
  my $msg = $imap->message_string($msgs[$i]);

  print ".";

  my $mail = $spamtest->parse($msg);
  my $status = $spamtest->check($mail);

# stolen from SA plugin bits
#sub parsed_metadata {
#  my ($self, $opts) = @_;
#  my $scanner = $status->{permsgstatus};

  my $skip_domains = $status->{main}->{conf}->{uridnsbl_skip_domains};

  # Generate the full list of html-parsed domains.
  my $uris = $status->get_uri_detail_list();

#  print "$msgs[$i]\n";
  my %msguris;

  while (my($uri, $info) = each %{$uris}) {
    # we want to skip mailto: uris
    next if ($uri =~ /^mailto:/);

    # no domains were found via this uri, so skip
    next unless ($info->{domains});

    next if $info->{types}->{img};
    foreach ( keys %{ $info->{domains} } ) {
      if (exists $skip_domains->{$_}) {
        next;
      }
      $msguris{$_}++;
      #print "  ".$_."\n";
    }

  #  print "$uri ".$info->{anchor_text}."\n";
  }

  foreach (keys %msguris) {
#    print "  $_ $msguris{$_}\n";
    $urilist{$_}++;
  }

  # now, get the relay IP

  my $stmsg = $status->get_message();
  my @untrusted = @{$stmsg->{metadata}->{relays_untrusted}};

  my $relayip = new NetAddr::IP $untrusted[0]->{ip};

# sigh.  messages tagged as spam already make life difficult by hiding the
# old Received: headers.  We'll just handle them manually for now.
  if (!$relayip) {
    print "phtui: ";
    my %headerlist = %{$imap->parse_headers($msgs[$i], "Received", "Subject")};
    my $recvnum = 0;
    my $recv = $headerlist{'Received'}[$recvnum];
    print "$recv\n";
    print "   $headerlist{'Subject'}[0]\n";
    next MSG;
  }

  foreach my $block (@dontlistme) {
    next MSG if $relayip->within($block);
  }
  $iplist{$relayip->addr}++ if $relayip;

#  last if $i > 2;
  sleep 1;
} # IMAP message iteration

print " Done.\n";

# mm.  don't really need times on the IP lists
#if ($opts{r}) {
#  print strftime("# %Y/%m/%d %H:%M", localtime())."\n";
#}
foreach my $ip (sort keys %iplist) {
  $ip =~ /(\d+)\.(\d+)\.(\d+)\.(\d+)/;
  if ($opts{r}) {
    print "+$4.$3.$2.$1.spamhosts.company.com:127.0.0.2:900:::\n";
  } else {
    print "$ip\t $iplist{$ip}\n";
  }
}

if ($opts{u}) {
  print strftime("# %Y/%m/%d %H:%M", localtime())."\n";
}
foreach my $uri (sort keys %urilist) {
  my @hout = qx { host '$uri.multi.uribl.com'; host '$uri.company.dnsbl.' };
  if ($hout[0] =~ /NXDOMAIN/ && $hout[1] =~ /NXDOMAIN/) {
    if ($opts{u}) {
      print "+$uri.uribl.company.com:127.0.0.2:900:::\n";
    } else {
      # URI plus count
      print "$uri\t$urilist{$uri}\n";
    }
  }
}

$imap->close();

# Close IMAP connection cleanly.
$imap->logout();

# integrate learned stuff - journal sync etc IFF bayes_journal is set AND you want to sync right away
# irrelevant for SQL Bayes
#my $sarebuild = `/usr/bin/sa-learn --rebuild`;
#print "-------\nRebuild: ",$sarebuild,"\n-------\n" if $debug;
