source: trunk/dnsbl/extract-data@ 39

Last change on this file since 39 was 39, checked in by Kris Deugau, 12 years ago

/trunk/dnsbl

Update extract-data with minor functional tweaks and IP exclusions
from active production code

  • Property svn:executable set to *
  • Property svn:keywords set to Date Rev Author Id
File size: 22.9 KB
Line 
1#!/usr/bin/perl
2# Extract relay IP, URI hosts from mail in folder
3# Hack-n-patch from:
4# Heavily reworked from original:
5# dmz@dmzs.com - March 19, 2004
6# http://www.dmzs.com/tools/files/spam.phtml
7# LGPL
8# by:
9# Kris Deugau <kdeugau@deepnet.cx> 2009/01/21
10# Kris Deugau <kdeugau@deepnet.cx> 2009/05/19
11##wrapreq libmail-imapclient-perl (>=3.11), libio-socket-ssl-perl, install-sa
12##wrapver 0.1
13##wrapsum Extract relay IP and URI hosts from mail in IMAP folder
14# $Id: extract-data 39 2012-03-04 18:13:00Z kdeugau $
15# $URL$
16
17use strict;
18use warnings;
19use Time::Local;
20use POSIX qw(strftime);
21use IO::Socket::SSL;
22use Mail::IMAPClient;
23use Getopt::Std;
24use NetAddr::IP;
25
26# hmm. can't make (enough) sense of the docs. gonna have to parse headers for IPs myself... :/
27# but we did find enough to extract the URIs...
28# only required if using a custom SA install
29#use lib '/opt/spamassassin/share/perl/5.10.0';
30use Mail::SpamAssassin;
31use Mail::SpamAssassin::PerMsgStatus;
32
33my %opts;
34getopts("druv", \%opts);
35
36my $debug = ($opts{d} ? 1 : 0);
37my $sadebug = 0; # sa-learn -D spits out a LOT of useless crap - better to only activate if specifically needed
38my $imapdebug = 0; # so does Mail::IMAPClient... as in, the whole content of all the mail you look at. O_o
39my $delete_after_learning = 0; # set to 1 if you want to delete mail right away after learning
40 # - note this makes it rather harder to deep-scan the messages to create local rules
41my $verbose = ($opts{v} ? 1 : 0);
42
43my $tmpdir = '/var/tmp';
44my $salearn = '/usr/local/bin/sa-learn';
45my $learnargs = ($sadebug ? ' -D' : '').' --showdots ';
46
47die "eeep! $salearn doesn't exist!\n" if ! -e $salearn;
48
49my $folder = 'reported.needsextraction';
50#my $folder = 'confirmed';
51
52# non-SSL IMAP settings:
53#my $imap = Mail::IMAPClient->new( Server=> 'imapmailhost:143',
54# User => 'imapspamuser',
55# Password => 'imapspamuserpassword',
56# Debug => $imapdebug);
57
58print "about to open IMAP connection\n" if $debug;
59
60my $imap = Mail::IMAPClient->new(
61 User => 'junkmail',
62 Password => 'k3c86z2',
63 Socket => IO::Socket::SSL->new(
64 Proto => 'tcp',
65 PeerAddr => 'mail.company.com',
66 PeerPort => 993, # IMAP over SSL standard port
67 ),
68 Debug => $imapdebug,
69 )
70
71 or die "IMAP login failed: $@\n";
72
73if (!defined($imap)) { die "IMAP Login Failed"; }
74
75my $msgcount = $imap->message_count($folder);
76
77print $msgcount, " message(s) to process\n";
78
79## Process the spam mailbox
80$imap->select($folder);
81my @msgs = $imap->search("ALL");
82
83# Since the data goes into files anyway, why not make a mockery of a maildir and let sa-learn iterate over it?
84#my $spamtmp = "$tmpdir/spam.".time.".$$"; # this should give us a suitably pseudorandom directory
85#mkdir $spamtmp or die "couldn't create temporary pen for spam: $!";
86
87my $spamtest = Mail::SpamAssassin->new();
88 # don't keep dereferencing this
89
90my %iplist;
91my %urilist;
92
93# put together an array of netblocks we won't/can't list for various reasons
94my @dontlistme = (
95 # Microsoft/Hotmail/Windows Live Mail
96#IP-Network 207.68.128.0/18
97#IP-Network 207.68.192.0/20
98#IP-Network-Block 207.068.128.000 - 207.068.207.255
99 NetAddr::IP->new("65.52.0.0/14"),
100 NetAddr::IP->new("207.68.176.96/27"),
101#IP-Network 157.54.0.0/15
102#IP-Network 157.56.0.0/14
103#IP-Network 157.60.0.0/16
104 NetAddr::IP->new("157.55.0.192/26"),
105 NetAddr::IP->new("157.55.1.128/26"),
106 NetAddr::IP->new("157.55.2.0/24"),
107#IP-Network 207.46.0.0/16
108 NetAddr::IP->new("207.46.66.0/28"),
109 NetAddr::IP->new("213.199.144.0/20"),
110 NetAddr::IP->new("216.32.180.0/22"),
111# "Frontbridge"/"bigfish" (inherited)
112#IP-Network 204.231.192.0/24
113 NetAddr::IP->new("204.231.192.41"),
114
115 # AOL - note only some IPs show mail-ish rDNS
116 #IP-Network 64.12.0.0/16
117 NetAddr::IP->new("64.12.78.136/30"),
118 NetAddr::IP->new("64.12.78.142"),
119 NetAddr::IP->new("64.12.100.31"),
120 NetAddr::IP->new("64.12.102.136/29"),
121 NetAddr::IP->new("64.12.140.129"),
122 NetAddr::IP->new("64.12.140.130"),
123 NetAddr::IP->new("64.12.143.144/30"),
124 NetAddr::IP->new("64.12.143.152/30"),
125 NetAddr::IP->new("64.12.206.39"),
126 NetAddr::IP->new("64.12.206.40/30"),
127 NetAddr::IP->new("64.12.207.128/27"),
128 NetAddr::IP->new("64.12.207.144/29"),
129 NetAddr::IP->new("64.12.207.152/30"),
130 NetAddr::IP->new("64.12.207.160/28"),
131 NetAddr::IP->new("64.12.207.176/29"),
132 #IP-Network 205.188.0.0/16
133 NetAddr::IP->new("205.188.58.0/28"),
134 NetAddr::IP->new("205.188.91.94/31"),
135 NetAddr::IP->new("205.188.91.96/31"),
136 NetAddr::IP->new("205.188.105.143"),
137 NetAddr::IP->new("205.188.105.144/30"),
138 NetAddr::IP->new("205.188.169.196/29"),
139 NetAddr::IP->new("205.188.249.64/29"),
140 NetAddr::IP->new("205.188.249.128/29"),
141 NetAddr::IP->new("205.188.249.148/30"),
142 NetAddr::IP->new("205.188.169.200/23"),
143 NetAddr::IP->new("205.188.255.0/28"),
144
145 # Google/GMail
146 NetAddr::IP->new("209.85.128.0/17"),
147 NetAddr::IP->new("72.14.192.0/18"),
148 NetAddr::IP->new("74.125.0.0/16"),
149
150 # Yahoo!/Inktomi
151 NetAddr::IP->new("98.136.0.0/14"),
152 NetAddr::IP->new("66.196.64.0/18"),
153 NetAddr::IP->new("67.195.0.0/16"),
154 NetAddr::IP->new("69.147.64.0/18"),
155 NetAddr::IP->new("206.190.32.0/18"),
156 NetAddr::IP->new("68.142.192.0/18"),
157 NetAddr::IP->new("216.252.96.0/19"),
158 NetAddr::IP->new("124.83.128.0/17"),
159 NetAddr::IP->new("217.146.184.0/21"),
160 NetAddr::IP->new("124.108.96.0/20"),
161 NetAddr::IP->new("76.13.0.0/16"),
162 NetAddr::IP->new("68.180.128.0/17"),
163 NetAddr::IP->new("209.191.64.0/18"),
164 NetAddr::IP->new("212.82.104.0/21"),
165 NetAddr::IP->new("66.163.160.0/19"),
166#inetnum: 87.248.110.0 - 87.248.111.255
167#route: 87.248.104.0/21
168 NetAddr::IP->new("87.248.110.0/23"),
169 NetAddr::IP->new("203.188.200.0/22"),
170 NetAddr::IP->new("217.12.0.0/20"),
171 NetAddr::IP->new("77.238.184.0/23"),
172#IP-Network 74.6.0.0/16
173 NetAddr::IP->new("74.6.114.48/24"),
174 NetAddr::IP->new("74.6.228.32/27"),
175 NetAddr::IP->new("74.6.228.64/26"),
176 NetAddr::IP->new("202.165.96.0/21"),
177#route: 87.248.112.0/21
178#inetnum: 87.248.114.0 - 87.248.115.255
179 NetAddr::IP->new("87.248.114.0/24"),
180#route: 116.214.0.0/20
181 NetAddr::IP->new("116.214.12.0/24"),
182 NetAddr::IP->new("202.86.4.0/22"),
183 NetAddr::IP->new("77.238.188/23"),
184 NetAddr::IP->new("217.146.182.0/23"),
185 NetAddr::IP->new("114.111.64.0/18"),
186 NetAddr::IP->new("115.178.12.0/23"),
187 NetAddr::IP->new("121.101.151.212"),
188#inetnum: 121.101.144.0 - 121.101.159.255
189 NetAddr::IP->new("121.101.144.0/20"),
190 NetAddr::IP->new("66.94.224.0/19"),
191 NetAddr::IP->new("203.104.16.0/21"),
192 NetAddr::IP->new("124.108.120.0/21"),
193#inetnum: 180.222.112.0 - 180.222.119.255
194 NetAddr::IP->new("180.222.112.0/21"),
195 NetAddr::IP->new("72.30.0.0/16"),
196 NetAddr::IP->new("217.146.176.0/21"),
197 NetAddr::IP->new("106.10.128.0/18"),
198
199 # MessageLabs - may add these to trusted_networks instead
200 NetAddr::IP->new("85.158.139.0/24"),
201# observed: .35, .51
202 NetAddr::IP->new("194.106.220.0/23"),
203 NetAddr::IP->new("193.109.254.0/23"),
204 NetAddr::IP->new("119.161.0.0/19"),
205 NetAddr::IP->new("216.82.240.0/20"),
206# buh? rDNS says Yahoo, but...
207#inetnum: 203.209.224.0 - 203.209.255.255
208#descr: Alibaba (Beijing) Technology Co., Ltd.
209 NetAddr::IP->new("203.209.230.22"),
210
211 # Bell Canada - note only some IPs show mail-ish rDNS
212 #IP-Network 209.226.0.0/16
213 #IP-Network 207.236.0.0/16
214 NetAddr::IP->new("209.226.175.0/24"),
215 NetAddr::IP->new("207.236.237.0/26"),
216
217 # Craigslist
218 #IP-Network 208.82.236.0/22
219 NetAddr::IP->new("208.82.236.0/22"),
220
221 # Apple.com/mac.com - note only some IPs show mail-ish rDNS
222 #IP-Network 17.0.0.0/8
223 # asmtpout0(11-30).mac.com
224 # 17.148.16. 011 -> 86 030 -> 105
225 NetAddr::IP->new("17.148.16.64/26"),
226
227 # Vodafone - note only some IPs show mail-ish rDNS
228 #route: 212.183.128.0/19
229 NetAddr::IP->new("212.183.156.224/29"),
230
231 # Facebook - only exclude mail-ish hostnames
232 # used SPF record to pick blocks
233#"v=spf1 ip4:69.63.179.25 ip4:69.63.178.128/25 ip4:69.63.184.0/25 ip4:66.220.144.128/25
234# ip4:66.220.155.0/24 ip4:69.171.232.128/25 ip4:66.220.157.0/25 ip4:69.171.244.0/24 mx -all"
235 #IP-Network 69.63.176.0/20
236 NetAddr::IP->new("69.63.178.128/25"),
237 NetAddr::IP->new("69.63.184.0/25"),
238 #IP-Network 66.220.144.0/20
239 NetAddr::IP->new("66.220.144.128/25"),
240 NetAddr::IP->new("66.220.155.0/24"),
241 NetAddr::IP->new("66.220.157.0/25"),
242 #IP-Network 69.171.224.0/19
243 NetAddr::IP->new("69.171.232.128/25"),
244 NetAddr::IP->new("69.171.244.0/24"),
245
246 # IBM Lotus Live - rdns mostly mail
247 #CIDR: 8.12.152.0/24
248 NetAddr::IP->new("8.12.152.0/24"),
249
250# ISPs
251 # Eastlink (formerly Persona [Sudbury etc]) - only a few IPs observed with mail-ish rDNS
252 #IP-Network 24.222.0.0/16
253 NetAddr::IP->new("24.222.0.30"),
254 NetAddr::IP->new("24.224.136.0/27"),
255
256 # Cogeco - only a few IPs observed with mail-ish rDNS
257 #IP-Network 216.221.64.0/19
258 NetAddr::IP->new("216.221.81.192"),
259 NetAddr::IP->new("216.221.81.25"),
260 NetAddr::IP->new("216.221.81.28/30"),
261 NetAddr::IP->new("216.221.81.96/30"),
262 # seznam.cz
263 #route: 77.75.72.0/23
264 NetAddr::IP->new("77.75.72.44"),
265
266 # T-Online - only mail-ish hostnames
267#route: 194.25.0.0/16
268#inetnum: 194.25.134.0 - 194.25.134.255
269#netname: DTOS-ULM-001
270#descr: www.t-online.de
271#descr: mail.t-online.de
272#descr: php.t-online.de
273 NetAddr::IP->new("194.25.134.80/29"),
274 NetAddr::IP->new("194.25.134.16/29"),
275
276 # Telestra (Australia)
277 #inetnum: 61.9.128.0 - 61.9.255.255
278 # may miss one or two
279 # .168.135-152
280 NetAddr::IP->new("61.9.168.136/29"),
281 NetAddr::IP->new("61.9.168.144/29"),
282 # .189.132-152
283 NetAddr::IP->new("61.9.189.132/30"),
284 NetAddr::IP->new("61.9.189.136/29"),
285 NetAddr::IP->new("61.9.189.144/29"),
286 # "TelestraClear" - related to above?
287 #inetnum: 203.97.0.0 - 203.97.127.255
288 NetAddr::IP->new("203.97.33.64"),
289 NetAddr::IP->new("203.97.33.68"),
290 NetAddr::IP->new("203.97.37.64"),
291
292 # Earthlink
293 #IP-Network 207.69.0.0/16
294 NetAddr::IP->new("207.69.200.28"),
295 #IP-Network 209.86.0.0/16
296 NetAddr::IP->new("209.86.89.60/30"),
297 NetAddr::IP->new("209.86.89.64/28"),
298
299 # Sprint
300 #IP-Network 68.24.0.0/13
301 # actually their PCS mail relay, so sez rDNS
302 NetAddr::IP->new("68.28.27.84"),
303
304 # Vodafone (New Zealand)
305 # inetnum: 203.109.128.0 - 203.109.159.255
306 NetAddr::IP->new("203.109.136/28"),
307
308 # Optus (Australia)
309 # inetnum: 211.28.0.0 - 211.31.255.255
310 NetAddr::IP->new("211.29.132.0/24"),
311
312 #IP-Network 204.209.192.0/20
313 #Org-Name AGT Limited.
314 # telus tentacle?
315 # 204.209.205.13 -> defout.telus.net. -> 204.209.205.13
316 NetAddr::IP->new("204.209.205.13"),
317
318 #route: 195.188.0.0/16
319 #descr: Telewest Broadband
320 # blueyonder.co.uk
321 NetAddr::IP->new("195.188.213.0/28"),
322
323 #inetnum: 210.50.0.0 - 210.50.63.255
324 #descr: Primus Telecommunications
325 # (Australia)
326 NetAddr::IP->new("210.50.30.224/28"),
327
328 # iiNet Limited (Australia)
329 #inetnum: 203.10.1.0 - 203.10.1.255
330 NetAddr::IP->new("203.10.1.232/29"),
331 NetAddr::IP->new("203.10.1.240/29"),
332 #inetnum: 203.59.0.0 - 203.59.255.255
333 NetAddr::IP->new("203.59.1.104/29"),
334 NetAddr::IP->new("203.59.1.128/26"),
335
336 # Road Runner
337 #IP-Network 75.176.0.0/12
338 NetAddr::IP->new("75.180.132.120/29"),
339
340 # Comcast
341 #IP-Network 76.96.0.0/17
342 NetAddr::IP->new("76.96.62.0/25"),
343
344 #IP-Network 142.146.0.0/16
345 #Org-Name Rogers Communications Inc.
346 NetAddr::IP->new("142.146.31.20/30"),
347
348 #address: Claranet UK Ltd
349 #route: 195.8.64.0/19
350 #inetnum: 195.8.89.32 - 195.8.89.47
351 #descr: Claranet UK SMTP relay platform
352 NetAddr::IP->new("195.8.89.37"),
353
354 #inetnum: 80.8.0.0 - 80.15.255.255
355 #descr: France Telecom S.A.
356 #inetnum: 80.12.242.0 - 80.12.242.255 (several ranges)
357 #descr: Mail Essentials Project
358 # orange.fr SMTP cluster. whois WTF
359 NetAddr::IP->new("80.12.242.0/24"),
360 #route: 193.252.0.0/18
361 #descr: France Telecom
362 # more "mail essentials project"
363 NetAddr::IP->new("193.252.22.208/29"),
364
365 #inetnum: 212.216.0.0 - 212.216.255.255
366 #descr: Telecom Italia Net
367 #inetnum: 212.216.172.0 - 212.216.177.255
368 #descr: Telecom Italia IDC - ISP&VAS MNGT
369 NetAddr::IP->new("212.216.176.0/24"),
370
371 #inetnum: 151.189.0.0 - 151.189.255.255
372 #descr: Arcor Online GmbH
373 NetAddr::IP->new("151.189.21.32/27"),
374
375 #CIDR: 74.208.0.0/16
376 #OrgName: 1&1 Internet Inc.
377 NetAddr::IP->new("74.208.122.35"),
378
379 #inetnum: 187.31/16
380 #owner: Internet Group do Brasil SA
381 # .16 and .17 seem to be SMTP relay hosts
382 NetAddr::IP->new("187.31.0.16/31"),
383
384 #address: 1&1 Internet AG
385 #route: 217.72.192.0/20
386 #descr: Web.de
387 NetAddr::IP->new("217.72.192.227"),
388
389 #descr: France Telecom Espana
390 #route: 62.36.0.0/16
391 #inetnum: 62.36.0.0 - 62.36.23.255
392 NetAddr::IP->new("62.36.20.205"),
393
394 # Primus (Canada)
395 # CIDR: 216.254.192.0/19, 216.254.128.0/18
396 NetAddr::IP->new("216.254.180.38"),
397
398## Edumactional places
399
400 # UAlberta - only one IP observed with mail-ish rDNS
401 #IP-Network 129.128.0.0/16
402 NetAddr::IP->new("129.128.5.19"),
403
404 # Queens University
405 #IP-Network 130.15.0.0/16
406 NetAddr::IP->new("130.15.241.183"),
407
408 # University of Maryland Baltimore County - mail servers only
409 #CIDR: 130.85.0.0/16
410 NetAddr::IP->new("130.85.25.76/30"),
411
412 # University of Florida
413 #IP-Network 128.227.0.0/16
414 NetAddr::IP->new("128.227.74.70"),
415 NetAddr::IP->new("128.227.74.149"),
416 NetAddr::IP->new("128.227.74.165"),
417
418 # University of Texas
419 #IP-Network 129.109.0.0/16
420 NetAddr::IP->new("129.109.195.0/28"),
421 #IP-Network 129.106.0.0/16
422 NetAddr::IP->new("129.106.148.58"),
423
424 # Roxbury Community College
425 #IP-Network 209.104.233.0/24
426 NetAddr::IP->new("209.104.233.247"),
427
428 # Stanford University
429 #IP-Network 171.64.0.0/14
430 NetAddr::IP->new("171.67.219.80/30"),
431
432 # Cleveland State University
433 #IP-Network 137.148.0.0/16
434 NetAddr::IP->new("137.148.18.13/32"),
435
436 # University of Missouri - dba the Missouri Research and Education Network (MOREnet)
437 #CIDR: 204.184.0.0/15
438 NetAddr::IP->new("204.185.165.125"),
439
440 # Yale University
441 #IP-Network 130.132.0.0/16
442 NetAddr::IP->new("130.132.50.7"),
443 NetAddr::IP->new("130.132.50.146"),
444
445 # Rutgers University
446 #IP-Network 165.230.0.0/16
447 NetAddr::IP->new("165.230.151.182"),
448
449 # Clemson University
450 #IP-Network 130.127.0.0/16
451 NetAddr::IP->new("130.127.235.21"),
452
453 # Wilkes University
454 #IP-Network 146.94.0.0/16
455 NetAddr::IP->new("146.94.192.152"),
456
457 # Virginia Community College System
458 #IP-Network 164.106.0.0/16
459 NetAddr::IP->new("164.106.130.251"),
460
461 # University of Virginia
462 #IP-Network 199.111.0.0/16
463 # Mary Washington College
464 #IP-Network 199.111.64.0/19
465 NetAddr::IP->new("199.111.84.18"),
466
467 #inetnum: 130.95.0.0 - 130.95.255.255
468 #address: The University of Western Australia
469 NetAddr::IP->new("130.95.3.211"),
470
471 #inetnum: 130.56.0.0 - 130.56.255.255
472 #address: IIS, Australian National University
473 NetAddr::IP->new("130.56.64.134"),
474
475 #IP-Network 129.81.0.0/16
476 #Org-Name Tulane University
477 NetAddr::IP->new("129.81.224.84"),
478
479 #Org-Name Texas A&M University
480 #IP-Network 192.195.88.0/21
481 NetAddr::IP->new("192.195.88.20"),
482 #IP-Network 165.95.0.0/16
483 NetAddr::IP->new("165.95.144.40"),
484
485 #IP-Network 162.82.0.0/16
486 #Org-Name William Beaumont Hospital
487 # rDNS on seen IP ends in .edu
488 NetAddr::IP->new("162.82.215.18"),
489
490 #IP-Network 169.232.0.0/16
491 #Org-Name University of California, Los Angeles
492 NetAddr::IP->new("169.232.46.169"),
493
494 #IP-Network 128.138.0.0/16
495 #Org-Name University of Colorado
496 NetAddr::IP->new("128.138.128.231"),
497
498 #IP-Network 209.221.168.0/24
499 #Org-Name City University
500 NetAddr::IP->new("209.221.168.36"),
501
502 #IP-Network 163.120.0.0/16
503 #Org-Name DePauw University
504
505 #IP-Network 205.137.240.0/20
506 #Org-Name Shelby County Schools
507 NetAddr::IP->new("205.137.241.81"),
508
509 #IP-Network 199.17.0.0/16
510 #Org-Name Minnesota State Colleges and Universities
511 NetAddr::IP->new("199.17.25.194"),
512
513 #CIDR: 162.129.0.0/16
514 #OrgName: The Johns Hopkins Medical Institutions
515 NetAddr::IP->new("162.129.8.151"),
516 #CIDR: 128.220.0.0/16
517 NetAddr::IP->new("128.220.161.140"),
518
519 #CIDR: 144.167.0.0/16
520 #OrgName: University of Arkansas at Little Rock
521 NetAddr::IP->new("144.167.3.152"),
522
523# List servers
524 # Debian listserver - relays spam, so we can't list it.
525 NetAddr::IP->new("82.195.75.100"),
526
527# ESPs
528 # moderately abusable, but mostly legit
529 # Org-Name iContact (Broadwick Corp./Preation Inc.)
530 NetAddr::IP->new("216.27.93.0/25"),
531 NetAddr::IP->new("207.254.213.192/26"),
532
533 # are these guys an ESP? rDNS in many blocks shows secureserver.net, with SMTPish overtones
534 #Org-Name GoDaddy.com, Inc.
535 #IP-Network 64.202.160.0/19
536 NetAddr::IP->new("64.202.160.0/24"),
537 NetAddr::IP->new("64.202.165.0/26"),
538 NetAddr::IP->new("64.202.165.180/30"),
539 NetAddr::IP->new("64.202.165.192/29"),
540 NetAddr::IP->new("64.202.165.224/28"),
541 #IP-Network 72.167.0.0/16
542 NetAddr::IP->new("72.167.82.80/29"),
543 NetAddr::IP->new("72.167.82.90"),
544 NetAddr::IP->new("72.167.224.0/28"),
545 NetAddr::IP->new("72.167.234.224/27"),
546 #IP-Network 208.109.0.0/16
547 NetAddr::IP->new("208.109.80.23"),
548 NetAddr::IP->new("208.109.80.58"),
549 NetAddr::IP->new("208.109.80.24"),
550 NetAddr::IP->new("208.109.80.74"),
551 NetAddr::IP->new("208.109.80.80"),
552 NetAddr::IP->new("208.109.80.81"),
553 #IP-Network 68.178.128.0/17
554 NetAddr::IP->new("68.178.232.18"),
555
556 #IP-Network 173.201.0.0/16
557 NetAddr::IP->new("173.201.193.0/23"),
558
559 # EmailBrain - note no actual netblocks of their own. :(
560 NetAddr::IP->new("66.100.171.162"),
561#163.171.100.66.in-addr.arpa domain name pointer eb08.ebhost9.com.
562#164.171.100.66.in-addr.arpa domain name pointer a.eb08.ebhost9.com.
563#165.171.100.66.in-addr.arpa domain name pointer b.eb08.ebhost9.com.
564#166.171.100.66.in-addr.arpa domain name pointer c.eb08.ebhost9.com.
565
566 # Tucows - don't recall if they're affiliated with anyone else
567 #CIDR: 64.96.0.0/14
568 NetAddr::IP->new("64.98.42.0/24"),
569
570 # not really an ESP, exactly, but best-fit
571 #IP-Network 208.47.184.0/23
572 #Org-Name SYNACOR
573 # mailrelay.embarq.synacor.com
574 NetAddr::IP->new("208.47.184.3"),
575
576 #CIDR: 208.75.120.0/22
577 #OrgName: Constant Contact, Inc
578
579
580# Mailbox providers
581
582 # Hushmail
583 #IP-Network 65.39.178.0/24
584 # all existent rdns in this range are smtp, but not all exist
585 NetAddr::IP->new("65.39.178.128/27"),
586
587# eBay/PayPal
588 NetAddr::IP->new("66.211.160.0/19"),
589
590# Misc legit
591 # Texas Instruments
592 #IP-Network 192.94.94.0/24
593 NetAddr::IP->new("192.94.94.40"),
594
595# Government tentacles
596 #inetnum: 193.39.144.0 - 193.39.159.255
597 #descr: City of Edinburgh District Council
598 NetAddr::IP->new("193.39.157.39"),
599
600 ); # done def for @dontlistme
601
602MSG: for (my $i=0; $i<$msgcount; $i++) {
603 my $msg = $imap->message_string($msgs[$i]);
604
605 print ".";
606
607 my $mail = $spamtest->parse($msg);
608 my $status = $spamtest->check($mail);
609
610# stolen from SA plugin bits
611#sub parsed_metadata {
612# my ($self, $opts) = @_;
613# my $scanner = $status->{permsgstatus};
614
615 my $skip_domains = $status->{main}->{conf}->{uridnsbl_skip_domains};
616
617 # Generate the full list of html-parsed domains.
618 my $uris = $status->get_uri_detail_list();
619
620# print "$msgs[$i]\n";
621 my %msguris;
622
623 while (my($uri, $info) = each %{$uris}) {
624 # we want to skip mailto: uris
625 next if ($uri =~ /^mailto:/);
626
627 # no domains were found via this uri, so skip
628 next unless ($info->{domains});
629
630 next if $info->{types}->{img};
631 foreach ( keys %{ $info->{domains} } ) {
632 if (exists $skip_domains->{$_}) {
633 next;
634 }
635 $msguris{$_}++;
636 #print " ".$_."\n";
637 }
638
639 # print "$uri ".$info->{anchor_text}."\n";
640 }
641
642 foreach (keys %msguris) {
643# print " $_ $msguris{$_}\n";
644 $urilist{$_}++;
645 }
646
647 # now, get the relay IP
648
649 my $stmsg = $status->get_message();
650 my @untrusted = @{$stmsg->{metadata}->{relays_untrusted}};
651
652 my $relayip = new NetAddr::IP $untrusted[0]->{ip};
653
654# sigh. messages tagged as spam already make life difficult by hiding the
655# old Received: headers. We'll just handle them manually for now.
656 if (!$relayip) {
657 print "phtui: ";
658 my %headerlist = %{$imap->parse_headers($msgs[$i], "Received", "Subject")};
659 my $recvnum = 0;
660 my $recv = $headerlist{'Received'}[$recvnum];
661 print "$recv\n";
662 print " $headerlist{'Subject'}[0]\n";
663 next MSG;
664 }
665
666 foreach my $block (@dontlistme) {
667 next MSG if $relayip->within($block);
668 }
669 $iplist{$relayip->addr}++ if $relayip;
670
671# last if $i > 2;
672 sleep 1;
673} # IMAP message iteration
674
675print " Done.\n";
676
677# mm. don't really need times on the IP lists
678#if ($opts{r}) {
679# print strftime("# %Y/%m/%d %H:%M", localtime())."\n";
680#}
681foreach my $ip (sort keys %iplist) {
682 $ip =~ /(\d+)\.(\d+)\.(\d+)\.(\d+)/;
683 if ($opts{r}) {
684 print "+$4.$3.$2.$1.spamhosts.company.com:127.0.0.2:900:::\n";
685 } else {
686 print "$ip\t $iplist{$ip}\n";
687 }
688}
689
690if ($opts{u}) {
691 print strftime("# %Y/%m/%d %H:%M", localtime())."\n";
692}
693foreach my $uri (sort keys %urilist) {
694 my @hout = qx { host '$uri.multi.uribl.com'; host '$uri.company.dnsbl.' };
695 if ($hout[0] =~ /NXDOMAIN/ && $hout[1] =~ /NXDOMAIN/) {
696 if ($opts{u}) {
697 print "+$uri.uribl.company.com:127.0.0.2:900:::\n";
698 } else {
699 # URI plus count
700 print "$uri\t$urilist{$uri}\n";
701 }
702 }
703}
704
705$imap->close();
706
707# Close IMAP connection cleanly.
708$imap->logout();
709
710# integrate learned stuff - journal sync etc IFF bayes_journal is set AND you want to sync right away
711# irrelevant for SQL Bayes
712#my $sarebuild = `/usr/bin/sa-learn --rebuild`;
713#print "-------\nRebuild: ",$sarebuild,"\n-------\n" if $debug;
Note: See TracBrowser for help on using the repository browser.