source: trunk/dnsbl/extract-data@ 64

Last change on this file since 64 was 40, checked in by Kris Deugau, 12 years ago

/trunk/dnsbl

Minor cleanups to prepare for semirelease
GPL-tag executables and Perl module from Makefile MANIFEST

  • Property svn:executable set to *
  • Property svn:keywords set to Date Rev Author Id
File size: 23.0 KB
Line 
1#!/usr/bin/perl
2# $Id: extract-data 40 2012-03-04 20:02:13Z kdeugau $
3#
4# Extract relay IP, URI hosts from mail in folder
5#
6# Heavily reworked 2009/01/21 by Kris Deugau <kdeugau@deepnet.cx>
7# from original learn-spam-from-IMAP script:
8# dmz@dmzs.com - March 19, 2004
9# http://www.dmzs.com/tools/files/spam.phtml
10# LGPL
11#
12# Hack-n-patch'ed to convert to IP and URI extraction script:
13# Kris Deugau <kdeugau@deepnet.cx> 2009/05/19
14#
15# Little bits to feed a lightweight package-creating tool:
16##wrapreq libmail-imapclient-perl (>=3.11), libio-socket-ssl-perl, install-sa
17##wrapver 0.1
18##wrapsum Extract relay IP and URI hosts from mail in IMAP folder
19
20use strict;
21use warnings;
22use Time::Local;
23use POSIX qw(strftime);
24use IO::Socket::SSL;
25use Mail::IMAPClient;
26use Getopt::Std;
27use NetAddr::IP;
28
29# hmm. can't make (enough) sense of the docs. gonna have to parse headers for IPs myself... :/
30# but we did find enough to extract the URIs...
31# only required if using a custom SA install
32#use lib '/opt/spamassassin/share/perl/5.10.0';
33use Mail::SpamAssassin;
34use Mail::SpamAssassin::PerMsgStatus;
35
36my %opts;
37getopts("druv", \%opts);
38
39my $debug = ($opts{d} ? 1 : 0);
40my $sadebug = 0; # sa-learn -D spits out a LOT of useless crap - better to only activate if specifically needed
41my $imapdebug = 0; # so does Mail::IMAPClient... as in, the whole content of all the mail you look at. O_o
42my $delete_after_learning = 0; # set to 1 if you want to delete mail right away after learning
43 # - note this makes it rather harder to deep-scan the messages to create local rules
44my $verbose = ($opts{v} ? 1 : 0);
45
46my $tmpdir = '/var/tmp';
47my $salearn = '/usr/local/bin/sa-learn';
48my $learnargs = ($sadebug ? ' -D' : '').' --showdots ';
49
50die "eeep! $salearn doesn't exist!\n" if ! -e $salearn;
51
52my $folder = 'reported.needsextraction';
53#my $folder = 'confirmed';
54
55# non-SSL IMAP settings:
56#my $imap = Mail::IMAPClient->new( Server=> 'imapmailhost:143',
57# User => 'imapspamuser',
58# Password => 'imapspamuserpassword',
59# Debug => $imapdebug);
60
61print "about to open IMAP connection\n" if $debug;
62
63my $imap = Mail::IMAPClient->new(
64 User => 'junkmail',
65 Password => 'k3c86z2',
66 Socket => IO::Socket::SSL->new(
67 Proto => 'tcp',
68 PeerAddr => 'mail.company.com',
69 PeerPort => 993, # IMAP over SSL standard port
70 ),
71 Debug => $imapdebug,
72 )
73
74 or die "IMAP login failed: $@\n";
75
76if (!defined($imap)) { die "IMAP Login Failed"; }
77
78my $msgcount = $imap->message_count($folder);
79
80print $msgcount, " message(s) to process\n";
81
82## Process the spam mailbox
83$imap->select($folder);
84my @msgs = $imap->search("ALL");
85
86# Since the data goes into files anyway, why not make a mockery of a maildir and let sa-learn iterate over it?
87#my $spamtmp = "$tmpdir/spam.".time.".$$"; # this should give us a suitably pseudorandom directory
88#mkdir $spamtmp or die "couldn't create temporary pen for spam: $!";
89
90my $spamtest = Mail::SpamAssassin->new();
91 # don't keep dereferencing this
92
93my %iplist;
94my %urilist;
95
96# put together an array of netblocks we won't/can't list for various reasons
97my @dontlistme = (
98 # Microsoft/Hotmail/Windows Live Mail
99#IP-Network 207.68.128.0/18
100#IP-Network 207.68.192.0/20
101#IP-Network-Block 207.068.128.000 - 207.068.207.255
102 NetAddr::IP->new("65.52.0.0/14"),
103 NetAddr::IP->new("207.68.176.96/27"),
104#IP-Network 157.54.0.0/15
105#IP-Network 157.56.0.0/14
106#IP-Network 157.60.0.0/16
107 NetAddr::IP->new("157.55.0.192/26"),
108 NetAddr::IP->new("157.55.1.128/26"),
109 NetAddr::IP->new("157.55.2.0/24"),
110#IP-Network 207.46.0.0/16
111 NetAddr::IP->new("207.46.66.0/28"),
112 NetAddr::IP->new("213.199.144.0/20"),
113 NetAddr::IP->new("216.32.180.0/22"),
114# "Frontbridge"/"bigfish" (inherited)
115#IP-Network 204.231.192.0/24
116 NetAddr::IP->new("204.231.192.41"),
117
118 # AOL - note only some IPs show mail-ish rDNS
119 #IP-Network 64.12.0.0/16
120 NetAddr::IP->new("64.12.78.136/30"),
121 NetAddr::IP->new("64.12.78.142"),
122 NetAddr::IP->new("64.12.100.31"),
123 NetAddr::IP->new("64.12.102.136/29"),
124 NetAddr::IP->new("64.12.140.129"),
125 NetAddr::IP->new("64.12.140.130"),
126 NetAddr::IP->new("64.12.143.144/30"),
127 NetAddr::IP->new("64.12.143.152/30"),
128 NetAddr::IP->new("64.12.206.39"),
129 NetAddr::IP->new("64.12.206.40/30"),
130 NetAddr::IP->new("64.12.207.128/27"),
131 NetAddr::IP->new("64.12.207.144/29"),
132 NetAddr::IP->new("64.12.207.152/30"),
133 NetAddr::IP->new("64.12.207.160/28"),
134 NetAddr::IP->new("64.12.207.176/29"),
135 #IP-Network 205.188.0.0/16
136 NetAddr::IP->new("205.188.58.0/28"),
137 NetAddr::IP->new("205.188.91.94/31"),
138 NetAddr::IP->new("205.188.91.96/31"),
139 NetAddr::IP->new("205.188.105.143"),
140 NetAddr::IP->new("205.188.105.144/30"),
141 NetAddr::IP->new("205.188.169.196/29"),
142 NetAddr::IP->new("205.188.249.64/29"),
143 NetAddr::IP->new("205.188.249.128/29"),
144 NetAddr::IP->new("205.188.249.148/30"),
145 NetAddr::IP->new("205.188.169.200/23"),
146 NetAddr::IP->new("205.188.255.0/28"),
147
148 # Google/GMail
149 NetAddr::IP->new("209.85.128.0/17"),
150 NetAddr::IP->new("72.14.192.0/18"),
151 NetAddr::IP->new("74.125.0.0/16"),
152
153 # Yahoo!/Inktomi
154 NetAddr::IP->new("98.136.0.0/14"),
155 NetAddr::IP->new("66.196.64.0/18"),
156 NetAddr::IP->new("67.195.0.0/16"),
157 NetAddr::IP->new("69.147.64.0/18"),
158 NetAddr::IP->new("206.190.32.0/18"),
159 NetAddr::IP->new("68.142.192.0/18"),
160 NetAddr::IP->new("216.252.96.0/19"),
161 NetAddr::IP->new("124.83.128.0/17"),
162 NetAddr::IP->new("217.146.184.0/21"),
163 NetAddr::IP->new("124.108.96.0/20"),
164 NetAddr::IP->new("76.13.0.0/16"),
165 NetAddr::IP->new("68.180.128.0/17"),
166 NetAddr::IP->new("209.191.64.0/18"),
167 NetAddr::IP->new("212.82.104.0/21"),
168 NetAddr::IP->new("66.163.160.0/19"),
169#inetnum: 87.248.110.0 - 87.248.111.255
170#route: 87.248.104.0/21
171 NetAddr::IP->new("87.248.110.0/23"),
172 NetAddr::IP->new("203.188.200.0/22"),
173 NetAddr::IP->new("217.12.0.0/20"),
174 NetAddr::IP->new("77.238.184.0/23"),
175#IP-Network 74.6.0.0/16
176 NetAddr::IP->new("74.6.114.48/24"),
177 NetAddr::IP->new("74.6.228.32/27"),
178 NetAddr::IP->new("74.6.228.64/26"),
179 NetAddr::IP->new("202.165.96.0/21"),
180#route: 87.248.112.0/21
181#inetnum: 87.248.114.0 - 87.248.115.255
182 NetAddr::IP->new("87.248.114.0/24"),
183#route: 116.214.0.0/20
184 NetAddr::IP->new("116.214.12.0/24"),
185 NetAddr::IP->new("202.86.4.0/22"),
186 NetAddr::IP->new("77.238.188/23"),
187 NetAddr::IP->new("217.146.182.0/23"),
188 NetAddr::IP->new("114.111.64.0/18"),
189 NetAddr::IP->new("115.178.12.0/23"),
190 NetAddr::IP->new("121.101.151.212"),
191#inetnum: 121.101.144.0 - 121.101.159.255
192 NetAddr::IP->new("121.101.144.0/20"),
193 NetAddr::IP->new("66.94.224.0/19"),
194 NetAddr::IP->new("203.104.16.0/21"),
195 NetAddr::IP->new("124.108.120.0/21"),
196#inetnum: 180.222.112.0 - 180.222.119.255
197 NetAddr::IP->new("180.222.112.0/21"),
198 NetAddr::IP->new("72.30.0.0/16"),
199 NetAddr::IP->new("217.146.176.0/21"),
200 NetAddr::IP->new("106.10.128.0/18"),
201
202 # MessageLabs - may add these to trusted_networks instead
203 NetAddr::IP->new("85.158.139.0/24"),
204# observed: .35, .51
205 NetAddr::IP->new("194.106.220.0/23"),
206 NetAddr::IP->new("193.109.254.0/23"),
207 NetAddr::IP->new("119.161.0.0/19"),
208 NetAddr::IP->new("216.82.240.0/20"),
209# buh? rDNS says Yahoo, but...
210#inetnum: 203.209.224.0 - 203.209.255.255
211#descr: Alibaba (Beijing) Technology Co., Ltd.
212 NetAddr::IP->new("203.209.230.22"),
213
214 # Bell Canada - note only some IPs show mail-ish rDNS
215 #IP-Network 209.226.0.0/16
216 #IP-Network 207.236.0.0/16
217 NetAddr::IP->new("209.226.175.0/24"),
218 NetAddr::IP->new("207.236.237.0/26"),
219
220 # Craigslist
221 #IP-Network 208.82.236.0/22
222 NetAddr::IP->new("208.82.236.0/22"),
223
224 # Apple.com/mac.com - note only some IPs show mail-ish rDNS
225 #IP-Network 17.0.0.0/8
226 # asmtpout0(11-30).mac.com
227 # 17.148.16. 011 -> 86 030 -> 105
228 NetAddr::IP->new("17.148.16.64/26"),
229
230 # Vodafone - note only some IPs show mail-ish rDNS
231 #route: 212.183.128.0/19
232 NetAddr::IP->new("212.183.156.224/29"),
233
234 # Facebook - only exclude mail-ish hostnames
235 # used SPF record to pick blocks
236#"v=spf1 ip4:69.63.179.25 ip4:69.63.178.128/25 ip4:69.63.184.0/25 ip4:66.220.144.128/25
237# ip4:66.220.155.0/24 ip4:69.171.232.128/25 ip4:66.220.157.0/25 ip4:69.171.244.0/24 mx -all"
238 #IP-Network 69.63.176.0/20
239 NetAddr::IP->new("69.63.178.128/25"),
240 NetAddr::IP->new("69.63.184.0/25"),
241 #IP-Network 66.220.144.0/20
242 NetAddr::IP->new("66.220.144.128/25"),
243 NetAddr::IP->new("66.220.155.0/24"),
244 NetAddr::IP->new("66.220.157.0/25"),
245 #IP-Network 69.171.224.0/19
246 NetAddr::IP->new("69.171.232.128/25"),
247 NetAddr::IP->new("69.171.244.0/24"),
248
249 # IBM Lotus Live - rdns mostly mail
250 #CIDR: 8.12.152.0/24
251 NetAddr::IP->new("8.12.152.0/24"),
252
253# ISPs
254 # Eastlink (formerly Persona [Sudbury etc]) - only a few IPs observed with mail-ish rDNS
255 #IP-Network 24.222.0.0/16
256 NetAddr::IP->new("24.222.0.30"),
257 NetAddr::IP->new("24.224.136.0/27"),
258
259 # Cogeco - only a few IPs observed with mail-ish rDNS
260 #IP-Network 216.221.64.0/19
261 NetAddr::IP->new("216.221.81.192"),
262 NetAddr::IP->new("216.221.81.25"),
263 NetAddr::IP->new("216.221.81.28/30"),
264 NetAddr::IP->new("216.221.81.96/30"),
265 # seznam.cz
266 #route: 77.75.72.0/23
267 NetAddr::IP->new("77.75.72.44"),
268
269 # T-Online - only mail-ish hostnames
270#route: 194.25.0.0/16
271#inetnum: 194.25.134.0 - 194.25.134.255
272#netname: DTOS-ULM-001
273#descr: www.t-online.de
274#descr: mail.t-online.de
275#descr: php.t-online.de
276 NetAddr::IP->new("194.25.134.80/29"),
277 NetAddr::IP->new("194.25.134.16/29"),
278
279 # Telestra (Australia)
280 #inetnum: 61.9.128.0 - 61.9.255.255
281 # may miss one or two
282 # .168.135-152
283 NetAddr::IP->new("61.9.168.136/29"),
284 NetAddr::IP->new("61.9.168.144/29"),
285 # .189.132-152
286 NetAddr::IP->new("61.9.189.132/30"),
287 NetAddr::IP->new("61.9.189.136/29"),
288 NetAddr::IP->new("61.9.189.144/29"),
289 # "TelestraClear" - related to above?
290 #inetnum: 203.97.0.0 - 203.97.127.255
291 NetAddr::IP->new("203.97.33.64"),
292 NetAddr::IP->new("203.97.33.68"),
293 NetAddr::IP->new("203.97.37.64"),
294
295 # Earthlink
296 #IP-Network 207.69.0.0/16
297 NetAddr::IP->new("207.69.200.28"),
298 #IP-Network 209.86.0.0/16
299 NetAddr::IP->new("209.86.89.60/30"),
300 NetAddr::IP->new("209.86.89.64/28"),
301
302 # Sprint
303 #IP-Network 68.24.0.0/13
304 # actually their PCS mail relay, so sez rDNS
305 NetAddr::IP->new("68.28.27.84"),
306
307 # Vodafone (New Zealand)
308 # inetnum: 203.109.128.0 - 203.109.159.255
309 NetAddr::IP->new("203.109.136/28"),
310
311 # Optus (Australia)
312 # inetnum: 211.28.0.0 - 211.31.255.255
313 NetAddr::IP->new("211.29.132.0/24"),
314
315 #IP-Network 204.209.192.0/20
316 #Org-Name AGT Limited.
317 # telus tentacle?
318 # 204.209.205.13 -> defout.telus.net. -> 204.209.205.13
319 NetAddr::IP->new("204.209.205.13"),
320
321 #route: 195.188.0.0/16
322 #descr: Telewest Broadband
323 # blueyonder.co.uk
324 NetAddr::IP->new("195.188.213.0/28"),
325
326 #inetnum: 210.50.0.0 - 210.50.63.255
327 #descr: Primus Telecommunications
328 # (Australia)
329 NetAddr::IP->new("210.50.30.224/28"),
330
331 # iiNet Limited (Australia)
332 #inetnum: 203.10.1.0 - 203.10.1.255
333 NetAddr::IP->new("203.10.1.232/29"),
334 NetAddr::IP->new("203.10.1.240/29"),
335 #inetnum: 203.59.0.0 - 203.59.255.255
336 NetAddr::IP->new("203.59.1.104/29"),
337 NetAddr::IP->new("203.59.1.128/26"),
338
339 # Road Runner
340 #IP-Network 75.176.0.0/12
341 NetAddr::IP->new("75.180.132.120/29"),
342
343 # Comcast
344 #IP-Network 76.96.0.0/17
345 NetAddr::IP->new("76.96.62.0/25"),
346
347 #IP-Network 142.146.0.0/16
348 #Org-Name Rogers Communications Inc.
349 NetAddr::IP->new("142.146.31.20/30"),
350
351 #address: Claranet UK Ltd
352 #route: 195.8.64.0/19
353 #inetnum: 195.8.89.32 - 195.8.89.47
354 #descr: Claranet UK SMTP relay platform
355 NetAddr::IP->new("195.8.89.37"),
356
357 #inetnum: 80.8.0.0 - 80.15.255.255
358 #descr: France Telecom S.A.
359 #inetnum: 80.12.242.0 - 80.12.242.255 (several ranges)
360 #descr: Mail Essentials Project
361 # orange.fr SMTP cluster. whois WTF
362 NetAddr::IP->new("80.12.242.0/24"),
363 #route: 193.252.0.0/18
364 #descr: France Telecom
365 # more "mail essentials project"
366 NetAddr::IP->new("193.252.22.208/29"),
367
368 #inetnum: 212.216.0.0 - 212.216.255.255
369 #descr: Telecom Italia Net
370 #inetnum: 212.216.172.0 - 212.216.177.255
371 #descr: Telecom Italia IDC - ISP&VAS MNGT
372 NetAddr::IP->new("212.216.176.0/24"),
373
374 #inetnum: 151.189.0.0 - 151.189.255.255
375 #descr: Arcor Online GmbH
376 NetAddr::IP->new("151.189.21.32/27"),
377
378 #CIDR: 74.208.0.0/16
379 #OrgName: 1&1 Internet Inc.
380 NetAddr::IP->new("74.208.122.35"),
381
382 #inetnum: 187.31/16
383 #owner: Internet Group do Brasil SA
384 # .16 and .17 seem to be SMTP relay hosts
385 NetAddr::IP->new("187.31.0.16/31"),
386
387 #address: 1&1 Internet AG
388 #route: 217.72.192.0/20
389 #descr: Web.de
390 NetAddr::IP->new("217.72.192.227"),
391
392 #descr: France Telecom Espana
393 #route: 62.36.0.0/16
394 #inetnum: 62.36.0.0 - 62.36.23.255
395 NetAddr::IP->new("62.36.20.205"),
396
397 # Primus (Canada)
398 # CIDR: 216.254.192.0/19, 216.254.128.0/18
399 NetAddr::IP->new("216.254.180.38"),
400
401## Edumactional places
402
403 # UAlberta - only one IP observed with mail-ish rDNS
404 #IP-Network 129.128.0.0/16
405 NetAddr::IP->new("129.128.5.19"),
406
407 # Queens University
408 #IP-Network 130.15.0.0/16
409 NetAddr::IP->new("130.15.241.183"),
410
411 # University of Maryland Baltimore County - mail servers only
412 #CIDR: 130.85.0.0/16
413 NetAddr::IP->new("130.85.25.76/30"),
414
415 # University of Florida
416 #IP-Network 128.227.0.0/16
417 NetAddr::IP->new("128.227.74.70"),
418 NetAddr::IP->new("128.227.74.149"),
419 NetAddr::IP->new("128.227.74.165"),
420
421 # University of Texas
422 #IP-Network 129.109.0.0/16
423 NetAddr::IP->new("129.109.195.0/28"),
424 #IP-Network 129.106.0.0/16
425 NetAddr::IP->new("129.106.148.58"),
426
427 # Roxbury Community College
428 #IP-Network 209.104.233.0/24
429 NetAddr::IP->new("209.104.233.247"),
430
431 # Stanford University
432 #IP-Network 171.64.0.0/14
433 NetAddr::IP->new("171.67.219.80/30"),
434
435 # Cleveland State University
436 #IP-Network 137.148.0.0/16
437 NetAddr::IP->new("137.148.18.13/32"),
438
439 # University of Missouri - dba the Missouri Research and Education Network (MOREnet)
440 #CIDR: 204.184.0.0/15
441 NetAddr::IP->new("204.185.165.125"),
442
443 # Yale University
444 #IP-Network 130.132.0.0/16
445 NetAddr::IP->new("130.132.50.7"),
446 NetAddr::IP->new("130.132.50.146"),
447
448 # Rutgers University
449 #IP-Network 165.230.0.0/16
450 NetAddr::IP->new("165.230.151.182"),
451
452 # Clemson University
453 #IP-Network 130.127.0.0/16
454 NetAddr::IP->new("130.127.235.21"),
455
456 # Wilkes University
457 #IP-Network 146.94.0.0/16
458 NetAddr::IP->new("146.94.192.152"),
459
460 # Virginia Community College System
461 #IP-Network 164.106.0.0/16
462 NetAddr::IP->new("164.106.130.251"),
463
464 # University of Virginia
465 #IP-Network 199.111.0.0/16
466 # Mary Washington College
467 #IP-Network 199.111.64.0/19
468 NetAddr::IP->new("199.111.84.18"),
469
470 #inetnum: 130.95.0.0 - 130.95.255.255
471 #address: The University of Western Australia
472 NetAddr::IP->new("130.95.3.211"),
473
474 #inetnum: 130.56.0.0 - 130.56.255.255
475 #address: IIS, Australian National University
476 NetAddr::IP->new("130.56.64.134"),
477
478 #IP-Network 129.81.0.0/16
479 #Org-Name Tulane University
480 NetAddr::IP->new("129.81.224.84"),
481
482 #Org-Name Texas A&M University
483 #IP-Network 192.195.88.0/21
484 NetAddr::IP->new("192.195.88.20"),
485 #IP-Network 165.95.0.0/16
486 NetAddr::IP->new("165.95.144.40"),
487
488 #IP-Network 162.82.0.0/16
489 #Org-Name William Beaumont Hospital
490 # rDNS on seen IP ends in .edu
491 NetAddr::IP->new("162.82.215.18"),
492
493 #IP-Network 169.232.0.0/16
494 #Org-Name University of California, Los Angeles
495 NetAddr::IP->new("169.232.46.169"),
496
497 #IP-Network 128.138.0.0/16
498 #Org-Name University of Colorado
499 NetAddr::IP->new("128.138.128.231"),
500
501 #IP-Network 209.221.168.0/24
502 #Org-Name City University
503 NetAddr::IP->new("209.221.168.36"),
504
505 #IP-Network 163.120.0.0/16
506 #Org-Name DePauw University
507
508 #IP-Network 205.137.240.0/20
509 #Org-Name Shelby County Schools
510 NetAddr::IP->new("205.137.241.81"),
511
512 #IP-Network 199.17.0.0/16
513 #Org-Name Minnesota State Colleges and Universities
514 NetAddr::IP->new("199.17.25.194"),
515
516 #CIDR: 162.129.0.0/16
517 #OrgName: The Johns Hopkins Medical Institutions
518 NetAddr::IP->new("162.129.8.151"),
519 #CIDR: 128.220.0.0/16
520 NetAddr::IP->new("128.220.161.140"),
521
522 #CIDR: 144.167.0.0/16
523 #OrgName: University of Arkansas at Little Rock
524 NetAddr::IP->new("144.167.3.152"),
525
526# List servers
527 # Debian listserver - relays spam, so we can't list it.
528 NetAddr::IP->new("82.195.75.100"),
529
530# ESPs
531 # moderately abusable, but mostly legit
532 # Org-Name iContact (Broadwick Corp./Preation Inc.)
533 NetAddr::IP->new("216.27.93.0/25"),
534 NetAddr::IP->new("207.254.213.192/26"),
535
536 # are these guys an ESP? rDNS in many blocks shows secureserver.net, with SMTPish overtones
537 #Org-Name GoDaddy.com, Inc.
538 #IP-Network 64.202.160.0/19
539 NetAddr::IP->new("64.202.160.0/24"),
540 NetAddr::IP->new("64.202.165.0/26"),
541 NetAddr::IP->new("64.202.165.180/30"),
542 NetAddr::IP->new("64.202.165.192/29"),
543 NetAddr::IP->new("64.202.165.224/28"),
544 #IP-Network 72.167.0.0/16
545 NetAddr::IP->new("72.167.82.80/29"),
546 NetAddr::IP->new("72.167.82.90"),
547 NetAddr::IP->new("72.167.224.0/28"),
548 NetAddr::IP->new("72.167.234.224/27"),
549 #IP-Network 208.109.0.0/16
550 NetAddr::IP->new("208.109.80.23"),
551 NetAddr::IP->new("208.109.80.58"),
552 NetAddr::IP->new("208.109.80.24"),
553 NetAddr::IP->new("208.109.80.74"),
554 NetAddr::IP->new("208.109.80.80"),
555 NetAddr::IP->new("208.109.80.81"),
556 #IP-Network 68.178.128.0/17
557 NetAddr::IP->new("68.178.232.18"),
558
559 #IP-Network 173.201.0.0/16
560 NetAddr::IP->new("173.201.193.0/23"),
561
562 # EmailBrain - note no actual netblocks of their own. :(
563 NetAddr::IP->new("66.100.171.162"),
564#163.171.100.66.in-addr.arpa domain name pointer eb08.ebhost9.com.
565#164.171.100.66.in-addr.arpa domain name pointer a.eb08.ebhost9.com.
566#165.171.100.66.in-addr.arpa domain name pointer b.eb08.ebhost9.com.
567#166.171.100.66.in-addr.arpa domain name pointer c.eb08.ebhost9.com.
568
569 # Tucows - don't recall if they're affiliated with anyone else
570 #CIDR: 64.96.0.0/14
571 NetAddr::IP->new("64.98.42.0/24"),
572
573 # not really an ESP, exactly, but best-fit
574 #IP-Network 208.47.184.0/23
575 #Org-Name SYNACOR
576 # mailrelay.embarq.synacor.com
577 NetAddr::IP->new("208.47.184.3"),
578
579 #CIDR: 208.75.120.0/22
580 #OrgName: Constant Contact, Inc
581
582
583# Mailbox providers
584
585 # Hushmail
586 #IP-Network 65.39.178.0/24
587 # all existent rdns in this range are smtp, but not all exist
588 NetAddr::IP->new("65.39.178.128/27"),
589
590# eBay/PayPal
591 NetAddr::IP->new("66.211.160.0/19"),
592
593# Misc legit
594 # Texas Instruments
595 #IP-Network 192.94.94.0/24
596 NetAddr::IP->new("192.94.94.40"),
597
598# Government tentacles
599 #inetnum: 193.39.144.0 - 193.39.159.255
600 #descr: City of Edinburgh District Council
601 NetAddr::IP->new("193.39.157.39"),
602
603 ); # done def for @dontlistme
604
605MSG: for (my $i=0; $i<$msgcount; $i++) {
606 my $msg = $imap->message_string($msgs[$i]);
607
608 print ".";
609
610 my $mail = $spamtest->parse($msg);
611 my $status = $spamtest->check($mail);
612
613# stolen from SA plugin bits
614#sub parsed_metadata {
615# my ($self, $opts) = @_;
616# my $scanner = $status->{permsgstatus};
617
618 my $skip_domains = $status->{main}->{conf}->{uridnsbl_skip_domains};
619
620 # Generate the full list of html-parsed domains.
621 my $uris = $status->get_uri_detail_list();
622
623# print "$msgs[$i]\n";
624 my %msguris;
625
626 while (my($uri, $info) = each %{$uris}) {
627 # we want to skip mailto: uris
628 next if ($uri =~ /^mailto:/);
629
630 # no domains were found via this uri, so skip
631 next unless ($info->{domains});
632
633 next if $info->{types}->{img};
634 foreach ( keys %{ $info->{domains} } ) {
635 if (exists $skip_domains->{$_}) {
636 next;
637 }
638 $msguris{$_}++;
639 #print " ".$_."\n";
640 }
641
642 # print "$uri ".$info->{anchor_text}."\n";
643 }
644
645 foreach (keys %msguris) {
646# print " $_ $msguris{$_}\n";
647 $urilist{$_}++;
648 }
649
650 # now, get the relay IP
651
652 my $stmsg = $status->get_message();
653 my @untrusted = @{$stmsg->{metadata}->{relays_untrusted}};
654
655 my $relayip = new NetAddr::IP $untrusted[0]->{ip};
656
657# sigh. messages tagged as spam already make life difficult by hiding the
658# old Received: headers. We'll just handle them manually for now.
659 if (!$relayip) {
660 print "phtui: ";
661 my %headerlist = %{$imap->parse_headers($msgs[$i], "Received", "Subject")};
662 my $recvnum = 0;
663 my $recv = $headerlist{'Received'}[$recvnum];
664 print "$recv\n";
665 print " $headerlist{'Subject'}[0]\n";
666 next MSG;
667 }
668
669 foreach my $block (@dontlistme) {
670 next MSG if $relayip->within($block);
671 }
672 $iplist{$relayip->addr}++ if $relayip;
673
674# last if $i > 2;
675 sleep 1;
676} # IMAP message iteration
677
678print " Done.\n";
679
680# mm. don't really need times on the IP lists
681#if ($opts{r}) {
682# print strftime("# %Y/%m/%d %H:%M", localtime())."\n";
683#}
684foreach my $ip (sort keys %iplist) {
685 $ip =~ /(\d+)\.(\d+)\.(\d+)\.(\d+)/;
686 if ($opts{r}) {
687 print "+$4.$3.$2.$1.spamhosts.company.com:127.0.0.2:900:::\n";
688 } else {
689 print "$ip\t $iplist{$ip}\n";
690 }
691}
692
693if ($opts{u}) {
694 print strftime("# %Y/%m/%d %H:%M", localtime())."\n";
695}
696foreach my $uri (sort keys %urilist) {
697 my @hout = qx { host '$uri.multi.uribl.com'; host '$uri.company.dnsbl.' };
698 if ($hout[0] =~ /NXDOMAIN/ && $hout[1] =~ /NXDOMAIN/) {
699 if ($opts{u}) {
700 print "+$uri.uribl.company.com:127.0.0.2:900:::\n";
701 } else {
702 # URI plus count
703 print "$uri\t$urilist{$uri}\n";
704 }
705 }
706}
707
708$imap->close();
709
710# Close IMAP connection cleanly.
711$imap->logout();
712
713# integrate learned stuff - journal sync etc IFF bayes_journal is set AND you want to sync right away
714# irrelevant for SQL Bayes
715#my $sarebuild = `/usr/bin/sa-learn --rebuild`;
716#print "-------\nRebuild: ",$sarebuild,"\n-------\n" if $debug;
Note: See TracBrowser for help on using the repository browser.