source: trunk/dnsbl/DNSBL.pm@ 2

Last change on this file since 2 was 2, checked in by Kris Deugau, 15 years ago

/trunk/dnsbl

Import work to date

File size: 12.8 KB
Line 
1# DNSBL
2# Functions for interacting with the DNSBL database
3
4package DNSBL;
5
6use strict;
7use warnings;
8use Exporter;
9use DBI;
10use NetAddr::IP;
11
12use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
13
14$VERSION = 2.0;
15@ISA = qw(Exporter);
16@EXPORT_OK = qw(
17 );
18
19@EXPORT = (); # Export nothing by default.
20%EXPORT_TAGS = ( ALL => [qw(
21 )]
22 );
23
24## "constants"
25
26# 8 bits available
27# 128 is per-IP shitlist
28# 2 is IP hitlist
29# 1 not available so we don't $self->shoot(foot)
30our %bitfields = (
31 # block levels
32 0 => 16,
33 1 => 8,
34 2 => 4,
35 # ip
36 ip => 2,
37 # OOB
38 org => 32,
39 block => 64,
40 # "I'm a total spamming moron!" - per-IP only!
41 slist => 128
42);
43
44# probably needs some tuning; even 7 hits in a /24 is a pretty small percentage
45# number of IPs in a block of the given masklength needed to have that block automatically listed
46our %autolist = (
47 30 => 1,
48 29 => 2,
49 28 => 3,
50 27 => 4,
51 26 => 5,
52 25 => 6,
53 24 => 7,
54 23 => 8,
55 22 => 10,
56 21 => 12,
57 20 => 14,
58 19 => 16,
59 18 => 18,
60 17 => 20,
61 16 => 22,
62 15 => 24,
63 14 => 26,
64 13 => 28,
65 12 => 30,
66 11 => 32,
67 10 => 34,
68 9 => 36,
69 8 => 38,
70 7 => 2**31,
71 6 => 2**31,
72 5 => 2**31,
73 4 => 2**31,
74 3 => 2**31,
75 2 => 2**31,
76 1 => 2**31,
77 0 => 2**31
78);
79
80# le sigh. constants for masklength iterationing
81our @howmany = (1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2);
82
83# variables
84our $dbh;
85
86our $err;
87our $errstr;
88
89# basic object subs
90sub new {
91# iff we want to start taking arguments, or doing other things on instantiation
92# my $self = {};
93# bless $self, "DNSBL";
94# return $self;
95 bless {};
96}
97
98sub DESTROY {
99 my $self = shift;
100 $self->dbclose();
101}
102
103# JIC someone wants to close the db but not finish the script
104sub dbclose {
105 $dbh->rollback;
106 $dbh->disconnect;
107}
108
109## specific object subs:
110
111sub connect {
112 my $DSN = "DBI:Pg:host=dbhost;dbname=dnsbl";
113# my $DSN = "DBI:Pg:dbname=dnsbl";
114 my $user = "dnsbl";
115 my $pass = "spambgone";
116 ## want to NOT autocommit everything, it's unlikely we'll step on our own toes but...
117 $dbh = DBI->connect($DSN, $user, $pass, {
118 AutoCommit => 0,
119 PrintError => 1
120 })
121 or die "database inaccessible: ".$DBI::errstr;
122 return $dbh;
123}
124
125# return true if the IP has been reported
126sub ipexists {
127 my $self = shift;
128 my $ip = shift;
129 my $sth = $dbh->prepare("SELECT count(*) FROM iplist WHERE ip=?");
130 $sth->execute($ip);
131 my ($ret) = $sth->fetchrow_array();
132 return $ret;
133}
134
135
136# report an IP or URI to the db
137# increments a hit counter iff the reported IP or URI exists, otherwise it adds it
138sub report {
139 my $self = shift;
140 my $rep = shift;
141 my $sth;
142 my $rows = 0;
143 if ($rep =~ /^[\d.]+$/) {
144 # weesa gonna ASS-U-ME IP addresses are sanely formatted.
145 $sth = $dbh->prepare("SELECT count FROM iplist WHERE ip=?");
146 $sth->execute($rep) or die "eep? ".$dbh->errstr."\n";
147 $rows = $sth->rows;
148 if ($rows == 0) {
149 $sth = $dbh->prepare("INSERT INTO iplist (ip) VALUES (?)");
150 } elsif ($rows == 1) {
151 $sth = $dbh->prepare("UPDATE iplist SET count=count+1 WHERE ip=?");
152 } else {
153 die "db corrupt: found $rows matches on $rep\n";
154 }
155 $sth->execute($rep) or die "couldn't update listing for $rep: ".$dbh->errstr."\n";
156 } else {
157 return;
158 }
159 $dbh->commit;
160 return $rows;
161} # end report()
162
163
164# add a new org
165# return the orgid
166# if the org exists, return the orgid anyway
167sub addorg {
168 my $self = shift;
169 my $orgname = shift;
170 my $listme = shift || 'n';
171 my $ret = $self->orgexists($orgname);
172 return $ret if $ret;
173 my $sth = $dbh->prepare("INSERT INTO orgs (orgname,listme) VALUES (?,?)");
174 $sth->execute($orgname,$listme) or die "couldn't add org $orgname: ".$dbh->errstr."\n";
175 $dbh->commit;
176 $sth = $dbh->prepare("SELECT orgid FROM orgs WHERE orgname=?");
177 $sth->execute($orgname);
178 my ($orgid) = $sth->fetchrow_array();
179 return $orgid;
180} # end addorg
181
182
183# checks for existence - nb, exact match! No way to really handle anything else. :/
184sub orgexists {
185 my $self = shift;
186 my $org = shift;
187 my $sth = $dbh->prepare("SELECT orgid FROM orgs WHERE orgname=?");
188 $sth->execute($org);
189 my ($ret) = $sth->fetchrow_array();
190 return $ret;
191} # end orgexists();
192
193
194# add a block. requires the orgid
195##fixme needs error handling
196sub addblock {
197 my $self = shift;
198 my $blockin = shift;
199 my $orgid = shift;
200 my $level = shift;
201 $blockin =~ s/^\s+//;
202 $blockin =~ s/\s+$//;
203 my $block = new NetAddr::IP "$blockin"; # need this to clean up messes like ranges. sigh.
204
205 local $dbh->{AutoCommit} = 1; # force autocommit
206
207 my $sth = $dbh->prepare("INSERT INTO blocks (block,orgid,level) VALUES (?,?,?)");
208 $sth->execute("$block",$orgid,$level);
209 # nb: no need to return anything, since the CIDR block is the key
210}
211
212
213sub blockexists {
214 my $self = shift;
215 my $block = shift;
216 my $sth = $dbh->prepare("SELECT count(*) FROM blocks WHERE block=?");
217 $sth->execute($block);
218 my ($ret) = $sth->fetchrow_array();
219 return $ret;
220}
221
222
223# returns list (block,orgname) for the block that contains the passed IP.
224# accepts a level argument if you don't want the top-level registrar allocation block
225sub getcontainer {
226 my $self = shift;
227 my $ip = shift;
228 my $level = shift || 0;
229 my $sth = $dbh->prepare("SELECT b.block,o.orgname FROM blocks b INNER JOIN orgs o ".
230 "ON b.orgid=o.orgid WHERE b.block >> ? AND b.level = ?");
231 $sth->execute($ip,$level);
232 return $sth->fetchrow_array();
233} # end getcontainer()
234
235
236# whee! Recursion is Fun!
237# Call ourself to dig down through the layers of blocks from registar-allocation
238# (level 0) to final block (level n, not to exceed 2)
239# Take a reference to a hash, and stuff it full of blacklisting goodness.
240# Optionally accept a level, block-container, and OOB block and org arguments for
241# the container to check and return
242# Returns no value directly
243# Calls itself to walk down the tree of containers
244sub export {
245 my $self = shift;
246 my $listhosts = shift;
247
248# Export data as CIDR netblocks or classful (A/B/C) blocks
249# Assume classful as it's more compatible with different DNS servers
250 my $mode = shift || 'class';
251
252# Assume we're checking the whole enchilada if we don't get told where to look.
253 my $level = shift || 0;
254 my $container = shift || '0.0.0.0/0';
255 my $oobblock = shift || 0;
256 my $ooborg = shift || 0;
257
258 if ($level > 3) {
259 warn "getting too deep, breaking off! ($container, $level)\n";
260 return;
261 }
262
263# fiddle $container into a sane state.
264 if ($container =~ m|^\d+\.\d+\.\d+/\d+$|) {
265 $container =~ s|/(\d+)$|.0/$1|;
266 } elsif ($container =~ m|^\d+\.\d+/\d+$|) {
267 $container =~ s|/(\d+)$|.0.0/$1|;
268 } elsif ($container =~ m|^\d+/(\d+)$|) {
269 $container =~ s|/(\d+)$|.0.0.0/$1|;
270 }
271
272 my $sth = $dbh->prepare("SELECT count(*) FROM blocks WHERE block << ?");
273 $sth->execute($container);
274 my ($nblocks) = $sth->fetchrow_array();
275
276 if ($nblocks > 0) {
277 my $sql = "SELECT b.block,b.listme,o.orgname,o.listme ".
278 "FROM blocks b INNER JOIN orgs o ON b.orgid=o.orgid ".
279 "WHERE b.level=$level and b.block << '$container' ORDER BY b.block, masklen(b.block) DESC";
280 $sth = $dbh->prepare($sql);
281 $sth->execute();
282 while (my ($cidr,$listblock,$org,$listorg) = $sth->fetchrow_array()) {
283 $self->export($listhosts,$mode,$level+1,$cidr,$listblock,$listorg);
284 }
285 } # avoid checking content of subs if we don't have any
286
287 # don't check all 4.2 billion IPs individually if we're looking at all teh Intarwebs
288 return if $container eq '0.0.0.0/0';
289
290##fixme: need a way to dig out orphan IPs at all levels - IPs not found in a
291# subblock of the current container when the current container *has* subblocks
292# NB: this may be better handled as an out-of-band data-integrity-checker
293
294 # decrement level here so the right bitfield setting gets picked. this segment
295 # is inherently off-by-one from the block-recursion loop, and I can't see a
296 # better way to work around that. >:(
297 $level--;
298
299# Snag all parent block "is-it-listed?" data, and stuff it into a single
300# variable we can use later. Much faster than retrieving this data
301# individually, for each octet iteration.
302
303 my $sql = "SELECT count(i.ip),b.block,b.level,b.listme AS oobblock,o.listme AS ooborg ".
304 "FROM iplist i INNER JOIN blocks b ON i.ip << b.block INNER JOIN orgs o ON b.orgid = o.orgid ".
305 "WHERE b.block >>= ? ".
306 "GROUP BY b.block,b.level,b.listme,o.listme ORDER BY b.block";
307
308 my $parsth = $dbh->prepare($sql);
309 $parsth->execute($container);
310 my $pdata = 0;
311 while (my ($pcount,$p,$plev,$pblock,$porg) = $parsth->fetchrow_array) {
312 my ($pmasklen) = ($p =~ m|\d+/(\d+)$|);
313 $pdata |= $bitfields{$plev} if $pcount >= $autolist{$pmasklen};
314 $pdata |= $bitfields{block} if $pblock;
315 $pdata |= $bitfields{org} if $porg;
316 }
317
318 if ($mode eq 'cidr') {
319 $listhosts->{$container} |= $pdata if $pdata;
320 } else {
321
322 # if $cidr->masklen is <= 24, iterate on /24 boundaries for bulk sublisting
323 # if $cidr->masklen is <= 16, iterate on /16 boundaries for bulk sublisting
324 # if $cidr->masklen is <= 8, iterate on /8 boundaries for bulk sublisting
325
326 my ($masklen) = ($container =~ m|/(\d+)$|);
327 if ($pdata) {
328 my @blocksubs;
329 if ($masklen <= 30 && $masklen > 24) {
330 my ($net,$octet) = ($container =~ m|^(\d+\.\d+\.\d+\.)(\d+)/|);
331 for (my $entry = $octet; $entry < ($octet + $howmany[$masklen]); $entry++) {
332 my $host = "$net$entry";
333 $listhosts->{$host} = 0 if !defined($listhosts->{$host});
334 $listhosts->{$host} |= $pdata;
335 }
336 } elsif ($masklen <= 24 && $masklen > 16) {
337 my ($net,$octet) = ($container =~ m|^(\d+\.\d+\.)(\d+)\.\d+/|);
338 for (my $entry = $octet; $entry < ($octet + $howmany[$masklen]); $entry++) {
339 my $twofour = "$net$entry.*";
340 $listhosts->{$twofour} |= $pdata;
341 }
342 } elsif ($masklen <= 16 && $masklen > 8) {
343 my ($net,$octet) = ($container =~ m|^(\d+\.)(\d+)\.\d+\.\d+/|);
344 for (my $entry = $octet; $entry < ($octet + $howmany[$masklen]); $entry++) {
345 my $sixteen = "$net$entry.*";
346 $listhosts->{$sixteen} |= $pdata;
347 }
348 } elsif ($masklen <= 8) {
349 my ($octet) = ($container =~ m|^(\d+)\.\d+\.\d+\.\d+/|);
350 for (my $entry = $octet; $entry < ($octet + $howmany[$masklen]); $entry++) {
351 my $eight = "$entry.*";
352 $listhosts->{$eight} |= $pdata;
353 }
354 }
355
356#print "DEBUG1: $container, ".(@blocksubs + 0)."\n";
357# this seems to be a BIG timesink... execution time ~1:30 without, ~4:30 with
358#if (0){
359# $sth = $dbh->prepare("select block,level,listme from blocks where block >> ?");
360# my $sth2 = $dbh->prepare("select count(*) from iplist where ip << ?");
361# foreach (@blocksubs) {
362#print " DEBUG: $_ container-is-listed check\n";
363# collect info on container block(s)
364# $sth->execute($container);
365# while (my ($parent, $plev, $listme) = $sth->fetchrow_array()) {
366# $sth2->execute($parent);
367# my ($parlen) = ($parent =~ m|/(\d+)|);
368# my ($parcount) = $sth2->fetchrow_array();
369#print " DEBUG: $parent: $parlen, $parcount, $plev\n";
370# $listhosts->{$_} |= $bitfields{$plev} if $parcount >= $autolist{$parlen}; #hmm.
371# $listhosts->{$_} |= $bitfields{block} if $listme;
372# }
373# }
374#}
375
376 } # generate autolist entries for ips/octets not (yet) seen in reports
377
378 } # cidr vs classful mode
379
380 $sth = $dbh->prepare("SELECT ip,s4list FROM iplist WHERE ip << ? ORDER BY ip");
381 $sth->execute($container);
382 while (my ($ip,$moron) = $sth->fetchrow_array()) {
383 $listhosts->{$ip} |= $pdata;
384 if ($moron) {
385 $listhosts->{$ip} = $bitfields{slist};
386 } else {
387 $listhosts->{$ip} |= $bitfields{ip};
388 }
389 }
390
391# get IPs which for reasons unknown are apparently allocated directly from the
392# parent registry (and so do not have containing netblocks in this system) O_o
393# select * from iplist where not (select count(*) from blocks where ip << block) > 0;
394
395 return;
396} # end export()
397
398
399sub export_alt {
400 my $self = shift;
401 my $listhosts = shift;
402 my $level = shift || 0;
403 my $container = shift || '0.0.0.0/0';
404 my $oobblock = shift || 0;
405 my $ooborg = shift || 0;
406
407#print "\nDEBUG: called with $level, $container, $oobblock, $ooborg\n";
408# if $level > 2 or $container =~ /^64\.76\./;
409# my %listhosts;
410
411# $level = 0 if !$level;
412 if ($level > 3) {
413 warn "getting too deep, breaking off!\n";
414 return;
415 }
416
417 my $sth = $dbh->prepare("select ip,s4list from iplist order by ip");
418 my $bsth = $dbh->prepare("select b.block,b.listme,b.level,o.listme ".
419 "from blocks b inner join orgs o on b.orgid=o.orgid ".
420 "where b.block >> ?");
421 while (my ($ip,$s4list) = $sth->fetchrow_array) {
422 $bsth->execute($ip);
423 while (my ($block,$blisted,$blevel,$olisted) = $bsth->fetchrow_array) {
424 $listhosts->{$ip} |= 0;
425 }
426 }
427
428} # end export_alt()
429
430
431## DNSBL::autolist_block()
432# check if a block should be autolisted
433sub autolist_block {
434 my $self = shift;
435 my $block = shift;
436
437 my $cidr = new NetAddr::IP "$block";
438 my $sth = $dbh->prepare("select count(*) from iplist where ip << ?");
439 $sth->execute("$cidr");
440 my ($count) = $sth->fetchrow_array;
441
442 return 1 if $count >= $autolist{$cidr->masklen};
443 return 0;
444} # end autolist_block()
445
446
447# make Perl happy
4481;
Note: See TracBrowser for help on using the repository browser.