| [799] | 1 | #!/usr/bin/perl
 | 
|---|
 | 2 | # Convert a BIND zone file to a hosts file
 | 
|---|
 | 3 | ##
 | 
|---|
 | 4 | # Copyright 2020 Kris Deugau <kdeugau@deepnet.cx>
 | 
|---|
 | 5 | # 
 | 
|---|
 | 6 | #    This program is free software: you can redistribute it and/or modify
 | 
|---|
 | 7 | #    it under the terms of the GNU General Public License as published by
 | 
|---|
 | 8 | #    the Free Software Foundation, either version 3 of the License, or
 | 
|---|
 | 9 | #    (at your option) any later version. 
 | 
|---|
 | 10 | # 
 | 
|---|
 | 11 | #    This program is distributed in the hope that it will be useful,
 | 
|---|
 | 12 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|---|
 | 13 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|---|
 | 14 | #    GNU General Public License for more details.
 | 
|---|
 | 15 | # 
 | 
|---|
 | 16 | #    You should have received a copy of the GNU General Public License
 | 
|---|
 | 17 | #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
|---|
 | 18 | ##
 | 
|---|
 | 19 | 
 | 
|---|
 | 20 | use strict;
 | 
|---|
 | 21 | use warnings;
 | 
|---|
| [805] | 22 | use Getopt::Long;
 | 
|---|
 | 23 | 
 | 
|---|
| [799] | 24 | use Data::Dumper;
 | 
|---|
 | 25 | 
 | 
|---|
 | 26 | # push "the directory the script is in" into @INC
 | 
|---|
 | 27 | use FindBin;
 | 
|---|
 | 28 | use lib "$FindBin::RealBin/";
 | 
|---|
 | 29 | 
 | 
|---|
 | 30 | use DNSDB;
 | 
|---|
 | 31 | 
 | 
|---|
| [805] | 32 | my @skipdefs;
 | 
|---|
 | 33 | my $skipfile;
 | 
|---|
 | 34 | my $dryrun = 0;
 | 
|---|
| [807] | 35 | # CNAME chain depth
 | 
|---|
 | 36 | my $maxdepth = 3;
 | 
|---|
| [799] | 37 | 
 | 
|---|
| [805] | 38 | GetOptions(
 | 
|---|
 | 39 |         "skip=s" => \@skipdefs,
 | 
|---|
 | 40 |         "skipfile=s" => \$skipfile,
 | 
|---|
 | 41 |         "test|dry-run" => \$dryrun,
 | 
|---|
 | 42 | );
 | 
|---|
| [799] | 43 | 
 | 
|---|
 | 44 | my $zname = shift @ARGV;
 | 
|---|
| [805] | 45 | 
 | 
|---|
 | 46 | my $usage = "usage: bind2hosts zone [--skip pattern [--skip pattern2 ...]] [--skipfile file]
 | 
|---|
 | 47 |         zonename < zonefile
 | 
|---|
 | 48 | 
 | 
|---|
 | 49 |         --skip
 | 
|---|
 | 50 |                 Specify a string to skip in the records.  If an IP-like string is
 | 
|---|
 | 51 |                 used, and the zone is a reverse zone, it will also search for the
 | 
|---|
 | 52 |                 octet-reversed form.  Specify multiple times to skip multiple
 | 
|---|
 | 53 |                 different record patterns.
 | 
|---|
| [807] | 54 |         --skipfile
 | 
|---|
| [805] | 55 |                 A file containing patterns to skip.  Patterns from the file and
 | 
|---|
 | 56 |                 any --skip arguments are merged.
 | 
|---|
 | 57 |         zonename
 | 
|---|
 | 58 |                 The name of the zone to import.  Required.
 | 
|---|
 | 59 | 
 | 
|---|
 | 60 |         Zone data will be read from STDIN.
 | 
|---|
 | 61 | ";
 | 
|---|
 | 62 | if (!$zname) {
 | 
|---|
 | 63 |   die $usage;
 | 
|---|
 | 64 | }
 | 
|---|
 | 65 | 
 | 
|---|
 | 66 | if ($skipfile) {
 | 
|---|
 | 67 |   if (-f $skipfile) {
 | 
|---|
 | 68 |     open SKIP, "<$skipfile";
 | 
|---|
 | 69 |     while (<SKIP>) {
 | 
|---|
 | 70 |       chomp;
 | 
|---|
 | 71 |       push @skipdefs, $_;
 | 
|---|
 | 72 |     }
 | 
|---|
 | 73 |     close SKIP;
 | 
|---|
 | 74 |   } else {
 | 
|---|
 | 75 |     warn "skipfile $skipfile requested but it doesn't seem to exist.  Continuing.\n";
 | 
|---|
 | 76 |   } 
 | 
|---|
 | 77 | }  
 | 
|---|
 | 78 | 
 | 
|---|
| [799] | 79 | my $rev = 'n';
 | 
|---|
 | 80 | my $zid;
 | 
|---|
 | 81 | 
 | 
|---|
 | 82 | my %amap;
 | 
|---|
 | 83 | my %namemap;
 | 
|---|
| [801] | 84 | my %cmap;
 | 
|---|
| [799] | 85 | 
 | 
|---|
| [805] | 86 | my $dnsdb = new DNSDB;
 | 
|---|
| [799] | 87 | 
 | 
|---|
| [801] | 88 | # need an ultimate fallback for this one
 | 
|---|
 | 89 | my $defttl = 900;
 | 
|---|
 | 90 | my $origin = "$zname.";   # to append to unqualified names
 | 
|---|
 | 91 | my $curlabel;
 | 
|---|
 | 92 | my $prevlabel;
 | 
|---|
| [799] | 93 | 
 | 
|---|
| [801] | 94 | my $i = 0;
 | 
|---|
 | 95 | 
 | 
|---|
| [799] | 96 | # need to spin up a full state machine-ish thing, because BIND zone files are all about context
 | 
|---|
| [801] | 97 | while (my $rec = <>) {
 | 
|---|
 | 98 |   chomp $rec;
 | 
|---|
 | 99 |   next if $rec =~ /^\s*$/;
 | 
|---|
 | 100 |   next if $rec =~ /^\s*;/;
 | 
|---|
 | 101 |   next if $rec =~ /^\s*\)/;     # SOA closing (possibly other records too?)
 | 
|---|
| [807] | 102 |                                 # arguably should do some more targeted voodoo when parsing the SOA details
 | 
|---|
| [800] | 103 | 
 | 
|---|
| [805] | 104 |   my $skipflag = 0;
 | 
|---|
 | 105 |   foreach (@skipdefs) {
 | 
|---|
 | 106 |     if ($rec =~ /\Q$_\E/) {
 | 
|---|
 | 107 |       $skipflag = 1;
 | 
|---|
| [807] | 108 |       # might want to do something with the skipped records someday
 | 
|---|
| [805] | 109 |     }
 | 
|---|
 | 110 |   }
 | 
|---|
 | 111 |   next if $skipflag;
 | 
|---|
 | 112 | 
 | 
|---|
| [801] | 113 |   if (my ($macro,$mdetail) = ($rec =~ /^\s*\$(TTL|ORIGIN|INCLUDE)\s+(.+)/) ) {
 | 
|---|
| [799] | 114 |     # macro sort of thing;  $TTL and $ORIGIN most common.  $INCLUDE is a thing, expect it to be rare in live use tho
 | 
|---|
 | 115 |     if ($macro eq 'TTL') {
 | 
|---|
| [800] | 116 |       # irrelevant for a hosts file
 | 
|---|
| [799] | 117 |     } elsif ($macro eq 'ORIGIN') {
 | 
|---|
| [800] | 118 |       # $ORIGIN supports cascading/nesting, by watching for fully-qualified names vs partial names.
 | 
|---|
 | 119 |       if ($mdetail =~ /\.$/) {
 | 
|---|
 | 120 |         $origin = $mdetail;
 | 
|---|
| [799] | 121 |       } else {
 | 
|---|
| [800] | 122 |         # append current origin to unqualified origin
 | 
|---|
 | 123 |         $origin = "$mdetail.$origin";
 | 
|---|
| [799] | 124 |       }
 | 
|---|
 | 125 |     }
 | 
|---|
| [800] | 126 | ##fixme: should arguably handle $INCLUDE
 | 
|---|
 | 127 | # probably NOT going to handle $GENERATE, since that type of record set is best handled in DNS
 | 
|---|
| [799] | 128 |     next;
 | 
|---|
 | 129 |   }
 | 
|---|
| [800] | 130 | 
 | 
|---|
| [806] | 131 |   # yay for special cases
 | 
|---|
 | 132 |   $origin = '' if $origin eq '.';
 | 
|---|
 | 133 | 
 | 
|---|
| [801] | 134 |   my $origrec = $rec;
 | 
|---|
| [800] | 135 | 
 | 
|---|
 | 136 |   # leading whitespace indicates "same label as last record"
 | 
|---|
| [801] | 137 |   if ($rec =~ /^\s/) {
 | 
|---|
| [800] | 138 |     $curlabel = $prevlabel;
 | 
|---|
 | 139 |   } else {
 | 
|---|
| [801] | 140 |     ($curlabel) = ($rec =~ /^([\w\@_.-]+)\s/);
 | 
|---|
| [800] | 141 |   }
 | 
|---|
 | 142 | 
 | 
|---|
 | 143 |   # magic name!
 | 
|---|
 | 144 |   $curlabel = "$zname." if $curlabel eq '@';
 | 
|---|
 | 145 | 
 | 
|---|
 | 146 |   # append $ORIGIN if name is not fully qualified.
 | 
|---|
 | 147 |   if ($curlabel !~ /\.$/) {
 | 
|---|
| [801] | 148 |     $curlabel .= ".$origin";
 | 
|---|
| [800] | 149 |   }
 | 
|---|
 | 150 | 
 | 
|---|
 | 151 |   # check for zone scope.  skip bad records.
 | 
|---|
 | 152 |   if ($curlabel !~ /$zname.$/) {
 | 
|---|
 | 153 |     warn "bad record $origrec, maybe bad \$ORIGIN?\n";
 | 
|---|
 | 154 |     next;
 | 
|---|
 | 155 |   }
 | 
|---|
 | 156 | 
 | 
|---|
| [801] | 157 |   # trim the label, if any
 | 
|---|
 | 158 |   $rec =~ s/^([\w\@_.-]*)\s+//;
 | 
|---|
 | 159 | 
 | 
|---|
| [807] | 160 |   # now that we've collected and trimmed off the record's label, unpack the class, TTL, and type.
 | 
|---|
 | 161 |   # class and TTL may be omitted, and may appear in either class,TTL or TTL,class order.
 | 
|---|
| [801] | 162 |   my $nc = 0;
 | 
|---|
 | 163 |   my %seenatoms;
 | 
|---|
| [805] | 164 |   # we don't actually use these but we have to recognize them
 | 
|---|
 | 165 |   my $class = 'IN';
 | 
|---|
 | 166 |   # not preset as we need to detect whether it's present in the record
 | 
|---|
 | 167 |   my $ttl;
 | 
|---|
 | 168 |   my $type;
 | 
|---|
| [801] | 169 |   my $badrec;
 | 
|---|
 | 170 |   my $curatom = 'class';
 | 
|---|
| [805] | 171 |   eval {
 | 
|---|
 | 172 |     for (; $nc < 3; $nc++) {
 | 
|---|
 | 173 |       last if $type;    # short-circuit if we've got a type, further data is record-specific.
 | 
|---|
 | 174 |       my ($atom) = ($rec =~ /^([\w\d.]+)\s/);
 | 
|---|
 | 175 |       # should be safe?
 | 
|---|
 | 176 |       last if !$atom;
 | 
|---|
 | 177 |       if ($atom =~ /^\d+$/) {
 | 
|---|
 | 178 |         if (defined($ttl)) {
 | 
|---|
 | 179 |           # we already have a TTL, so another all-numeric field is invalid.
 | 
|---|
| [803] | 180 |           die "bad record ($origrec)\n";
 | 
|---|
| [805] | 181 |         } else {
 | 
|---|
 | 182 |           if ($curatom ne 'class' && $curatom ne 'ttl') {
 | 
|---|
 | 183 |             die "bad record ($origrec)\n";
 | 
|---|
 | 184 |           }
 | 
|---|
 | 185 |           $curatom = 'ttl';
 | 
|---|
 | 186 |           $ttl = $atom;
 | 
|---|
| [801] | 187 |         }
 | 
|---|
 | 188 |       }
 | 
|---|
| [805] | 189 |       elsif ($atom =~ /^IN|CS|CH|HS$/) {
 | 
|---|
 | 190 |         if ($atom =~ /CS|CH|HS/) {
 | 
|---|
 | 191 |           die "unsupported class $atom in record ($origrec)\n";
 | 
|---|
 | 192 |         }
 | 
|---|
 | 193 |         $curatom = 'class';
 | 
|---|
 | 194 |         $class = $atom;
 | 
|---|
| [801] | 195 |       }
 | 
|---|
| [807] | 196 |       elsif ($atom =~ /^[A-Z\d-]+/) {
 | 
|---|
| [805] | 197 |         # check against dnsadmin's internal list of known DNS types.
 | 
|---|
 | 198 |         if ($reverse_typemap{$atom}) {
 | 
|---|
 | 199 |           $type = $atom;
 | 
|---|
 | 200 |         } else {
 | 
|---|
 | 201 |           die "unknown type $atom in record ($origrec)\n";
 | 
|---|
 | 202 |         }
 | 
|---|
 | 203 |         $curatom = 'type';
 | 
|---|
| [801] | 204 |       }
 | 
|---|
| [805] | 205 |       $rec =~ s/^$atom\s*//;
 | 
|---|
 | 206 |     } # class/type/TTL loop
 | 
|---|
 | 207 |   };
 | 
|---|
 | 208 |   if ($@) {
 | 
|---|
 | 209 |     warn $@;
 | 
|---|
 | 210 |     next;
 | 
|---|
 | 211 |   }
 | 
|---|
| [801] | 212 | 
 | 
|---|
| [805] | 213 |   $ttl = $defttl if !defined($ttl);
 | 
|---|
| [801] | 214 | 
 | 
|---|
| [807] | 215 |   # Just In Case we need the original rdata after we've sliced off more pieces
 | 
|---|
| [801] | 216 |   my $rdata = $rec;
 | 
|---|
 | 217 |   $prevlabel = $curlabel;
 | 
|---|
| [799] | 218 | 
 | 
|---|
| [801] | 219 | ##fixme:  squish this down for this script since SOA records are irrelevant
 | 
|---|
 | 220 |   if ($type eq 'SOA') {
 | 
|---|
 | 221 |     my ($ns, $adminmail) = ($rdata =~ /([\w.]+)\s+([\w.]+)\s+\(/);
 | 
|---|
| [805] | 222 |     die "Can't parse gibberish SOAish record: '$rdata'/'$origrec'\n" if !$ns;
 | 
|---|
| [801] | 223 |     $rdata =~ s/([\w.]+)\s+([\w.]+)\s+\(\s*//;
 | 
|---|
| [799] | 224 | 
 | 
|---|
| [801] | 225 |     # There are probably more efficient ways to do this but the SOA record
 | 
|---|
 | 226 |     # format is essentially character based, not line-based.
 | 
|---|
| [807] | 227 |     # In theory the SOA serial etc may be spread over "many" lines, bounded by ().
 | 
|---|
| [800] | 228 | 
 | 
|---|
| [801] | 229 |     # Parse fields from $rdata if present
 | 
|---|
 | 230 |     my @soabits;
 | 
|---|
 | 231 |     my @soafirst = split /\s+/, $rdata;
 | 
|---|
 | 232 |     while (my $f = shift @soafirst) {
 | 
|---|
 | 233 |       last if $f !~ /^\d/;
 | 
|---|
 | 234 |       push @soabits, $f;
 | 
|---|
 | 235 |     }
 | 
|---|
| [799] | 236 | 
 | 
|---|
| [801] | 237 |     # Read more lines if we don't have enough SOA fields filled
 | 
|---|
 | 238 |     while (scalar(@soabits) < 5) {
 | 
|---|
 | 239 |       my $tmp = <>;
 | 
|---|
 | 240 |       $tmp =~ s/^\s*//;
 | 
|---|
 | 241 |       my @tmpsoa = split /\s+/, $tmp;
 | 
|---|
 | 242 |       while (my $f = shift @tmpsoa) {
 | 
|---|
 | 243 |         last if $f !~ /^\d/;
 | 
|---|
 | 244 |         push @soabits, $f;
 | 
|---|
 | 245 |       }
 | 
|---|
 | 246 |       if (scalar(@soabits) == 5) {
 | 
|---|
 | 247 |         last;
 | 
|---|
 | 248 |       }
 | 
|---|
 | 249 |     }
 | 
|---|
 | 250 |   } # SOA
 | 
|---|
| [799] | 251 | 
 | 
|---|
| [801] | 252 | ##fixme:  trim dupes if possible
 | 
|---|
| [805] | 253 | 
 | 
|---|
| [801] | 254 |   elsif ($type eq 'A') {
 | 
|---|
 | 255 |     # need the name->IP map so we can reverse-map the CNAMEs on output
 | 
|---|
| [805] | 256 |     push @{$amap{$curlabel}}, $rdata;
 | 
|---|
| [801] | 257 |     $namemap{$rdata}{$curlabel}++;
 | 
|---|
 | 258 |   } # A record
 | 
|---|
 | 259 | 
 | 
|---|
 | 260 |   elsif ($type eq 'CNAME') {
 | 
|---|
| [802] | 261 |     $cmap{$curlabel} = $rdata.($rdata =~ /\./ ? '' : ".$origin");
 | 
|---|
| [801] | 262 |   } # CNAME record
 | 
|---|
 | 263 | 
 | 
|---|
| [805] | 264 |   # all other record types are irrelevant for a hosts file
 | 
|---|
| [801] | 265 | 
 | 
|---|
 | 266 | } # <STDIN>
 | 
|---|
 | 267 | 
 | 
|---|
| [807] | 268 | #print Dumper \%cmap;
 | 
|---|
| [801] | 269 | 
 | 
|---|
| [807] | 270 | # Walk the CNAME list and see if we can match the targets in-zone.
 | 
|---|
 | 271 | # Out-of-zone CNAMEs are out of scope for this conversion.
 | 
|---|
 | 272 | foreach my $cn (sort keys %cmap) {
 | 
|---|
 | 273 |   my $targ = $cmap{$cn};
 | 
|---|
 | 274 | #print "dbg: ".Dumper($targ);
 | 
|---|
 | 275 |   my @targlist;
 | 
|---|
 | 276 | #  push @targlist, $targ;  # mostly for error reporting
 | 
|---|
 | 277 | my $dangle = 0;
 | 
|---|
| [805] | 278 | 
 | 
|---|
| [807] | 279 | my $depth = 1;  # CNAME -> A
 | 
|---|
| [805] | 280 | 
 | 
|---|
| [807] | 281 | # check this as a loop for consistent fail/break conditions.  bonus:  may allow user choice for CNAME depth?
 | 
|---|
 | 282 |   for (; $dangle == 0; $depth++) {
 | 
|---|
| [805] | 283 | 
 | 
|---|
| [807] | 284 | #print "d:$depth  checking $cn -> $targ\n";
 | 
|---|
 | 285 | push @targlist, $targ;
 | 
|---|
 | 286 | 
 | 
|---|
 | 287 |   # Depth limit.  If made user-selectable should arguably set a hard
 | 
|---|
 | 288 |   # limit because deeply chained CNAMEs are Baaaaad, mmkaay?
 | 
|---|
 | 289 |   if ($depth >= $maxdepth) {
 | 
|---|
 | 290 |     warn "CNAMEs too deeply chained, skipping: $cn => ".join(' => ', @targlist)."\n";
 | 
|---|
 | 291 |     last;
 | 
|---|
| [805] | 292 |   }
 | 
|---|
 | 293 | 
 | 
|---|
| [807] | 294 | # break if CNAME target is in the A record list
 | 
|---|
 | 295 |   last if $amap{$targ};
 | 
|---|
 | 296 |   if ($cmap{$targ}) {
 | 
|---|
 | 297 | #     note the new target
 | 
|---|
 | 298 |     my $tmpcn = $targ;
 | 
|---|
 | 299 |     $targ = $cmap{$tmpcn};
 | 
|---|
 | 300 | #print "    chaining $tmpcn to new $targ\n";
 | 
|---|
 | 301 |   } else {
 | 
|---|
 | 302 | #     target is either out of zone or doesn't exist
 | 
|---|
 | 303 |     $dangle = 1;
 | 
|---|
 | 304 |     last;
 | 
|---|
 | 305 |   }
 | 
|---|
 | 306 | 
 | 
|---|
 | 307 | 
 | 
|---|
 | 308 | #warn "chained cname $cn => $targ\n";
 | 
|---|
 | 309 |     # CNAME to another CNAME
 | 
|---|
 | 310 | #$tmpcn => $targ\n";
 | 
|---|
 | 311 | 
 | 
|---|
 | 312 | #  last if $dangle;
 | 
|---|
 | 313 | 
 | 
|---|
 | 314 | #      if (!$amap{$targ}) {
 | 
|---|
 | 315 | #        if ($cmap{$targ}) {
 | 
|---|
 | 316 | #          $tmpcn = $targ;
 | 
|---|
 | 317 | #          $targ = $cmap{$tmpcn};
 | 
|---|
 | 318 | #push @targlist, $targ;
 | 
|---|
 | 319 | #warn "  chain target $cn => ".join(' => ', @targlist).
 | 
|---|
 | 320 | #       "\n";
 | 
|---|
 | 321 | #        } else {
 | 
|---|
 | 322 | #          warn "skipping dangling CNAME $cn => $targlist[0] => $targlist[1]\n";
 | 
|---|
 | 323 | #          next;
 | 
|---|
 | 324 | #        }
 | 
|---|
 | 325 | #      }
 | 
|---|
 | 326 | #    } else {
 | 
|---|
 | 327 | #      # skip depth-3 (?) CNAMES;  any such zone does not belong as a hosts file anyway
 | 
|---|
 | 328 | #      warn "skipping dangling CNAME $cn => $targ\n";
 | 
|---|
 | 329 | #      next;
 | 
|---|
 | 330 | #    }
 | 
|---|
 | 331 | #  }
 | 
|---|
 | 332 | 
 | 
|---|
 | 333 | 
 | 
|---|
 | 334 |   } # CNAME recursion loop
 | 
|---|
 | 335 | 
 | 
|---|
 | 336 | next if $dangle;
 | 
|---|
 | 337 | 
 | 
|---|
 | 338 | #print "    chain target $cn => ".join(' => ', @targlist)."\n";
 | 
|---|
 | 339 |   if ($amap{$targ}) {
 | 
|---|
 | 340 |     # absent any criteria, we use the first IP a name was associated with
 | 
|---|
 | 341 |     my $targip = $amap{$targ}[0];
 | 
|---|
 | 342 |     $namemap{$targip}{$cn}++;
 | 
|---|
 | 343 |   } else {
 | 
|---|
 | 344 |   }
 | 
|---|
 | 345 | } # each %cmap
 | 
|---|
 | 346 | 
 | 
|---|
| [799] | 347 | #print Dumper \%amap;
 | 
|---|
| [801] | 348 | #foreach my $n (keys %amap) {
 | 
|---|
| [805] | 349 | #  foreach my $ip (keys %{$amap{$n}}) {
 | 
|---|
 | 350 | #print "$ip\t$n\n";
 | 
|---|
| [801] | 351 | #    push @{$namemap{$ip}}, $n unless grep $n, @{$namemap{$ip}};
 | 
|---|
| [805] | 352 | #    $namemap{$ip}{$n}++;
 | 
|---|
| [801] | 353 | #  }
 | 
|---|
 | 354 | #}
 | 
|---|
| [799] | 355 | 
 | 
|---|
 | 356 | #print Dumper \%namemap;
 | 
|---|
| [805] | 357 | foreach my $ip (sort keys %namemap) {
 | 
|---|
 | 358 |   print "$ip\t".join(' ', sort keys %{$namemap{$ip}})."\n";
 | 
|---|
| [799] | 359 | }
 | 
|---|