| 1 | #!/usr/bin/perl
 | 
|---|
| 2 | # Convert a BIND zone file to a hosts file
 | 
|---|
| 3 | ##
 | 
|---|
| 4 | # Copyright 2020 Kris Deugau <kdeugau@deepnet.cx>
 | 
|---|
| 5 | # 
 | 
|---|
| 6 | #    This program is free software: you can redistribute it and/or modify
 | 
|---|
| 7 | #    it under the terms of the GNU General Public License as published by
 | 
|---|
| 8 | #    the Free Software Foundation, either version 3 of the License, or
 | 
|---|
| 9 | #    (at your option) any later version. 
 | 
|---|
| 10 | # 
 | 
|---|
| 11 | #    This program is distributed in the hope that it will be useful,
 | 
|---|
| 12 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|---|
| 13 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|---|
| 14 | #    GNU General Public License for more details.
 | 
|---|
| 15 | # 
 | 
|---|
| 16 | #    You should have received a copy of the GNU General Public License
 | 
|---|
| 17 | #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
|---|
| 18 | ##
 | 
|---|
| 19 | 
 | 
|---|
| 20 | use strict;
 | 
|---|
| 21 | use warnings;
 | 
|---|
| 22 | use Getopt::Long;
 | 
|---|
| 23 | 
 | 
|---|
| 24 | use Data::Dumper;
 | 
|---|
| 25 | 
 | 
|---|
| 26 | # push "the directory the script is in" into @INC
 | 
|---|
| 27 | use FindBin;
 | 
|---|
| 28 | use lib "$FindBin::RealBin/";
 | 
|---|
| 29 | 
 | 
|---|
| 30 | use DNSDB;
 | 
|---|
| 31 | 
 | 
|---|
| 32 | my @skipdefs;
 | 
|---|
| 33 | my $skipfile;
 | 
|---|
| 34 | my $dryrun = 0;
 | 
|---|
| 35 | # CNAME chain depth
 | 
|---|
| 36 | my $maxdepth = 3;
 | 
|---|
| 37 | 
 | 
|---|
| 38 | GetOptions(
 | 
|---|
| 39 |         "skip=s" => \@skipdefs,
 | 
|---|
| 40 |         "skipfile=s" => \$skipfile,
 | 
|---|
| 41 |         "test|dry-run" => \$dryrun,
 | 
|---|
| 42 | );
 | 
|---|
| 43 | 
 | 
|---|
| 44 | my $zname = shift @ARGV;
 | 
|---|
| 45 | 
 | 
|---|
| 46 | my $usage = "usage: bind2hosts zone [--skip pattern [--skip pattern2 ...]] [--skipfile file]
 | 
|---|
| 47 |         zonename < zonefile
 | 
|---|
| 48 | 
 | 
|---|
| 49 |         --skip
 | 
|---|
| 50 |                 Specify a string to skip in the records.  If an IP-like string is
 | 
|---|
| 51 |                 used, and the zone is a reverse zone, it will also search for the
 | 
|---|
| 52 |                 octet-reversed form.  Specify multiple times to skip multiple
 | 
|---|
| 53 |                 different record patterns.
 | 
|---|
| 54 |         --skipfile
 | 
|---|
| 55 |                 A file containing patterns to skip.  Patterns from the file and
 | 
|---|
| 56 |                 any --skip arguments are merged.
 | 
|---|
| 57 |         zonename
 | 
|---|
| 58 |                 The name of the zone to import.  Required.
 | 
|---|
| 59 | 
 | 
|---|
| 60 |         Zone data will be read from STDIN.
 | 
|---|
| 61 | ";
 | 
|---|
| 62 | if (!$zname) {
 | 
|---|
| 63 |   die $usage;
 | 
|---|
| 64 | }
 | 
|---|
| 65 | 
 | 
|---|
| 66 | if ($skipfile) {
 | 
|---|
| 67 |   if (-f $skipfile) {
 | 
|---|
| 68 |     open SKIP, "<$skipfile";
 | 
|---|
| 69 |     while (<SKIP>) {
 | 
|---|
| 70 |       chomp;
 | 
|---|
| 71 |       push @skipdefs, $_;
 | 
|---|
| 72 |     }
 | 
|---|
| 73 |     close SKIP;
 | 
|---|
| 74 |   } else {
 | 
|---|
| 75 |     warn "skipfile $skipfile requested but it doesn't seem to exist.  Continuing.\n";
 | 
|---|
| 76 |   } 
 | 
|---|
| 77 | }  
 | 
|---|
| 78 | 
 | 
|---|
| 79 | my $rev = 'n';
 | 
|---|
| 80 | my $zid;
 | 
|---|
| 81 | 
 | 
|---|
| 82 | my %amap;
 | 
|---|
| 83 | my %namemap;
 | 
|---|
| 84 | my %cmap;
 | 
|---|
| 85 | 
 | 
|---|
| 86 | my $dnsdb = new DNSDB;
 | 
|---|
| 87 | 
 | 
|---|
| 88 | # need an ultimate fallback for this one
 | 
|---|
| 89 | my $defttl = 900;
 | 
|---|
| 90 | my $origin = "$zname.";   # to append to unqualified names
 | 
|---|
| 91 | my $curlabel;
 | 
|---|
| 92 | my $prevlabel;
 | 
|---|
| 93 | 
 | 
|---|
| 94 | my $i = 0;
 | 
|---|
| 95 | 
 | 
|---|
| 96 | # need to spin up a full state machine-ish thing, because BIND zone files are all about context
 | 
|---|
| 97 | while (my $rec = <>) {
 | 
|---|
| 98 |   chomp $rec;
 | 
|---|
| 99 |   next if $rec =~ /^\s*$/;
 | 
|---|
| 100 |   next if $rec =~ /^\s*;/;
 | 
|---|
| 101 |   next if $rec =~ /^\s*\)/;     # SOA closing (possibly other records too?)
 | 
|---|
| 102 |                                 # arguably should do some more targeted voodoo when parsing the SOA details
 | 
|---|
| 103 | 
 | 
|---|
| 104 |   my $skipflag = 0;
 | 
|---|
| 105 |   foreach (@skipdefs) {
 | 
|---|
| 106 |     if ($rec =~ /\Q$_\E/) {
 | 
|---|
| 107 |       $skipflag = 1;
 | 
|---|
| 108 |       # might want to do something with the skipped records someday
 | 
|---|
| 109 |     }
 | 
|---|
| 110 |   }
 | 
|---|
| 111 |   next if $skipflag;
 | 
|---|
| 112 | 
 | 
|---|
| 113 |   if (my ($macro,$mdetail) = ($rec =~ /^\s*\$(TTL|ORIGIN|INCLUDE)\s+(.+)/) ) {
 | 
|---|
| 114 |     # macro sort of thing;  $TTL and $ORIGIN most common.  $INCLUDE is a thing, expect it to be rare in live use tho
 | 
|---|
| 115 |     if ($macro eq 'TTL') {
 | 
|---|
| 116 |       # irrelevant for a hosts file
 | 
|---|
| 117 |     } elsif ($macro eq 'ORIGIN') {
 | 
|---|
| 118 |       # $ORIGIN supports cascading/nesting, by watching for fully-qualified names vs partial names.
 | 
|---|
| 119 |       if ($mdetail =~ /\.$/) {
 | 
|---|
| 120 |         $origin = $mdetail;
 | 
|---|
| 121 |       } else {
 | 
|---|
| 122 |         # append current origin to unqualified origin
 | 
|---|
| 123 |         $origin = "$mdetail.$origin";
 | 
|---|
| 124 |       }
 | 
|---|
| 125 |     }
 | 
|---|
| 126 | ##fixme: should arguably handle $INCLUDE
 | 
|---|
| 127 | # probably NOT going to handle $GENERATE, since that type of record set is best handled in DNS
 | 
|---|
| 128 |     next;
 | 
|---|
| 129 |   }
 | 
|---|
| 130 | 
 | 
|---|
| 131 |   # yay for special cases
 | 
|---|
| 132 |   $origin = '' if $origin eq '.';
 | 
|---|
| 133 | 
 | 
|---|
| 134 |   my $origrec = $rec;
 | 
|---|
| 135 | 
 | 
|---|
| 136 |   # leading whitespace indicates "same label as last record"
 | 
|---|
| 137 |   if ($rec =~ /^\s/) {
 | 
|---|
| 138 |     $curlabel = $prevlabel;
 | 
|---|
| 139 |   } else {
 | 
|---|
| 140 |     ($curlabel) = ($rec =~ /^([\w\@_.-]+)\s/);
 | 
|---|
| 141 |   }
 | 
|---|
| 142 | 
 | 
|---|
| 143 |   # magic name!
 | 
|---|
| 144 |   $curlabel = "$zname." if $curlabel eq '@';
 | 
|---|
| 145 | 
 | 
|---|
| 146 |   # append $ORIGIN if name is not fully qualified.
 | 
|---|
| 147 |   if ($curlabel !~ /\.$/) {
 | 
|---|
| 148 |     $curlabel .= ".$origin";
 | 
|---|
| 149 |   }
 | 
|---|
| 150 | 
 | 
|---|
| 151 |   # check for zone scope.  skip bad records.
 | 
|---|
| 152 |   if ($curlabel !~ /$zname.$/) {
 | 
|---|
| 153 |     warn "bad record $origrec, maybe bad \$ORIGIN?\n";
 | 
|---|
| 154 |     next;
 | 
|---|
| 155 |   }
 | 
|---|
| 156 | 
 | 
|---|
| 157 |   # trim the label, if any
 | 
|---|
| 158 |   $rec =~ s/^([\w\@_.-]*)\s+//;
 | 
|---|
| 159 | 
 | 
|---|
| 160 |   # now that we've collected and trimmed off the record's label, unpack the class, TTL, and type.
 | 
|---|
| 161 |   # class and TTL may be omitted, and may appear in either class,TTL or TTL,class order.
 | 
|---|
| 162 |   my $nc = 0;
 | 
|---|
| 163 |   my %seenatoms;
 | 
|---|
| 164 |   # we don't actually use these but we have to recognize them
 | 
|---|
| 165 |   my $class = 'IN';
 | 
|---|
| 166 |   # not preset as we need to detect whether it's present in the record
 | 
|---|
| 167 |   my $ttl;
 | 
|---|
| 168 |   my $type;
 | 
|---|
| 169 |   my $badrec;
 | 
|---|
| 170 |   my $curatom = 'class';
 | 
|---|
| 171 |   eval {
 | 
|---|
| 172 |     for (; $nc < 3; $nc++) {
 | 
|---|
| 173 |       last if $type;    # short-circuit if we've got a type, further data is record-specific.
 | 
|---|
| 174 |       my ($atom) = ($rec =~ /^([\w\d.]+)\s/);
 | 
|---|
| 175 |       # should be safe?
 | 
|---|
| 176 |       last if !$atom;
 | 
|---|
| 177 |       if ($atom =~ /^\d+$/) {
 | 
|---|
| 178 |         if (defined($ttl)) {
 | 
|---|
| 179 |           # we already have a TTL, so another all-numeric field is invalid.
 | 
|---|
| 180 |           die "bad record ($origrec)\n";
 | 
|---|
| 181 |         } else {
 | 
|---|
| 182 |           if ($curatom ne 'class' && $curatom ne 'ttl') {
 | 
|---|
| 183 |             die "bad record ($origrec)\n";
 | 
|---|
| 184 |           }
 | 
|---|
| 185 |           $curatom = 'ttl';
 | 
|---|
| 186 |           $ttl = $atom;
 | 
|---|
| 187 |         }
 | 
|---|
| 188 |       }
 | 
|---|
| 189 |       elsif ($atom =~ /^IN|CS|CH|HS$/) {
 | 
|---|
| 190 |         if ($atom =~ /CS|CH|HS/) {
 | 
|---|
| 191 |           die "unsupported class $atom in record ($origrec)\n";
 | 
|---|
| 192 |         }
 | 
|---|
| 193 |         $curatom = 'class';
 | 
|---|
| 194 |         $class = $atom;
 | 
|---|
| 195 |       }
 | 
|---|
| 196 |       elsif ($atom =~ /^[A-Z\d-]+/) {
 | 
|---|
| 197 |         # check against dnsadmin's internal list of known DNS types.
 | 
|---|
| 198 |         if ($reverse_typemap{$atom}) {
 | 
|---|
| 199 |           $type = $atom;
 | 
|---|
| 200 |         } else {
 | 
|---|
| 201 |           die "unknown type $atom in record ($origrec)\n";
 | 
|---|
| 202 |         }
 | 
|---|
| 203 |         $curatom = 'type';
 | 
|---|
| 204 |       }
 | 
|---|
| 205 |       $rec =~ s/^$atom\s*//;
 | 
|---|
| 206 |     } # class/type/TTL loop
 | 
|---|
| 207 |   };
 | 
|---|
| 208 |   if ($@) {
 | 
|---|
| 209 |     warn $@;
 | 
|---|
| 210 |     next;
 | 
|---|
| 211 |   }
 | 
|---|
| 212 | 
 | 
|---|
| 213 |   $ttl = $defttl if !defined($ttl);
 | 
|---|
| 214 | 
 | 
|---|
| 215 |   # Just In Case we need the original rdata after we've sliced off more pieces
 | 
|---|
| 216 |   my $rdata = $rec;
 | 
|---|
| 217 |   $prevlabel = $curlabel;
 | 
|---|
| 218 | 
 | 
|---|
| 219 | ##fixme:  squish this down for this script since SOA records are irrelevant
 | 
|---|
| 220 |   if ($type eq 'SOA') {
 | 
|---|
| 221 |     my ($ns, $adminmail) = ($rdata =~ /([\w.]+)\s+([\w.]+)\s+\(/);
 | 
|---|
| 222 |     die "Can't parse gibberish SOAish record: '$rdata'/'$origrec'\n" if !$ns;
 | 
|---|
| 223 |     $rdata =~ s/([\w.]+)\s+([\w.]+)\s+\(\s*//;
 | 
|---|
| 224 | 
 | 
|---|
| 225 |     # There are probably more efficient ways to do this but the SOA record
 | 
|---|
| 226 |     # format is essentially character based, not line-based.
 | 
|---|
| 227 |     # In theory the SOA serial etc may be spread over "many" lines, bounded by ().
 | 
|---|
| 228 | 
 | 
|---|
| 229 |     # Parse fields from $rdata if present
 | 
|---|
| 230 |     my @soabits;
 | 
|---|
| 231 |     my @soafirst = split /\s+/, $rdata;
 | 
|---|
| 232 |     while (my $f = shift @soafirst) {
 | 
|---|
| 233 |       last if $f !~ /^\d/;
 | 
|---|
| 234 |       push @soabits, $f;
 | 
|---|
| 235 |     }
 | 
|---|
| 236 | 
 | 
|---|
| 237 |     # Read more lines if we don't have enough SOA fields filled
 | 
|---|
| 238 |     while (scalar(@soabits) < 5) {
 | 
|---|
| 239 |       my $tmp = <>;
 | 
|---|
| 240 |       $tmp =~ s/^\s*//;
 | 
|---|
| 241 |       my @tmpsoa = split /\s+/, $tmp;
 | 
|---|
| 242 |       while (my $f = shift @tmpsoa) {
 | 
|---|
| 243 |         last if $f !~ /^\d/;
 | 
|---|
| 244 |         push @soabits, $f;
 | 
|---|
| 245 |       }
 | 
|---|
| 246 |       if (scalar(@soabits) == 5) {
 | 
|---|
| 247 |         last;
 | 
|---|
| 248 |       }
 | 
|---|
| 249 |     }
 | 
|---|
| 250 |   } # SOA
 | 
|---|
| 251 | 
 | 
|---|
| 252 | ##fixme:  trim dupes if possible
 | 
|---|
| 253 | 
 | 
|---|
| 254 |   elsif ($type eq 'A') {
 | 
|---|
| 255 |     # need the name->IP map so we can reverse-map the CNAMEs on output
 | 
|---|
| 256 |     push @{$amap{$curlabel}}, $rdata;
 | 
|---|
| 257 |     $namemap{$rdata}{$curlabel}++;
 | 
|---|
| 258 |   } # A record
 | 
|---|
| 259 | 
 | 
|---|
| 260 |   elsif ($type eq 'CNAME') {
 | 
|---|
| 261 |     $cmap{$curlabel} = $rdata.($rdata =~ /\./ ? '' : ".$origin");
 | 
|---|
| 262 |   } # CNAME record
 | 
|---|
| 263 | 
 | 
|---|
| 264 |   # all other record types are irrelevant for a hosts file
 | 
|---|
| 265 | 
 | 
|---|
| 266 | } # <STDIN>
 | 
|---|
| 267 | 
 | 
|---|
| 268 | #print Dumper \%cmap;
 | 
|---|
| 269 | 
 | 
|---|
| 270 | # Walk the CNAME list and see if we can match the targets in-zone.
 | 
|---|
| 271 | # Out-of-zone CNAMEs are out of scope for this conversion.
 | 
|---|
| 272 | foreach my $cn (sort keys %cmap) {
 | 
|---|
| 273 |   my $targ = $cmap{$cn};
 | 
|---|
| 274 | #print "dbg: ".Dumper($targ);
 | 
|---|
| 275 |   my @targlist;
 | 
|---|
| 276 | #  push @targlist, $targ;  # mostly for error reporting
 | 
|---|
| 277 | my $dangle = 0;
 | 
|---|
| 278 | 
 | 
|---|
| 279 | my $depth = 1;  # CNAME -> A
 | 
|---|
| 280 | 
 | 
|---|
| 281 | # check this as a loop for consistent fail/break conditions.  bonus:  may allow user choice for CNAME depth?
 | 
|---|
| 282 |   for (; $dangle == 0; $depth++) {
 | 
|---|
| 283 | 
 | 
|---|
| 284 | #print "d:$depth  checking $cn -> $targ\n";
 | 
|---|
| 285 | push @targlist, $targ;
 | 
|---|
| 286 | 
 | 
|---|
| 287 |   # Depth limit.  If made user-selectable should arguably set a hard
 | 
|---|
| 288 |   # limit because deeply chained CNAMEs are Baaaaad, mmkaay?
 | 
|---|
| 289 |   if ($depth >= $maxdepth) {
 | 
|---|
| 290 |     warn "CNAMEs too deeply chained, skipping: $cn => ".join(' => ', @targlist)."\n";
 | 
|---|
| 291 |     last;
 | 
|---|
| 292 |   }
 | 
|---|
| 293 | 
 | 
|---|
| 294 | # break if CNAME target is in the A record list
 | 
|---|
| 295 |   last if $amap{$targ};
 | 
|---|
| 296 |   if ($cmap{$targ}) {
 | 
|---|
| 297 | #     note the new target
 | 
|---|
| 298 |     my $tmpcn = $targ;
 | 
|---|
| 299 |     $targ = $cmap{$tmpcn};
 | 
|---|
| 300 | #print "    chaining $tmpcn to new $targ\n";
 | 
|---|
| 301 |   } else {
 | 
|---|
| 302 | #     target is either out of zone or doesn't exist
 | 
|---|
| 303 |     $dangle = 1;
 | 
|---|
| 304 |     last;
 | 
|---|
| 305 |   }
 | 
|---|
| 306 | 
 | 
|---|
| 307 | 
 | 
|---|
| 308 | #warn "chained cname $cn => $targ\n";
 | 
|---|
| 309 |     # CNAME to another CNAME
 | 
|---|
| 310 | #$tmpcn => $targ\n";
 | 
|---|
| 311 | 
 | 
|---|
| 312 | #  last if $dangle;
 | 
|---|
| 313 | 
 | 
|---|
| 314 | #      if (!$amap{$targ}) {
 | 
|---|
| 315 | #        if ($cmap{$targ}) {
 | 
|---|
| 316 | #          $tmpcn = $targ;
 | 
|---|
| 317 | #          $targ = $cmap{$tmpcn};
 | 
|---|
| 318 | #push @targlist, $targ;
 | 
|---|
| 319 | #warn "  chain target $cn => ".join(' => ', @targlist).
 | 
|---|
| 320 | #       "\n";
 | 
|---|
| 321 | #        } else {
 | 
|---|
| 322 | #          warn "skipping dangling CNAME $cn => $targlist[0] => $targlist[1]\n";
 | 
|---|
| 323 | #          next;
 | 
|---|
| 324 | #        }
 | 
|---|
| 325 | #      }
 | 
|---|
| 326 | #    } else {
 | 
|---|
| 327 | #      # skip depth-3 (?) CNAMES;  any such zone does not belong as a hosts file anyway
 | 
|---|
| 328 | #      warn "skipping dangling CNAME $cn => $targ\n";
 | 
|---|
| 329 | #      next;
 | 
|---|
| 330 | #    }
 | 
|---|
| 331 | #  }
 | 
|---|
| 332 | 
 | 
|---|
| 333 | 
 | 
|---|
| 334 |   } # CNAME recursion loop
 | 
|---|
| 335 | 
 | 
|---|
| 336 | next if $dangle;
 | 
|---|
| 337 | 
 | 
|---|
| 338 | #print "    chain target $cn => ".join(' => ', @targlist)."\n";
 | 
|---|
| 339 |   if ($amap{$targ}) {
 | 
|---|
| 340 |     # absent any criteria, we use the first IP a name was associated with
 | 
|---|
| 341 |     my $targip = $amap{$targ}[0];
 | 
|---|
| 342 |     $namemap{$targip}{$cn}++;
 | 
|---|
| 343 |   } else {
 | 
|---|
| 344 |   }
 | 
|---|
| 345 | } # each %cmap
 | 
|---|
| 346 | 
 | 
|---|
| 347 | #print Dumper \%amap;
 | 
|---|
| 348 | #foreach my $n (keys %amap) {
 | 
|---|
| 349 | #  foreach my $ip (keys %{$amap{$n}}) {
 | 
|---|
| 350 | #print "$ip\t$n\n";
 | 
|---|
| 351 | #    push @{$namemap{$ip}}, $n unless grep $n, @{$namemap{$ip}};
 | 
|---|
| 352 | #    $namemap{$ip}{$n}++;
 | 
|---|
| 353 | #  }
 | 
|---|
| 354 | #}
 | 
|---|
| 355 | 
 | 
|---|
| 356 | #print Dumper \%namemap;
 | 
|---|
| 357 | foreach my $ip (sort keys %namemap) {
 | 
|---|
| 358 |   print "$ip\t".join(' ', sort keys %{$namemap{$ip}})."\n";
 | 
|---|
| 359 | }
 | 
|---|