| 1 | #!/usr/bin/perl | 
|---|
| 2 | # Convert a BIND zone file to a hosts file | 
|---|
| 3 | ## | 
|---|
| 4 | # Copyright 2020 Kris Deugau <kdeugau@deepnet.cx> | 
|---|
| 5 | # | 
|---|
| 6 | #    This program is free software: you can redistribute it and/or modify | 
|---|
| 7 | #    it under the terms of the GNU General Public License as published by | 
|---|
| 8 | #    the Free Software Foundation, either version 3 of the License, or | 
|---|
| 9 | #    (at your option) any later version. | 
|---|
| 10 | # | 
|---|
| 11 | #    This program is distributed in the hope that it will be useful, | 
|---|
| 12 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 13 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|---|
| 14 | #    GNU General Public License for more details. | 
|---|
| 15 | # | 
|---|
| 16 | #    You should have received a copy of the GNU General Public License | 
|---|
| 17 | #    along with this program.  If not, see <http://www.gnu.org/licenses/>. | 
|---|
| 18 | ## | 
|---|
| 19 |  | 
|---|
| 20 | use strict; | 
|---|
| 21 | use warnings; | 
|---|
| 22 | use Getopt::Long; | 
|---|
| 23 |  | 
|---|
| 24 | use Data::Dumper; | 
|---|
| 25 |  | 
|---|
| 26 | # push "the directory the script is in" into @INC | 
|---|
| 27 | use FindBin; | 
|---|
| 28 | use lib "$FindBin::RealBin/"; | 
|---|
| 29 |  | 
|---|
| 30 | use DNSDB; | 
|---|
| 31 |  | 
|---|
| 32 | my @skipdefs; | 
|---|
| 33 | my $skipfile; | 
|---|
| 34 | my $dryrun = 0; | 
|---|
| 35 | # CNAME chain depth | 
|---|
| 36 | my $maxdepth = 3; | 
|---|
| 37 |  | 
|---|
| 38 | GetOptions( | 
|---|
| 39 | "skip=s" => \@skipdefs, | 
|---|
| 40 | "skipfile=s" => \$skipfile, | 
|---|
| 41 | "test|dry-run" => \$dryrun, | 
|---|
| 42 | ); | 
|---|
| 43 |  | 
|---|
| 44 | my $zname = shift @ARGV; | 
|---|
| 45 |  | 
|---|
| 46 | my $usage = "usage: bind2hosts zone [--skip pattern [--skip pattern2 ...]] [--skipfile file] | 
|---|
| 47 | zonename < zonefile | 
|---|
| 48 |  | 
|---|
| 49 | --skip | 
|---|
| 50 | Specify a string to skip in the records.  If an IP-like string is | 
|---|
| 51 | used, and the zone is a reverse zone, it will also search for the | 
|---|
| 52 | octet-reversed form.  Specify multiple times to skip multiple | 
|---|
| 53 | different record patterns. | 
|---|
| 54 | --skipfile | 
|---|
| 55 | A file containing patterns to skip.  Patterns from the file and | 
|---|
| 56 | any --skip arguments are merged. | 
|---|
| 57 | zonename | 
|---|
| 58 | The name of the zone to import.  Required. | 
|---|
| 59 |  | 
|---|
| 60 | Zone data will be read from STDIN. | 
|---|
| 61 | "; | 
|---|
| 62 | if (!$zname) { | 
|---|
| 63 | die $usage; | 
|---|
| 64 | } | 
|---|
| 65 |  | 
|---|
| 66 | if ($skipfile) { | 
|---|
| 67 | if (-f $skipfile) { | 
|---|
| 68 | open SKIP, "<$skipfile"; | 
|---|
| 69 | while (<SKIP>) { | 
|---|
| 70 | chomp; | 
|---|
| 71 | push @skipdefs, $_; | 
|---|
| 72 | } | 
|---|
| 73 | close SKIP; | 
|---|
| 74 | } else { | 
|---|
| 75 | warn "skipfile $skipfile requested but it doesn't seem to exist.  Continuing.\n"; | 
|---|
| 76 | } | 
|---|
| 77 | } | 
|---|
| 78 |  | 
|---|
| 79 | my $rev = 'n'; | 
|---|
| 80 | my $zid; | 
|---|
| 81 |  | 
|---|
| 82 | my %amap; | 
|---|
| 83 | my %namemap; | 
|---|
| 84 | my %cmap; | 
|---|
| 85 |  | 
|---|
| 86 | my $dnsdb = new DNSDB; | 
|---|
| 87 |  | 
|---|
| 88 | # need an ultimate fallback for this one | 
|---|
| 89 | my $defttl = 900; | 
|---|
| 90 | my $origin = "$zname.";   # to append to unqualified names | 
|---|
| 91 | my $curlabel; | 
|---|
| 92 | my $prevlabel; | 
|---|
| 93 |  | 
|---|
| 94 | my $i = 0; | 
|---|
| 95 |  | 
|---|
| 96 | # need to spin up a full state machine-ish thing, because BIND zone files are all about context | 
|---|
| 97 | while (my $rec = <>) { | 
|---|
| 98 | chomp $rec; | 
|---|
| 99 | next if $rec =~ /^\s*$/; | 
|---|
| 100 | next if $rec =~ /^\s*;/; | 
|---|
| 101 | next if $rec =~ /^\s*\)/;     # SOA closing (possibly other records too?) | 
|---|
| 102 | # arguably should do some more targeted voodoo when parsing the SOA details | 
|---|
| 103 |  | 
|---|
| 104 | my $skipflag = 0; | 
|---|
| 105 | foreach (@skipdefs) { | 
|---|
| 106 | if ($rec =~ /\Q$_\E/) { | 
|---|
| 107 | $skipflag = 1; | 
|---|
| 108 | # might want to do something with the skipped records someday | 
|---|
| 109 | } | 
|---|
| 110 | } | 
|---|
| 111 | next if $skipflag; | 
|---|
| 112 |  | 
|---|
| 113 | if (my ($macro,$mdetail) = ($rec =~ /^\s*\$(TTL|ORIGIN|INCLUDE)\s+(.+)/) ) { | 
|---|
| 114 | # macro sort of thing;  $TTL and $ORIGIN most common.  $INCLUDE is a thing, expect it to be rare in live use tho | 
|---|
| 115 | if ($macro eq 'TTL') { | 
|---|
| 116 | # irrelevant for a hosts file | 
|---|
| 117 | } elsif ($macro eq 'ORIGIN') { | 
|---|
| 118 | # $ORIGIN supports cascading/nesting, by watching for fully-qualified names vs partial names. | 
|---|
| 119 | if ($mdetail =~ /\.$/) { | 
|---|
| 120 | $origin = $mdetail; | 
|---|
| 121 | } else { | 
|---|
| 122 | # append current origin to unqualified origin | 
|---|
| 123 | $origin = "$mdetail.$origin"; | 
|---|
| 124 | } | 
|---|
| 125 | } | 
|---|
| 126 | ##fixme: should arguably handle $INCLUDE | 
|---|
| 127 | # probably NOT going to handle $GENERATE, since that type of record set is best handled in DNS | 
|---|
| 128 | next; | 
|---|
| 129 | } | 
|---|
| 130 |  | 
|---|
| 131 | # yay for special cases | 
|---|
| 132 | $origin = '' if $origin eq '.'; | 
|---|
| 133 |  | 
|---|
| 134 | my $origrec = $rec; | 
|---|
| 135 |  | 
|---|
| 136 | # leading whitespace indicates "same label as last record" | 
|---|
| 137 | if ($rec =~ /^\s/) { | 
|---|
| 138 | $curlabel = $prevlabel; | 
|---|
| 139 | } else { | 
|---|
| 140 | ($curlabel) = ($rec =~ /^([\w\@_.-]+)\s/); | 
|---|
| 141 | } | 
|---|
| 142 |  | 
|---|
| 143 | # magic name! | 
|---|
| 144 | $curlabel = "$zname." if $curlabel eq '@'; | 
|---|
| 145 |  | 
|---|
| 146 | # append $ORIGIN if name is not fully qualified. | 
|---|
| 147 | if ($curlabel !~ /\.$/) { | 
|---|
| 148 | $curlabel .= ".$origin"; | 
|---|
| 149 | } | 
|---|
| 150 |  | 
|---|
| 151 | # check for zone scope.  skip bad records. | 
|---|
| 152 | if ($curlabel !~ /$zname.$/) { | 
|---|
| 153 | warn "bad record $origrec, maybe bad \$ORIGIN?\n"; | 
|---|
| 154 | next; | 
|---|
| 155 | } | 
|---|
| 156 |  | 
|---|
| 157 | # trim the label, if any | 
|---|
| 158 | $rec =~ s/^([\w\@_.-]*)\s+//; | 
|---|
| 159 |  | 
|---|
| 160 | # now that we've collected and trimmed off the record's label, unpack the class, TTL, and type. | 
|---|
| 161 | # class and TTL may be omitted, and may appear in either class,TTL or TTL,class order. | 
|---|
| 162 | my $nc = 0; | 
|---|
| 163 | my %seenatoms; | 
|---|
| 164 | # we don't actually use these but we have to recognize them | 
|---|
| 165 | my $class = 'IN'; | 
|---|
| 166 | # not preset as we need to detect whether it's present in the record | 
|---|
| 167 | my $ttl; | 
|---|
| 168 | my $type; | 
|---|
| 169 | my $badrec; | 
|---|
| 170 | my $curatom = 'class'; | 
|---|
| 171 | eval { | 
|---|
| 172 | for (; $nc < 3; $nc++) { | 
|---|
| 173 | last if $type;    # short-circuit if we've got a type, further data is record-specific. | 
|---|
| 174 | my ($atom) = ($rec =~ /^([\w\d.]+)\s/); | 
|---|
| 175 | # should be safe? | 
|---|
| 176 | last if !$atom; | 
|---|
| 177 | if ($atom =~ /^\d+$/) { | 
|---|
| 178 | if (defined($ttl)) { | 
|---|
| 179 | # we already have a TTL, so another all-numeric field is invalid. | 
|---|
| 180 | die "bad record ($origrec)\n"; | 
|---|
| 181 | } else { | 
|---|
| 182 | if ($curatom ne 'class' && $curatom ne 'ttl') { | 
|---|
| 183 | die "bad record ($origrec)\n"; | 
|---|
| 184 | } | 
|---|
| 185 | $curatom = 'ttl'; | 
|---|
| 186 | $ttl = $atom; | 
|---|
| 187 | } | 
|---|
| 188 | } | 
|---|
| 189 | elsif ($atom =~ /^IN|CS|CH|HS$/) { | 
|---|
| 190 | if ($atom =~ /CS|CH|HS/) { | 
|---|
| 191 | die "unsupported class $atom in record ($origrec)\n"; | 
|---|
| 192 | } | 
|---|
| 193 | $curatom = 'class'; | 
|---|
| 194 | $class = $atom; | 
|---|
| 195 | } | 
|---|
| 196 | elsif ($atom =~ /^[A-Z\d-]+/) { | 
|---|
| 197 | # check against dnsadmin's internal list of known DNS types. | 
|---|
| 198 | if ($reverse_typemap{$atom}) { | 
|---|
| 199 | $type = $atom; | 
|---|
| 200 | } else { | 
|---|
| 201 | die "unknown type $atom in record ($origrec)\n"; | 
|---|
| 202 | } | 
|---|
| 203 | $curatom = 'type'; | 
|---|
| 204 | } | 
|---|
| 205 | $rec =~ s/^$atom\s*//; | 
|---|
| 206 | } # class/type/TTL loop | 
|---|
| 207 | }; | 
|---|
| 208 | if ($@) { | 
|---|
| 209 | warn $@; | 
|---|
| 210 | next; | 
|---|
| 211 | } | 
|---|
| 212 |  | 
|---|
| 213 | $ttl = $defttl if !defined($ttl); | 
|---|
| 214 |  | 
|---|
| 215 | # Just In Case we need the original rdata after we've sliced off more pieces | 
|---|
| 216 | my $rdata = $rec; | 
|---|
| 217 | $prevlabel = $curlabel; | 
|---|
| 218 |  | 
|---|
| 219 | ##fixme:  squish this down for this script since SOA records are irrelevant | 
|---|
| 220 | if ($type eq 'SOA') { | 
|---|
| 221 | my ($ns, $adminmail) = ($rdata =~ /([\w.]+)\s+([\w.]+)\s+\(/); | 
|---|
| 222 | die "Can't parse gibberish SOAish record: '$rdata'/'$origrec'\n" if !$ns; | 
|---|
| 223 | $rdata =~ s/([\w.]+)\s+([\w.]+)\s+\(\s*//; | 
|---|
| 224 |  | 
|---|
| 225 | # There are probably more efficient ways to do this but the SOA record | 
|---|
| 226 | # format is essentially character based, not line-based. | 
|---|
| 227 | # In theory the SOA serial etc may be spread over "many" lines, bounded by (). | 
|---|
| 228 |  | 
|---|
| 229 | # Parse fields from $rdata if present | 
|---|
| 230 | my @soabits; | 
|---|
| 231 | my @soafirst = split /\s+/, $rdata; | 
|---|
| 232 | while (my $f = shift @soafirst) { | 
|---|
| 233 | last if $f !~ /^\d/; | 
|---|
| 234 | push @soabits, $f; | 
|---|
| 235 | } | 
|---|
| 236 |  | 
|---|
| 237 | # Read more lines if we don't have enough SOA fields filled | 
|---|
| 238 | while (scalar(@soabits) < 5) { | 
|---|
| 239 | my $tmp = <>; | 
|---|
| 240 | $tmp =~ s/^\s*//; | 
|---|
| 241 | my @tmpsoa = split /\s+/, $tmp; | 
|---|
| 242 | while (my $f = shift @tmpsoa) { | 
|---|
| 243 | last if $f !~ /^\d/; | 
|---|
| 244 | push @soabits, $f; | 
|---|
| 245 | } | 
|---|
| 246 | if (scalar(@soabits) == 5) { | 
|---|
| 247 | last; | 
|---|
| 248 | } | 
|---|
| 249 | } | 
|---|
| 250 | } # SOA | 
|---|
| 251 |  | 
|---|
| 252 | ##fixme:  trim dupes if possible | 
|---|
| 253 |  | 
|---|
| 254 | elsif ($type eq 'A') { | 
|---|
| 255 | # need the name->IP map so we can reverse-map the CNAMEs on output | 
|---|
| 256 | push @{$amap{$curlabel}}, $rdata; | 
|---|
| 257 | $namemap{$rdata}{$curlabel}++; | 
|---|
| 258 | } # A record | 
|---|
| 259 |  | 
|---|
| 260 | elsif ($type eq 'CNAME') { | 
|---|
| 261 | $cmap{$curlabel} = $rdata.($rdata =~ /\./ ? '' : ".$origin"); | 
|---|
| 262 | } # CNAME record | 
|---|
| 263 |  | 
|---|
| 264 | # all other record types are irrelevant for a hosts file | 
|---|
| 265 |  | 
|---|
| 266 | } # <STDIN> | 
|---|
| 267 |  | 
|---|
| 268 | #print Dumper \%cmap; | 
|---|
| 269 |  | 
|---|
| 270 | # Walk the CNAME list and see if we can match the targets in-zone. | 
|---|
| 271 | # Out-of-zone CNAMEs are out of scope for this conversion. | 
|---|
| 272 | foreach my $cn (sort keys %cmap) { | 
|---|
| 273 | my $targ = $cmap{$cn}; | 
|---|
| 274 | #print "dbg: ".Dumper($targ); | 
|---|
| 275 | my @targlist; | 
|---|
| 276 | #  push @targlist, $targ;  # mostly for error reporting | 
|---|
| 277 | my $dangle = 0; | 
|---|
| 278 |  | 
|---|
| 279 | my $depth = 1;  # CNAME -> A | 
|---|
| 280 |  | 
|---|
| 281 | # check this as a loop for consistent fail/break conditions.  bonus:  may allow user choice for CNAME depth? | 
|---|
| 282 | for (; $dangle == 0; $depth++) { | 
|---|
| 283 |  | 
|---|
| 284 | #print "d:$depth  checking $cn -> $targ\n"; | 
|---|
| 285 | push @targlist, $targ; | 
|---|
| 286 |  | 
|---|
| 287 | # Depth limit.  If made user-selectable should arguably set a hard | 
|---|
| 288 | # limit because deeply chained CNAMEs are Baaaaad, mmkaay? | 
|---|
| 289 | if ($depth >= $maxdepth) { | 
|---|
| 290 | warn "CNAMEs too deeply chained, skipping: $cn => ".join(' => ', @targlist)."\n"; | 
|---|
| 291 | last; | 
|---|
| 292 | } | 
|---|
| 293 |  | 
|---|
| 294 | # break if CNAME target is in the A record list | 
|---|
| 295 | last if $amap{$targ}; | 
|---|
| 296 | if ($cmap{$targ}) { | 
|---|
| 297 | #     note the new target | 
|---|
| 298 | my $tmpcn = $targ; | 
|---|
| 299 | $targ = $cmap{$tmpcn}; | 
|---|
| 300 | #print "    chaining $tmpcn to new $targ\n"; | 
|---|
| 301 | } else { | 
|---|
| 302 | #     target is either out of zone or doesn't exist | 
|---|
| 303 | $dangle = 1; | 
|---|
| 304 | last; | 
|---|
| 305 | } | 
|---|
| 306 |  | 
|---|
| 307 |  | 
|---|
| 308 | #warn "chained cname $cn => $targ\n"; | 
|---|
| 309 | # CNAME to another CNAME | 
|---|
| 310 | #$tmpcn => $targ\n"; | 
|---|
| 311 |  | 
|---|
| 312 | #  last if $dangle; | 
|---|
| 313 |  | 
|---|
| 314 | #      if (!$amap{$targ}) { | 
|---|
| 315 | #        if ($cmap{$targ}) { | 
|---|
| 316 | #          $tmpcn = $targ; | 
|---|
| 317 | #          $targ = $cmap{$tmpcn}; | 
|---|
| 318 | #push @targlist, $targ; | 
|---|
| 319 | #warn "  chain target $cn => ".join(' => ', @targlist). | 
|---|
| 320 | #       "\n"; | 
|---|
| 321 | #        } else { | 
|---|
| 322 | #          warn "skipping dangling CNAME $cn => $targlist[0] => $targlist[1]\n"; | 
|---|
| 323 | #          next; | 
|---|
| 324 | #        } | 
|---|
| 325 | #      } | 
|---|
| 326 | #    } else { | 
|---|
| 327 | #      # skip depth-3 (?) CNAMES;  any such zone does not belong as a hosts file anyway | 
|---|
| 328 | #      warn "skipping dangling CNAME $cn => $targ\n"; | 
|---|
| 329 | #      next; | 
|---|
| 330 | #    } | 
|---|
| 331 | #  } | 
|---|
| 332 |  | 
|---|
| 333 |  | 
|---|
| 334 | } # CNAME recursion loop | 
|---|
| 335 |  | 
|---|
| 336 | next if $dangle; | 
|---|
| 337 |  | 
|---|
| 338 | #print "    chain target $cn => ".join(' => ', @targlist)."\n"; | 
|---|
| 339 | if ($amap{$targ}) { | 
|---|
| 340 | # absent any criteria, we use the first IP a name was associated with | 
|---|
| 341 | my $targip = $amap{$targ}[0]; | 
|---|
| 342 | $namemap{$targip}{$cn}++; | 
|---|
| 343 | } else { | 
|---|
| 344 | } | 
|---|
| 345 | } # each %cmap | 
|---|
| 346 |  | 
|---|
| 347 | #print Dumper \%amap; | 
|---|
| 348 | #foreach my $n (keys %amap) { | 
|---|
| 349 | #  foreach my $ip (keys %{$amap{$n}}) { | 
|---|
| 350 | #print "$ip\t$n\n"; | 
|---|
| 351 | #    push @{$namemap{$ip}}, $n unless grep $n, @{$namemap{$ip}}; | 
|---|
| 352 | #    $namemap{$ip}{$n}++; | 
|---|
| 353 | #  } | 
|---|
| 354 | #} | 
|---|
| 355 |  | 
|---|
| 356 | #print Dumper \%namemap; | 
|---|
| 357 | foreach my $ip (sort keys %namemap) { | 
|---|
| 358 | print "$ip\t".join(' ', sort keys %{$namemap{$ip}})."\n"; | 
|---|
| 359 | } | 
|---|