Signal drop!
Relay (operand.online) is unreachable.
Usually, a dropped signal means an upgrade is happening. Hold on!
Sorry, no connección.
Hang in there while we get back on track
gram: build
> ./config/nushell/domain.nu
Lenses
(coming soon!)
source ./grammar.nu
# Map an online address to local disc.
# In case of dir URLS (ending in `/`),
# append `index.html`
def "domain map" [addr: string, --base (-b): path] {
let call = $addr | url parse
let base = if not ($base | is-empty) { $base } else {
[ ~ # brea cache
domain $call.host ] | path join
} | path expand
if (($base | path type) != 'dir') { mkdir $base }
let node = $call.path
let node = if ($node | str ends-with "/") { $node + "index.html" } else { $node }
let node = if ($node | str starts-with "/") { $node | str substring 1.. } else { $node }
[ $base $node ] | path join | path expand
}
# Pull and cache an online address,
# or read a copy already cached on disc.
def "domain page" [
addr: string,
--dura (-d): duration = 0sec,
] {
let node = domain map $addr
if ($node | path type) == file { open $node
} else {
mkdir ($node | path dirname)
retry 3 { http get $addr | tee { save -f $node;
print $"(day) | (cloc) // ($node)"; sleep $dura } }
}
}
# Cache and read an online page by address,
# mine link addresses.
def "domain links" [
addr: string,
--dura (-d): duration = 0sec,
] {
(domain page $addr -d $dura) | pup 'a attr{href}' | lines }
# Using a `queue` of addresses,
# pull each one and mine page links,
# before marking as `seen`;
# incurs a spin-up delay
# in exchange for idempotency and completeness.
def "domain index" [
--dura (-d): duration = 2sec,
--seen (-s): list<string> = [],
...queue: string,
] {
mut q = $queue; mut s = $seen; mut prune = 0;
let domains = $queue | each { url parse } | get host
print $"(day) | (cloc) // begin:"
$queue | to yaml | print
while ($q | length ) > 0 { let addr = $q | first
# Unseen addresses
if not ($addr in $s) {
# Mine any HTML links
if (domain map $addr | path parse | get extension) == html {
let links = domain links $addr -d $dura |
where { ($in | url parse | get host) in $domains } |
where { not ($in in $s) }
$q = $q ++ $links
# ensure non-HTML on disc.
} else if (domain map $addr | path type) != 'file' {
domain page $addr }
$s = $s ++ [$addr]
}
# Pop queue.
$q = $q | slice 1..
# Prune!
if (($s | length) - $prune) >= 500 {
let l = $q | length; $q = $q | uniq
print $"(day) | (cloc) // seen ($s | length); prune queue: ($l) -> ($q | length)"
$prune = $s | length
}
} }