Signal drop!
Relay (operand.online) is unreachable.
Usually, a dropped signal means an upgrade is happening. Hold on!
Sorry, no connección.
Hang in there while we get back on track
gram: nue
> ./domain.nu
Lenses
(coming soon!)
source ~/.config/nushell/nix.nu
source ~/.config/nushell/grammar.nu
source ~/.config/nushell/day.nu
# Domain clone commands:
# use on bulk records, displayed in simple html page hierarchies.
#
# Procedures:
# > domain addrs pull static.case.law
# > domain addrs describe static.case.law
# # here, you can choose the suffices you need.
# > domain addrs static.case.law | first 4
# > domain sums pull --hash sha256 # or md5, or so on. depends on domain.
# pull all pages
# > domain pages pull static.case.law
# pull pages according to chosen suffixes
# > domain pages pull static.case.law -s [ html tar pdf ]
def "domain base" []: any -> string { "~/domain" | path expand | tee { mkcd } }
# Locus on disc of domain's address index.
def "domain addrs index" [ domain: string, ] {
[ (domain base) ($domain | str replace -a '.' '_')]
| path join | path expand
}
def "domain addrs" [
domain: string,
--suffix (-s): list<string>,
] {
let addrs = (
cat (domain addrs index $domain)
| lines
| each {|l|
mut line = $l
mut labels = []
mut len = 0
while ($line | str length) != $len {
$len = $line | str length
let parse = $line | addr spider label
if ($parse | is-not-empty) {
# print $"parsed '($line)' -> '($parse)'"
$labels = $labels ++ [$parse.capture0.0]
$line = $parse.capture1.0
} }
{ addr: $line, labels: $labels }
})
if ($suffix | is-empty) { $addrs } else {
$addrs | where {
($in.addr | path parse | get extension)
in $suffix } }
}
def "addr spider label" [] {
# : string -> record<label: string, remain: string>
$in | parse -r '\[([\-\w]+)\] - (.+)'
}
#
def "domain addrs pull" [
domain: string,
] {
( nsh gospider gospider
-s $"https://($domain)/"
-o (domain base)
-v
# --whitelist "\/$"
--whitelist-domain $domain
--include-subs
-c 40
-d 0
); domain addrs index $domain
}
# See numbers of domain addresses,
# per path extension.
def "domain addrs describe" [
domain: string,
] {
domain addrs $domain
| get addr
| path parse
| group-by extension
| transpose shape nodes
| each { { shape: $in.shape, num: ($in.nodes | length) } }
}
# Scan all domain addresses ending in a hash checksum extension,
# pulling all sums into a local file,
# in batches (normally 1000).
# ---
# Use when a static file domain
# includes `.md5`, `.sha256`, `.sha512`, or so on
# as route suffixes.
def "domain sums pull" [
domain: string,
--hash (-h): string = sha256,
--number (-n): int = 1000,
] {
print $"loading index @ (domain addrs index $domain);"
let full = open -r (domain addrs index $domain) | lines | length
print $"($full) lines..."
let sum_addresses = domain addrs $domain
| where { ($in | path parse | get extension) == $hash } # 2min 8sec 486ms 693µs 116ns
# | where { ($in | split row '.' | last) == $hash } # 2min 15sec 424ms 833µs 767ns
# | where { ($in | str replace -r '.+\.' '') == $hash } # 2min 43sec 611ms 12µs 759ns
mut begin = -1
mut end = domain sums $domain | length
print $"loaded ($sum_addresses | length) hash addresses."
print $"($end) already seen."
while ($end != $begin) {
$begin = $end
let seen = domain sums $domain -h $hash | get addr
$sum_addresses
| where {|l| not ($l.addr in $seen) }
| first $number
| par-each {|a| $a
| insert $hash { retry 4 { http get $a.addr } }
| select $hash addr
| to csv -n
| save -a (domain sums index $domain -h $hash)
}
$end = domain sums $domain | length # num of lines in (domain sum index $domain)
print $"(clock) / pulled ($number);\t($begin) -> ($end) / ($full)"
}
}
# Load all domain checksum hashes in a domain.
# [upgrade] scan for any addr ending in a common hash sequence
# [upgrade] ensure each response is one line, or process may break!
def "domain sums" [
domain: string,
--hash (-h): string = sha256,
] {
try { open -r (domain sums index $domain -h $hash) | from csv --noheaders } catch {[]}
| rename $hash address
}
def "domain sums index" [
domain: string,
--hash (-h): string = sha256,
] {
[ (domain base) $"($domain).($hash).index.csv" ]
| path join | path expand
}
# ---
# Pages; in progress.
# ---
def "domain pages pull" [
domain: string,
--suffix (-s): list<string>,
--number (-n): int = 400,
] {
print $"loading index @ (domain addrs index $domain);"
if ($suffix | is-not-empty) {
print $"- suffix: ($suffix)" } else {
print "- any suffix" }
let addrs = domain pages local $domain
print $"($addrs | length) lines..."
mut begin = -1
mut end = $addrs | length
# print $"loaded ($addrs | length) hash addresses."
# print $"($end) already seen."
# while ($end != $begin) {
# $begin = $end
# let seen = domain sums $domain -h $hash | get addr
# $addrs
# | where {|l| not ($l.address in $seen) }
# | first $number
# | par-each {|a|
# $a | insert $hash { retry 4 { http get $a.address } }
# | select $hash address
# | to csv -n | save -a (domain sums index $domain -h $hash)
# }
# $end = domain sums $domain | length
# print $"pulled ($number);\t($begin) -> ($end) / ($full)"
# }
}
def "domain pages local" [
domain: string,
--suffix (-s): list<string>
] {
let addrs = domain addrs $domain -s $suffix | get addr
print $"pulling ($addrs | length) pages..."
}