Operand

pro bo? no.

gram: fiscal

> ./index.nu


def "fiscal addrs" [] {
  1..12 | each { fill -a right -c '0' -w 2 } | each {|year|
  1..12 | each { fill -a right -c '0' -w 2 } | each {|mon|
    $"https://www.fiscal.treasury.gov/files/reports-statements/mts/mts($year)($mon).txt"
  } } | flatten
}

def "fiscal index" [] {
  fiscal addrs | par-each {|addr|
    mkdir cache
    let node = [ . cache ($addr | path basename)] | path join | path expand
    if not ($node | path exists) { http get $addr | save $node }
  }
}

def "fiscal report slice" [] {
  glob ./cache/*.txt
  | each {|$node|
    print $node
    let mon = {
      name: $node,
      body: (open $node | decode | split row "REPORT ID:")
    }

    let reports = $mon.body
    | slice 0..-2
    | each { |table|
      { name: ($table | lines | first | str replace -r '  .+' '' | str trim),
        pages: ($table | lines | slice 1.. | str join "\n")
      }
    }
    | group-by name
    | transpose name pages
    | insert body { $in.pages | flatten | str join "\n" }
    | select name body

    $reports | each {|report|
      let n = ([ . cache
        $"($node | path parse | get stem)"
        $"($report.name).txt"
      ] | path join | path expand)
      if not ($n | path exists) {
        mkdir ($n | path dirname)
        $report.body | save $n
      }
    }
  }
}

def "fiscal report clean" [] {
  glob ./cache/mts*/*.txt
  | each {|$node|
    let name = $node | path parse | upsert extension {"noheader"} | path join
    if not ($name | path exists) {
      let body = open $node | decode
      let lines = $body | lines | enumerate

      let headers_begin = $lines
      | where item =~ "{name:"
      | get index

      let headers_end = $lines
      | where item =~ 'ACCOUNTING DATE:'
      | get index

      let header_indices = $headers_begin
        | zip $headers_end
        | each {|idx| ($idx.0)..($idx.1) | enumerate }
        | get item
        | flatten

      let pagination = $lines | select ...($header_indices
        | where {|idx| ($lines | get $idx).item =~ " PAGE " })

      $lines
        | get item
        | reject ...($header_indices)
        | where { ($in | str length) > 2 }
        | str join "\n"
        | save $name
    }
  }
}

def "fiscal report arrange" [] {
  glob ./cache/mts*/*.noheader
  | first 6
  | each {|node|
    let name = $node | path parse | upsert extension {"csv"} | path join
    if not ($name | path exists) {
      let lines = open -r $node | decode | lines | enumerate
      let borders = $lines | where item =~ "^\\+[ _]+$" | get index

      let headings = $borders | each {|b| $b - 2 }

      let pages = $borders | enumerate | each {|h|
        let begin = $h.item + 1
        let end = try { $headings | get ($h.index + 1) } catch { $lines.index | last }
        $lines | select ...(
          ($begin)..($end) | enumerate | get item )
          | get item | str join "\n"
      }

      let headers = $borders | each { |b|
        $lines.item | select ...(
          ($b - 2)..$b | enumerate | where item >= 0 | get item
      )
      | where {$in !~ "\\d+"} }

      let labeled_pages = $headers | zip $pages
      | each {|lp| {
        column_key: ($lp.0 | last),
        headers: ($lp.0 | slice ..-2),
        page: $lp.1
      } | transpose }
      $labeled_pages
    }
  }
}