#!/usr/pkg/bin/ruby32
# $Id: scrape-MDAPI-2-geogroups.rb 7581 2021-12-31 18:17:15Z flaterco $
# API documented at: https://api.tidesandcurrents.noaa.gov/mdapi/prod/
# If it should go away, there is another one at https://opendap.co-ops.nos.noaa.gov/ioos-dif-sos/
# The geogroups query is undocumented.

require 'json'

# Get the complete list of geogroups with parent pointers.
unless File.exist?("geogroups-tides.json")
  system "wget --progress=dot -O geogroups-tides.json https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/geogroups.json?type=ETIDES"
end

# Make list of URLs to get the stations in each geogroup.
unless File.exist?("geogroups-tides-urls.txt")
  geogroups = JSON.parse(File.read("geogroups-tides.json"))
  File.open("geogroups-tides-urls.txt", "w") do |outf|
    geogroups["geoGroupList"].each {|g|
      outf.print "https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/geogroups/", g["geoGroupId"], "/children.json\n"
    }
  end
end

# The records all contain pointers to the parent geogroup.
# The seq field may be something to do with the order of entries inside a
# group when the HTML is generated.

# 2021-12-29
# Previously, I was doing this:
#   -O stations-by-group-tides.json    Append all to one file
# But I was getting blocks of binary garbage in stations-by-group-tides.json
# with OK status in the wget log.  Now I'm getting them all to separate files
# so the bad fetches can be retried easily, but the problem did not reproduce.
# Could be a file append glitch in wget, the lib, or the kernel.
#   -x -nH --cut-dirs=3   Gets geogroups/NNNN/children.json
system "wget -x -nH --cut-dirs=3 --wait=4 --random-wait --no-clobber --input-file=geogroups-tides-urls.txt --append-output=scrape_log_geogroups_tides.txt"

# Unfinished changes from 2021-12-29.
# If any corrupt files, delete them and loop.


# Commented-out dead code follows.
#
# 2017:  Geogroup children query (getting stations) does not work for
#        currents.  It retrieves only the group records.
# 2020:  Same.
# 2121:  Moved to test_currents_geogroups.rb.
=begin

unless File.exist?("geogroups-currents.json")
  system "wget --progress=dot -O geogroups-currents.json https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/geogroups.json?type=CURRENTS"
end

unless File.exist?("geogroups-currents-urls.txt")
  geogroups = JSON.parse(File.read("geogroups-currents.json"))
  File.open("geogroups-currents-urls.txt", "w") do |outf|
    geogroups["geoGroupList"].each {|g|
      outf.print "https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/geogroups/", g["geoGroupId"], "/children.json\n"
    }
  end
end

# This retrieves nothing but the group records.
unless File.exist?("stations-by-group-currents.json")
  system "wget -O stations-by-group-currents.json --wait=4 --random-wait --no-clobber --input-file=geogroups-currents-urls.txt --append-output=scrape_log_geogroups_currents.txt"
end

=end
