#!/usr/bin/python

"""
Uses the USGS's web services to download all data from a given dataset
covering a given area.

Requires the Suds and BeautifulSoup python modules and wget.

The program needs a product key to select a dataset.  Look for product
keys at http://ags.cr.usgs.gov/index_service/Index_Service_SOAP.asmx .

After the first run for a dataset, the program will store all of the
download URLs for the dataset in a file in the current directory.  A URL
is removed from the file for every successful download.  If you want to
download data from the same dataset but a different area, you will need to
first delete the URL file.

"""

import re, sys, os.path, subprocess, urllib2, time
from optparse import OptionParser
from suds.client import Client
from BeautifulSoup import BeautifulStoneSoup

def download_urls(product, left, bottom, right, top):
    validation = Client('http://extract.cr.usgs.gov/requestValidationService/wsdl/RequestValidationService.wsdl')
    raw_response = validation.service.processAOI("\
<REQUEST_SERVICE_INPUT>\
   <AOI_GEOMETRY>\
      <EXTENT>\
         <TOP>{0}</TOP>\
         <BOTTOM>{1}</BOTTOM>\
         <LEFT>{2}</LEFT>\
         <RIGHT>{3}</RIGHT>\
      </EXTENT>\
      <SPATIALREFERENCE_WKID/>\
   </AOI_GEOMETRY>\
   <LAYER_INFORMATION>\
      <LAYER_IDS>{4}</LAYER_IDS>\
   </LAYER_INFORMATION>\
   <CHUNK_SIZE>250</CHUNK_SIZE>\
   <ORIGINATOR/>\
   <JSON></JSON>\
</REQUEST_SERVICE_INPUT>\
".format(top, bottom, left, right, product))
    # The XML response isn't proper XML, because ampersands in the URLs aren't
    # quoted.  BeautifulSoup, by default, turns "&foo=bar" into "&foo;=bar",
    # when we want "&amp;foo=bar", so we need to add some markupMassage.
    # Really, though, we need the final product to just have "&foo=bar", which
    # is accomplished by the convertEntities parameter.
    soup = BeautifulStoneSoup(raw_response,
                              markupMassage=[(re.compile('&'), lambda match: '&amp;')],
                              convertEntities=BeautifulStoneSoup.XML_ENTITIES)
    if soup.status.text != 'true':
        print "Error."
        print soup.prettify()
        exit(1)

    urls = [ u.text for u in soup.findAll('download_url') ]
    urls.sort()
    return urls

def read_urls_from_file(product):
    urls = []
    with open(product, 'r') as f:
        urls = [ u.strip() for u in f.readlines() ]
    return urls

def write_urls_to_file(product, urls):
    with open(product, 'w') as f:
        f.writelines([ u + '\n' for u in urls ])

def download_url(url):
    print '------------------------------------------------------------'
    response = urllib2.urlopen(url).read()
    id = BeautifulStoneSoup(response).find('ns:return').text
    print id
    status = -1
    while status < 400:
        if status != -1:
            time.sleep(30)
        response = urllib2.urlopen('http://extract.cr.usgs.gov/axis2/services/DownloadService/getDownloadStatus?downloadID=' + id).read()
        message = BeautifulStoneSoup(response, markupMassage=[(re.compile('&#xd;\n'), lambda match: ' ')]).find('ns:return').text
        print message
        match = re.search('^(\d+),', message)
        if match:
            status = int(match.group(1))
        else:
            status = -2
    subprocess.call(['wget', '--trust-server-names', 'http://extract.cr.usgs.gov/axis2/services/DownloadService/getData?downloadID=' + id])
    response = urllib2.urlopen('http://extract.cr.usgs.gov/axis2/services/DownloadService/setDownloadComplete?downloadID=' + id).read()
    print BeautifulStoneSoup(response, markupMassage=[(re.compile('&#xd;\n'), lambda match: ' ')]).find('ns:return').text

if __name__ == '__main__':
    parser = OptionParser(usage="Usage: %prog [--bbox=LEFT,BOTTOM,RIGHT,TOP] PRODUCT")
    parser.add_option('-b', '--bbox', dest="bbox",
                      help="Bounding box for the product query.  Only needed if no URLs have been downloaded already.")
    (options, args) = parser.parse_args()

    try:
        product = args[0]
    except IndexError:
        print "You must specify a product to download."
        exit(1)

    if os.path.exists(product):
        urls = read_urls_from_file(product)
    else:
        if not options.bbox:
            print "No URLs have been downloaded.  You must specify a bounding box."
            exit(1)
        (l, b, r, t) = [ float(n) for n in options.bbox.split(',') ]
        urls = download_urls(product, l, b, r, t)
        write_urls_to_file(product, urls)

    try:
        while True:
            download_url(urls.pop())
            write_urls_to_file(product, urls)
    except IndexError:
        pass
