#!/usr/bin/python
"""Download automatically files from MegaUpload (using a free account)"""

# Copyright (c) 2008-2009 Arnau Sanchez <tokland@gmail.com>

# This file is part of Megaupload-dl.

# This script is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this software.  If not, see <http://www.gnu.org/licenses/>

import PIL.Image
import StringIO
import optparse
import time
import math
import sys
import os
from itertools import ifilter

# Import our modules
from megaupload_dl import lib
#from megaupload_dl import captcha

# Global variables
VERSION = "0.3.2" 
LINK_ENABLE_TIME = 45

# Default debug function
debug = lib.get_debug_func(0)        

def get_share_path(filename):
    path = os.path.join(os.path.dirname(lib.__file__), filename)
    if os.path.isfile(path):
        return path
    else:
        return os.path.join("/usr/share/megaupload_dl", filename)

def loop_with_retries(retries, func, *args, **kwargs):
    """Run func with a retry mechanism"""    
    current_try = 1
    while current_try <= retries:
        debug(1, "start try: %d/%d" %(current_try, retries)) 
        try:
            return func(*args, **kwargs)
        except AssertionError, details:
            debug(1, "error: %s" % details)
            current_try += 1
    debug(0, "Retries exhausted")

def process_image(captcha_image):
    """Process PIL image (clean, clip, ...) to help the OCR process"""
    def pixel_process(x):
        if x == 0:
            return 0
        else: return 255
    image = captcha_image.convert("L").point(pixel_process)
    return image

def get_captcha_url(index):
    """Return PIL Image of the captcha image from the index soup"""
    baseurl = "http://www.megaupload.com"
    holds_captcha = lambda tag: "/gencap.php" in tag["src"]
    img = lib.first(ifilter(holds_captcha, index.findAll("img")))
    if not img:
        debug(0, "No captcha found in download page.: incorrect link " +
            "or file not available")
        # Raise an application exception?
        sys.exit(2)
    return img["src"]

###
                                         
def get_download_form(url, index, captcha_image, image_callback=None):
    """Get the captcha image, get the text and return the download page URL"""
    form = index.find("form")
    hidden = form.findAll("input", {'type': 'hidden'})
    postdata = dict(map(str, (tag["name"], tag["value"])) for tag in hidden)
    captcha_code = form.find("input", {'type': 'hidden'})["name"]
    captcha_field_name = index.find("input", {"id": "captchafield"})["name"]
    fontfile = get_share_path("news_gothic_bt.ttf")
    #text = captcha.decode_megaupload_captcha(PIL.Image.open(captcha_image))
    text = lib.ocr(process_image(PIL.Image.open(captcha_image)))
    debug(2, "decoded captcha: %s" % text)
    assert len(text) == 4, "captcha has not the expected length"
    postdata[captcha_field_name] = text
    return url, postdata

def process_download_page(soup):
    """Parse the download page to extract the file URL."""
    download_link = soup.find("div", {"id": "downloadlink"})
    assert download_link, "This is not the download page"
    href = download_link.find("a")["href"]
    debug(2, "file URL: %s" % href)
    return lib.unescape_entities(href)
    
###
    
def get_download_url(main_url, image_callback=None):
    """Download, parse and ocr the captcha to return the file URL"""
    debug(1, "downloading main html: %s" % main_url)
    index = lib.get_soup(lib.download(main_url))
    image_url = get_captcha_url(index)
    debug(1, "captcha image: %s" % image_url)
    captcha_image = StringIO.StringIO(lib.download(image_url))
    action, postdata = get_download_form(main_url, index, 
        captcha_image, image_callback)
    request = lib.build_request(action, postdata)
    debug(3, "building POSTDATA: %s" % request.get_data())
    download_page = lib.get_soup(lib.download(request))
    debug(1, "downloading html page: %s" % action)
    return process_download_page(download_page)

###

def main(args0):    
    usage = """usage: megaupload_dl.py [options]\n\n    %s""" % __doc__
    parser = optparse.OptionParser(usage, version=VERSION)
    parser.add_option('-v', '--verbose', dest='debug_level', action="count",
        default=0, help='Increase verbose level')
    parser.add_option('-r', '--captcha-retries', dest='captcha_retries',
        default=100, metavar="TIMES", type="int", 
        help='Maximum captcha deconding retries before giving up')
    parser.add_option('-w', '--wait-time', dest='wait_time',
        default=LINK_ENABLE_TIME, metavar="SECONDS", type="int", 
        help='Time to wait before exiting')                
    options, args = parser.parse_args(args0)
    if not args:
        parser.print_help()
        return 1
    global debug        
    debug = lib.get_debug_func(options.debug_level)        
    if options.debug_level >= 3:
        image_callback = lambda image: image.show()
    else: image_callback = None
    url = loop_with_retries(options.captcha_retries, get_download_url, 
        args[0], image_callback)
    if not url:
        debug(1, "operation unsuccesful")
        return 2
    if options.wait_time:
        debug(1, "waiting the link to be enabled: %d secs" % options.wait_time)
        time.sleep(options.wait_time)
    debug(1, "operation succesful")
    lib.output(url)
       
if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))
