from mod_python import apache, util
from PyLucene import QueryParser, IndexSearcher, StandardAnalyzer, FSDirectory

replace = ["_"] # these chars will be replaced by a space
escape = ["\\", "+", "-", "&&", "||", "!", "(", ")", "{", "}", "[", "]", "^", "\"", "~", "*", "?", ":"] # these chars will be escaped
lower = ["AND", "OR", "NOT"] # these words will be forced to lowercase
fncts = ["\\", "/", "_", "-", "+", ":", ".", ",", "|"] # characters in filename we should convert to spaces
maxhits = 100 # max amount we'll ever return
maxdurationdiff = 250000 # max difference in duration

metasearch = None

def handler(req):
    global metasearch

    if not metasearch:
        metasearch = MetaSearch()

    search = createquery(util.FieldStorage(req))

    if search == "":
        req.content_type = "text/html"
        req.write("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11-strict.dtd\">")
        req.write("<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\">")
        req.write("<head>")
        req.write("<title>Error</title>")
        req.write("</head>")
        req.write("<body>")
        req.write("<h4>Hi there, apparently you're new to this :)</h4>")
        req.write("<p>You'll have to specify the parameters, the valid ones are:</p>")
        req.write("<ul>")
        req.write("<li>limit (amount of tracks returned, 1-100, default 50)</li>")
        req.write("<li>durationdiff (max duration difference given in milliseconds, 0-100000, default 10000)</li>")
        req.write("<li>artistid</li>")
        req.write("<li>releaseid</li>")
        req.write("<li>trackid</li>")
        req.write("<li>artist</li>")
        req.write("<li>release</li>")
        req.write("<li>track</li>")
        req.write("<li>tracknum</li>")
        req.write("<li>duration (milliseconds)</li>")
        req.write("<li>filename</li>")
        req.write("</ul>")
        req.write("<p>Brief example:</p>")
        req.write("<p><a href=\"http://mb.samfundet.no/?artist=europe&amp;track=countdown\">http://mb.samfundet.no/?artist=europe&amp;track=countdown</a></p>")
        req.write("</body>")
        req.write("</html>")
    else:
        req.content_type = "text/xml"
        req.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")
        req.write("<metadata xmlns=\"http://musicbrainz.org/ns/mmd-1.0#\" xmlns:ext=\"http://musicbrainz.org/ns/ext-1.0#\">")
        req.write(metasearch.asXML(metasearch.search(search), limit))
        req.write("</metadata>")

    return apache.OK

def createquery(fs):
    if fs.getfirst("limit", [""])[0].isdigit():
        limit = int(fs.getfirst("limit", [50])[0])
    else:
        limit = 50
    if fs.getfirst("durationdiff", [""])[0].isdigit():
        durationdiff = int(fs.getfirst("durationdiff", [10000])[0])
    else:
        durationdiff = 10000
    if durationdiff < 1000:
        durationdiff = 1000
    if durationdiff > maxdurationdiff:
        durationdiff = maxdurationdiff
    if limit < 1:
        limit = 1
    if limit > maxhits:
        limit = maxhits

    search = ""
    if fs.getfirst("artistid"):
        search += "artistid:" + fixstring(fs.getfirst("artistid")) + "^4 "
    if fs.getfirst("releaseid"):
        search += "releaseid:" + fixstring(fs.getfirst("releaseid")) + "^4 "
    if fs.getfirst("trackid"):
        search += "trackid:" + fixstring(fs.getfirst("trackid")) + "^4 "
    if fs.getfirst("artist"):
        search += fixstring(fs.getfirst("artist")) + " "
    if fs.getfirst("release"):
        search += fixstring(fs.getfirst("release")) + " "
    if fs.getfirst("track"):
        search += fixstring(fs.getfirst("track")) + " "
    if fs.getfirst("filename"):
        search += fixstring(fs.getfirst("filename")) + " "
    if fs.getfirst("tracknum") and fs.getfirst("tracknum").isdigit():
        search += "tracknum:" + str(int(fs.getfirst("tracknum"))) + "^2 " + str(int(fs.getfirst("tracknum"))) + " "
    if fs.getfirst("duration") and fs.getfirst("duration").isdigit():
        minvalue = (int(fs.getfirst("duration")) - durationdiff) / 1000
        if minvalue < 0:
            minvalue = 0
        maxvalue = (int(fs.getfirst("duration")) + durationdiff) / 1000
        # why can't we use range search [x TO y]?
        # because lucene thinks 4000-4200, 40000-42000 and so on is between 400-420
        # most likely because lucene handles text, not numeric values
        search += "+tracklengthsec:("
        for a in xrange(minvalue, maxvalue + 1):
            search += str(a) + " "
        search += "0)"

    search = search.strip()
    return search
    
def fixstring(string):
    for a in xrange(len(replace)):
        string = string.replace(replace[a], " ")
    for a in xrange(len(escape)):
        string = string.replace(escape[a], "\\" + escape[a])
    for a in xrange(len(lower)):
        string = string.replace(lower[a], lower[a].lower())

    return string

class MetaSearch(object):
    def __init__(self):
        self.searcher = IndexSearcher(FSDirectory.getDirectory("/export/mb/index", False))
        self.querypar = QueryParser("content", StandardAnalyzer([]))

    def search(self, search):
        query = self.querypar.parse(search)
        return self.searcher.search(query)

    def asXML(self, hits, maxhits):
        '''
        Output result from metasearch as XML
        '''
        out = "\t<track-list>\n"
        for i in xrange(min(hits.length(), maxhits)):
            doc = hits.doc(i)
            out += "\t\t<track id=\"%s\" ext:score=\"%d\">\n" % (self.escape(doc.get("trackid")), (hits.score(i) * 100))
            out += "\t\t\t<title>%s</title>\n" % self.escape(doc.get("track"))
            out += "\t\t\t<duration>%s</duration>\n" % self.escape(doc.get("tracklength"))
            out += "\t\t\t<artist id=\"%s\">\n" % self.escape(doc.get("artistid"))
            out += "\t\t\t\t<name>%s</name>\n" % self.escape(doc.get("artist"))
            out += "\t\t\t\t<sort-name>%s</sort-name>\n" % self.escape(doc.get("artistsortname"))
            out += "\t\t\t</artist>\n"
            out += "\t\t\t<release-list>\n"
            out += "\t\t\t\t<release id=\"%s\">\n" % self.escape(doc.get("albumid"))
            out += "\t\t\t\t\t<title>%s</title>\n" % self.escape(doc.get("album"))
            out += "\t\t\t\t\t<track-list offset=\"%d\"/>\n" % (int(self.escape(doc.get("tracknum"))) - 1)
            out += "\t\t\t\t\t<artist id=\"%s\">\n" % self.escape(doc.get("albumartistid"))
            out += "\t\t\t\t\t\t<name>%s</name>\n" % self.escape(doc.get("albumartist"))
            out += "\t\t\t\t\t\t<sort-name>%s</sort-name>\n" % self.escape(doc.get("albumartistsortname"))
            out += "\t\t\t\t\t</artist>\n"
            out += "\t\t\t\t</release>\n"
            out += "\t\t\t</release-list>\n"
            out += "\t\t</track>\n"
        out += "\t</track-list>"
        return out

    def escape(self, text):
        '''
        Escape XML/HTML entities and convert output to utf-8
        '''
        return (text.replace(u'&', u'&amp;').replace(u'<', u'&lt;').replace(u'>', u'&gt;')).encode('utf-8', 'replace')
