import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;

import java.io.DataInputStream;
import java.io.FileInputStream;

public class SearchIndex {

	private static String[] remove = {"+", "-", "_"};
	private static String[] escape = {"&&", "||", "!", "(", ")", "{", "}", "[", "]", "^", "\"", "~", "*", "?", ":", "\\"};
	private static String[] lower = {"AND", "OR", "NOT"};

	public static void main(String[] args) throws Exception {
		IndexReader ir = IndexReader.open("index");
		Searcher searcher = new IndexSearcher(ir);
		Analyzer analyzer = new StandardAnalyzer();

		String field = "content";
		QueryParser qp = new QueryParser(field, analyzer);

		DataInputStream dis = new DataInputStream(new FileInputStream("songs.txt"));
		int queries = 0;
		long totquerytime = 0;
		while (dis.available() != 0) {
			++queries;
			String text = dis.readLine();
			/* filter out certain things */
			for (int a = 0; a < remove.length; ++a)
				text = text.replace(remove[a], " ");
			for (int a = 0; a < escape.length; ++a)
				text = text.replace(escape[a], "\\" + escape[a]);
			for (int a = 0; a < lower.length; ++a)
				text = text.replace(lower[a], lower[a].toLowerCase());

			/* fuzzy match */
			/* very slow, not an option */
			/*
			String[] words = text.split(" ");
			text = "";
			for (int a = 0; a < words.length; ++a)
				text += words[a] + "~ ";
			*/

			text = "Chante Moore Hed Kandi\\: Back To Love 03.05 Love's Taken Over CD2/01 \\- Chante Moore \\- Loves Taken Over.mp3 tracknum:1^2 1 +tracklengthsec:([471 TO 491] 0)";
			//text = "Chante Moore Hed Kandi\\: Back To Love 03.05 Love's Taken Over CD2/01 \\- Chante Moore \\- Loves Taken Over.mp3 tracknum:1^2 1"; // tracklengthsec:([471 TO 491] 0)";
			//text = "(Chante Moore Hed Kandi\\: Back To Love 03.05 Love's Taken Over CD2/01 \\- Chante Moore \\- Loves Taken Over.mp3 tracknum:1^2 1) AND +tracklengthsec:(471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 0)";
			System.out.println();
			System.out.println("Text : " + text);
			Query query = qp.parse(text);
			BooleanQuery bq = (BooleanQuery)query;
			BooleanClause[] bc = bq.getClauses();
			System.out.println("bc   : " + bc.length);
			System.out.println("Query: " + query);

			long start = System.currentTimeMillis();
			Hits hits = searcher.search(query);
			long querytime = System.currentTimeMillis() - start;
			totquerytime += querytime;
			System.out.println("Query took " + querytime + "ms.");

			System.out.println("Found " + hits.length() + " matching tracks.");
			for (int a = 0; a < hits.length() && a < 50; ++a) {
				Document doc = hits.doc(a);
				System.out.println(a + " (Score " + hits.score(a) + "): " + doc.getField("content").stringValue() + " | " + doc.getField("tracklengthsec").stringValue());
			}
		}
		dis.close();
		ir.close();

		System.out.println(queries + " queries performed eating " + totquerytime + "ms of your life.");
		System.out.println("That's " + (double)queries / ((double)totquerytime / 1000.0) + " queries per second.");
	}
}
