"""
WebSearcher defines classes for programmatically searching the web via a search engine.
For example, to find the first 20 results of some search terms from Google:

	for result in GoogleWebSearcher.fetchResults(20, 'Cobra programming language')
		print result

The results are instances of SearchResult with properties such as .url, .title and .snippet.

Tip: To get the contents of a given URL, use System.Net.WebClient.

You can also instantiate a searcher and reuse it, primarily if there is some state within it that you wanted to maintain between uses:

	searcher = GoogleWebSearcher()
	searcher.willHideIdentity = false
	searcher.maxResults = 20
	for result in searcher.fetchResults('Cobra programming language')
		print result
	for result in searcher.fetchResults('programming')
		print result


TODO

	[ ] Get first version working.
	[ ] Support async fetching? The WebClient class already has some async support.
"""

use System.Net


class WebSearcher<of TSearchResult> is abstract
	where TSearchResult must be SearchResult

	var _maxResults = 10
	var _searchText = ''
	var _willHideIdentity = false

	cue init
		base.init

	pro maxResults from var
		"""
		The maximum number of search results that will be fetched from the search engine.
		"""

	pro searchText from var
		"""
		The text that will be searched for. Ex: 'Cobra programming language'
		"""

	pro willHideIdentity from var
		"""
		When true, the web request to the search engine will include headers to make it appear to
		be a Firefox user client. This is to avoid the potential problem of a search engine
		denying service to a programmatic client. Defaults to true.
		"""
	
	def fetchResults(maxResults as int, searchText as String) as IEnumerable<of TSearchResult>
		require maxResults > 0
		_maxResults = maxResults
		_searchText = searchText
		return .fetchResults
		
	def fetchResults(searchText as String) as IEnumerable<of TSearchResult>
		require searchText.length
		_searchText = searchText
		return .fetchResults
		
	def fetchResults as IEnumerable<of TSearchResult>
		require .searchText.length
		# TODO: retry on network errors
		try
			WebClient().downloadFile(_buildUrl, localFileName)  # TODO: can that be downloaded to memory?
		catch e as Exception
			print e.toString

	def _buildUrl as String is abstract
		"""
		Build the specific URL to be retrieved.
		"""

	def _setHeaders(wc as WebClient)
		"""
		Sets the HTTP request headers.
		The default implementation sets the headers to a web browser if .willHideIdentity (which defaults to true).
		"""
		# TODO: compare these headers to my Python code
		wc.headers.add('User-Agent', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322)')
		wc.headers.add('Accept', 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*')
		wc.headers.add('Accept-Language', 'en-us')
		wc.headers.add('Accept-Encoding', 'gzip, deflate')


class SearchResult

	cue init(url as String, title as String, snippet as String)
		base.init
		_url, _title, _snippet = url, title, snippet

	get url from var as String

	get title from var as String
	
	get snippet from var as String


class GoogleWebSearcher inherits WebSearcher<of SearchResult>
	"""
	See the module-level docs for explanation and example use.
	"""

	var _hl = 'en'
	var _btnG = 'Google+Search'

	cue init
		base.init

	pro hl from var
		"""
		The language query variable. Defaults to 'en'.
		"""
	
	pro btnG from var
		"""
		The button query variable. Defaults to 'Google+Search'.
		"""

	def _buildUrl as String is override
		# http://www.google.com/search?hl=en&q=google+search+api&btnG=Google+Search
		url = 'http://www.google.com/search?'
		if .hl.length, url += 'hl=[.hl]'
		url += 'q=[.searchText]'
		if .btnG.length, url += 'btnG=[.btnG]'
		return url


class Program

	def main
		print 'WebSearcher'
		args = CobraCore.commandLineArgs
		if args.count < 2
			print ns'usage: download URL [LOCALFILENAME]'
			return
		url = args[1]

		# localFileName is derived from the url if missing:
		if args.count > 2
			localFileName = args[2]
		else
			s = url
			if s.endsWith('/')
				s = s[:-1]
			i = s.lastIndexOf('/')
			if i <> -1
				localFileName = s[i+1:]
			else
				print 'Invalid URL.'

		print 'Downloading [url] to [localFileName]'
		Console.out.flush