diff --git a/Makefile b/Makefile index 14d04f350..6ab983323 100644 --- a/Makefile +++ b/Makefile @@ -66,8 +66,7 @@ edit_translations_to_download_list: ## this target is to build all what have to be built: # 1. Update Quranic resources needed for indexing phase, see update_pre_build # 2. Generate all Indexes, see index_all -# 4. Update all resources that must be updated after indexing phase or independently, see update_post_build -build: update_pre_build index_all update_post_build +build: update_pre_build index_all @@ -102,11 +101,6 @@ download_tanzil: -## update resources that must be updated after (or independent to) indexing phase, which are: -# 1. list of indexed translations, see update_translations_indexed_list -# 3. list of online recitations, see update_recitations_online_list -update_post_build: update_dynamic_resources_postbuild #update_recitations_online_list - ## update resources that must be updated before indexing phase, which are: # 1. Quranic Arabic Corpus, see update_quranic_corpus # 2. Linguistic resources on the form of python dictionarries to accelerate the loading , see update_dynamic_resources diff --git a/README.rst b/README.rst index d1379b830..e4d7449e1 100644 --- a/README.rst +++ b/README.rst @@ -17,23 +17,17 @@ Install it from Pypi: .. code-block:: sh - $ sudo pip install alfanous + $ pip install alfanous -You can use it from console: -.. code-block:: sh - - $ alfanous-console -a search -q الله - $ alfanous-console -a search -q Allh - -or from Python: +You can use it: .. code-block:: python - >>> import alfanous - >>> alfanous.search(u"الله") - >>> alfanous.do({"action":"search","query":u"الله"}) - >>> alfanous.do({"action":"search","query":u"Allh"}) # Buckwalter transliteration + >>> from alfanous import api + >>> api.search(u"الله") + >>> api.do({"action":"search","query":u"الله"}) + >>> api.do({"action":"search","query":u"Allh"}) # Buckwalter transliteration You can use it also from the web service: diff --git a/src/alfanous/__init__.py b/src/alfanous/__init__.py index de34b1ca2..fd40910d9 100755 --- a/src/alfanous/__init__.py +++ b/src/alfanous/__init__.py @@ -1,49 +1,4 @@ -""" hint: - Use `alfanous.search` for searching in Quran verses and translations. - Use `alfanous.get_info` for getting meta info. - Use `alfanous.do` method for search, suggestion and get most useful info. - """ -# import Output object -from alfanous.outputs import Raw as _search_engine -# import default Paths -from alfanous.data import paths as PATHS -DEFAULTS, DOMAINS, HELPMESSAGES = _search_engine.DEFAULTS, _search_engine.DOMAINS, _search_engine.HELPMESSAGES -FLAGS = DEFAULTS["flags"].keys() -from alfanous.outputs import arabic_to_english_fields as _fields - -FIELDS_ARABIC = _fields.keys() -FIELDS_ENGLISH = _fields.values() - -_R = _search_engine() - - -# Pivot function for search, suggestion, show info -def do(flags): - return _R.do(flags) - - -def search(query, unit="aya", page=1, sortedby="relevance", fuzzy=False, view="normal", highlight="bold", flags={}): - all_flags = flags - all_flags.update({"action": "search", - "unit": unit, - "query": query, - "page": page, - "sortedby": sortedby, - "fuzzy": fuzzy, - "view": view, - "highlight": highlight - }) - return do(all_flags) - - -def get_info(query="all"): - """ - Show useful meta info. - - @param query: info to be retrieved, possible_values = ['chapters', 'defaults', 'domains', 'errors', 'arabic_to_english_fields', 'fields_reverse', 'flags', 'help_messages', 'hints', 'information', 'recitations', 'roots', 'surates', 'translations'] - """ - return do({"action": "show", "query": query}) diff --git a/src/alfanous/api.py b/src/alfanous/api.py new file mode 100755 index 000000000..de34b1ca2 --- /dev/null +++ b/src/alfanous/api.py @@ -0,0 +1,49 @@ +""" hint: + + Use `alfanous.search` for searching in Quran verses and translations. + Use `alfanous.get_info` for getting meta info. + Use `alfanous.do` method for search, suggestion and get most useful info. + """ + +# import Output object +from alfanous.outputs import Raw as _search_engine +# import default Paths +from alfanous.data import paths as PATHS + +DEFAULTS, DOMAINS, HELPMESSAGES = _search_engine.DEFAULTS, _search_engine.DOMAINS, _search_engine.HELPMESSAGES +FLAGS = DEFAULTS["flags"].keys() + +from alfanous.outputs import arabic_to_english_fields as _fields + +FIELDS_ARABIC = _fields.keys() +FIELDS_ENGLISH = _fields.values() + +_R = _search_engine() + + +# Pivot function for search, suggestion, show info +def do(flags): + return _R.do(flags) + + +def search(query, unit="aya", page=1, sortedby="relevance", fuzzy=False, view="normal", highlight="bold", flags={}): + all_flags = flags + all_flags.update({"action": "search", + "unit": unit, + "query": query, + "page": page, + "sortedby": sortedby, + "fuzzy": fuzzy, + "view": view, + "highlight": highlight + }) + return do(all_flags) + + +def get_info(query="all"): + """ + Show useful meta info. + + @param query: info to be retrieved, possible_values = ['chapters', 'defaults', 'domains', 'errors', 'arabic_to_english_fields', 'fields_reverse', 'flags', 'help_messages', 'hints', 'information', 'recitations', 'roots', 'surates', 'translations'] + """ + return do({"action": "show", "query": query}) diff --git a/src/alfanous/data.py b/src/alfanous/data.py index fdc6ec08a..718ca4e75 100644 --- a/src/alfanous/data.py +++ b/src/alfanous/data.py @@ -2,8 +2,7 @@ import json from alfanous import paths -from alfanous.engines import QuranicSearchEngine -from alfanous.engines import TraductionSearchEngine, WordSearchEngine + def recitations(path=paths.RECITATIONS_LIST_FILE): @@ -48,14 +47,42 @@ def information(path=paths.INFORMATION_FILE): def QSE(path=paths.QSE_INDEX): + from alfanous.engines import QuranicSearchEngine return QuranicSearchEngine(path) + def TSE(path=paths.TSE_INDEX): + from alfanous.engines import TraductionSearchEngine return TraductionSearchEngine(path) + def WSE(path=paths.WSE_INDEX): + from alfanous.engines import WordSearchEngine return WordSearchEngine(path) - - +try: + arabic_to_english_fields = json.load(open(paths.ARABIC_NAMES_FILE)) +except: + arabic_to_english_fields = {} +try: + std2uth_words = json.load(open(paths.STANDARD_TO_UTHMANI_FILE)) +except: + std2uth_words = {} +try: + vocalization_dict = json.load(open(paths.VOCALIZATIONS_FILE)) +except: + vocalization_dict = {} +try: + syndict = json.load(open(paths.SYNONYMS_FILE)) +except: + syndict = {} +try: + derivedict = json.load(open(paths.DERIVATIONS_FILE)) +except: + derivedict = {"root": []} + +try: + worddict = json.load(open(paths.WORD_PROPS_FILE)) +except: + worddict = {} diff --git a/src/alfanous/misc.py b/src/alfanous/misc.py index 5a73c6d20..74edc169f 100755 --- a/src/alfanous/misc.py +++ b/src/alfanous/misc.py @@ -1,6 +1,5 @@ - -FILTER_DOUBLES = filter_doubles = lambda lst:list( set( lst ) ) -LOCATE = lambda source, dist, itm: dist[source.index( itm )] \ +filter_doubles = lambda lst :list(set(lst)) +locate = lambda source, dist, itm: dist[source.index(itm)] \ if itm in source else None -FIND = lambda source, dist, itm: [dist[i] for i in [i for i in range( len( source ) ) if source[i] == itm]] +find = lambda source, dist, itm: [dist[i] for i in [i for i in range(len(source)) if source[i] == itm]] diff --git a/src/alfanous/outputs.py b/src/alfanous/outputs.py index 98566b4b7..ce1f785b3 100755 --- a/src/alfanous/outputs.py +++ b/src/alfanous/outputs.py @@ -3,10 +3,9 @@ from alfanous.text_processing import QArabicSymbolsFilter from alfanous.data import * -from alfanous.resources import * from alfanous.romanization import transliterate -from alfanous.misc import LOCATE, FIND, FILTER_DOUBLES +from alfanous.misc import locate, find, filter_doubles STANDARD2UTHMANI = lambda x: std2uth_words.get(x) or x @@ -529,16 +528,16 @@ def _search_aya(self, flags): synonyms = syndict.get(term[1]) or [] derivations_extra = [] if word_derivations: - lemma = LOCATE(derivedict["word_"], derivedict["lemma"], term[1]) + lemma = locate(derivedict["word_"], derivedict["lemma"], term[1]) if lemma: # if different of none - derivations = FILTER_DOUBLES(FIND(derivedict["lemma"], derivedict["word_"], lemma)) + derivations = filter_doubles(find(derivedict["lemma"], derivedict["word_"], lemma)) else: derivations = [] # go deeper with derivations - root = LOCATE(derivedict["word_"], derivedict["root"], term[1]) + root = locate(derivedict["word_"], derivedict["root"], term[1]) if root: # if different of none derivations_extra = list( - set(FILTER_DOUBLES(FIND(derivedict["root"], derivedict["word_"], lemma))) - set( + set(filter_doubles(find(derivedict["root"], derivedict["word_"], lemma))) - set( derivations)) words_output["individual"][cpt] = { diff --git a/src/alfanous/query_processing.py b/src/alfanous/query_processing.py index ba54d7210..241aa9688 100755 --- a/src/alfanous/query_processing.py +++ b/src/alfanous/query_processing.py @@ -21,12 +21,12 @@ from whoosh.query import Term, MultiTerm from whoosh.query import Wildcard as whoosh_Wildcard from whoosh.query import Prefix as whoosh_Prefix -from whoosh.query import Or, NullQuery, Every, And +from whoosh.query import Or, NullQuery, Every -from alfanous.resources import syndict, derivedict, worddict, arabic_to_english_fields +from alfanous.data import syndict, derivedict, worddict, arabic_to_english_fields from alfanous.text_processing import QArabicSymbolsFilter -from alfanous.misc import LOCATE, FIND, FILTER_DOUBLES +from alfanous.misc import locate, find, filter_doubles def _make_arabic_parser(): @@ -590,9 +590,9 @@ def derivation(word, leveldist): lst = [] if indexsrc: # if index source level is defined - itm = LOCATE(derivedict[indexsrc], derivedict[indexdist], word) + itm = locate(derivedict[indexsrc], derivedict[indexdist], word) if itm: # if different of none - lst = FILTER_DOUBLES(FIND(derivedict[indexdist], derivedict["word_"], itm)) + lst = filter_doubles(find(derivedict[indexdist], derivedict["word_"], itm)) else: lst = [word] @@ -623,7 +623,7 @@ def tuple(props): wset = None for propkey in props.keys(): if worddict.get(propkey): - partial_wset = set(FIND(worddict[propkey], worddict["word_"], props[propkey])) + partial_wset = set(find(worddict[propkey], worddict["word_"], props[propkey])) if wset is None: wset = partial_wset else: diff --git a/src/alfanous/results_processing.py b/src/alfanous/results_processing.py index 980970d61..12e37a189 100755 --- a/src/alfanous/results_processing.py +++ b/src/alfanous/results_processing.py @@ -1,4 +1,3 @@ - from whoosh.scoring import BM25F from whoosh.highlight import highlight, Fragment, \ HtmlFormatter, ContextFragmenter, BasicFragmentScorer, WholeFragmenter @@ -25,25 +24,7 @@ def QSort(sortedby): return sortedby - -def QFilter(results, new_results): - """ Filter give results with new results""" - results.filter(new_results) - return results - - - - -def QPaginate(results, pagelen=10): - """generator of pages""" - l = len(results) - minimal = lambda x, y: y if x > y else x - for i in range(0, l, 10): - yield i / pagelen, results[i:minimal(i + pagelen, l)] - - -def Qhighlight(text, terms, type="css", strip_vocalization=True): - +def Qhighlight(text, terms, type="css", strip_vocalization=True): if type == "bold": formatter = QBoldFormatter() else: # css @@ -60,13 +41,9 @@ def Qhighlight(text, terms, type="css", strip_vocalization=True): minscore=1 ) - return highlighted or text - - - class QBoldFormatter(object): """ add the style tags to the text """ diff --git a/src/alfanous/romanization.py b/src/alfanous/romanization.py index a838cc7be..6bbceb545 100644 --- a/src/alfanous/romanization.py +++ b/src/alfanous/romanization.py @@ -1,4 +1,3 @@ - # Buckwalter Romanization letters mapping BUCKWALTER2UNICODE = { u"'": u"\u0621", # hamza-on-the-line @@ -71,9 +70,9 @@ ISO2UNICODE = {u"ˌ": u"\u0621", # hamza-on-the-line # u"|": u"\u0622", # madda u"ˈ": u"\u0623", # hamza-on-'alif - #u"ˈ": u"\u0624", # hamza-on-waaw + # u"ˈ": u"\u0624", # hamza-on-waaw # u"<": u"\u0625", # hamza-under-'alif - #u"ˈ": u"\u0626", # hamza-on-yaa' + # u"ˈ": u"\u0626", # hamza-on-yaa' u"ʾ": u"\u0627", # bare 'alif u"b": u"\u0628", # baa' u"ẗ": u"\u0629", # taa' marbuuTa @@ -141,9 +140,6 @@ } - - - def transliterate(mode, string, ignore=u"", reverse=False): """ encode & decode different romanization systems """ @@ -159,7 +155,7 @@ def transliterate(mode, string, ignore=u"", reverse=False): result = "" for char in string: - if char not in ignore and mapping.get(char) : + if char not in ignore and mapping.get(char): result += mapping[char] else: result += char diff --git a/src/alfanous/searching.py b/src/alfanous/searching.py index 8bed777cd..a2c8b7cc4 100755 --- a/src/alfanous/searching.py +++ b/src/alfanous/searching.py @@ -1,5 +1,3 @@ -import logging - from alfanous.results_processing import QSort, QScore diff --git a/src/alfanous/setup.cfg b/src/alfanous/setup.cfg index bd295ced1..299a07701 100644 --- a/src/alfanous/setup.cfg +++ b/src/alfanous/setup.cfg @@ -3,4 +3,4 @@ name = alfanous description = Quranic search engine API description-file = file: README.rst platforms=ALL -license=AGPL \ No newline at end of file +license=GPL \ No newline at end of file diff --git a/src/alfanous/suggestions.py b/src/alfanous/suggestions.py deleted file mode 100755 index 8b1378917..000000000 --- a/src/alfanous/suggestions.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/src/alfanous/text_processing.py b/src/alfanous/text_processing.py index 9df8855ea..aff6ff515 100755 --- a/src/alfanous/text_processing.py +++ b/src/alfanous/text_processing.py @@ -2,7 +2,7 @@ import re -from whoosh.analysis import StopFilter, RegexTokenizer, Filter # LowercaseFilter, StandardAnalyzer, +from whoosh.analysis import RegexTokenizer, Filter # LowercaseFilter, StandardAnalyzer, from alfanous.Support.pyarabic.main import strip_tashkeel, strip_tatweel, strip_shadda, normalize_spellerrors, \ normalize_hamza, normalize_lamalef, normalize_uthmani_symbols # , HARAKAT_pat, from alfanous.Support.pyarabic.main import FATHATAN, DAMMATAN, KASRATAN, FATHA, DAMMA, KASRA, SUKUN, SHADDA # * @@ -15,10 +15,6 @@ def __init__(self, expression=r"[^ \t\r\n]+"): super(QSpaceTokenizer, self).__init__(expression=expression) -class QAffixesTokenizer(QSpaceTokenizer): - def __init__(self, expression=r"[^ \t\r\n]+"): - super(QAffixesTokenizer, self).__init__(expression=expression) - raise NotImplemented() class QArabicSymbolsFilter(Filter): diff --git a/src/alfanous_import/setup.py b/src/alfanous_import/setup.py deleted file mode 100755 index a25ec657b..000000000 --- a/src/alfanous_import/setup.py +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env python - -from setuptools import setup - -setup( - name = "alfanous_import", - description = "Importing system for quranic indexes", - version = 0.1, - platforms = "ALL", - license = "AGPL", - packages = ["Qimport"], - install_requires = ["whoosh", "quran_corpus_reader"], - - author = "Assem Chelli", - author_email = "assem.ch@gmail.com", - maintainer = "Assem Chelli", - maintainer_email = "assem.ch@gmail.com", - - package_dir = {'Qimport':'.'}, - long_description = """ Importing system for Alfanous search engine""", - keywords = "search quran islam alfanous arabic", - url = "http://www.alfanous.org/", - download_url = "https://sourceforge.net/projects/alfanous/files/", - - include_package_data = True, - - #data_files = [ ( '.', './urls.txt' ), ( '.', './GPL.txt' ), ( '.', './config.xml' )], - - zip_safe = True, - - classifiers = [ - "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - "License :: OSI Approved :: GNU General Public License (GPL)", - "Natural Language :: Arabic", - "Natural Language :: English", - "Operating System :: OS Independent", - "Programming Language :: Python :: 2.6", - "Topic :: Software Development :: Libraries :: Python Modules", - ] - ) diff --git a/src/tests/test_interface.py b/src/tests/test_interface.py index 21c378a02..632d96a50 100644 --- a/src/tests/test_interface.py +++ b/src/tests/test_interface.py @@ -2,13 +2,13 @@ -import alfanous +import alfanous.api def test_search(): - alfanous.search(u"الله", sortedby='mushaf') + alfanous.api.search(u"الله", sortedby='mushaf') - results = alfanous.do({"action": "search", "query": u"الله"}) + results = alfanous.api.do({"action": "search", "query": u"الله"}) del results['search']['runtime'] assert results['search']['interval'] == {'end': 10, 'nb_pages': 157.5, 'page': 1.0, 'start': 1, 'total': 1566} diff --git a/src/tests/test_results_processing.py b/src/tests/test_results_processing.py index 8590ecf56..60a779e9f 100644 --- a/src/tests/test_results_processing.py +++ b/src/tests/test_results_processing.py @@ -1,4 +1,3 @@ - """ This is a test module for alfanous.ResultsProcessing. @@ -9,9 +8,8 @@ def test_highlight(): assert Qhighlight( - u"الْحَمْدُ لِلَّهِ رَبِّ الْعَالَمِينَ", - [u"الحمد", u"لله"], - "html" - ) == ('الْحَمْدُ لِلَّهِ رَبِّ الْعَالَمِينَ') - + u"الْحَمْدُ لِلَّهِ رَبِّ الْعَالَمِينَ", + [u"الحمد", u"لله"], + "html" + ) == ('الْحَمْدُ لِلَّهِ رَبِّ الْعَالَمِينَ')