Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

V1 track #544

Open
wants to merge 1 commit into
base: whooshxpy3
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,7 @@ edit_translations_to_download_list:
## this target is to build all what have to be built:
# 1. Update Quranic resources needed for indexing phase, see update_pre_build
# 2. Generate all Indexes, see index_all
# 4. Update all resources that must be updated after indexing phase or independently, see update_post_build
build: update_pre_build index_all update_post_build
build: update_pre_build index_all



Expand Down Expand Up @@ -102,11 +101,6 @@ download_tanzil:



## update resources that must be updated after (or independent to) indexing phase, which are:
# 1. list of indexed translations, see update_translations_indexed_list
# 3. list of online recitations, see update_recitations_online_list
update_post_build: update_dynamic_resources_postbuild #update_recitations_online_list

## update resources that must be updated before indexing phase, which are:
# 1. Quranic Arabic Corpus, see update_quranic_corpus
# 2. Linguistic resources on the form of python dictionarries to accelerate the loading , see update_dynamic_resources
Expand Down
18 changes: 6 additions & 12 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,17 @@ Install it from Pypi:

.. code-block:: sh

$ sudo pip install alfanous
$ pip install alfanous

You can use it from console:

.. code-block:: sh

$ alfanous-console -a search -q الله
$ alfanous-console -a search -q Allh

or from Python:
You can use it:

.. code-block:: python

>>> import alfanous
>>> alfanous.search(u"الله")
>>> alfanous.do({"action":"search","query":u"الله"})
>>> alfanous.do({"action":"search","query":u"Allh"}) # Buckwalter transliteration
>>> from alfanous import api
>>> api.search(u"الله")
>>> api.do({"action":"search","query":u"الله"})
>>> api.do({"action":"search","query":u"Allh"}) # Buckwalter transliteration

You can use it also from the web service:

Expand Down
45 changes: 0 additions & 45 deletions src/alfanous/__init__.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,4 @@
""" hint:

Use `alfanous.search` for searching in Quran verses and translations.
Use `alfanous.get_info` for getting meta info.
Use `alfanous.do` method for search, suggestion and get most useful info.
"""

# import Output object
from alfanous.outputs import Raw as _search_engine
# import default Paths
from alfanous.data import paths as PATHS

DEFAULTS, DOMAINS, HELPMESSAGES = _search_engine.DEFAULTS, _search_engine.DOMAINS, _search_engine.HELPMESSAGES
FLAGS = DEFAULTS["flags"].keys()

from alfanous.outputs import arabic_to_english_fields as _fields

FIELDS_ARABIC = _fields.keys()
FIELDS_ENGLISH = _fields.values()

_R = _search_engine()


# Pivot function for search, suggestion, show info
def do(flags):
return _R.do(flags)


def search(query, unit="aya", page=1, sortedby="relevance", fuzzy=False, view="normal", highlight="bold", flags={}):
all_flags = flags
all_flags.update({"action": "search",
"unit": unit,
"query": query,
"page": page,
"sortedby": sortedby,
"fuzzy": fuzzy,
"view": view,
"highlight": highlight
})
return do(all_flags)


def get_info(query="all"):
"""
Show useful meta info.

@param query: info to be retrieved, possible_values = ['chapters', 'defaults', 'domains', 'errors', 'arabic_to_english_fields', 'fields_reverse', 'flags', 'help_messages', 'hints', 'information', 'recitations', 'roots', 'surates', 'translations']
"""
return do({"action": "show", "query": query})
49 changes: 49 additions & 0 deletions src/alfanous/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
""" hint:

Use `alfanous.search` for searching in Quran verses and translations.
Use `alfanous.get_info` for getting meta info.
Use `alfanous.do` method for search, suggestion and get most useful info.
"""

# import Output object
from alfanous.outputs import Raw as _search_engine
# import default Paths
from alfanous.data import paths as PATHS

DEFAULTS, DOMAINS, HELPMESSAGES = _search_engine.DEFAULTS, _search_engine.DOMAINS, _search_engine.HELPMESSAGES
FLAGS = DEFAULTS["flags"].keys()

from alfanous.outputs import arabic_to_english_fields as _fields

FIELDS_ARABIC = _fields.keys()
FIELDS_ENGLISH = _fields.values()

_R = _search_engine()


# Pivot function for search, suggestion, show info
def do(flags):
return _R.do(flags)


def search(query, unit="aya", page=1, sortedby="relevance", fuzzy=False, view="normal", highlight="bold", flags={}):
all_flags = flags
all_flags.update({"action": "search",
"unit": unit,
"query": query,
"page": page,
"sortedby": sortedby,
"fuzzy": fuzzy,
"view": view,
"highlight": highlight
})
return do(all_flags)


def get_info(query="all"):
"""
Show useful meta info.

@param query: info to be retrieved, possible_values = ['chapters', 'defaults', 'domains', 'errors', 'arabic_to_english_fields', 'fields_reverse', 'flags', 'help_messages', 'hints', 'information', 'recitations', 'roots', 'surates', 'translations']
"""
return do({"action": "show", "query": query})
35 changes: 31 additions & 4 deletions src/alfanous/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
import json

from alfanous import paths
from alfanous.engines import QuranicSearchEngine
from alfanous.engines import TraductionSearchEngine, WordSearchEngine



def recitations(path=paths.RECITATIONS_LIST_FILE):
Expand Down Expand Up @@ -48,14 +47,42 @@ def information(path=paths.INFORMATION_FILE):


def QSE(path=paths.QSE_INDEX):
from alfanous.engines import QuranicSearchEngine
return QuranicSearchEngine(path)


def TSE(path=paths.TSE_INDEX):
from alfanous.engines import TraductionSearchEngine
return TraductionSearchEngine(path)


def WSE(path=paths.WSE_INDEX):
from alfanous.engines import WordSearchEngine
return WordSearchEngine(path)




try:
arabic_to_english_fields = json.load(open(paths.ARABIC_NAMES_FILE))
except:
arabic_to_english_fields = {}
try:
std2uth_words = json.load(open(paths.STANDARD_TO_UTHMANI_FILE))
except:
std2uth_words = {}
try:
vocalization_dict = json.load(open(paths.VOCALIZATIONS_FILE))
except:
vocalization_dict = {}
try:
syndict = json.load(open(paths.SYNONYMS_FILE))
except:
syndict = {}
try:
derivedict = json.load(open(paths.DERIVATIONS_FILE))
except:
derivedict = {"root": []}

try:
worddict = json.load(open(paths.WORD_PROPS_FILE))
except:
worddict = {}
7 changes: 3 additions & 4 deletions src/alfanous/misc.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@


FILTER_DOUBLES = filter_doubles = lambda lst:list( set( lst ) )
LOCATE = lambda source, dist, itm: dist[source.index( itm )] \
filter_doubles = lambda lst :list(set(lst))
locate = lambda source, dist, itm: dist[source.index(itm)] \
if itm in source else None
FIND = lambda source, dist, itm: [dist[i] for i in [i for i in range( len( source ) ) if source[i] == itm]]
find = lambda source, dist, itm: [dist[i] for i in [i for i in range(len(source)) if source[i] == itm]]
11 changes: 5 additions & 6 deletions src/alfanous/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@

from alfanous.text_processing import QArabicSymbolsFilter
from alfanous.data import *
from alfanous.resources import *

from alfanous.romanization import transliterate
from alfanous.misc import LOCATE, FIND, FILTER_DOUBLES
from alfanous.misc import locate, find, filter_doubles

STANDARD2UTHMANI = lambda x: std2uth_words.get(x) or x

Expand Down Expand Up @@ -529,16 +528,16 @@ def _search_aya(self, flags):
synonyms = syndict.get(term[1]) or []
derivations_extra = []
if word_derivations:
lemma = LOCATE(derivedict["word_"], derivedict["lemma"], term[1])
lemma = locate(derivedict["word_"], derivedict["lemma"], term[1])
if lemma: # if different of none
derivations = FILTER_DOUBLES(FIND(derivedict["lemma"], derivedict["word_"], lemma))
derivations = filter_doubles(find(derivedict["lemma"], derivedict["word_"], lemma))
else:
derivations = []
# go deeper with derivations
root = LOCATE(derivedict["word_"], derivedict["root"], term[1])
root = locate(derivedict["word_"], derivedict["root"], term[1])
if root: # if different of none
derivations_extra = list(
set(FILTER_DOUBLES(FIND(derivedict["root"], derivedict["word_"], lemma))) - set(
set(filter_doubles(find(derivedict["root"], derivedict["word_"], lemma))) - set(
derivations))

words_output["individual"][cpt] = {
Expand Down
12 changes: 6 additions & 6 deletions src/alfanous/query_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@
from whoosh.query import Term, MultiTerm
from whoosh.query import Wildcard as whoosh_Wildcard
from whoosh.query import Prefix as whoosh_Prefix
from whoosh.query import Or, NullQuery, Every, And
from whoosh.query import Or, NullQuery, Every

from alfanous.resources import syndict, derivedict, worddict, arabic_to_english_fields
from alfanous.data import syndict, derivedict, worddict, arabic_to_english_fields
from alfanous.text_processing import QArabicSymbolsFilter

from alfanous.misc import LOCATE, FIND, FILTER_DOUBLES
from alfanous.misc import locate, find, filter_doubles


def _make_arabic_parser():
Expand Down Expand Up @@ -590,9 +590,9 @@ def derivation(word, leveldist):

lst = []
if indexsrc: # if index source level is defined
itm = LOCATE(derivedict[indexsrc], derivedict[indexdist], word)
itm = locate(derivedict[indexsrc], derivedict[indexdist], word)
if itm: # if different of none
lst = FILTER_DOUBLES(FIND(derivedict[indexdist], derivedict["word_"], itm))
lst = filter_doubles(find(derivedict[indexdist], derivedict["word_"], itm))
else:
lst = [word]

Expand Down Expand Up @@ -623,7 +623,7 @@ def tuple(props):
wset = None
for propkey in props.keys():
if worddict.get(propkey):
partial_wset = set(FIND(worddict[propkey], worddict["word_"], props[propkey]))
partial_wset = set(find(worddict[propkey], worddict["word_"], props[propkey]))
if wset is None:
wset = partial_wset
else:
Expand Down
25 changes: 1 addition & 24 deletions src/alfanous/results_processing.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

from whoosh.scoring import BM25F
from whoosh.highlight import highlight, Fragment, \
HtmlFormatter, ContextFragmenter, BasicFragmentScorer, WholeFragmenter
Expand All @@ -25,25 +24,7 @@ def QSort(sortedby):
return sortedby



def QFilter(results, new_results):
""" Filter give results with new results"""
results.filter(new_results)
return results




def QPaginate(results, pagelen=10):
"""generator of pages"""
l = len(results)
minimal = lambda x, y: y if x > y else x
for i in range(0, l, 10):
yield i / pagelen, results[i:minimal(i + pagelen, l)]


def Qhighlight(text, terms, type="css", strip_vocalization=True):

def Qhighlight(text, terms, type="css", strip_vocalization=True):
if type == "bold":
formatter = QBoldFormatter()
else: # css
Expand All @@ -60,13 +41,9 @@ def Qhighlight(text, terms, type="css", strip_vocalization=True):
minscore=1
)


return highlighted or text





class QBoldFormatter(object):
""" add the style tags to the text """

Expand Down
10 changes: 3 additions & 7 deletions src/alfanous/romanization.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

# Buckwalter Romanization letters mapping
BUCKWALTER2UNICODE = {
u"'": u"\u0621", # hamza-on-the-line
Expand Down Expand Up @@ -71,9 +70,9 @@
ISO2UNICODE = {u"ˌ": u"\u0621", # hamza-on-the-line
# u"|": u"\u0622", # madda
u"ˈ": u"\u0623", # hamza-on-'alif
#u"ˈ": u"\u0624", # hamza-on-waaw
# u"ˈ": u"\u0624", # hamza-on-waaw
# u"<": u"\u0625", # hamza-under-'alif
#u"ˈ": u"\u0626", # hamza-on-yaa'
# u"ˈ": u"\u0626", # hamza-on-yaa'
u"ʾ": u"\u0627", # bare 'alif
u"b": u"\u0628", # baa'
u"ẗ": u"\u0629", # taa' marbuuTa
Expand Down Expand Up @@ -141,9 +140,6 @@
}





def transliterate(mode, string, ignore=u"", reverse=False):
""" encode & decode different romanization systems """

Expand All @@ -159,7 +155,7 @@ def transliterate(mode, string, ignore=u"", reverse=False):

result = ""
for char in string:
if char not in ignore and mapping.get(char) :
if char not in ignore and mapping.get(char):
result += mapping[char]
else:
result += char
Expand Down
2 changes: 0 additions & 2 deletions src/alfanous/searching.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import logging

from alfanous.results_processing import QSort, QScore


Expand Down
2 changes: 1 addition & 1 deletion src/alfanous/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ name = alfanous
description = Quranic search engine API
description-file = file: README.rst
platforms=ALL
license=AGPL
license=GPL
1 change: 0 additions & 1 deletion src/alfanous/suggestions.py

This file was deleted.

Loading