Here’s a brief overview of the Python webservice’s code:
Main
def post_url():
url = URL(url_string)
saved_citations = []
# Get all citations on this page
# This can be optimized with a map function
citations = url.citations()
for n, citation in enumerate(citations):
print(n, ": saving citation.")
c = Citation(citation) # lookup citation
c.db_save() # save citation to database
# Save JSON Context to file
quote_json = {}
quote_json['citing_quote'] = escape_json(c.data['citing_quote'])
saved_citations.append(quote_json)
return jsonify(saved_citations)
Citation
citeit-webservice/blob/master/app/citation.py
class Citation:
""" Filter list of citation data
Save result locally in JSON format
Upload JSON to Remote Storage (S3)
Save Results to Database
"""
def db_save(self, debug=False):
# Save Quote data to database, using SQLAlchemy
pass
URL
citeit-webservice/app/lib/citeit_quote_context/url.py
class URL:
def citations(self):
result_list = []
for c in citations_list_dict:
result = load_quote_data(c)
if result:
result_list.append(result)
def load_quote_data(quote_keys):
""" lookup quote data, from keys """
print("Downloading citation for: " + quote_keys['citing_quote'])
print("Downloading citation url: " + quote_keys['cited_url'])
quote = Quote(
quote_keys['citing_quote'],
quote_keys['citing_url'],
quote_keys['cited_url'],
quote_keys['citing_text'], # optional: caching
quote_keys['citing_raw'] # optional: caching
)
return quote.data()
Quote
citeit-webservice/app/lib/citeit_quote_context/quote.py
class Quote
def data(self, text_output=True, all_fields=True):
# Find context of quote from within text
citing_context = QuoteContext(self.citing_quote(), self.citing_text())
cited_context = QuoteContext(self.citing_quote(), self.cited_text())
Quote_Context
citeit-webservice/app/lib/citeit_quote_context/quote_context.py
class QuoteContext:
""" Locates a quote from within a text, returns context
Calculates quote location using google_diff_match_patch (levenshtein)
Returns: dictionary of quote-context data()
"""
def quote_start_position(self):
""" Lookup quote starting position using
google diff_match_patch (Levenshtein) algorithm:
https://en.wikipedia.org/wiki/Levenshtein_distance
"""
link: google_diff_match_patch (GitHub)
Document
citeit-webservice/app/lib/citeit_quote_context/document.py
class Document
@lru_cache(maxsize=20)
def download(self, convert_to_unicode=False):
"""
Download the data and update tracking metrics
"""
# return utf-8 content
return self.download_resource()['text'] # default to blank string