import pathlib import os import xml.etree.ElementTree as etree def check_sentence_id(sentence_id): assert(sentence_id.startswith("GF")) assert(len(sentence_id.split(".")) == 3) def get_file_contents(sentence_id): word_base_id = sentence_id.split(".")[0] gf_folder = os.environ["GF_FOLDER"] gf_folder = pathlib.Path(gf_folder) gf_inner_folder = gf_folder.joinpath(word_base_id[:4]) gf_file = gf_inner_folder.joinpath(word_base_id + "-dedup.xml") assert(gf_file.exists()) with gf_file.open('r') as fp: return fp.read() def obtain_sentence(sentence_id, content): substring = '", idx_start) + 4 sentence = content[idx_start:idx_end] sentence = sentence.replace(" xml:", " ") return sentence def as_example(sentence_id, headword, sentence): root = etree.XML(sentence) result = {} current_text = "" for element in root: if element.text == headword: # first, lets pack left part result["left"] = current_text # then, lets pack the mid - headword part result["mid"] = element.text # and cleanup the current_text current_text = "" else: # else, lets just add current_text += element.text # lastly add the right part if "mid" not in result: return "headword not found" elif "left" not in result: result["left"] = "" result["right"] = current_text return result def obtain_example_from_gf2(sentence_id, headword): try: check_sentence_id(sentence_id) contents = get_file_contents(sentence_id) sentence = obtain_sentence(sentence_id, contents) return as_example(sentence_id, headword, sentence) except ValueError: return "Could not find word id" except IndexError: return "Word id index after last dot should be an integer" except AssertionError: return "Bad word id" except KeyError: return "GF2 location not set on the server" from flask import Response, Flask, request, jsonify app = Flask(__name__) @app.route('/') def home(): return "An api to get examples from gigafida, just use /get_example/$WORD_ID to get them" @app.route('/get_example//') def test(sentence_id, headword): result = obtain_example_from_gf2(sentence_id, headword) mimetype = "application/json" if type(result) is dict else "text/plain" return Response(result, mimetype=mimetype) @app.route('/get_examples', methods=["POST"]) def get_examples(): data = request.get_json() if data is None or type(data) is not dict: return "", 500 bad_ones = {} good_ones = {} for sentence_id, headword in data.items(): headword = headword.strip() example = obtain_example_from_gf2(sentence_id, headword) if type(example) is dict: good_ones[sentence_id] = example else: bad_ones[sentence_id] = example return jsonify({"good": good_ones, "bad": bad_ones})