Gf2ExamplesApi/app.py

import pathlib
import os
import xml.etree.ElementTree as etree

def check_sentence_id(sentence_id):
    assert(sentence_id.startswith("GF"))
    assert(len(sentence_id.split(".")) == 3)

def get_file_contents(sentence_id):
    word_base_id = sentence_id.split(".")[0]
    gf_folder = os.environ["GF_FOLDER"]

    gf_folder = pathlib.Path(gf_folder)
    gf_inner_folder = gf_folder.joinpath(word_base_id[:4])
    gf_file = gf_inner_folder.joinpath(word_base_id + "-dedup.xml")

    assert(gf_file.exists())

    with gf_file.open('r') as fp:
        return fp.read()

def obtain_sentence(sentence_id, content):
    substring = '<s xml:id=\"' + sentence_id

    idx_start = content.index(substring)
    idx_end = content.index("</s>", idx_start) + 4

    sentence = content[idx_start:idx_end]
    sentence = sentence.replace(" xml:", " ")

    return sentence

def as_example(sentence_id, headword, sentence):
    root = etree.XML(sentence)
    result = {}
    current_text = ""

    for element in root:
        if element.text == headword:
            # first, lets pack left part
            result["left"] = current_text
            # then, lets pack the mid - headword part
            result["mid"] = element.text
            # and cleanup the current_text
            current_text = ""

        else:
            # else, lets just add
            current_text += element.text

    # lastly add the right part
    if "mid" not in result:
        return "headword not found"
    elif "left" not in result:
        result["left"] = ""

    result["right"] = current_text
    return result

def obtain_example_from_gf2(sentence_id, headword):
    try:
        check_sentence_id(sentence_id)
        contents = get_file_contents(sentence_id)
        sentence = obtain_sentence(sentence_id, contents)
        return as_example(sentence_id, headword, sentence)
    except ValueError:
        return "Could not find word id"
    except IndexError:
        return "Word id index after last dot should be an integer"
    except AssertionError:
        return "Bad word id"
    except KeyError:
        return "GF2 location not set on the server"


from flask import Response, Flask, request, jsonify
app = Flask(__name__)

@app.route('/')
def home():
    return "An api to get examples from gigafida, just use /get_example/$WORD_ID to get them"

@app.route('/get_example/<string:sentence_id>/<string:headword>')
def test(sentence_id, headword):
    result = obtain_example_from_gf2(sentence_id, headword)
    mimetype = "application/json" if type(result) is dict else "text/plain"
    return Response(result, mimetype=mimetype)

@app.route('/get_examples', methods=["POST"])
def get_examples():
    data = request.get_json()
    if data is None or type(data) is not dict:
        return "", 500

    bad_ones = {}
    good_ones = {}
    for sentence_id, headword in data.items():
        headword = headword.strip()
        example = obtain_example_from_gf2(sentence_id, headword)
        if type(example) is dict:
            good_ones[sentence_id] = example
        else:
            bad_ones[sentence_id] = example

    return jsonify({"good": good_ones, "bad": bad_ones})
First commit 2020-05-19 20:12:41 +00:00			`import pathlib`
			`import os`
			`import xml.etree.ElementTree as etree`

			`def check_sentence_id(sentence_id):`
			`assert(sentence_id.startswith("GF"))`
			`assert(len(sentence_id.split(".")) == 3)`

			`def get_file_contents(sentence_id):`
			`word_base_id = sentence_id.split(".")[0]`
			`gf_folder = os.environ["GF_FOLDER"]`

			`gf_folder = pathlib.Path(gf_folder)`
			`gf_inner_folder = gf_folder.joinpath(word_base_id[:4])`
			`gf_file = gf_inner_folder.joinpath(word_base_id + "-dedup.xml")`

			`assert(gf_file.exists())`

			`with gf_file.open('r') as fp:`
			`return fp.read()`

			`def obtain_sentence(sentence_id, content):`
			`substring = '<s xml:id=\"' + sentence_id`

			`idx_start = content.index(substring)`
			`idx_end = content.index("</s>", idx_start) + 4`

			`sentence = content[idx_start:idx_end]`
			`sentence = sentence.replace(" xml:", " ")`

			`return sentence`

			`def as_example(sentence_id, headword, sentence):`
			`root = etree.XML(sentence)`
			`result = {}`
			`current_text = ""`

			`for element in root:`
			`if element.text == headword:`
			`# first, lets pack left part`
			`result["left"] = current_text`
			`# then, lets pack the mid - headword part`
			`result["mid"] = element.text`
			`# and cleanup the current_text`
			`current_text = ""`

			`else:`
			`# else, lets just add`
			`current_text += element.text`

			`# lastly add the right part`
			`if "mid" not in result:`
			`return "headword not found"`
			`elif "left" not in result:`
			`result["left"] = ""`

			`result["right"] = current_text`
			`return result`

			`def obtain_example_from_gf2(sentence_id, headword):`
			`try:`
			`check_sentence_id(sentence_id)`
			`contents = get_file_contents(sentence_id)`
			`sentence = obtain_sentence(sentence_id, contents)`
			`return as_example(sentence_id, headword, sentence)`
			`except ValueError:`
			`return "Could not find word id"`
			`except IndexError:`
			`return "Word id index after last dot should be an integer"`
			`except AssertionError:`
			`return "Bad word id"`
			`except KeyError:`
			`return "GF2 location not set on the server"`


			`from flask import Response, Flask, request, jsonify`
			`app = Flask(__name__)`

			`@app.route('/')`
			`def home():`
			`return "An api to get examples from gigafida, just use /get_example/$WORD_ID to get them"`

			`@app.route('/get_example/<string:sentence_id>/<string:headword>')`
			`def test(sentence_id, headword):`
			`result = obtain_example_from_gf2(sentence_id, headword)`
			`mimetype = "application/json" if type(result) is dict else "text/plain"`
			`return Response(result, mimetype=mimetype)`

			`@app.route('/get_examples', methods=["POST"])`
			`def get_examples():`
			`data = request.get_json()`
			`if data is None or type(data) is not dict:`
			`return "", 500`

			`bad_ones = {}`
			`good_ones = {}`
			`for sentence_id, headword in data.items():`
			`headword = headword.strip()`
			`example = obtain_example_from_gf2(sentence_id, headword)`
			`if type(example) is dict:`
			`good_ones[sentence_id] = example`
			`else:`
			`bad_ones[sentence_id] = example`

			`return jsonify({"good": good_ones, "bad": bad_ones})`