Gf2ExamplesApi/app.py

import pathlib
import os
import xml.etree.ElementTree as etree

def check_sentence_id(sentence_id):
    assert(sentence_id.startswith("GF"))
    assert(len(sentence_id.split(".")) == 3)

def get_file_contents(sentence_id):
    word_base_id = sentence_id.split(".")[0]
    gf_folder = os.environ["GF_FOLDER"]

    gf_folder = pathlib.Path(gf_folder)
    gf_inner_folder = gf_folder.joinpath(word_base_id[:4])
    gf_file = gf_inner_folder.joinpath(word_base_id + "-dedup.xml")

    assert(gf_file.exists())

    with gf_file.open('r') as fp:
        return fp.read()

def obtain_sentence(sentence_id, content):
    substring = '<s xml:id=\"' + sentence_id

    idx_start = content.index(substring)
    idx_end = content.index("</s>", idx_start) + 4

    sentence = content[idx_start:idx_end]
    sentence = sentence.replace(" xml:", " ")

    return sentence

def as_example(sentence_id, headword, sentence):
    root = etree.XML(sentence)
    result = {}
    current_text = ""

    for element in root:
        if element.text == headword:
            # first, lets pack left part
            result["left"] = current_text
            # then, lets pack the mid - headword part
            result["mid"] = element.text
            # and cleanup the current_text
            current_text = ""

        else:
            # else, lets just add
            current_text += element.text

    # lastly add the right part
    if "mid" not in result:
        return "headword not found"
    elif "left" not in result:
        result["left"] = ""

    result["right"] = current_text
    return result

def obtain_example_from_gf2(sentence_id, headword):
    try:
        check_sentence_id(sentence_id)
        contents = get_file_contents(sentence_id)
        sentence = obtain_sentence(sentence_id, contents)
        return as_example(sentence_id, headword, sentence)
    except ValueError:
        return "Could not find word id"
    except IndexError:
        return "Word id index after last dot should be an integer"
    except AssertionError:
        return "Bad word id"
    except KeyError:
        return "GF2 location not set on the server"


from flask import Response, Flask, request, jsonify
app = Flask(__name__)

@app.route('/')
def home():
    return "An api to get examples from gigafida, just use /get_example/$WORD_ID to get them"

@app.route('/get_example/<string:sentence_id>/<string:headword>')
def test(sentence_id, headword):
    result = obtain_example_from_gf2(sentence_id, headword)
    mimetype = "application/json" if type(result) is dict else "text/plain"
    return Response(result, mimetype=mimetype)

@app.route('/get_examples', methods=["POST"])
def get_examples():
    data = request.get_json()
    if data is None or type(data) is not dict:
        return "", 500

    bad_ones = {}
    good_ones = {}
    for sentence_id, headword in data.items():
        headword = headword.strip()
        example = obtain_example_from_gf2(sentence_id, headword)
        if type(example) is dict:
            good_ones[sentence_id] = example
        else:
            bad_ones[sentence_id] = example

    return jsonify({"good": good_ones, "bad": bad_ones})