From 47bb4ce0adf6712e54f3bf4b829279e5f9dce2e7 Mon Sep 17 00:00:00 2001 From: voje Date: Tue, 12 Feb 2019 08:24:46 +0100 Subject: [PATCH] java_python env dockerfile, progress on parser --- dockerfiles/all/Dockerfile | 12 +++++++ dockerfiles/all/README.md | 12 +++++++ tools/parser/README.md | 3 ++ tools/parser/__init__.pyc | Bin 147 -> 0 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 129 -> 0 bytes .../parser/__pycache__/parser.cpython-37.pyc | Bin 2289 -> 0 bytes tools/parser/msdmap.py | 6 +++- tools/parser/parser.py | 30 ++++++++++++++---- tools/parser/parser.pyc | Bin 325 -> 0 bytes 9 files changed, 56 insertions(+), 7 deletions(-) create mode 100644 dockerfiles/all/Dockerfile create mode 100644 dockerfiles/all/README.md delete mode 100644 tools/parser/__init__.pyc delete mode 100644 tools/parser/__pycache__/__init__.cpython-37.pyc delete mode 100644 tools/parser/__pycache__/parser.cpython-37.pyc delete mode 100644 tools/parser/parser.pyc diff --git a/dockerfiles/all/Dockerfile b/dockerfiles/all/Dockerfile new file mode 100644 index 0000000..e6159e9 --- /dev/null +++ b/dockerfiles/all/Dockerfile @@ -0,0 +1,12 @@ +FROM ubuntu:16.04 + +RUN apt-get update +RUN apt-get install -y \ +vim \ +default-jdk \ +python3 \ +python3-pip + +RUN pip3 install lxml + +ENV PYTHONIOENCODING UTF-8 diff --git a/dockerfiles/all/README.md b/dockerfiles/all/README.md new file mode 100644 index 0000000..166f5b1 --- /dev/null +++ b/dockerfiles/all/README.md @@ -0,0 +1,12 @@ +You might want to mount this whole repo into the docker container. +Also mount data locations. + +Example container: +```bash +$ docker build . -t my_python +$ docker run \ + -it \ + -v $(echo $(cd ../..; pwd)):/cjvt-srl-tagging \ + python_java \ + /bin/bash +``` diff --git a/tools/parser/README.md b/tools/parser/README.md index ef6ae42..a52097a 100644 --- a/tools/parser/README.md +++ b/tools/parser/README.md @@ -1,3 +1,6 @@ ## msdmap.py Help conversion between english and slovenian MSD. Hardcoded values from online documentation (html tables). + +## Sources +[1] (conll09 data format) https://nlpado.de/~sebastian/pub/papers/conll09_hajic.pdf \ No newline at end of file diff --git a/tools/parser/__init__.pyc b/tools/parser/__init__.pyc deleted file mode 100644 index d333b89c362415aca1b483e4a42d569584230a4a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 147 zcmZSn%**xXKv+yN0~9a+XNg?O} diff --git a/tools/parser/__pycache__/__init__.cpython-37.pyc b/tools/parser/__pycache__/__init__.cpython-37.pyc deleted file mode 100644 index ecc16a37ac1eb85d907302918a0abcc82dd970ce..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 129 zcmZ?b<>g`kf_s+nF(CReh=2h`Aj1KOi&=m~3PUi1CZpd) zqg#@go}QVPu3wU$pHr+~kXTflTBIKzpP83g5+AQuP12o|P0xCd2Qz0mnk01q2TcYiHc9~={v+;OK zy7fqiz!B()147j8%WBPFBgoS`2X+BI}FWP@&(+DSaKXbrT}X!#1P5Tu|n zDVQP}R_wBC?8}D>M>}IWAq7`#!x6AWa{((p=TGSH@LQh1`eyI!3-| zOlUd#MN!q&>FhWV%?TAV^P~u%Cn5JmYhUkDQPZ_M^yY6S6`s{$9-J znT@(?Y{Clpv7Wi(ua<6!_B;`@n+zIHV#EdcjA7rFZs6bMGo)zfrZ}W#bZeJdNx910 zglZ~+@-IJ>^H`a)_XFRKY9q2mZ)z)pzuosh_IwbDJOnX9i=_eF& zoM+#5CXcH*UE8H8LpC1$hHIwV&y($6*2wmCw2#*a)+O(dcgg!i+uS>bY?Gsz3;E7g zlIAa*IRC+ki;tf$WY4CZP04OHOx4I;!2GG3bYbKURqy1phQ%V}XWu=u8ak`1uaCT? z)Q5jvf;bCaG5d77n>(&k7%-_S(A0dm9(2WwSTDv_Li|>eh3}?Cd=yG@CvO1Z|?SRo_46p;-P=$isrGjk|Bt3x67K4Su zowrd8eBQ#vO5rFAhyXy8FrXMET$v}kwBV%_(5Lyn1|aO9M$bX%aFosyYtupr&W(H| zcNtyj!;(?^bSHg8E!a$2D8)5PDKo(saXdA?&F9&&8wdqnWZ@BH`NvmmNIVF!1#(#~l>VY|oCUt-!-EJZhRMaf5 zDWI!QxGL(FpBu_rKfo1X~5Q0*%CBYCL8zLWNVN1il zWdVRT%hv`@K5e(JhCBgClu#xQLn4n^a-Ss&me_(R<6G(Wd3=Ay{3{gM^BSWwT6Ho+2oij)sCrBM5gS!V% zF+`0YSZ_vZPENj#LP26tacYqUP!R(fQEUYi)6dAyP1VmX$}BF)O3c$w&n(eT&MGU> zEiTH@ElEsI&&*5LFUil(Db|M=1f}!}Doc2P+QF8@L!>}1F9x}hfzeL`#0dhiscb0+ S&`6uy{FKt1R6DSRAUgo?&`8Gs