diff --git a/Dockerfile b/Dockerfile index 39f1d6f..e9d6ce4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,16 +1,18 @@ FROM python:3.9 +COPY entrypoint.sh /usr/src/portal-webapp/ COPY app.py /usr/src/portal-webapp/ COPY config.ini /usr/src/portal-webapp/ COPY templates /usr/src/portal-webapp/templates COPY static /usr/src/portal-webapp/static COPY contract/ /usr/src/portal-webapp/contract COPY portal/ /usr/src/portal-webapp/portal +COPY migrations// /usr/src/portal-webapp/migrations WORKDIR /usr/src/portal-webapp RUN apt-get update && apt-get -y install wkhtmltopdf && \ rm -rf /var/lib/apt/lists/* -RUN pip3 install --no-cache-dir pdfkit flask flask-dropzone gunicorn pdfkit +RUN pip3 install --no-cache-dir pdfkit flask flask-dropzone flask-log-request-id Flask-SQLAlchemy alembic flask-migrate Flask-script psycopg2 gunicorn pdfkit -CMD ["gunicorn", "--bind", "0.0.0.0:80", "-w", "1", "--access-logfile", "-", "app:app"] +ENTRYPOINT ["./entrypoint.sh"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..e6f65c8 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +## A web application for contribution of textual data files diff --git a/app.py b/app.py index 1dd90a8..efc2ac9 100644 --- a/app.py +++ b/app.py @@ -1,15 +1,25 @@ +import logging import os import configparser +import re from pathlib import Path from flask import Flask, render_template, request from flask_dropzone import Dropzone +from flask_migrate import Migrate, MigrateCommand +from flask_script import Manager +from portal.model import db import portal.base -# TODO: Put all the stuff in base.py into a class, so it can have a state of it's own, to avoid passing a bunch of arguments at each function call. +ENABLED_FILETYPES = ['txt', 'csv', 'pdf', 'doc', 'docx', 'xls', 'xlsx', 'ppt', 'pptx', 'xml', 'mxliff', 'tmx'] +REGEX_EMAIL = re.compile('^[a-z0-9]+[\._]?[a-z0-9]+[@]\w+[.]\w{2,3}$') + +# TODO: make logging level configurable +logging.basicConfig(level=logging.DEBUG, format='[APP LOGGER] %(asctime)s %(levelname)s: %(message)s') + ###################### # Load configuration # ###################### @@ -23,9 +33,13 @@ MAIL_PASS = config['MAIL_PASS'] SMTP_PORT = int(config['SMTP_PORT']) IMAP_PORT = int(config['IMAP_PORT']) MAX_UPLOAD_SIZE = int(config['MAX_UPLOAD_SIZE']) # Bytes +MAX_FILES_PER_UPLOAD = int(config['MAX_FILES_PER_UPLOAD']) CONTRACT_CLIENT_CONTACT = config['CONTRACT_CLIENT_CONTACT'] MAIL_SUBJECT = config['MAIL_SUBJECT'] MAIL_BODY = config['MAIL_BODY'] +SQL_CONN_STR = config['SQL_CONN_STR'] +DESC_PREVODI = config['DESC_PREVODI'] +DESC_GIGAFIDA = config['DESC_GIGAFIDA'] if 'UPLOADS_DIR' in config: UPLOADS_DIR = Path(config['UPLOADS_DIR']) @@ -34,13 +48,6 @@ else: if not UPLOADS_DIR.exists: UPLOADS_DIR.mkdir(parents=True) -if 'DATA_DIR' in config: - DATA_DIR = Path(config['DATA_DIR']) -else: - DATA_DIR = Path(__file__).resolve().parent / 'data' -if not DATA_DIR.exists: - DATA_DIR.mkdir(parents=True) - # Override configs with environment variables, if set if 'PORTALDS4DS1_MAIL_HOST' in os.environ: MAIL_HOST = os.environ['PORTALDS4DS1_MAIL_HOST'] @@ -54,16 +61,22 @@ if 'PORTALDS4DS1_IMAP_PORT' in os.environ: IMAP_PORT = int(os.environ['PORTALDS4DS1_IMAP_PORT']) if 'PORTALDS4DS1_MAX_UPLOAD_SIZE' in os.environ: MAX_UPLOAD_SIZE = int(os.environ['PORTALDS4DS1_MAX_UPLOAD_SIZE']) +if 'PORTALDS4DS1_MAX_FILES_PER_UPLOAD' in os.environ: + MAX_FILES_PER_UPLOAD = int(os.environ['PORTALDS4DS1_MAX_FILES_PER_UPLOAD']) if 'PORTALDS4DS1_CONTRACT_CLIENT_CONTACT' in os.environ: CONTRACT_CLIENT_CONTACT = os.environ['PORTALDS4DS1_CONTRACT_CLIENT_CONTACT'] if 'PORTALDS4DS1_UPLOADS_DIR' in os.environ: UPLOADS_DIR = os.environ['PORTALDS4DS1_UPLOADS_DIR'] -if 'PORTALDS4DS1_DATA_DIR' in os.environ: - DATA_DIR = os.environ['PORTALDS4DS1_DATA_DIR'] if 'PORTALDS4DS1_MAIL_SUBJECT' in os.environ: MAIL_SUBJECT = os.environ['PORTALDS4DS1_MAIL_SUBJECT'] if 'PORTALDS4DS1_MAIL_BODY' in os.environ: MAIL_BODY = os.environ['PORTALDS4DS1_MAIL_BODY'] +if 'PORTALDS4DS1_SQL_CONN_STR' in os.environ: + SQL_CONN_STR = os.environ['PORTALDS4DS1_SQL_CONN_STR'] +if 'PORTALDS4DS1_DESC_PREVODI' in os.environ: + DESC_PREVODI = os.environ['PORTALDS4DS1_DESC_PREVODI'] +if 'PORTALDS4DS1_DESC_GIGAFIDA' in os.environ: + DESC_GIGAFIDA = os.environ['PORTALDS4DS1_DESC_GIGAFIDA'] VALID_CORPUS_NAMES = ['prevodi', 'gigafida', 'solar'] @@ -75,11 +88,32 @@ app = Flask(__name__) app.config.update( UPLOADED_PATH = UPLOADS_DIR, MAX_CONTENT_LENGTH = MAX_UPLOAD_SIZE, - TEMPLATES_AUTO_RELOAD = True + TEMPLATES_AUTO_RELOAD = True, + SQLALCHEMY_DATABASE_URI = SQL_CONN_STR, + SQLALCHEMY_ECHO = True ) +# Run "python app.py db -?" to see more info about DB migrations. +manager = Manager(app) +db.init_app(app) +migrate = Migrate(app, db) +manager.add_command('db', MigrateCommand) + +# Set up dropzone.js to serve all the stuff for "file dropping" on the web interface. dropzone = Dropzone(app) +upload_handler = portal.base.UploadHandler( + UPLOADS_DIR=UPLOADS_DIR, + MAIL_HOST=MAIL_HOST, + MAIL_LOGIN=MAIL_LOGIN, + MAIL_PASS=MAIL_PASS, + SMTP_PORT=SMTP_PORT, + IMAP_PORT=IMAP_PORT, + MAIL_SUBJECT=MAIL_SUBJECT, + MAIL_BODY=MAIL_BODY, + CONTRACT_CLIENT_CONTACT=CONTRACT_CLIENT_CONTACT + ) + @app.route('/') def index(): @@ -90,52 +124,111 @@ def index(): def index_corpus(corpus_name): if corpus_name not in VALID_CORPUS_NAMES: return 'Korpus "{}" ne obstaja.'.format(corpus_name), 404 + if corpus_name == 'prevodi': - subtitle = 'KORPUS PARALELNIH BESEDIL ANG-SLO' + description = DESC_PREVODI elif corpus_name == 'gigafida': - subtitle = 'KORPUS GIGAFIDA' - return render_template('basic.html', subtitle=subtitle, corpus_name=corpus_name) + description = DESC_GIGAFIDA + elif corpus_name == 'solar': + return handle_solar(request) + + return render_template('basic.html', + corpus_name=corpus_name, description=description, max_files=MAX_FILES_PER_UPLOAD) + + +def handle_solar(request): + return 404 @app.route('//upload', methods=['POST']) def handle_upload(corpus_name): if corpus_name not in VALID_CORPUS_NAMES: return 404 + + if corpus_name == 'solar': + return handle_upload_solar(request) + else: + return handle_upload_unauthenticated(request, corpus_name) + + +def handle_upload_solar(request): + return 404 + +def handle_upload_unauthenticated(request, corpus_name): files = request.files - if len(files) > 20: - return 'Naložite lahko do 20 datotek hkrati.', 400 + if len(files) > MAX_FILES_PER_UPLOAD: + return 'Naložite lahko do {} datotek hkrati.'.format(MAX_FILES_PER_UPLOAD), 400 elif len(files) < 1: return 'Priložena ni bila nobena datoteka.', 400 - print('one') - err = portal.base.check_suffixes(files) + err = check_suffixes(files) if err: return err, 400 - print('two') - err = portal.base.check_form(request.form) + err = check_form(request.form) if err: return err, 400 - print('three') - upload_metadata = portal.base.get_upload_metadata(corpus_name, request) - contract_file_name = portal.base.generate_contract_pdf(UPLOADS_DIR, upload_metadata, CONTRACT_CLIENT_CONTACT) - - # Add contract_file_name to metadata TODO: move somewhere else - upload_metadata['contract'] = contract_file_name - portal.base.store_datafiles(UPLOADS_DIR, files, upload_metadata) - portal.base.store_metadata(UPLOADS_DIR, upload_metadata) - portal.base.send_confirm_mail( - subject=MAIL_SUBJECT, - body=MAIL_BODY, - uploads_path=UPLOADS_DIR, - upload_metadata=upload_metadata, - mail_host=MAIL_HOST, mail_login=MAIL_LOGIN, mail_pass=MAIL_PASS, - imap_port=IMAP_PORT, smtp_port=SMTP_PORT) + # Parse request. + upload_metadata = upload_handler.extract_upload_metadata(corpus_name, request) + + logging.info('Upload with id "{}" supplied form data: {}'.format(upload_metadata['upload_id'], + str(upload_metadata['form_data']))) + + # Generate contract PDF file based on the uploads metadata. + upload_handler.generate_upload_contract_pdf(upload_metadata) + + # Store uploaded files to disk. + upload_handler.store_datafiles(files, upload_metadata) + + # Store metadata to database. + upload_handler.store_metadata_unauthenticated(upload_metadata) + + # Send confirmation mail along with the contract to the submitted email address. + upload_handler.send_confirm_mail(upload_metadata) return 'Uspešno ste oddali datotek(e). Št. datotek: {}'.format(len(files)) +def check_suffixes(files): + for key, f in files.items(): + if key.startswith('file'): + suffix = f.filename.split('.')[-1] + if suffix not in ENABLED_FILETYPES: + return 'Datoteka "{}" ni pravilnega formata.'.format(f.filename) + return None + + +def check_form(form): + ime = form.get('ime') + podjetje = form.get('podjetje') + naslov = form.get('naslov') + posta = form.get('posta') + email = form.get('email') + telefon = form.get('telefon') + + if len(ime) > 100: + return 'Predolgo ime.' + + if len(podjetje) > 100: + return 'Predolgo ime institucije.' + + if len(email) > 100: + return 'Predolgi email naslov' + elif not re.search(REGEX_EMAIL, email): + return 'Email napačnega formata.' + + if len(telefon) > 100: + return 'Predolga telefonska št.' + + if len(naslov) > 100: + return 'Predolg naslov.' + + if len(posta) > 100: + return 'Predolga pošta' + + return None + if __name__ == '__main__': app.run(debug=True) diff --git a/config.ini b/config.ini index a8ef2e4..915d8c9 100644 --- a/config.ini +++ b/config.ini @@ -1,13 +1,16 @@ [DEFAULT] +SQL_CONN_STR=postgresql://portal:randompass123@localhost/portal MAIL_HOST=posta.cjvt.si MAIL_LOGIN=oddaja-besedil@cjvt.si MAIL_PASS=secretmailpass123 SMTP_PORT=465 IMAP_PORT=993 MAX_UPLOAD_SIZE=1000000000 +MAX_FILES_PER_UPLOAD=30 UPLOADS_DIR=./uploads -DATA_DIR=./data CONTRACT_CLIENT_CONTACT=Testko Tester +DESC_PREVODI=

Prevodi

Strojno prevajanje je ena od uporabnih jezikovnih tehnologij, saj omogoča hitro sporazumevanje med ljudmi iz različnih kultur in jezikovnih okolij. Več o razvoju slovenskega strojnega prevajalnika lahko preberete na tej povezavi. Za kakovosten strojni prevajalnik so ključnega pomena prevodi, iz kateri se algoritmi umetne inteligence naučijo prevajati. S prispevanjem besedil v korpus prevodov boste pomembno prispevali k razvoju slovenskega strojnega prevajalnika med angleščino in slovenščino. Več informacij o prispevanju besedil najdete tukaj.

+DESC_GIGAFIDA=

Gigafida

Gigafida je referenčni korpus pisne slovenščine. Besedila so izbrana in strojno obdelana z namenom, da bi korpus kot vzorec sodobne standardne slovenščine lahko služil za jezikoslovne in druge humanistične raziskave, izdelavo sodobnih slovarjev, slovnic, učnih gradiv in razvoj jezikovnih tehnologij za slovenščino. S prispevanjem besedil v korpus Gigafida pomembno prispevate k razvoju sodobnih jezikovnih tehnologij za slovenski jezik.

MAIL_SUBJECT=RSDO: pogodba za oddana besedila ({upload_id}) MAIL_BODY=Hvala, ker ste prispevali besedila in na ta način pomagali pri razvoju slovenskega jezika v digitalnem okolju. V prilogi vam pošiljamo pogodbo s seznamom naloženih datotek. diff --git a/docker-compose.yml b/docker-compose.yml index 5a5f4bb..7061141 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,15 +4,26 @@ services: build: . restart: always environment: - - PORTALDS4DS1_MAIL_HOST=posta.cjvt.si - - PORTALDS4DS1_MAIL_LOGIN=oddaja-besedil@cjvt.si - - PORTALDS4DS1_MAIL_PASS=randompass123 - - PORTALDS4DS1_SMTP_PORT=465 - - PORTALDS4DS1_IMAP_PORT=993 - - PORTALDS4DS1_MAX_UPLOAD_SIZE=1000000000 - - PORTALDS4DS1_BASE_DIR=./ - - PORTALDS4DS1_CONTRACT_CLIENT_CONTACT=Testko Tester + - PORTALDS4DS1_SQL_CONN_STR=postgresql://portal:randompass123@db/portal + - PORTALDS4DS1_MAIL_HOST=posta.cjvt.si + - PORTALDS4DS1_MAIL_LOGIN=oddaja-besedil@cjvt.si + - PORTALDS4DS1_MAIL_PASS=randompass123 + - PORTALDS4DS1_SMTP_PORT=465 + - PORTALDS4DS1_IMAP_PORT=993 + - PORTALDS4DS1_MAX_UPLOAD_SIZE=1000000000 + - PORTALDS4DS1_BASE_DIR=./ + - PORTALDS4DS1_CONTRACT_CLIENT_CONTACT=Testko Tester ports: - 127.0.0.1:5000:80 volumes: - - /tmp/uploads/:/usr/src/portal-webapp/uploads + - /tmp/portal-ds4-ds1/uploads:/usr/src/portal-webapp/uploads + command: gunicorn --bind 0.0.0.0:80 -w 1 --access-logfile - app:app + db: + image: postgres:9.6.21-alpine + restart: always + environment: + - POSTGRES_PASSWORD=randompass123 + - POSTGRES_USER=portal + - POSTGRES_DB=portal + volumes: + - /tmp/portal-ds4-ds1/db:/var/lib/postgresql/data diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100755 index 0000000..e8952ea --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +# Upgrade DB schema to version used by application. This also initializes table, if they aren't already created. +flask db upgrade + + +exec "$@" diff --git a/migrations/README b/migrations/README new file mode 100644 index 0000000..98e4f9c --- /dev/null +++ b/migrations/README @@ -0,0 +1 @@ +Generic single-database configuration. \ No newline at end of file diff --git a/migrations/alembic.ini b/migrations/alembic.ini new file mode 100644 index 0000000..ec9d45c --- /dev/null +++ b/migrations/alembic.ini @@ -0,0 +1,50 @@ +# A generic, single database configuration. + +[alembic] +# template used to generate migration files +# file_template = %%(rev)s_%%(slug)s + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic,flask_migrate + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[logger_flask_migrate] +level = INFO +handlers = +qualname = flask_migrate + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/migrations/env.py b/migrations/env.py new file mode 100644 index 0000000..42438a5 --- /dev/null +++ b/migrations/env.py @@ -0,0 +1,90 @@ +from __future__ import with_statement + +import logging +from logging.config import fileConfig + +from flask import current_app + +from alembic import context + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +fileConfig(config.config_file_name) +logger = logging.getLogger('alembic.env') + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +config.set_main_option( + 'sqlalchemy.url', + str(current_app.extensions['migrate'].db.engine.url).replace('%', '%%')) +target_metadata = current_app.extensions['migrate'].db.metadata + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline(): + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, target_metadata=target_metadata, literal_binds=True + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online(): + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + + # this callback is used to prevent an auto-migration from being generated + # when there are no changes to the schema + # reference: http://alembic.zzzcomputing.com/en/latest/cookbook.html + def process_revision_directives(context, revision, directives): + if getattr(config.cmd_opts, 'autogenerate', False): + script = directives[0] + if script.upgrade_ops.is_empty(): + directives[:] = [] + logger.info('No changes in schema detected.') + + connectable = current_app.extensions['migrate'].db.engine + + with connectable.connect() as connection: + context.configure( + connection=connection, + target_metadata=target_metadata, + process_revision_directives=process_revision_directives, + **current_app.extensions['migrate'].configure_args + ) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/migrations/script.py.mako b/migrations/script.py.mako new file mode 100644 index 0000000..2c01563 --- /dev/null +++ b/migrations/script.py.mako @@ -0,0 +1,24 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + + +def upgrade(): + ${upgrades if upgrades else "pass"} + + +def downgrade(): + ${downgrades if downgrades else "pass"} diff --git a/migrations/versions/a846faa2b908_initial_migration.py b/migrations/versions/a846faa2b908_initial_migration.py new file mode 100644 index 0000000..bfedc10 --- /dev/null +++ b/migrations/versions/a846faa2b908_initial_migration.py @@ -0,0 +1,40 @@ +"""Initial migration. + +Revision ID: a846faa2b908 +Revises: +Create Date: 2021-03-24 08:53:24.792682 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'a846faa2b908' +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('upload_unauthenticated', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('upload_hash', sa.String(), nullable=True), + sa.Column('timestamp', sa.DateTime(), nullable=True), + sa.Column('form_name', sa.String(), nullable=True), + sa.Column('form_org', sa.String(), nullable=True), + sa.Column('form_address', sa.String(), nullable=True), + sa.Column('form_zipcode', sa.String(), nullable=True), + sa.Column('form_email', sa.String(), nullable=True), + sa.Column('file_contract', sa.String(), nullable=True), + sa.Column('upload_file_hashes', sa.ARRAY(sa.String()), nullable=True), + sa.PrimaryKeyConstraint('id') + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('upload_unauthenticated') + # ### end Alembic commands ### diff --git a/portal/base.py b/portal/base.py index 3af7875..96c9527 100644 --- a/portal/base.py +++ b/portal/base.py @@ -1,8 +1,9 @@ -import re import hashlib import time import ssl +import traceback from pathlib import Path +from datetime import datetime import imaplib from smtplib import SMTP_SSL @@ -17,9 +18,7 @@ from email.mime.application import MIMEApplication import pdfkit from jinja2 import Environment, FileSystemLoader - -ENABLED_FILETYPES = ['txt', 'csv', 'pdf', 'doc', 'docx', 'xls', 'xlsx', 'ppt', 'pptx', 'xml', 'mxliff', 'tmx'] -REGEX_EMAIL = re.compile('^[a-z0-9]+[\._]?[a-z0-9]+[@]\w+[.]\w{2,3}$') +from . model import db, UploadUnauthenticated class ContractCreator: @@ -49,202 +48,161 @@ class ContractCreator: pdfkit.from_string(html_str, out_f, options=self.pdfkit_options) -contract_creator = ContractCreator() - - -def get_upload_metadata(corpus_name, request): - upload_metadata = dict() - - file_hashes = create_file_hashes(request.files) - file_names = file_hashes.keys() - form_data = request.form.copy() - upload_timestamp = int(time.time()) - upload_id = create_upload_id(corpus_name, form_data, upload_timestamp, file_hashes) - - upload_metadata['corpus_name'] = corpus_name - upload_metadata['form_data'] = form_data - upload_metadata['upload_id'] = upload_id - upload_metadata['timestamp'] = upload_timestamp - upload_metadata['file_hashes'] = file_hashes - upload_metadata['file_names'] = file_names - - return upload_metadata - - -def check_suffixes(files): - for key, f in files.items(): - if key.startswith('file'): - suffix = f.filename.split('.')[-1] - if suffix not in ENABLED_FILETYPES: - return 'Datoteka "{}" ni pravilnega formata.'.format(f.filename) - return None - - -def get_subdir(uploads_path, dir_name): - subdir = uploads_path / dir_name - if not subdir.exists(): - subdir.mkdir(parents=True) - return subdir - - -def create_upload_id(corpus_name, form_data, upload_timestamp, file_hashes): - ime = form_data.get('ime') - podjetje = form_data.get('podjetje') - naslov = form_data.get('naslov') - posta = form_data.get('posta') - email = form_data.get('email') - telefon = form_data.get('telefon') - - # This hash serves as an unique identifier for the whole upload. - metahash = hashlib.md5((corpus_name+ime+podjetje+naslov+posta+email+telefon).encode()) - # Include file hashes to avoid metafile name collisions if they have the same form values, - # but different data files. Sort hashes first so upload order doesn't matter. - sorted_f_hashes = list(file_hashes.values()) - sorted_f_hashes.sort() - metahash.update(''.join(sorted_f_hashes).encode()) - metahash = metahash.hexdigest() - - return metahash - - -def check_form(form): - ime = form.get('ime') - podjetje = form.get('podjetje') - naslov = form.get('naslov') - posta = form.get('posta') - email = form.get('email') - telefon = form.get('telefon') - - if len(ime) > 100: - return 'Predolgo ime.' - - if len(podjetje) > 100: - return 'Predolgo ime institucije.' - - if len(email) > 100: - return 'Predolgi email naslov' - elif not re.search(REGEX_EMAIL, email): - return 'Email napačnega formata.' - - if len(telefon) > 100: - return 'Predolga telefonska št.' - - if len(naslov) > 100: - return 'Predolg naslov.' - - if len(posta) > 100: - return 'Predolga pošta' - - return None - - -def create_file_hashes(files): - res = dict() - for key, f in files.items(): - if key.startswith('file'): - h = hashlib.md5(f.filename.encode()) - h.update(f.stream.read()) - res[f.filename] = h.hexdigest() - f.seek(0) - return res - - -def store_metadata(uploads_path, upload_metadata): - base = get_subdir(uploads_path, 'meta') - - timestamp = upload_metadata['timestamp'] - upload_id = upload_metadata['upload_id'] - form_data = upload_metadata['form_data'] - email = form_data['email'] - file_hashes = upload_metadata['file_hashes'] - contract = upload_metadata['contract'] - filename = str(timestamp) + '-' + email + '-' + upload_id + '.meta' - - sorted_f_hashes = list(file_hashes.values()) - sorted_f_hashes.sort() - - path = base / filename - with path.open('w') as f: - f.write('korpus=' + upload_metadata['corpus_name']) - f.write('\nime=' + form_data['ime']) - f.write('\npodjetje=' + form_data['podjetje']) - f.write('\nnaslov=' + form_data['naslov']) - f.write('\nposta=' + form_data['posta']) - f.write('\nemail=' + form_data['email']) - f.write('\ndatoteke=' + str(sorted_f_hashes)) - f.write('\npogodba=' + contract) - - -def store_datafiles(uploads_path, files, upload_metadata): - base = get_subdir(uploads_path, 'files') - file_hashes = upload_metadata['file_hashes'] - - for key, f in files.items(): - if key.startswith('file'): - path = base / file_hashes[f.filename] - if not path.exists(): - path.mkdir() - f.save(path / f.filename) - - -def generate_contract_pdf(uploads_path, upload_metadata, contract_client_contact): - base = get_subdir(uploads_path, 'contracts') - contract_file_name = upload_metadata['upload_id'] + '.pdf' - form_data = upload_metadata['form_data'] - - files_table_str = [] - for file_name in upload_metadata['file_names']: - files_table_str.append('') - files_table_str.append(file_name) - files_table_str.append('') - files_table_str = ''.join(files_table_str) - - data = { - 'ime_priimek': form_data['ime'], - 'naslov': form_data['naslov'], - 'posta': form_data['posta'], - 'kontakt_narocnik': contract_client_contact, - 'kontakt_imetnikpravic': form_data['ime'], - 'files_table_str': files_table_str - } - - contract_creator.create_pdf(base / contract_file_name, data) - return contract_file_name - - -def send_confirm_mail(subject, body, uploads_path, upload_metadata, mail_host, mail_login, mail_pass, imap_port=993, smtp_port=465): - upload_id = upload_metadata['upload_id'] - - message = MIMEMultipart() - message['From'] = mail_login - message['To'] = upload_metadata['form_data']['email'] - message['Subject'] = subject.format(upload_id=upload_id) - body = body.format(upload_id=upload_id) - message.attach(MIMEText(body, "plain")) - - contracts_dir = get_subdir(uploads_path, 'contracts') - base_name = upload_metadata['contract'] - contract_file = contracts_dir / base_name - with open(contract_file, "rb") as f: - part = MIMEApplication( - f.read(), - Name = base_name - ) - part['Content-Disposition'] = 'attachment; filename="%s"' % base_name - message.attach(part) - - text = message.as_string() - - # Create a secure SSL context - context = ssl.create_default_context() - - with SMTP_SSL(mail_host, smtp_port, context=context) as server: - server.login(mail_login, mail_pass) - server.sendmail(message['From'], message['To'], text) - - # Save copy of sent mail in Sent mailbox - imap = imaplib.IMAP4_SSL(mail_host, imap_port) - imap.login(mail_login, mail_pass) - imap.append('Sent', '\\Seen', imaplib.Time2Internaldate(time.time()), text.encode('utf8')) - imap.logout() +class UploadHandler: + + def __init__(self, **kwargs): + self.config = kwargs + + self.contract_creator = ContractCreator() + + def extract_upload_metadata(self, corpus_name, request): + upload_metadata = dict() + + file_hashes = self.create_file_hashes(request.files) + file_names = file_hashes.keys() + form_data = request.form.copy() + upload_timestamp = int(time.time()) + upload_id = self.create_upload_id(corpus_name, form_data, upload_timestamp, file_hashes) + + upload_metadata['corpus_name'] = corpus_name + upload_metadata['form_data'] = form_data + upload_metadata['upload_id'] = upload_id + upload_metadata['timestamp'] = upload_timestamp + upload_metadata['file_hashes_dict'] = file_hashes + upload_metadata['file_names'] = file_names + upload_metadata['contract_file'] = upload_id + '.pdf' + + return upload_metadata + + + def get_uploads_subdir(self, dir_name): + subdir = self.config['UPLOADS_DIR'] / dir_name + if not subdir.exists(): + subdir.mkdir(parents=True) + return subdir + + + def create_upload_id(self, corpus_name, form_data, upload_timestamp, file_hashes): + # Order is important while hashing, hence the sorting. + val_buff = [str(upload_timestamp)] + for key in sorted(form_data): + val_buff.append(form_data[key]) + + # This hash serves as an unique identifier for the whole upload. + metahash = hashlib.md5((''.join(val_buff)).encode()) + # Include file hashes to avoid metafile name collisions if they have the same form values, + # but different data files. Sort hashes first so upload order doesn't matter. + sorted_f_hashes = list(file_hashes.values()) + sorted_f_hashes.sort() + metahash.update(''.join(sorted_f_hashes).encode()) + metahash = metahash.hexdigest() + + return metahash + + def create_file_hashes(self, files): + res = dict() + for key, f in files.items(): + if key.startswith('file'): + h = hashlib.md5(f.filename.encode()) + h.update(f.stream.read()) + res[f.filename] = h.hexdigest() + f.seek(0) + return res + + def store_metadata_unauthenticated(self, upload_metadata): + timestamp = datetime.fromtimestamp(upload_metadata['timestamp']) + form_data = upload_metadata['form_data'] + file_hashes = upload_metadata['file_hashes_dict'] + sorted_f_hashes = list(file_hashes.values()) + sorted_f_hashes.sort() + + try: + upload_unauthenticated = UploadUnauthenticated( + upload_hash=upload_metadata['upload_id'], + timestamp=timestamp, + form_name=form_data['ime'], + form_org=form_data['podjetje'], + form_address=form_data['naslov'], + form_zipcode=form_data['posta'], + form_email=form_data['email'], + file_contract=upload_metadata['contract_file'], + upload_file_hashes=sorted_f_hashes + ) + + db.session.add(upload_unauthenticated) + db.session.commit() + except Exception: + traceback.print_exc() + + def store_metadata_authenticated(self, upload_metadata): + pass + + def store_datafiles(self, files, upload_metadata): + base = self.get_uploads_subdir('files') + file_hashes = upload_metadata['file_hashes_dict'] + + for key, f in files.items(): + if key.startswith('file'): + path = base / file_hashes[f.filename] + if not path.exists(): + path.mkdir() + f.save(path / f.filename) + + def generate_upload_contract_pdf(self, upload_metadata): + base = self.get_uploads_subdir('contracts') + form_data = upload_metadata['form_data'] + + files_table_str = [] + for file_name in upload_metadata['file_names']: + files_table_str.append('') + files_table_str.append(file_name) + files_table_str.append('') + files_table_str = ''.join(files_table_str) + + data = { + 'ime_priimek': form_data['ime'], + 'naslov': form_data['naslov'], + 'posta': form_data['posta'], + 'kontakt_narocnik': self.config['CONTRACT_CLIENT_CONTACT'], + 'kontakt_imetnikpravic': form_data['ime'], + 'files_table_str': files_table_str + } + + self.contract_creator.create_pdf(base / upload_metadata['contract_file'], data) + + def send_confirm_mail(self, upload_metadata): + upload_id = upload_metadata['upload_id'] + + message = MIMEMultipart() + message['From'] = self.config['MAIL_LOGIN'] + message['To'] = upload_metadata['form_data']['email'] + message['Subject'] = self.config['MAIL_SUBJECT'].format(upload_id=upload_id) + body = self.config['MAIL_BODY'].format(upload_id=upload_id) + message.attach(MIMEText(body, "plain")) + + contracts_dir = self.get_uploads_subdir('contracts') + base_name = upload_metadata['contract_file'] + contract_file = contracts_dir / base_name + with open(contract_file, "rb") as f: + part = MIMEApplication( + f.read(), + Name = base_name + ) + part['Content-Disposition'] = 'attachment; filename="%s"' % base_name + message.attach(part) + + text = message.as_string() + + # Create a secure SSL context + context = ssl.create_default_context() + + with SMTP_SSL(self.config['MAIL_HOST'], self.config['SMTP_PORT'], context=context) as server: + server.login(self.config['MAIL_LOGIN'], self.config['MAIL_PASS']) + server.sendmail(message['From'], message['To'], text) + + # Save copy of sent mail in Sent mailbox + imap = imaplib.IMAP4_SSL(self.config['MAIL_HOST'], self.config['IMAP_PORT']) + imap.login(self.config['MAIL_LOGIN'], self.config['MAIL_PASS']) + imap.append('Sent', '\\Seen', imaplib.Time2Internaldate(time.time()), text.encode('utf8')) + imap.logout() diff --git a/portal/model.py b/portal/model.py new file mode 100644 index 0000000..3e7f6d7 --- /dev/null +++ b/portal/model.py @@ -0,0 +1,26 @@ +from datetime import datetime +import sqlalchemy +from flask_sqlalchemy import SQLAlchemy + +########################################### +# Model classes for describing SQL tables # +########################################### + +# The "db" object gets bound to the Flask app in app.py. +db = SQLAlchemy() + + +# Entries for uploads to corpora, that have no authentication. E.g. "prevodi" or "gigafida". +class UploadUnauthenticated(db.Model): + __tablename__ = 'upload_unauthenticated' + id = db.Column(db.Integer, primary_key=True) + upload_hash = db.Column(db.String) + timestamp = db.Column(db.DateTime, default=datetime.utcnow) + form_name = db.Column(db.String) + form_org = db.Column(db.String) + form_address = db.Column(db.String) + form_zipcode = db.Column(db.String) + form_email = db.Column(db.String) + file_contract = db.Column(db.String) + upload_file_hashes = db.Column(sqlalchemy.types.ARRAY(db.String)) + diff --git a/static/image/bg.jpeg b/static/image/bg.jpeg index 184c1f7..a485057 100644 Binary files a/static/image/bg.jpeg and b/static/image/bg.jpeg differ diff --git a/static/style.css b/static/style.css index dfa80f8..f2e8240 100644 --- a/static/style.css +++ b/static/style.css @@ -21,14 +21,16 @@ html { overflow-y: hidden; } -.bg { - background-image: url("image/bg.jpeg"); - height: 100%; - background-position: center; - background-repeat: no-repeat; +html { + background: url(image/bg.jpeg) no-repeat center center fixed; + -webkit-background-size: cover; + -moz-background-size: cover; + -o-background-size: cover; background-size: cover; + overflow-y: scroll; } + #main-window { position: absolute; top: 50%; @@ -60,7 +62,7 @@ html { font-family: Roboto; font-style: normal; font-weight: 200; - font-size: 36px; + font-size: 32px; line-height: 42px; margin-block-start: 0.4em; z-index: 5; @@ -89,6 +91,15 @@ label { color: #46535b; } +.form-text { + font-family: Roboto; + font-style: normal; + font-weight: normal; + font-size: 11px; + line-height: 12px; + color: #46535b; +} + #button-submit { display: flex; flex-direction: row; @@ -102,7 +113,7 @@ label { background: #006cb7; border-radius: 29px; border: 0px; - top: 530px; + top: 630px; font-family: Roboto; font-style: normal; @@ -187,8 +198,8 @@ input { margin-top: 26px; margin-bottom: 26px; min-height: 100px; - max-height: 500px; - top: -530px; + max-height: 670px; + top: -600px; overflow-x: hidden; overflow-y: auto; } @@ -196,7 +207,7 @@ input { #rect1 { position: relative; width: 388px; - height: 631px; + height: 731px; background: #f5f5f5; box-shadow: 0px 4px 40px rgba(0, 0, 0, 0.25); @@ -238,7 +249,7 @@ input { width: 60%; height: 40%; margin: 0 auto; - padding-top: 90px; + padding-top: 120px; padding-left: 10px; padding-right: 10px; right: 75px; diff --git a/templates/basic.html b/templates/basic.html index ea60ca3..cce1db6 100644 --- a/templates/basic.html +++ b/templates/basic.html @@ -8,7 +8,7 @@ {{ dropzone.style('position: absolute; top: -0.5px; width: 388px; - height: 632px; + height: 732px; left: 385px; background: linear-gradient(198.62deg, rgba(255, 255, 255, 0.49) -1.62%, rgba(255, 255, 255, 0.73) -1.61%, rgba(255, 255, 255, 0.41) 79.34%); box-shadow: 20px 4px 40px rgba(0, 0, 0, 0.25); @@ -18,7 +18,6 @@ -
@@ -27,8 +26,8 @@
-

Portal za oddajanje besedil {{corpus_name}}

-

{{subtitle}}

+

Portal za oddajanje besedil

+
{{description|safe}}
@@ -48,8 +47,7 @@ - - +
*Po kliku na gumb “Oddaj” se bo prikazala vsebina pogodobe o odstopu avtorskih pravic. Če se z vsebino strinjate, kliknite gumb “Pošlji”, da podatke posredujete v korpus, po e-pošti pa boste prejeli svoj izvod pogodbe.
@@ -163,7 +161,7 @@ zagotovili vse potrebne informacije v skladu s predpisi o varstvu osebnih podatk

10.3. Pogodbeni stranki s podpisom potrjujeta veljavnost te pogodbe.

- +
@@ -190,17 +188,17 @@ zagotovili vse potrebne informacije v skladu s predpisi o varstvu osebnih podatk url: "/{{corpus_name}}/upload", autoProcessQueue: false, uploadMultiple: true, - parallelUploads: 20, + parallelUploads: {{max_files}}, paramName: "file", // The name that will be used to transfer the file maxFilesize: 1000, // MB acceptedFiles: ".txt, .csv, .pdf, .doc, .docx, .xls, .xlsx, .ppt, .pptx", - maxFiles: 20, + maxFiles: {{max_files}}, dictDefaultMessage: `Kliknite ali odložite datoteke sem.`, dictFallbackMessage: "Vaš brskalnik ne podpira izbiranje datotek z odlaganjem (\"drag & drop\").", dictInvalidFileType: "Datoteka je napačnega formata.", dictFileTooBig: "Datoteke je prevelika {{filesize}}. Največja dovoljena velikost: {{maxFilesize}}MiB.", dictResponseError: "Napaka strežnika: {{statusCode}}", - dictMaxFilesExceeded: "Ne morete naložiti več datotek.", + dictMaxFilesExceeded: "Največje število datotek že doseženo.", dictCancelUpload: "Prekini prenos", dictRemoveFile: "Odstrani datoteko", dictCancelUploadConfirmation: "Ali res želite odstraniti to datoteko?", @@ -221,8 +219,7 @@ zagotovili vse potrebne informacije v skladu s predpisi o varstvu osebnih podatk var email = form["email"].value; var podjetje = form["podjetje"].value; var telefon = form["telefon"].value; - var izjava = form["izjava"].checked; - if (isEmptyOrSpaces(ime) || isEmptyOrSpaces(email) || !izjava) { + if (isEmptyOrSpaces(ime) || isEmptyOrSpaces(email)) { alert("Izpolnite vsa obvezna polja!"); } else if (!reEmail.test(email.toLowerCase())) { alert("Email napačnega formata!"); diff --git a/templates/index-solar.html b/templates/index-solar.html new file mode 100644 index 0000000..ea60ca3 --- /dev/null +++ b/templates/index-solar.html @@ -0,0 +1,300 @@ + + + + + Portal za oddajanje besedil + + + {{ dropzone.style('position: absolute; + top: -0.5px; + width: 388px; + height: 632px; + left: 385px; + background: linear-gradient(198.62deg, rgba(255, 255, 255, 0.49) -1.62%, rgba(255, 255, 255, 0.73) -1.61%, rgba(255, 255, 255, 0.41) 79.34%); + box-shadow: 20px 4px 40px rgba(0, 0, 0, 0.25); + backdrop-filter: blur(20px); + border: 0px; + border-radius: 0px 20px 20px 0px;') }} + + + +
+
+
+
+ logo +
+ + +
+

Portal za oddajanje besedil {{corpus_name}}

+

{{subtitle}}

+ + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + +
+
+ + + + + + + +