import hashlib import time import ssl import traceback import re from pathlib import Path from datetime import datetime import imaplib from smtplib import SMTP_SSL import email from email import encoders from email.mime.base import MIMEBase from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from email.mime.application import MIMEApplication import pdfkit from jinja2 import Environment, FileSystemLoader from . model import db, UploadRegular, UploadSolar, RegisteredUser, CorpusAccess, Institution REGEX_EMAIL = re.compile('^[a-z0-9]+[\._]?[a-z0-9]+[@]\w+[.]\w{2,3}$') MAX_FNAME_LEN = 100 class ContractCreator: def __init__(self, base_path, template_path): self.base = base_path template_loader = FileSystemLoader(searchpath="./") template_env = Environment(loader=template_loader) self.template = template_env.get_template(template_path) self.pdfkit_options = { 'page-size': 'A4', 'margin-top': '0.75in', 'margin-right': '0.75in', 'margin-bottom': '0.75in', 'margin-left': '0.75in', 'encoding': "UTF-8", 'custom-header' : [ ('Accept-Encoding', 'gzip') ] } def fill_template(self, **kwargs): return self.template.render(**kwargs) def create_pdf(self, f_name, fields_dict): sub_dir = self.base / Path(f_name[:2]) if not sub_dir.exists(): sub_dir.mkdir() out_f = sub_dir / Path(f_name[2:]) html_str = self.fill_template(**fields_dict) pdfkit.from_string(html_str, out_f, options=self.pdfkit_options) class UploadHandler: ENABLED_FILETYPES = ['txt', 'csv', 'pdf', 'doc', 'docx', 'xls', 'xlsx', 'ppt', 'pptx', 'xml', 'mxliff', 'tmx'] def __init__(self, **kwargs): self.config = kwargs def set_contract_creator(self, contract_creator): assert isinstance(contract_creator, ContractCreator) self._contract_creator = contract_creator def get_uploads_subdir(self, dir_name): subdir = Path(self.config['UPLOADS_DIR']) / dir_name if not subdir.exists(): subdir.mkdir(parents=True) return subdir @staticmethod def extract_upload_metadata(corpus_name, request): upload_metadata = dict() file_hashes = UploadHandler.create_file_hashes(request.files) file_names = file_hashes.keys() form_data = request.form.copy() upload_timestamp = int(time.time()) upload_id = UploadHandler.create_upload_id(corpus_name, form_data, upload_timestamp, file_hashes) upload_metadata['corpus_name'] = corpus_name upload_metadata['form_data'] = form_data upload_metadata['upload_id'] = upload_id upload_metadata['timestamp'] = upload_timestamp upload_metadata['file_hashes_dict'] = file_hashes upload_metadata['file_names'] = file_names upload_metadata['contract_file'] = upload_id + '.pdf' return upload_metadata @staticmethod def create_upload_id(corpus_name, form_data, upload_timestamp, file_hashes): # Order is important while hashing, hence the sorting. val_buff = [str(upload_timestamp)] for key in sorted(form_data): val_buff.append(form_data[key]) # This hash serves as an unique identifier for the whole upload. metahash = hashlib.md5((''.join(val_buff)).encode()) # Include file hashes to avoid metafile name collisions if they have the same form values, # but different data files. Sort hashes first so upload order doesn't matter. sorted_f_hashes = list(file_hashes.values()) sorted_f_hashes.sort() metahash.update(''.join(sorted_f_hashes).encode()) metahash = metahash.hexdigest() return metahash @staticmethod def create_file_hashes(files): res = dict() for key, f in files.items(): if key.startswith('file'): h = hashlib.md5(f.filename.encode()) h.update(f.stream.read()) res[f.filename] = h.hexdigest() f.seek(0) return res @staticmethod def store_model(self, model_obj): try: db.session.add(model_obj) db.session.commit() except Exception: traceback.print_exc() def store_datafiles(self, files, upload_metadata): base = self.get_uploads_subdir('files') file_hashes = upload_metadata['file_hashes_dict'] for key, f in files.items(): if key.startswith('file'): f_hash = file_hashes[f.filename] # First byte used for indexing, similarly like git does for example. sub_dir = base / f_hash[:2] if not sub_dir.exists(): sub_dir.mkdir() path = sub_dir / f_hash[2:] if not path.exists(): path.mkdir() f.save(path / f.filename) def send_confirm_mail(self, upload_metadata, attach_contract_file=False): upload_id = upload_metadata['upload_id'] message = MIMEMultipart() message['From'] = self.config['MAIL_LOGIN'] message['To'] = upload_metadata['form_data']['email'] message['Subject'] = self.config['MAIL_SUBJECT'].format(upload_id=upload_id) body = self.config['MAIL_BODY'].format(upload_id=upload_id) message.attach(MIMEText(body, "plain")) if attach_contract_file: contracts_dir = self.contract_creator.base f_name = upload_metadata['contract_file'] sub_dir = contracts_dir / Path(f_name[:2]) contract_file = sub_dir / Path(f_name[2:]) with open(contract_file, "rb") as f: part = MIMEApplication( f.read(), Name = f_name ) part['Content-Disposition'] = 'attachment; filename="%s"' % f_name message.attach(part) text = message.as_string() # Create a secure SSL context context = ssl.create_default_context() try: with SMTP_SSL(self.config['MAIL_HOST'], self.config['SMTP_PORT'], context=context) as server: server.login(self.config['MAIL_LOGIN'], self.config['MAIL_PASS']) server.sendmail(message['From'], message['To'], text) # Save copy of sent mail in Sent mailbox imap = imaplib.IMAP4_SSL(self.config['MAIL_HOST'], self.config['IMAP_PORT']) imap.login(self.config['MAIL_LOGIN'], self.config['MAIL_PASS']) imap.append('Sent', '\\Seen', imaplib.Time2Internaldate(time.time()), text.encode('utf8')) imap.logout() except Exception: traceback.print_exc() @staticmethod def check_suffixes(files): for key, f in files.items(): if key.startswith('file'): suffix = f.filename.split('.')[-1] if ENABLED_FILETYPES and suffix not in ENABLED_FILETYPES: return 'Datoteka "{}" ni pravilnega formata.'.format(f.filename) return None @staticmethod def check_fname_lengths(files): for key, f in files.items(): if key.startswith('file'): if len(f.filename) > MAX_FNAME_LEN: return 'Ime datoteke presega dolžino {} znakov.'.format(MAX_FNAME_LEN) return None def check_upload_request(self, request): files = request.files max_files = self.config['MAX_FILES_PER_UPLOAD'] if len(files) > max_files: return 'Naložite lahko do {} datotek hkrati.'.format(max_files), 400 elif len(files) < 1: return 'Priložena ni bila nobena datoteka.', 400 err = UploadHandler.check_suffixes(files) if err: return err, 400 err = UploadHandler.check_fname_lengths(files) if err: return err, 400 return None @staticmethod def get_user_institution(user_id): match = db.session.query(RegisteredUser).filter(RegisteredUser.id == user_id).one() return match.institution def has_user_corpus_access(user_id, corpus_name): user = RegisteredUser.query.filter_by(id=user_id).first() if user.role == 'admin': return True return CorpusAccess.query.filter_by(user_id=user.id, corpus=corpus_name).first() is not None def get_user_obj(user_id): return RegisteredUser.query.filter_by(id=user_id).first() def get_institution_obj(institution_id): return Institution.query.filter_by(id=institution_id).first()