file_crawler update

This commit is contained in:
crunk 2024-05-18 16:11:05 +02:00
parent 8cf699214c
commit 0fb8929a80
3 changed files with 62 additions and 11 deletions

View File

@ -2,22 +2,74 @@ import os
import magic import magic
from distribusi.mappings import CODE_TYPES, FILE_TYPES, SUB_TYPES from distribusi.mappings import CODE_TYPES, FILE_TYPES, SUB_TYPES
from models.distribusi_model import Distribusis
from models.distribusi_file_model import DistribusiFiles from models.distribusi_file_model import DistribusiFiles
from app import create_app, get_app, db
from sqlalchemy.exc import (
DatabaseError,
DataError,
IntegrityError,
InterfaceError,
InvalidRequestError,
)
MIME_TYPE = magic.Magic(mime=True) MIME_TYPE = magic.Magic(mime=True)
def distribusi_file_with_type(full_path): def _distribusi_file_with_type(distribusi, full_path):
mime = MIME_TYPE.from_file(full_path) mime = MIME_TYPE.from_file(full_path)
type_, subtype = mime.split("/") type_, subtype = mime.split("/")
if type_ in FILE_TYPES: if type_ in FILE_TYPES:
print(f"distribusi file:{full_path} type:{type_}") _add_distribusi_file_to_db(distribusi, full_path, type_)
def _get_distribusi_from_path(path):
distribusi = Distribusis.query.filter_by(distribusiname=path).first()
return distribusi
def _add_distribusi_file_to_db(distribusi, full_path, type):
app = get_app()
print(f"adding file to database: {full_path} type: {type}")
try:
new_distribusi_file = DistribusiFiles(
path=full_path,
type=type,
distribusi=distribusi.id,
)
db.session.add(new_distribusi_file)
db.session.commit()
return
except InvalidRequestError:
db.session.rollback()
app.logger.error("Something went wrong!")
except IntegrityError:
db.session.rollback()
app.logger.error("file %s already exists!", full_path)
except DataError:
db.session.rollback()
app.logger.error("%s Invalid Entry", full_path)
except InterfaceError:
db.session.rollback()
app.logger.error(
"Error connecting to the database"
)
except DatabaseError:
db.session.rollback()
app.logger.error(
"Error connecting to the database"
)
for root, dirs, files in os.walk("stash", topdown=True): def add_distribusi_files(path):
files = list(filter(lambda f: not f.startswith("."), files)) app = create_app()
files = list(filter(lambda f: not f.endswith(".html"), files)) with app.app_context():
for file in files: distribusi = _get_distribusi_from_path(path)
full_path = os.path.join(root, file) path = os.path.join("stash", path)
distribusi_file_with_type(full_path) for root, dirs, files in os.walk(path, topdown=True):
files = list(filter(lambda f: not f.startswith("."), files))
files = list(filter(lambda f: not f.endswith(".html"), files))
for file in files:
full_path = os.path.join(root, file)
_distribusi_file_with_type(distribusi, full_path)
add_distribusi_files("2018-12-WttF-Mastodon-and-the-Fediverse")

View File

@ -12,7 +12,7 @@ class DistribusiFiles(db.Model):
path = db.Column(db.String(4096), nullable=True, unique=False) path = db.Column(db.String(4096), nullable=True, unique=False)
alttext = db.Column(db.String(255), nullable=True, unique=False) alttext = db.Column(db.String(255), nullable=True, unique=False)
tags = db.Column(db.String(500), nullable=True, unique=False) tags = db.Column(db.String(500), nullable=True, unique=False)
description = db.Column(db.String(9), nullable=True, unique=False) description = db.Column(db.String(4096), nullable=True, unique=False)
def __repr__(self): def __repr__(self):
return "<Distribusi_File %r>" % self.distribusiname return "<Distribusi_File %r>" % self.distribusiname

View File

@ -37,7 +37,6 @@ def LoginUser():
loginform.password.errors.append("Invalid email or password!") loginform.password.errors.append("Invalid email or password!")
return render_template("login.html", loginform=loginform) return render_template("login.html", loginform=loginform)
if check_password_hash(user.password, loginform.password.data): if check_password_hash(user.password, loginform.password.data):
print(type(user))
login_user(user) login_user(user)
flash("Logged in successfully.", "success") flash("Logged in successfully.", "success")
next = request.args.get("next") next = request.args.get("next")