Browse Source

file crawler for annotations,descriptions,tags

pull/12/head
crunk 4 months ago
parent
commit
bb1bd62250
  1. 21
      verse/file_crawler.py

21
verse/file_crawler.py

@ -1 +1,22 @@
import os
import magic
from distribusi.mappings import CODE_TYPES, FILE_TYPES, SUB_TYPES
from models.distribusi_file_model import DistribusiFiles
MIME_TYPE = magic.Magic(mime=True)
def distribusi_file_with_type(full_path):
mime = MIME_TYPE.from_file(full_path)
type_, subtype = mime.split("/")
if type_ in FILE_TYPES:
print(f"distribusi file:{full_path} type:{type_}")
for root, dirs, files in os.walk("stash", topdown=True):
files = list(filter(lambda f: not f.startswith("."), files))
for file in files:
full_path = os.path.join(root, file)
distribusi_file_with_type(full_path)

Loading…
Cancel
Save