start

2 years ago · 000bbcef8c
2 changed files with 145 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,33 @@
+# ATNOFS Documentation contraption
+
+## Current sketch
+
+Currently a python script that will downloads all the chapters as listed here:
+<https://octomode.vvvvvvaria.org/atnofs-index/pad/>
+
+Listed as pads, it will download the HTML + CSS of the pages, and then creates PDF out of them with pagedjs-cli.
+
+It then compiles the full pdf with pypdftk.
+
+## Or...
+
+Maybe it should instead make a very long html from the previews?
+<https://octomode.vvvvvvaria.org/atnofs-index/preview.html>
+In case we'll want page numbers, etc from pagedjs.
+
+## Requirements
+
+* apt install python3
+* pip3 install pypdftk
+* npm install -g pupeteer
+* export NODE_CHROMIUM_REVISION=729994
+node .nvm/versions/node/v18.12.1/lib/node_modules/pagedjs-cli/node_modules/puppeteer/install.js
+* npm install -g chromium
+* npm install -g pagedjs-cli
+## Sandbox
+
+To be able to use pagedjs-cli, one need to build a chrome-sandbox (...):
+chown root:root chrome_sandbox
+sudo chmod 4755 chrome_sandbox 
+sudo cp chrome_sandbox /usr/local/sbin/chrome-devel-sandbox
+export CHROME_DEVEL_SANDBOX=/usr/local/sbin/chrome-devel-sandbox
--- a/pdf-compile.py
+++ b/pdf-compile.py
@ -0,0 +1,112 @@
+#!/usr/bin/python3
+
+import requests
+import pypdftk
+import subprocess
+from requests.auth import HTTPBasicAuth
+import re
+import os
+import sys
+
+NODE = '/home/dickreckard/.nvm/versions/node/v16.13.0/bin/node'
+PAGEDJSCLI = '/home/dickreckard/.nvm/versions/node/v16.13.0/bin/pagedjs-cli'
+localhost = '/var/www/html/atnofs/'
+localurl = 'http://localhost/atnofs/'
+
+outputname='atnofs-compiled.pdf'
+
+authu='octomode'
+authp='spider'
+authvvvvvv = HTTPBasicAuth('octomode', 'spider')
+
+e2hprocessor='http://totalism.org:7777/glia2?sourceMethod=post&sinkFormat=text/graph/graphviz'
+
+chapters=[]
+outputs=[]
+counter=0
+ooooo_insert=''
+
+indexmd = requests.get('https://pad.vvvvvvaria.org/atnofs-index.md/export/txt').text
+
+# download the index of all contributions
+
+with open('index.md','w') as file:
+	file.write(indexmd)
+
+
+with open('index.md') as file:
+	for line in file:
+
+# searches for the chapter links
+
+		x = re.search ('[.*](.*/pad/)',line)
+		if x:
+			line=line.split('(')[1].split(')')[-2]
+			chapters.append(line)
+
+# searches for ooooo insert
+		if 'ooooo' in line:
+			ooooo_insert=line.split('(')[1].split(')')[-2]
+
+
+#then iterates over all chapters to get the pdf versions
+for chapter in chapters:
+
+	name=str(counter).zfill(2)+'-'+chapter.split('/')[-3]
+	if not os.path.exists(localhost+name):
+		os.makedirs(localhost+name)
+	pdffile=name+'.pdf'
+
+	if len(sys.argv)>1 and sys.argv[1]=='local':
+
+		if('octomode.vvvvvvaria.org' in chapter):
+			preview = requests.get(chapter.replace('pad/','preview.html'), auth=authvvvvvv)
+			style = requests.get(chapter.replace('pad/','stylesheet.css'), auth=authvvvvvv)
+		else:
+			preview = requests.get(chapter.replace('pad/','preview.html'))
+			style = requests.get(chapter.replace('pad/','stylesheet.css'))
+		
+		stylefile=localhost+name+'/stylesheet.css'
+		with open(stylefile, 'w') as file:
+			file.write(style.text)	
+
+		previewfile=localhost+name+'/preview.html'
+		localpreview=localurl+name+'/preview.html'
+
+		with open(previewfile, 'w') as file:
+			file.write(preview.text)
+
+# hacke before octomode is fixed on vvvvaria 
+		with open(previewfile, "r") as f:
+		    contents = f.readlines()
+
+		contents.insert(5, '<link href="stylesheet.css" rel="stylesheet" type="text/css" media="screen">')
+
+		with open(previewfile, "w") as f:
+		    contents = "".join(contents)
+		    f.write(contents)
+# end hack
+
+# this is the pagedjs client version making the pdf out of the localhost version of the html
+		subprocess.run([NODE, PAGEDJSCLI, localpreview, '-o', pdffile])
+
+	else:
+		if('octomode.vvvvvvaria.org' in chapter):
+			chapter=chapter.replace('://','://'+authu+':'+authp+'@')
+		preview = chapter.replace('pad/','pagedjs.html')
+		subprocess.run([NODE, PAGEDJSCLI, preview, '-o', pdffile])
+
+	outputs.append(pdffile)
+
+	counter+=1
+
+if ooooo_insert:
+	all_graphs = requests.get(ooooo_insert+'/export/txt')
+	graphs=all_graphs.text.split('[graph]"""')
+	print(len(graphs))
+
+
+# compiling all the files in one big pdf!
+
+output = pypdftk.concat(outputs,out_file=outputname)
+