|
|
|
import sys
|
|
|
|
import subprocess
|
|
|
|
import re
|
|
|
|
import os
|
|
|
|
|
|
|
|
source_dir = sys.argv[1]
|
|
|
|
build_dir = sys.argv[2]
|
|
|
|
insert_toc_after_page = 1
|
|
|
|
max_bookmark_level = 3
|
|
|
|
|
|
|
|
index_pdf_filename = build_dir + '/weasyprint/index.pdf'
|
|
|
|
css_filename = source_dir + '/css/print-theme.css'
|
|
|
|
|
|
|
|
script_dir = os.path.dirname(__file__)
|
|
|
|
|
|
|
|
# Compile PDF
|
|
|
|
assert(subprocess.run(['weasyprint', build_dir + '/weasyprint/index.html', index_pdf_filename, '-s', css_filename]).returncode == 0)
|
|
|
|
|
|
|
|
# Generate table of content (TOC)
|
|
|
|
assert(subprocess.run(['sh', '-c', script_dir + '/../pdftoc-to-latex "' + index_pdf_filename + '" > "' + build_dir + '/weasyprint/toc.tex"']).returncode == 0)
|
|
|
|
subprocess.run(['pdflatex', '-interaction', 'nonstopmode', '-output-directory=' + build_dir + '/weasyprint', build_dir + '/weasyprint/toc.tex'])
|
|
|
|
|
|
|
|
# Count TOC pages
|
|
|
|
toc_pdfinfo = subprocess.run(['pdfinfo', build_dir + '/weasyprint/toc.pdf'], stdout=subprocess.PIPE)
|
|
|
|
assert(toc_pdfinfo.returncode == 0)
|
|
|
|
toc_pages_match = re.search('\\nPages:\s+([0-9]+)\\n', toc_pdfinfo.stdout.decode())
|
|
|
|
toc_num_pages = int(toc_pages_match.group(1))
|
|
|
|
print('toc.pdf: ' + str(toc_num_pages) + ' page(s)')
|
|
|
|
|
|
|
|
# Count pages in index.pdf
|
|
|
|
pdfinfo = subprocess.run(['pdfinfo', index_pdf_filename], stdout=subprocess.PIPE)
|
|
|
|
assert(pdfinfo.returncode == 0)
|
|
|
|
pages_match = re.search('\\nPages:\s+([0-9]+)\\n', pdfinfo.stdout.decode())
|
|
|
|
num_pages = int(pages_match.group(1))
|
|
|
|
print('index.pdf: ' + str(num_pages) + ' pages')
|
|
|
|
|
|
|
|
num_pages = num_pages + toc_num_pages # account for table of content that will be added later
|
|
|
|
|
|
|
|
# If needed, update the CSS file with the correct number of pages
|
|
|
|
with open(css_filename) as css_file:
|
|
|
|
original_css = css_file.read()
|
|
|
|
|
|
|
|
modified_css = re.sub('content: counter\(page\) "/[0-9]+";', 'content: counter(page) "/'+str(num_pages)+'";', original_css)
|
|
|
|
|
|
|
|
if modified_css != original_css:
|
|
|
|
with open(css_filename, 'w') as css_file:
|
|
|
|
css_file.write(modified_css)
|
|
|
|
|
|
|
|
# We need to compile again with the modified CSS (this won't impact the TOC)
|
|
|
|
print('Number of pages has changed, rebuilding PDF...')
|
|
|
|
assert(subprocess.run(['weasyprint', build_dir + '/weasyprint/index.html', index_pdf_filename, '-s', css_filename]).returncode == 0)
|
|
|
|
|
|
|
|
# Insert TOC in the PDF
|
|
|
|
assert(subprocess.run(['pdftk', 'A='+index_pdf_filename, 'B='+build_dir+'/weasyprint/toc.pdf', 'cat', 'A'+str(insert_toc_after_page), 'B', 'A'+str(insert_toc_after_page+1)+'-end', 'output', build_dir + '/weasyprint/vheliotech-without-bookmarks.pdf']).returncode == 0)
|
|
|
|
|
|
|
|
# Restore bookmarks
|
|
|
|
extract_bookmarks_from = index_pdf_filename
|
|
|
|
source_pdf_filename = build_dir + '/weasyprint/vheliotech-without-bookmarks.pdf'
|
|
|
|
output_filename = build_dir + '/weasyprint/vheliotech.pdf'
|
|
|
|
|
|
|
|
bookmarks_filename = extract_bookmarks_from.replace('.pdf', '.txt')
|
|
|
|
assert(bookmarks_filename != extract_bookmarks_from)
|
|
|
|
|
|
|
|
# extract PDF metadata into a text file
|
|
|
|
assert(subprocess.run(['pdftk', extract_bookmarks_from, 'dump_data', 'output', bookmarks_filename]).returncode == 0)
|
|
|
|
|
|
|
|
with open(bookmarks_filename) as bookmarks_file:
|
|
|
|
metadata = bookmarks_file.read()
|
|
|
|
|
|
|
|
# Remove link icon character at the end of each bookmark name (these are added by sphinx but make no sense in a PDF bookmark)
|
|
|
|
metadata = metadata.replace('', '')
|
|
|
|
|
|
|
|
# Remove bookmarks for small titles, adjust page number for remaining ones
|
|
|
|
def filterBookmark(match):
|
|
|
|
#print('bookmark: "' + match.group(0) + '"')
|
|
|
|
level = int(match.group(2))
|
|
|
|
if level > max_bookmark_level:
|
|
|
|
return ''
|
|
|
|
initial_page = int(match.group(3))
|
|
|
|
final_page = initial_page + toc_num_pages if initial_page > insert_toc_after_page else initial_page
|
|
|
|
return 'BookmarkBegin\nBookmarkTitle: '+match.group(1).replace(' ', ' ')+'\nBookmarkLevel: '+match.group(2)+'\nBookmarkPageNumber: '+str(final_page)+'\n'
|
|
|
|
metadata = re.sub('BookmarkBegin\nBookmarkTitle: (.*)\nBookmarkLevel: ([0-9]+)\nBookmarkPageNumber: ([0-9]+)\n', filterBookmark, metadata)
|
|
|
|
|
|
|
|
with open(bookmarks_filename, 'w') as bookmarks_file:
|
|
|
|
bookmarks_file.write(metadata)
|
|
|
|
|
|
|
|
# generate the output PDF
|
|
|
|
assert(subprocess.run(['pdftk', source_pdf_filename, 'update_info', bookmarks_filename, 'output', output_filename]).returncode == 0)
|
|
|
|
|
|
|
|
print('Generated file: ' + output_filename)
|