Compilation du PDF via un script python

Permet de gérer automatiquement le nombre de pages de la table des matières, le nombre de pages total du PDF, et de ne le re-généré que si le nombre de pages a changé dans le CSS.
3 years ago · 2b5e828c59
4 changed files with 83 additions and 70 deletions
--- a/17
+++ b/17
@ -19,20 +19,5 @@ help:
 %: Makefile
 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 # Default make weasyprint target has a bug so we have to call it manually on the generated index.html file
 # Then we execute count_pdf_pages.py that will count the number of pages, and update print-theme.css
 # Then we generate index.pdf again (with the correct number of pages in the footer)
 # After that, extract table of content data from the pdf and generate a .tex file
 # Then generate a toc.pdf from the .tex file
 # And put them together in a .pdf file
 # Finally, we generate the final PDF by adding the bookmarks (for easier navigation in the PDF)
 # After all, clean up
 pdf: weasyprint
-	weasyprint "$(BUILDDIR)/weasyprint/index.html" "$(BUILDDIR)/weasyprint/index.pdf" -s "$(SOURCEDIR)/css/print-theme.css"
+	python3 sphinx-tools/make_pdf.py "$(SOURCEDIR)" "$(BUILDDIR)"
 	python3 sphinx-tools/count_pdf_pages.py "$(BUILDDIR)/weasyprint/index.pdf" "$(SOURCEDIR)/css/print-theme.css"
 	weasyprint "$(BUILDDIR)/weasyprint/index.html" "$(BUILDDIR)/weasyprint/index.pdf" -s "$(SOURCEDIR)/css/print-theme.css"
 	./pdftoc-to-latex "$(BUILDDIR)/weasyprint/index.pdf" > "$(BUILDDIR)/weasyprint/toc.tex"
 	- pdflatex -interaction nonstopmode "-output-directory=$(BUILDDIR)/weasyprint" "$(BUILDDIR)/weasyprint/toc.tex"
 	pdftk "A=$(BUILDDIR)/weasyprint/index.pdf" "B=$(BUILDDIR)/weasyprint/toc.pdf" cat A1 B A2-end output "$(BUILDDIR)/weasyprint/vheliotech-without-bookmarks.pdf"
 	python3 sphinx-tools/update_pdf_bookmarks.py "$(BUILDDIR)/weasyprint/index.pdf" "$(BUILDDIR)/weasyprint/vheliotech-without-bookmarks.pdf" "$(BUILDDIR)/weasyprint/vheliotech.pdf"
 	#rm "$(BUILDDIR)/weasyprint/index.pdf" "$(BUILDDIR)/weasyprint/toc.tex" "$(BUILDDIR)/weasyprint/toc.pdf" "$(BUILDDIR)/weasyprint/GuidedemontageVheliotech.pdf"
--- a/sphinx-tools/count_pdf_pages.py
+++ b/sphinx-tools/count_pdf_pages.py
@ -1,25 +0,0 @@
 import subprocess
 import re
 import sys
 pdf_filename = sys.argv[1]
 css_filename = sys.argv[2]
 additional_pages = 2
 # count pages in index.pdf
 pdfinfo = subprocess.run(['pdfinfo', pdf_filename], stdout=subprocess.PIPE)
 pages_match = re.search('\\nPages:\s+([0-9]+)\\n', pdfinfo.stdout.decode())
 num_pages = int(pages_match.group(1))
 print('index.pdf: ' + str(num_pages) + ' pages')
 num_pages = num_pages + additional_pages # account for table of content that will be added later
 # update the CSS file with the correct number of pages
 with open(css_filename) as css_file:
 	css = css_file.read()
 css = re.sub('content: counter\(page\) "/[0-9]+";', 'content: counter(page) "/'+str(num_pages)+'";', css)
 with open(css_filename, 'w') as css_file:
 	css_file.write(css)
--- a/sphinx-tools/make_pdf.py
+++ b/sphinx-tools/make_pdf.py
@ -0,0 +1,82 @@
 import sys
 import subprocess
 import re
 import os
 source_dir = sys.argv[1]
 build_dir = sys.argv[2]
 insert_toc_after_page = 1
 index_pdf_filename = build_dir + '/weasyprint/index.pdf'
 css_filename = source_dir + '/css/print-theme.css'
 script_dir = os.path.dirname(__file__)
 # Compile PDF
 assert(subprocess.run(['weasyprint', build_dir + '/weasyprint/index.html', index_pdf_filename, '-s', css_filename]).returncode == 0)
 # Generate table of content (TOC)
 assert(subprocess.run(['sh', '-c', script_dir + '/../pdftoc-to-latex "' + index_pdf_filename + '" > "' + build_dir + '/weasyprint/toc.tex"']).returncode == 0)
 assert(subprocess.run(['pdflatex', '-interaction', 'nonstopmode', '-output-directory=' + build_dir + '/weasyprint', build_dir + '/weasyprint/toc.tex']).returncode == 1)
 # Count TOC pages
 toc_pdfinfo = subprocess.run(['pdfinfo', build_dir + '/weasyprint/toc.pdf'], stdout=subprocess.PIPE)
 assert(toc_pdfinfo.returncode == 0)
 toc_pages_match = re.search('\\nPages:\s+([0-9]+)\\n', toc_pdfinfo.stdout.decode())
 toc_num_pages = int(toc_pages_match.group(1))
 print('toc.pdf: ' + str(toc_num_pages) + ' page(s)')
 # Count pages in index.pdf
 pdfinfo = subprocess.run(['pdfinfo', index_pdf_filename], stdout=subprocess.PIPE)
 assert(pdfinfo.returncode == 0)
 pages_match = re.search('\\nPages:\s+([0-9]+)\\n', pdfinfo.stdout.decode())
 num_pages = int(pages_match.group(1))
 print('index.pdf: ' + str(num_pages) + ' pages')
 num_pages = num_pages + toc_num_pages # account for table of content that will be added later
 # If needed, update the CSS file with the correct number of pages
 with open(css_filename) as css_file:
 	original_css = css_file.read()
 modified_css = re.sub('content: counter\(page\) "/[0-9]+";', 'content: counter(page) "/'+str(num_pages)+'";', original_css)
 if modified_css != original_css:
 	with open(css_filename, 'w') as css_file:
 		css_file.write(modified_css)
 	# We need to compile again with the modified CSS (this won't impact the TOC)
 	print('Number of pages has changed, rebuilding PDF...')
 	assert(subprocess.run(['weasyprint', build_dir + '/weasyprint/index.html', index_pdf_filename, '-s', css_filename]).returncode == 0)
 # Insert TOC in the PDF
 assert(subprocess.run(['pdftk', 'A='+index_pdf_filename, 'B='+build_dir+'/weasyprint/toc.pdf', 'cat', 'A'+str(insert_toc_after_page), 'B', 'A'+str(insert_toc_after_page+1)+'-end', 'output', build_dir + '/weasyprint/vheliotech-without-bookmarks.pdf']).returncode == 0)
 # Restore bookmarks
 extract_bookmarks_from = index_pdf_filename
 source_pdf_filename = build_dir + '/weasyprint/vheliotech-without-bookmarks.pdf'
 output_filename = build_dir + '/weasyprint/vheliotech.pdf'
 bookmarks_filename = extract_bookmarks_from.replace('.pdf', '.txt')
 assert(bookmarks_filename != extract_bookmarks_from)
 # extract PDF metadata into a text file
 assert(subprocess.run(['pdftk', extract_bookmarks_from, 'dump_data', 'output', bookmarks_filename]).returncode == 0)
 with open(bookmarks_filename) as bookmarks_file:
 	metadata = bookmarks_file.read()
 # Offset page numbers of bookmarks
 def replaceBookmarkPageNumber(match):
 	initial_page = int(match.group(1))
 	final_page = initial_page + toc_num_pages if initial_page > insert_toc_after_page else initial_page
 	return 'BookmarkPageNumber: ' + str(final_page)
 metadata = re.sub('BookmarkPageNumber:\s+([0-9]+)', replaceBookmarkPageNumber, metadata)
 with open(bookmarks_filename, 'w') as bookmarks_file:
 	 bookmarks_file.write(metadata)
 # generate the output PDF
 assert(subprocess.run(['pdftk', source_pdf_filename, 'update_info', bookmarks_filename, 'output', output_filename]).returncode == 0)
 print('Generated file: ' + output_filename)
--- a/sphinx-tools/update_pdf_bookmarks.py
+++ b/sphinx-tools/update_pdf_bookmarks.py
@ -1,29 +0,0 @@
 import sys
 import subprocess
 import re
 extract_bookmarks_from = sys.argv[1]
 source_pdf_filename = sys.argv[2]
 output_filename = sys.argv[3]
 bookmarks_filename = extract_bookmarks_from.replace('.pdf', '.txt')
 assert(bookmarks_filename != extract_bookmarks_from)
 # extract PDF metadata into a text file
 subprocess.run(['pdftk', extract_bookmarks_from, 'dump_data', 'output', bookmarks_filename])
 with open(bookmarks_filename) as bookmarks_file:
 	metadata = bookmarks_file.read()
 # Offset page numbers
 def replaceBookmarkPageNumber(match):
 	initial_page = int(match.group(1))
 	final_page = initial_page + 2 if initial_page > 1 else initial_page
 	return 'BookmarkPageNumber: ' + str(final_page)
 metadata = re.sub('BookmarkPageNumber:\s+([0-9]+)', replaceBookmarkPageNumber, metadata)
 with open(bookmarks_filename, 'w') as bookmarks_file:
 	 bookmarks_file.write(metadata)
 # generate the output PDF
 subprocess.run(['pdftk', source_pdf_filename, 'update_info', bookmarks_filename, 'output', output_filename])