From 2b5e828c59716371fc49852bc79aa51bc8214d3a Mon Sep 17 00:00:00 2001 From: Youen Date: Wed, 10 May 2023 23:39:35 +0200 Subject: [PATCH] Compilation du PDF via un script python MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Permet de gérer automatiquement le nombre de pages de la table des matières, le nombre de pages total du PDF, et de ne le re-généré que si le nombre de pages a changé dans le CSS. --- Makefile | 17 +----- sphinx-tools/count_pdf_pages.py | 25 --------- sphinx-tools/make_pdf.py | 82 ++++++++++++++++++++++++++++ sphinx-tools/update_pdf_bookmarks.py | 29 ---------- 4 files changed, 83 insertions(+), 70 deletions(-) delete mode 100644 sphinx-tools/count_pdf_pages.py create mode 100644 sphinx-tools/make_pdf.py delete mode 100644 sphinx-tools/update_pdf_bookmarks.py diff --git a/Makefile b/Makefile index cffb8b8..bcc15dc 100644 --- a/Makefile +++ b/Makefile @@ -19,20 +19,5 @@ help: %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -# Default make weasyprint target has a bug so we have to call it manually on the generated index.html file -# Then we execute count_pdf_pages.py that will count the number of pages, and update print-theme.css -# Then we generate index.pdf again (with the correct number of pages in the footer) -# After that, extract table of content data from the pdf and generate a .tex file -# Then generate a toc.pdf from the .tex file -# And put them together in a .pdf file -# Finally, we generate the final PDF by adding the bookmarks (for easier navigation in the PDF) -# After all, clean up pdf: weasyprint - weasyprint "$(BUILDDIR)/weasyprint/index.html" "$(BUILDDIR)/weasyprint/index.pdf" -s "$(SOURCEDIR)/css/print-theme.css" - python3 sphinx-tools/count_pdf_pages.py "$(BUILDDIR)/weasyprint/index.pdf" "$(SOURCEDIR)/css/print-theme.css" - weasyprint "$(BUILDDIR)/weasyprint/index.html" "$(BUILDDIR)/weasyprint/index.pdf" -s "$(SOURCEDIR)/css/print-theme.css" - ./pdftoc-to-latex "$(BUILDDIR)/weasyprint/index.pdf" > "$(BUILDDIR)/weasyprint/toc.tex" - - pdflatex -interaction nonstopmode "-output-directory=$(BUILDDIR)/weasyprint" "$(BUILDDIR)/weasyprint/toc.tex" - pdftk "A=$(BUILDDIR)/weasyprint/index.pdf" "B=$(BUILDDIR)/weasyprint/toc.pdf" cat A1 B A2-end output "$(BUILDDIR)/weasyprint/vheliotech-without-bookmarks.pdf" - python3 sphinx-tools/update_pdf_bookmarks.py "$(BUILDDIR)/weasyprint/index.pdf" "$(BUILDDIR)/weasyprint/vheliotech-without-bookmarks.pdf" "$(BUILDDIR)/weasyprint/vheliotech.pdf" - #rm "$(BUILDDIR)/weasyprint/index.pdf" "$(BUILDDIR)/weasyprint/toc.tex" "$(BUILDDIR)/weasyprint/toc.pdf" "$(BUILDDIR)/weasyprint/GuidedemontageVheliotech.pdf" + python3 sphinx-tools/make_pdf.py "$(SOURCEDIR)" "$(BUILDDIR)" diff --git a/sphinx-tools/count_pdf_pages.py b/sphinx-tools/count_pdf_pages.py deleted file mode 100644 index eaee5ea..0000000 --- a/sphinx-tools/count_pdf_pages.py +++ /dev/null @@ -1,25 +0,0 @@ -import subprocess -import re -import sys - -pdf_filename = sys.argv[1] -css_filename = sys.argv[2] -additional_pages = 2 - -# count pages in index.pdf -pdfinfo = subprocess.run(['pdfinfo', pdf_filename], stdout=subprocess.PIPE) -pages_match = re.search('\\nPages:\s+([0-9]+)\\n', pdfinfo.stdout.decode()) -num_pages = int(pages_match.group(1)) -print('index.pdf: ' + str(num_pages) + ' pages') - -num_pages = num_pages + additional_pages # account for table of content that will be added later - -# update the CSS file with the correct number of pages - -with open(css_filename) as css_file: - css = css_file.read() - -css = re.sub('content: counter\(page\) "/[0-9]+";', 'content: counter(page) "/'+str(num_pages)+'";', css) - -with open(css_filename, 'w') as css_file: - css_file.write(css) diff --git a/sphinx-tools/make_pdf.py b/sphinx-tools/make_pdf.py new file mode 100644 index 0000000..1db7359 --- /dev/null +++ b/sphinx-tools/make_pdf.py @@ -0,0 +1,82 @@ +import sys +import subprocess +import re +import os + +source_dir = sys.argv[1] +build_dir = sys.argv[2] +insert_toc_after_page = 1 + +index_pdf_filename = build_dir + '/weasyprint/index.pdf' +css_filename = source_dir + '/css/print-theme.css' + +script_dir = os.path.dirname(__file__) + +# Compile PDF +assert(subprocess.run(['weasyprint', build_dir + '/weasyprint/index.html', index_pdf_filename, '-s', css_filename]).returncode == 0) + +# Generate table of content (TOC) +assert(subprocess.run(['sh', '-c', script_dir + '/../pdftoc-to-latex "' + index_pdf_filename + '" > "' + build_dir + '/weasyprint/toc.tex"']).returncode == 0) +assert(subprocess.run(['pdflatex', '-interaction', 'nonstopmode', '-output-directory=' + build_dir + '/weasyprint', build_dir + '/weasyprint/toc.tex']).returncode == 1) + +# Count TOC pages +toc_pdfinfo = subprocess.run(['pdfinfo', build_dir + '/weasyprint/toc.pdf'], stdout=subprocess.PIPE) +assert(toc_pdfinfo.returncode == 0) +toc_pages_match = re.search('\\nPages:\s+([0-9]+)\\n', toc_pdfinfo.stdout.decode()) +toc_num_pages = int(toc_pages_match.group(1)) +print('toc.pdf: ' + str(toc_num_pages) + ' page(s)') + +# Count pages in index.pdf +pdfinfo = subprocess.run(['pdfinfo', index_pdf_filename], stdout=subprocess.PIPE) +assert(pdfinfo.returncode == 0) +pages_match = re.search('\\nPages:\s+([0-9]+)\\n', pdfinfo.stdout.decode()) +num_pages = int(pages_match.group(1)) +print('index.pdf: ' + str(num_pages) + ' pages') + +num_pages = num_pages + toc_num_pages # account for table of content that will be added later + +# If needed, update the CSS file with the correct number of pages +with open(css_filename) as css_file: + original_css = css_file.read() + +modified_css = re.sub('content: counter\(page\) "/[0-9]+";', 'content: counter(page) "/'+str(num_pages)+'";', original_css) + +if modified_css != original_css: + with open(css_filename, 'w') as css_file: + css_file.write(modified_css) + + # We need to compile again with the modified CSS (this won't impact the TOC) + print('Number of pages has changed, rebuilding PDF...') + assert(subprocess.run(['weasyprint', build_dir + '/weasyprint/index.html', index_pdf_filename, '-s', css_filename]).returncode == 0) + +# Insert TOC in the PDF +assert(subprocess.run(['pdftk', 'A='+index_pdf_filename, 'B='+build_dir+'/weasyprint/toc.pdf', 'cat', 'A'+str(insert_toc_after_page), 'B', 'A'+str(insert_toc_after_page+1)+'-end', 'output', build_dir + '/weasyprint/vheliotech-without-bookmarks.pdf']).returncode == 0) + +# Restore bookmarks +extract_bookmarks_from = index_pdf_filename +source_pdf_filename = build_dir + '/weasyprint/vheliotech-without-bookmarks.pdf' +output_filename = build_dir + '/weasyprint/vheliotech.pdf' + +bookmarks_filename = extract_bookmarks_from.replace('.pdf', '.txt') +assert(bookmarks_filename != extract_bookmarks_from) + +# extract PDF metadata into a text file +assert(subprocess.run(['pdftk', extract_bookmarks_from, 'dump_data', 'output', bookmarks_filename]).returncode == 0) + +with open(bookmarks_filename) as bookmarks_file: + metadata = bookmarks_file.read() + +# Offset page numbers of bookmarks +def replaceBookmarkPageNumber(match): + initial_page = int(match.group(1)) + final_page = initial_page + toc_num_pages if initial_page > insert_toc_after_page else initial_page + return 'BookmarkPageNumber: ' + str(final_page) +metadata = re.sub('BookmarkPageNumber:\s+([0-9]+)', replaceBookmarkPageNumber, metadata) + +with open(bookmarks_filename, 'w') as bookmarks_file: + bookmarks_file.write(metadata) + +# generate the output PDF +assert(subprocess.run(['pdftk', source_pdf_filename, 'update_info', bookmarks_filename, 'output', output_filename]).returncode == 0) + +print('Generated file: ' + output_filename) diff --git a/sphinx-tools/update_pdf_bookmarks.py b/sphinx-tools/update_pdf_bookmarks.py deleted file mode 100644 index e70125d..0000000 --- a/sphinx-tools/update_pdf_bookmarks.py +++ /dev/null @@ -1,29 +0,0 @@ -import sys -import subprocess -import re - -extract_bookmarks_from = sys.argv[1] -source_pdf_filename = sys.argv[2] -output_filename = sys.argv[3] - -bookmarks_filename = extract_bookmarks_from.replace('.pdf', '.txt') -assert(bookmarks_filename != extract_bookmarks_from) - -# extract PDF metadata into a text file -subprocess.run(['pdftk', extract_bookmarks_from, 'dump_data', 'output', bookmarks_filename]) - -with open(bookmarks_filename) as bookmarks_file: - metadata = bookmarks_file.read() - -# Offset page numbers -def replaceBookmarkPageNumber(match): - initial_page = int(match.group(1)) - final_page = initial_page + 2 if initial_page > 1 else initial_page - return 'BookmarkPageNumber: ' + str(final_page) -metadata = re.sub('BookmarkPageNumber:\s+([0-9]+)', replaceBookmarkPageNumber, metadata) - -with open(bookmarks_filename, 'w') as bookmarks_file: - bookmarks_file.write(metadata) - -# generate the output PDF -subprocess.run(['pdftk', source_pdf_filename, 'update_info', bookmarks_filename, 'output', output_filename])