From ca4d9a1518c4e6ad830ffc5144947ffab39c7188 Mon Sep 17 00:00:00 2001 From: Youen Date: Wed, 10 May 2023 20:28:13 +0200 Subject: [PATCH] =?UTF-8?q?Am=C3=A9lioration=20de=20la=20g=C3=A9n=C3=A9rat?= =?UTF-8?q?ion=20de=20PDF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Mise à jour automatique du nombre de pages dans le CSS - Intégration des bookmarks dans le PDF final --- Makefile | 10 ++++++++-- sphinx-tools/count_pdf_pages.py | 24 ++++++++++++++++++++++++ sphinx-tools/update_pdf_bookmarks.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 sphinx-tools/count_pdf_pages.py create mode 100644 sphinx-tools/update_pdf_bookmarks.py diff --git a/Makefile b/Makefile index 222f524..273a77e 100644 --- a/Makefile +++ b/Makefile @@ -20,13 +20,19 @@ help: @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) # Default make weasyprint target has a bug so we have to call it manually on the generated index.html file +# Then we execute count_pdf_pages.py that will count the number of pages, and update print-theme.css +# Then we generate index.pdf again (with the correct number of pages in the footer) # After that, extract table of content data from the pdf and generate a .tex file # Then generate a toc.pdf from the .tex file # And put them together in a .pdf file +# Finally, we generate the final PDF by adding the bookmarks (for easier navigation in the PDF) # After all, clean up pdf: weasyprint + weasyprint build/weasyprint/index.html build/weasyprint/index.pdf -s source/css/print-theme.css + python3 sphinx-tools/count_pdf_pages.py weasyprint build/weasyprint/index.html build/weasyprint/index.pdf -s source/css/print-theme.css ./pdftoc-to-latex build/weasyprint/index.pdf > build/weasyprint/toc.tex - pdflatex -interaction nonstopmode -output-directory=build/weasyprint build/weasyprint/toc.tex - pdftk A=build/weasyprint/index.pdf B=build/weasyprint/toc.pdf cat A1 B A2-end output build/weasyprint/vheliotech.pdf - rm build/weasyprint/index.pdf build/weasyprint/toc.tex build/weasyprint/toc.pdf build/weasyprint/GuidedemontageVheliotech.pdf \ No newline at end of file + pdftk A=build/weasyprint/index.pdf B=build/weasyprint/toc.pdf cat A1 B A2-end output build/weasyprint/vheliotech-without-bookmarks.pdf + python3 sphinx-tools/update_pdf_bookmarks.py build/weasyprint/index.pdf build/weasyprint/vheliotech-without-bookmarks.pdf build/weasyprint/vheliotech.pdf + #rm build/weasyprint/index.pdf build/weasyprint/toc.tex build/weasyprint/toc.pdf build/weasyprint/GuidedemontageVheliotech.pdf diff --git a/sphinx-tools/count_pdf_pages.py b/sphinx-tools/count_pdf_pages.py new file mode 100644 index 0000000..bc55c90 --- /dev/null +++ b/sphinx-tools/count_pdf_pages.py @@ -0,0 +1,24 @@ +import subprocess +import re + +pdf_filename = 'build/weasyprint/index.pdf' +css_filename = 'source/css/print-theme.css' +additional_pages = 2 + +# count pages in index.pdf +pdfinfo = subprocess.run(['pdfinfo', pdf_filename], stdout=subprocess.PIPE) +pages_match = re.search('\\nPages:\s+([0-9]+)\\n', pdfinfo.stdout.decode()) +num_pages = int(pages_match.group(1)) +print('index.pdf: ' + str(num_pages) + ' pages') + +num_pages = num_pages + additional_pages # account for table of content that will be added later + +# update the CSS file with the correct number of pages + +with open(css_filename) as css_file: + css = css_file.read() + +css = re.sub('content: counter\(page\) "/[0-9]+";', 'content: counter(page) "/'+str(num_pages)+'";', css) + +with open(css_filename, 'w') as css_file: + css_file.write(css) diff --git a/sphinx-tools/update_pdf_bookmarks.py b/sphinx-tools/update_pdf_bookmarks.py new file mode 100644 index 0000000..3465a3a --- /dev/null +++ b/sphinx-tools/update_pdf_bookmarks.py @@ -0,0 +1,28 @@ +import sys +import subprocess +import re + +extract_bookmarks_from = sys.argv[1] +source_pdf_filename = sys.argv[2] +output_filename = sys.argv[3] + +bookmarks_filename = extract_bookmarks_from.replace('.pdf', '.txt') +assert(bookmarks_filename != extract_bookmarks_from) + +# extract PDF metadata into a text file +subprocess.run(['pdftk', extract_bookmarks_from, 'dump_data', 'output', bookmarks_filename]) + +with open(bookmarks_filename) as bookmarks_file: + metadata = bookmarks_file.read() + +# Offset page numbers +def replaceBookmarkPageNumber(match): + initial_page = int(match.group(1)) + return 'BookmarkPageNumber: ' + str(initial_page + 2) +metadata = re.sub('BookmarkPageNumber:\s+([0-9]+)', replaceBookmarkPageNumber, metadata) + +with open(bookmarks_filename, 'w') as bookmarks_file: + bookmarks_file.write(metadata) + +# generate the output PDF +subprocess.run(['pdftk', source_pdf_filename, 'update_info', bookmarks_filename, 'output', output_filename])