优化文集PDF导出

2021-02-28 08:22:34 +08:00 · 2021-02-28 08:22:34 +08:00 · 50bf6de540
commit 50bf6de540
parent f509718d9d
5 changed files with 106 additions and 47 deletions
--- a/MrDoc/settings.py
+++ b/MrDoc/settings.py
@ -215,4 +215,14 @@ HAYSTACK_CONNECTIONS = {
 # 当添加、修改、删除数据时，自动生成索引
 HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'
 # 自定义高亮
-HAYSTACK_CUSTOM_HIGHLIGHTER = "app_doc.search.highlight.MyHighLighter"
+HAYSTACK_CUSTOM_HIGHLIGHTER = "app_doc.search.highlight.MyHighLighter"
+
+# Selenium 调用的driver类型 默认为Chromium
+try:
+    CHROMIUM_DRIVER = CONFIG['selenium']['driver']
+except:
+    CHROMIUM_DRIVER = 'CHROMIUM'
+if 'driver_path' in CONFIG['selenium'].keys():
+    CHROMIUM_DRIVER_PATH = CONFIG['selenium']['driver_path']
+else:
+    CHROMIUM_DRIVER_PATH = None
--- a/app_doc/report_html2pdf.py
+++ b/app_doc/report_html2pdf.py
@ -4,42 +4,94 @@
 # #日期：2020/12/27
 # 博客地址：zmister.com

+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.common.exceptions import TimeoutException
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support.expected_conditions import staleness_of
+from webdriver_manager.chrome import ChromeDriverManager
+from webdriver_manager.utils import ChromeType
+from django.conf import settings
 import sys
-from urllib.parse import quote
-from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets, QtGui
-# print(sys.path)
+import json
+import base64


-def html2pdf(html_path,pdf_path):
-    html_path = '/'.join(html_path.split('\\'))
-    html_path = quote(html_path, safe='/:?=')
-    # 实例化一个Qt应用
-    app = QtWidgets.QApplication(sys.argv)
-    # 实例化一个WebEngineView
-    loader = QtWebEngineWidgets.QWebEngineView()
-    # 设置视图缩放比例
-    loader.setZoomFactor(1)
-    # 设置页码打印完成后的槽
-    loader.page().pdfPrintingFinished.connect(loader.close)
-    # 请求HTML文件
-    loader.load(QtCore.QUrl("file:///{}".format(html_path)))
+def convert(source: str, target: str, timeout: int = 2, compress: bool = False, power: int = 0, install_driver: bool = True):
+    '''
+    Convert a given html file or website into PDF

-    def emit_pdf(finished):
-        layout = QtGui.QPageLayout()
-        layout.setPageSize(QtGui.QPageSize(QtGui.QPageSize.A4Extra))
-        layout.setLeftMargin(20)
-        layout.setRightMargin(20)
-        layout.setTopMargin(20)
-        layout.setBottomMargin(20)
-        layout.setOrientation(QtGui.QPageLayout.Portrait)
-        loader.page().printToPdf(pdf_path, pageLayout=layout)
+    :param str source: source html file or website link
+    :param str target: target location to save the PDF
+    :param int timeout: timeout in seconds. Default value is set to 2 seconds
+    :param bool compress: whether PDF is compressed or not. Default value is False
+    :param int power: power of the compression. Default value is 0. This can be 0: default, 1: prepress, 2: printer, 3: ebook, 4: screen
+   '''

-    # 加载完成后连接到PDF打印方法
-    loader.loadFinished.connect(emit_pdf)
-    app.exec_()
+    result = __get_pdf_from_html(source, timeout, install_driver)

+    # if compress:
+    #     __compress(result, target, power)
+    # else:
+    with open(target, 'wb') as file:
+        file.write(result)
+
+
+def __send_devtools(driver, cmd, params={}):
+    resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id
+    url = driver.command_executor._url + resource
+    body = json.dumps({'cmd': cmd, 'params': params})
+    response = driver.command_executor._request('POST', url, body)
+
+    if not response:
+        raise Exception(response.get('value'))
+
+    return response.get('value')
+
+
+def __get_pdf_from_html(path: str, timeout: int, install_driver: bool, print_options={}):
+    webdriver_options = Options()
+    webdriver_prefs = {}
+    driver = None
+
+    webdriver_options.add_argument('--headless')
+    webdriver_options.add_argument('--disable-gpu')
+    webdriver_options.add_argument('--no-sandbox')
+    webdriver_options.add_argument('--disable-dev-shm-usage')
+    webdriver_options.experimental_options['prefs'] = webdriver_prefs
+
+    webdriver_prefs['profile.default_content_settings'] = {'images': 2}
+
+    if install_driver:
+        driver = webdriver.Chrome(
+            ChromeDriverManager(
+                url='https://npm.taobao.org/mirrors/chromedriver/',
+                latest_release_url='https://npm.taobao.org/mirrors/chromedriver/LATEST_RELEASE',
+                chrome_type=ChromeType.GOOGLE if settings.CHROMIUM_DRIVER == 'Chrome' else ChromeType.CHROMIUM
+            ).install(),
+            options=webdriver_options
+        )
+    else:
+        driver = webdriver.Chrome(options=webdriver_options)
+
+    driver.get(path)
+
+    try:
+       WebDriverWait(driver, timeout).until(staleness_of(driver.find_element_by_tag_name('html')))
+    except TimeoutException:
+        calculated_print_options = {
+            'landscape': False,
+            'displayHeaderFooter': False,
+            'printBackground': True,
+            'preferCSSPageSize': True,
+        }
+        calculated_print_options.update(print_options)
+        result = __send_devtools(driver, "Page.printToPDF", calculated_print_options)
+        driver.quit()
+        return base64.b64decode(result['data'])

 if __name__ == '__main__':
    # print(sys.argv)
    html_path, pdf_path = sys.argv[1],sys.argv[2]
-    html2pdf(html_path=html_path,pdf_path=pdf_path)
+    convert(html_path,pdf_path)
+    # html2pdf(html_path=html_path,pdf_path=pdf_path)
--- a/app_doc/report_utils.py
+++ b/app_doc/report_utils.py
@ -21,6 +21,7 @@ django.setup()
 from app_doc.models import *
 from subprocess import Popen
 from loguru import logger
+from app_doc.report_html2pdf import convert
 import traceback
 import time
 import json
@ -705,7 +706,7 @@ class ReportPDF():
        try:
            project = Project.objects.get(pk=self.pro_id)
        except:
-            return
+            return False
        # 拼接文档的HTML字符串
        data = Doc.objects.filter(top_doc=self.pro_id,parent_doc=0).order_by("sort")
        toc_list = {'1':[],'2':[],'3':[]}
@ -755,10 +756,6 @@ class ReportPDF():
        temp_file_path = report_pdf_folder + '/{0}.html'.format(temp_file_name)
        # PDF文件路径
        report_file_path = report_pdf_folder + '/{0}.pdf'.format(temp_file_name)
-        # output_pdf_path = report_pdf_folder + '/{}_{}.pdf'.format(
-        #     project.name,
-        #     str(datetime.datetime.today()).replace(' ','-').replace(':','-')
-        # )
        # 写入HTML文件
        with open(temp_file_path, 'w', encoding='utf-8') as htmlfile:
            htmlfile.write(
@ -772,16 +769,14 @@ class ReportPDF():
            )

        # 执行HTML转PDF
-        # html_to_pdf(temp_file_path,report_file_path)
-        # print(os.getcwd())
-        shell_path = os.path.join(os.getcwd(),'app_doc/report_html2pdf.py')
-        html2pdf = Popen(['python',shell_path,temp_file_path,report_file_path])
-        html2pdf.wait()
-        for proc in psutil.process_iter():
-            if proc.name().startswith('QtWebEngineProcess'):
-                proc.kill()
+        try:
+            convert(temp_file_path,report_file_path)
+        except:
+            logger.error("生成PDF出错")
+            return False
        # 处理PDF文件
        if os.path.exists(report_file_path):
+            os.remove(temp_file_path)
            return report_file_path
        else:
            return False
--- a/config/config.ini
+++ b/config/config.ini
@ -1,6 +1,6 @@
 [site]
 # True表示开启站点调试模式，False表示关闭站点调试模式
-debug = False
+debug = True

 [database]
 # engine，指定数据库类型，接受sqlite、mysql、oracle、postgresql
@ -14,4 +14,6 @@ engine = sqlite
 # host表示数据库主机地址
 # host = db_host
 # port表示数据库端口
-# port = db_port
+# port = db_port
+[selenium]
+# driver = Chrome
--- a/requirements.txt
+++ b/requirements.txt
@ -13,6 +13,6 @@ Markdown==3.3.3
 jieba==0.42.1
 mammoth==1.4.13
 markdownify==0.6.0
-psutil==5.8.0
-PyQt5==5.11.3
+selenium==3.141.0
+webdriver_manager==3.3.0
 PyYAML==5.4.1