From 50bf6de54020e761f90b61ed4dd026a4bfbeca29 Mon Sep 17 00:00:00 2001 From: yangjian Date: Sun, 28 Feb 2021 08:22:34 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E6=96=87=E9=9B=86PDF?= =?UTF-8?q?=E5=AF=BC=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- MrDoc/settings.py | 12 +++- app_doc/report_html2pdf.py | 110 +++++++++++++++++++++++++++---------- app_doc/report_utils.py | 21 +++---- config/config.ini | 6 +- requirements.txt | 4 +- 5 files changed, 106 insertions(+), 47 deletions(-) diff --git a/MrDoc/settings.py b/MrDoc/settings.py index 5c67bbd..e47a443 100644 --- a/MrDoc/settings.py +++ b/MrDoc/settings.py @@ -215,4 +215,14 @@ HAYSTACK_CONNECTIONS = { # 当添加、修改、删除数据时,自动生成索引 HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor' # 自定义高亮 -HAYSTACK_CUSTOM_HIGHLIGHTER = "app_doc.search.highlight.MyHighLighter" \ No newline at end of file +HAYSTACK_CUSTOM_HIGHLIGHTER = "app_doc.search.highlight.MyHighLighter" + +# Selenium 调用的driver类型 默认为Chromium +try: + CHROMIUM_DRIVER = CONFIG['selenium']['driver'] +except: + CHROMIUM_DRIVER = 'CHROMIUM' +if 'driver_path' in CONFIG['selenium'].keys(): + CHROMIUM_DRIVER_PATH = CONFIG['selenium']['driver_path'] +else: + CHROMIUM_DRIVER_PATH = None diff --git a/app_doc/report_html2pdf.py b/app_doc/report_html2pdf.py index 901dcb2..6252db5 100644 --- a/app_doc/report_html2pdf.py +++ b/app_doc/report_html2pdf.py @@ -4,42 +4,94 @@ # #日期:2020/12/27 # 博客地址:zmister.com +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.common.exceptions import TimeoutException +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support.expected_conditions import staleness_of +from webdriver_manager.chrome import ChromeDriverManager +from webdriver_manager.utils import ChromeType +from django.conf import settings import sys -from urllib.parse import quote -from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets, QtGui -# print(sys.path) +import json +import base64 -def html2pdf(html_path,pdf_path): - html_path = '/'.join(html_path.split('\\')) - html_path = quote(html_path, safe='/:?=') - # 实例化一个Qt应用 - app = QtWidgets.QApplication(sys.argv) - # 实例化一个WebEngineView - loader = QtWebEngineWidgets.QWebEngineView() - # 设置视图缩放比例 - loader.setZoomFactor(1) - # 设置页码打印完成后的槽 - loader.page().pdfPrintingFinished.connect(loader.close) - # 请求HTML文件 - loader.load(QtCore.QUrl("file:///{}".format(html_path))) +def convert(source: str, target: str, timeout: int = 2, compress: bool = False, power: int = 0, install_driver: bool = True): + ''' + Convert a given html file or website into PDF - def emit_pdf(finished): - layout = QtGui.QPageLayout() - layout.setPageSize(QtGui.QPageSize(QtGui.QPageSize.A4Extra)) - layout.setLeftMargin(20) - layout.setRightMargin(20) - layout.setTopMargin(20) - layout.setBottomMargin(20) - layout.setOrientation(QtGui.QPageLayout.Portrait) - loader.page().printToPdf(pdf_path, pageLayout=layout) + :param str source: source html file or website link + :param str target: target location to save the PDF + :param int timeout: timeout in seconds. Default value is set to 2 seconds + :param bool compress: whether PDF is compressed or not. Default value is False + :param int power: power of the compression. Default value is 0. This can be 0: default, 1: prepress, 2: printer, 3: ebook, 4: screen + ''' - # 加载完成后连接到PDF打印方法 - loader.loadFinished.connect(emit_pdf) - app.exec_() + result = __get_pdf_from_html(source, timeout, install_driver) + # if compress: + # __compress(result, target, power) + # else: + with open(target, 'wb') as file: + file.write(result) + + +def __send_devtools(driver, cmd, params={}): + resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id + url = driver.command_executor._url + resource + body = json.dumps({'cmd': cmd, 'params': params}) + response = driver.command_executor._request('POST', url, body) + + if not response: + raise Exception(response.get('value')) + + return response.get('value') + + +def __get_pdf_from_html(path: str, timeout: int, install_driver: bool, print_options={}): + webdriver_options = Options() + webdriver_prefs = {} + driver = None + + webdriver_options.add_argument('--headless') + webdriver_options.add_argument('--disable-gpu') + webdriver_options.add_argument('--no-sandbox') + webdriver_options.add_argument('--disable-dev-shm-usage') + webdriver_options.experimental_options['prefs'] = webdriver_prefs + + webdriver_prefs['profile.default_content_settings'] = {'images': 2} + + if install_driver: + driver = webdriver.Chrome( + ChromeDriverManager( + url='https://npm.taobao.org/mirrors/chromedriver/', + latest_release_url='https://npm.taobao.org/mirrors/chromedriver/LATEST_RELEASE', + chrome_type=ChromeType.GOOGLE if settings.CHROMIUM_DRIVER == 'Chrome' else ChromeType.CHROMIUM + ).install(), + options=webdriver_options + ) + else: + driver = webdriver.Chrome(options=webdriver_options) + + driver.get(path) + + try: + WebDriverWait(driver, timeout).until(staleness_of(driver.find_element_by_tag_name('html'))) + except TimeoutException: + calculated_print_options = { + 'landscape': False, + 'displayHeaderFooter': False, + 'printBackground': True, + 'preferCSSPageSize': True, + } + calculated_print_options.update(print_options) + result = __send_devtools(driver, "Page.printToPDF", calculated_print_options) + driver.quit() + return base64.b64decode(result['data']) if __name__ == '__main__': # print(sys.argv) html_path, pdf_path = sys.argv[1],sys.argv[2] - html2pdf(html_path=html_path,pdf_path=pdf_path) + convert(html_path,pdf_path) + # html2pdf(html_path=html_path,pdf_path=pdf_path) diff --git a/app_doc/report_utils.py b/app_doc/report_utils.py index 76fc39f..57febd6 100644 --- a/app_doc/report_utils.py +++ b/app_doc/report_utils.py @@ -21,6 +21,7 @@ django.setup() from app_doc.models import * from subprocess import Popen from loguru import logger +from app_doc.report_html2pdf import convert import traceback import time import json @@ -705,7 +706,7 @@ class ReportPDF(): try: project = Project.objects.get(pk=self.pro_id) except: - return + return False # 拼接文档的HTML字符串 data = Doc.objects.filter(top_doc=self.pro_id,parent_doc=0).order_by("sort") toc_list = {'1':[],'2':[],'3':[]} @@ -755,10 +756,6 @@ class ReportPDF(): temp_file_path = report_pdf_folder + '/{0}.html'.format(temp_file_name) # PDF文件路径 report_file_path = report_pdf_folder + '/{0}.pdf'.format(temp_file_name) - # output_pdf_path = report_pdf_folder + '/{}_{}.pdf'.format( - # project.name, - # str(datetime.datetime.today()).replace(' ','-').replace(':','-') - # ) # 写入HTML文件 with open(temp_file_path, 'w', encoding='utf-8') as htmlfile: htmlfile.write( @@ -772,16 +769,14 @@ class ReportPDF(): ) # 执行HTML转PDF - # html_to_pdf(temp_file_path,report_file_path) - # print(os.getcwd()) - shell_path = os.path.join(os.getcwd(),'app_doc/report_html2pdf.py') - html2pdf = Popen(['python',shell_path,temp_file_path,report_file_path]) - html2pdf.wait() - for proc in psutil.process_iter(): - if proc.name().startswith('QtWebEngineProcess'): - proc.kill() + try: + convert(temp_file_path,report_file_path) + except: + logger.error("生成PDF出错") + return False # 处理PDF文件 if os.path.exists(report_file_path): + os.remove(temp_file_path) return report_file_path else: return False diff --git a/config/config.ini b/config/config.ini index 2e2388d..8d01f57 100644 --- a/config/config.ini +++ b/config/config.ini @@ -1,6 +1,6 @@ [site] # True表示开启站点调试模式,False表示关闭站点调试模式 -debug = False +debug = True [database] # engine,指定数据库类型,接受sqlite、mysql、oracle、postgresql @@ -14,4 +14,6 @@ engine = sqlite # host表示数据库主机地址 # host = db_host # port表示数据库端口 -# port = db_port \ No newline at end of file +# port = db_port +[selenium] +# driver = Chrome \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 5ab7515..b103941 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,6 +13,6 @@ Markdown==3.3.3 jieba==0.42.1 mammoth==1.4.13 markdownify==0.6.0 -psutil==5.8.0 -PyQt5==5.11.3 +selenium==3.141.0 +webdriver_manager==3.3.0 PyYAML==5.4.1 \ No newline at end of file