优化文集PDF导出

This commit is contained in:
yangjian 2021-02-28 08:22:34 +08:00
parent f509718d9d
commit 50bf6de540
5 changed files with 106 additions and 47 deletions

View File

@ -215,4 +215,14 @@ HAYSTACK_CONNECTIONS = {
# 当添加、修改、删除数据时,自动生成索引
HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'
# 自定义高亮
HAYSTACK_CUSTOM_HIGHLIGHTER = "app_doc.search.highlight.MyHighLighter"
HAYSTACK_CUSTOM_HIGHLIGHTER = "app_doc.search.highlight.MyHighLighter"
# Selenium 调用的driver类型 默认为Chromium
try:
CHROMIUM_DRIVER = CONFIG['selenium']['driver']
except:
CHROMIUM_DRIVER = 'CHROMIUM'
if 'driver_path' in CONFIG['selenium'].keys():
CHROMIUM_DRIVER_PATH = CONFIG['selenium']['driver_path']
else:
CHROMIUM_DRIVER_PATH = None

View File

@ -4,42 +4,94 @@
# #日期2020/12/27
# 博客地址zmister.com
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.expected_conditions import staleness_of
from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.utils import ChromeType
from django.conf import settings
import sys
from urllib.parse import quote
from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets, QtGui
# print(sys.path)
import json
import base64
def html2pdf(html_path,pdf_path):
html_path = '/'.join(html_path.split('\\'))
html_path = quote(html_path, safe='/:?=')
# 实例化一个Qt应用
app = QtWidgets.QApplication(sys.argv)
# 实例化一个WebEngineView
loader = QtWebEngineWidgets.QWebEngineView()
# 设置视图缩放比例
loader.setZoomFactor(1)
# 设置页码打印完成后的槽
loader.page().pdfPrintingFinished.connect(loader.close)
# 请求HTML文件
loader.load(QtCore.QUrl("file:///{}".format(html_path)))
def convert(source: str, target: str, timeout: int = 2, compress: bool = False, power: int = 0, install_driver: bool = True):
'''
Convert a given html file or website into PDF
def emit_pdf(finished):
layout = QtGui.QPageLayout()
layout.setPageSize(QtGui.QPageSize(QtGui.QPageSize.A4Extra))
layout.setLeftMargin(20)
layout.setRightMargin(20)
layout.setTopMargin(20)
layout.setBottomMargin(20)
layout.setOrientation(QtGui.QPageLayout.Portrait)
loader.page().printToPdf(pdf_path, pageLayout=layout)
:param str source: source html file or website link
:param str target: target location to save the PDF
:param int timeout: timeout in seconds. Default value is set to 2 seconds
:param bool compress: whether PDF is compressed or not. Default value is False
:param int power: power of the compression. Default value is 0. This can be 0: default, 1: prepress, 2: printer, 3: ebook, 4: screen
'''
# 加载完成后连接到PDF打印方法
loader.loadFinished.connect(emit_pdf)
app.exec_()
result = __get_pdf_from_html(source, timeout, install_driver)
# if compress:
# __compress(result, target, power)
# else:
with open(target, 'wb') as file:
file.write(result)
def __send_devtools(driver, cmd, params={}):
resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id
url = driver.command_executor._url + resource
body = json.dumps({'cmd': cmd, 'params': params})
response = driver.command_executor._request('POST', url, body)
if not response:
raise Exception(response.get('value'))
return response.get('value')
def __get_pdf_from_html(path: str, timeout: int, install_driver: bool, print_options={}):
webdriver_options = Options()
webdriver_prefs = {}
driver = None
webdriver_options.add_argument('--headless')
webdriver_options.add_argument('--disable-gpu')
webdriver_options.add_argument('--no-sandbox')
webdriver_options.add_argument('--disable-dev-shm-usage')
webdriver_options.experimental_options['prefs'] = webdriver_prefs
webdriver_prefs['profile.default_content_settings'] = {'images': 2}
if install_driver:
driver = webdriver.Chrome(
ChromeDriverManager(
url='https://npm.taobao.org/mirrors/chromedriver/',
latest_release_url='https://npm.taobao.org/mirrors/chromedriver/LATEST_RELEASE',
chrome_type=ChromeType.GOOGLE if settings.CHROMIUM_DRIVER == 'Chrome' else ChromeType.CHROMIUM
).install(),
options=webdriver_options
)
else:
driver = webdriver.Chrome(options=webdriver_options)
driver.get(path)
try:
WebDriverWait(driver, timeout).until(staleness_of(driver.find_element_by_tag_name('html')))
except TimeoutException:
calculated_print_options = {
'landscape': False,
'displayHeaderFooter': False,
'printBackground': True,
'preferCSSPageSize': True,
}
calculated_print_options.update(print_options)
result = __send_devtools(driver, "Page.printToPDF", calculated_print_options)
driver.quit()
return base64.b64decode(result['data'])
if __name__ == '__main__':
# print(sys.argv)
html_path, pdf_path = sys.argv[1],sys.argv[2]
html2pdf(html_path=html_path,pdf_path=pdf_path)
convert(html_path,pdf_path)
# html2pdf(html_path=html_path,pdf_path=pdf_path)

View File

@ -21,6 +21,7 @@ django.setup()
from app_doc.models import *
from subprocess import Popen
from loguru import logger
from app_doc.report_html2pdf import convert
import traceback
import time
import json
@ -705,7 +706,7 @@ class ReportPDF():
try:
project = Project.objects.get(pk=self.pro_id)
except:
return
return False
# 拼接文档的HTML字符串
data = Doc.objects.filter(top_doc=self.pro_id,parent_doc=0).order_by("sort")
toc_list = {'1':[],'2':[],'3':[]}
@ -755,10 +756,6 @@ class ReportPDF():
temp_file_path = report_pdf_folder + '/{0}.html'.format(temp_file_name)
# PDF文件路径
report_file_path = report_pdf_folder + '/{0}.pdf'.format(temp_file_name)
# output_pdf_path = report_pdf_folder + '/{}_{}.pdf'.format(
# project.name,
# str(datetime.datetime.today()).replace(' ','-').replace(':','-')
# )
# 写入HTML文件
with open(temp_file_path, 'w', encoding='utf-8') as htmlfile:
htmlfile.write(
@ -772,16 +769,14 @@ class ReportPDF():
)
# 执行HTML转PDF
# html_to_pdf(temp_file_path,report_file_path)
# print(os.getcwd())
shell_path = os.path.join(os.getcwd(),'app_doc/report_html2pdf.py')
html2pdf = Popen(['python',shell_path,temp_file_path,report_file_path])
html2pdf.wait()
for proc in psutil.process_iter():
if proc.name().startswith('QtWebEngineProcess'):
proc.kill()
try:
convert(temp_file_path,report_file_path)
except:
logger.error("生成PDF出错")
return False
# 处理PDF文件
if os.path.exists(report_file_path):
os.remove(temp_file_path)
return report_file_path
else:
return False

View File

@ -1,6 +1,6 @@
[site]
# True表示开启站点调试模式False表示关闭站点调试模式
debug = False
debug = True
[database]
# engine指定数据库类型接受sqlite、mysql、oracle、postgresql
@ -14,4 +14,6 @@ engine = sqlite
# host表示数据库主机地址
# host = db_host
# port表示数据库端口
# port = db_port
# port = db_port
[selenium]
# driver = Chrome

View File

@ -13,6 +13,6 @@ Markdown==3.3.3
jieba==0.42.1
mammoth==1.4.13
markdownify==0.6.0
psutil==5.8.0
PyQt5==5.11.3
selenium==3.141.0
webdriver_manager==3.3.0
PyYAML==5.4.1