diff --git a/MrDoc/settings.py b/MrDoc/settings.py index 9d6b0bc..5c67bbd 100644 --- a/MrDoc/settings.py +++ b/MrDoc/settings.py @@ -202,21 +202,6 @@ REST_FRAMEWORK = { 'PAGE_SIZE': 10 } -# Chromium路径 -try: - CHROMIUM_DIR = CONFIG['chromium']['path'] - CHROMIUM_PATH = CHROMIUM_DIR - # CHROMIUM_PATH = os.path.join(CONFIG_DIR,CHROMIUM_DIR) # Windows便携版本使用config下的路径 -except: - CHROMIUM_PATH = None - -# Chromium启动参数 -try: - CHROMIUM_ARGS = CONFIG['chromium']['args'].split(',') -except: - CHROMIUM_ARGS = [] - - # 全文检索配置 HAYSTACK_CONNECTIONS = { 'default': { diff --git a/app_doc/report_html2pdf.py b/app_doc/report_html2pdf.py new file mode 100644 index 0000000..901dcb2 --- /dev/null +++ b/app_doc/report_html2pdf.py @@ -0,0 +1,45 @@ +# coding:utf-8 +# @文件: report_html2pdf.py +# @创建者:州的先生 +# #日期:2020/12/27 +# 博客地址:zmister.com + +import sys +from urllib.parse import quote +from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets, QtGui +# print(sys.path) + + +def html2pdf(html_path,pdf_path): + html_path = '/'.join(html_path.split('\\')) + html_path = quote(html_path, safe='/:?=') + # 实例化一个Qt应用 + app = QtWidgets.QApplication(sys.argv) + # 实例化一个WebEngineView + loader = QtWebEngineWidgets.QWebEngineView() + # 设置视图缩放比例 + loader.setZoomFactor(1) + # 设置页码打印完成后的槽 + loader.page().pdfPrintingFinished.connect(loader.close) + # 请求HTML文件 + loader.load(QtCore.QUrl("file:///{}".format(html_path))) + + def emit_pdf(finished): + layout = QtGui.QPageLayout() + layout.setPageSize(QtGui.QPageSize(QtGui.QPageSize.A4Extra)) + layout.setLeftMargin(20) + layout.setRightMargin(20) + layout.setTopMargin(20) + layout.setBottomMargin(20) + layout.setOrientation(QtGui.QPageLayout.Portrait) + loader.page().printToPdf(pdf_path, pageLayout=layout) + + # 加载完成后连接到PDF打印方法 + loader.loadFinished.connect(emit_pdf) + app.exec_() + + +if __name__ == '__main__': + # print(sys.argv) + html_path, pdf_path = sys.argv[1],sys.argv[2] + html2pdf(html_path=html_path,pdf_path=pdf_path) diff --git a/app_doc/report_utils.py b/app_doc/report_utils.py index 9120ba5..76fc39f 100644 --- a/app_doc/report_utils.py +++ b/app_doc/report_utils.py @@ -19,112 +19,17 @@ application = get_wsgi_application() import django django.setup() from app_doc.models import * +from subprocess import Popen +from loguru import logger import traceback import time -from pyppeteer import launch -import asyncio -from loguru import logger +import json +import psutil +import markdown +import yaml # import PyPDF2 # from pdfminer import high_level -# JS动态图形转静态图片 -@logger.catch() -def geneta_js_img(html_path,img_path,types): - ''' - :param html_path: HTML源文件路径 - :param img_path: 保存的静态图片路径 - :param type: 转换的类型,有mindmap、tex、flowchart、seque四种 - :return: - ''' - type_map = { - 'mindmap':'.mindmap', # 脑图 - 'tex':'.editormd-tex', # 科学公式 - 'flowchart':'.flowchart', # 流程图 - 'seque':'.sequence-diagram', # 序列图 - 'echart':'.echart', # echart图表 - } - async def main(): - if settings.CHROMIUM_PATH: - browser = await launch( - executablePath=r'{}'.format(settings.CHROMIUM_PATH), - args=settings.CHROMIUM_ARGS, - headless=True, - handleSIGINT=False, - handleSIGTERM=False, - handleSIGHUP=False - ) - else: - browser = await launch( - headless=True, - handleSIGINT=False, - handleSIGTERM=False, - handleSIGHUP=False - ) - page = await browser.newPage() - await page.goto('file://' + html_path, {'waitUntil': 'networkidle0'}) - element = await page.querySelector(type_map[types]) - await element.screenshot({'type': 'jpeg', 'quality': 100, 'path': img_path}) - await browser.close() - - # asyncio.new_event_loop().run_until_complete(main()) - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - loop = asyncio.get_event_loop() - try: - loop.run_until_complete(main()) - except: - loop.run_until_complete(main()) - finally: - loop.close() - - -# HTML转PDF -@logger.catch() -def html_to_pdf(html_path,pdf_path): - async def main(): - if settings.CHROMIUM_PATH: - browser = await launch( - executablePath=r'{}'.format(settings.CHROMIUM_PATH), - args=settings.CHROMIUM_ARGS, - headless=True, - handleSIGINT=False, - handleSIGTERM=False, - handleSIGHUP=False - ) - else: - browser = await launch( - headless=True, - handleSIGINT=False, - handleSIGTERM=False, - handleSIGHUP=False - ) - page = await browser.newPage() - await page.goto('file://' + html_path, {'waitUntil': 'networkidle0'}) - await page.pdf({ - 'path':pdf_path, - 'format':'A4', - 'displayHeaderFooter':True, - 'headerTemplate':'
', - 'footerTemplate':'
/
', - 'margin':{ - 'top':'1cm', - 'right':'1cm', - 'bottom':'1cm', - 'left':'1cm' - } - }) - await browser.close() - - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - loop = asyncio.get_event_loop() - try: - loop.run_until_complete(main()) - except: - loop.run_until_complete(main()) - finally: - loop.close() - # 导出MD文件压缩包 @logger.catch() @@ -132,12 +37,12 @@ class ReportMD(): def __init__(self,project_id): # 查询文集信息 self.pro_id = project_id - project_data = Project.objects.get(pk=project_id) + self.project_data = Project.objects.get(pk=project_id) # 文集名称 self.project_name = "{0}_{1}_{2}".format( - project_data.create_user, - project_data.name, + self.project_data.create_user, + self.project_data.name, str(datetime.date.today()) ) @@ -158,13 +63,24 @@ class ReportMD(): os.mkdir(self.media_path) def work(self): + # 初始化文集YAML数据 + project_toc_list = {} + project_toc_list['project_name'] = self.project_data.name + project_toc_list['project_desc'] = self.project_data.intro + project_toc_list['project_role'] = self.project_data.role + project_toc_list['toc'] = [] # 读取指定文集的文档数据 data = Doc.objects.filter(top_doc=self.pro_id, parent_doc=0).order_by("sort") # 遍历一级文档 for d in data: - md_name = d.name - md_content = d.pre_content - md_content = self.operat_md_media(md_content) + top_item = { + 'name': d.name, + 'file': d.name+'.md', + } + md_name = d.name # 文档名称 + # 文档内容,如果使用Markdown编辑器编写则导出Markdown文本,如果使用富文本编辑器编写则导出HTML文本 + md_content = self.operat_md_media(d.pre_content) \ + if d.editor_mode in [1,2] else self.operat_md_media(d.content) # 新建MD文件 with open('{}/{}.md'.format(self.project_path,md_name),'w',encoding='utf-8') as files: @@ -172,26 +88,45 @@ class ReportMD(): # 查询二级文档 data_2 = Doc.objects.filter(parent_doc=d.id).order_by("sort") - for d2 in data_2: - md_name_2 = d2.name - md_content_2 = d2.pre_content - md_content_2 = self.operat_md_media(md_content_2) + if data_2.count() > 0: + top_item['children'] = [] + for d2 in data_2: + sec_item = { + 'name': d2.name, + 'file': d2.name+'.md', + } - # 新建MD文件 - with open('{}/{}.md'.format(self.project_path, md_name_2), 'w', encoding='utf-8') as files: - files.write(md_content_2) - - # 获取第三级文档 - data_3 = Doc.objects.filter(parent_doc=d2.id).order_by("sort") - for d3 in data_3: - md_name_3 = d3.name - md_content_3 = d3.pre_content - - md_content_3 = self.operat_md_media(md_content_3) + md_name_2 = d2.name + md_content_2 = self.operat_md_media(d2.pre_content) \ + if d2.editor_mode in [1,2] else self.operat_md_media(d2.content) # 新建MD文件 - with open('{}/{}.md'.format(self.project_path, md_name_3), 'w', encoding='utf-8') as files: - files.write(md_content_3) + with open('{}/{}.md'.format(self.project_path, md_name_2), 'w', encoding='utf-8') as files: + files.write(md_content_2) + + # 获取第三级文档 + data_3 = Doc.objects.filter(parent_doc=d2.id).order_by("sort") + if data_3.count() > 0: + sec_item['children'] = [] + for d3 in data_3: + item = { + 'name': d3.name, + 'file': d3.name+'.md', + } + sec_item['children'].append(item) + md_name_3 = d3.name + md_content_3 = self.operat_md_media(d3.pre_content) \ + if d3.editor_mode in [1,2] else self.operat_md_media(d3.content) + + # 新建MD文件 + with open('{}/{}.md'.format(self.project_path, md_name_3), 'w', encoding='utf-8') as files: + files.write(md_content_3) + top_item['children'].append(sec_item) + project_toc_list['toc'].append(top_item) + + # 写入层级YAML + with open('{}/mrdoc.yaml'.format(self.project_path), 'a+', encoding='utf-8') as toc_yaml: + yaml.dump(project_toc_list,toc_yaml,allow_unicode=True) # 压缩文件 md_file = shutil.make_archive( @@ -199,7 +134,7 @@ class ReportMD(): format='zip', root_dir=self.project_path ) - print(md_file) + # print(md_file) # 删除文件夹 shutil.rmtree(self.project_path) @@ -214,10 +149,12 @@ class ReportMD(): # 存在静态文件,进行遍历 if len(media_list) > 0: for media in media_list: - media_filename = media.split("(")[-1].split(")")[0] # 媒体文件的文件名 + media_filename = media.replace('//','/').split("(")[-1].split(")")[0] # 媒体文件的文件名 # 对本地静态文件进行复制 - if media_filename.startswith("/"): - sub_folder = "/" + media_filename.split("/")[3] # 获取子文件夹的名称 + if media_filename.startswith("/media"): + # print(media_filename) + sub_folder = "/" + media_filename.split("/")[2] # 获取子文件夹的名称 + # print(sub_folder) is_sub_folder = os.path.exists(self.media_path+sub_folder) # 创建子文件夹 if is_sub_folder is False: @@ -229,9 +166,7 @@ class ReportMD(): shutil.copy(settings.BASE_DIR + media_filename, self.media_path+sub_folder) except FileNotFoundError: pass - # 不存在本地静态文件,直接返回MD内容 - # else: - # print("没有本地静态文件") + return md_content # 不存在静态文件,直接返回MD内容 else: @@ -259,7 +194,7 @@ class ReportEPUB(): # 复制样式文件到相关目录 shutil.copyfile(settings.BASE_DIR+'/static/report_epub/style.css',self.base_path + '/OEBPS/Styles/style.css') - shutil.copyfile(settings.BASE_DIR+'/static/katex/katex.min.css',self.base_path + '/OEBPS/Styles/katex.css') + # shutil.copyfile(settings.BASE_DIR+'/static/katex/katex.min.css',self.base_path + '/OEBPS/Styles/katex.css') shutil.copyfile(settings.BASE_DIR+'/static/editor.md/css/editormd.min.css',self.base_path + '/OEBPS/Styles/editormd.css') # 复制封面图片到相关目录 shutil.copyfile(settings.BASE_DIR+'/static/report_epub/epub_cover1.jpg',self.base_path + '/OEBPS/Images/epub_cover1.jpg') @@ -279,14 +214,9 @@ class ReportEPUB(): # 添加css样式标签 style_link = html_soup.new_tag(name='link',href="../Styles/style.css",rel="stylesheet",type="text/css") - katex_link = html_soup.new_tag(name='link',href='../Styles/katex.css',rel="stylesheet",type="text/css") - editormd_link = html_soup.new_tag(name='link',href='../Styles/editormd.css',rel="stylesheet",type="text/css") html_soup.body.insert_before(style_link) - html_soup.body.insert_before(katex_link) - # html_soup.body.insert_before(editormd_link) - - # 添加xlm标签声明 - # html_soup.html.insert_before('') + editormd_link = html_soup.new_tag(name='link',href='../Styles/editormd.css',rel="stylesheet",type="text/css") + html_soup.body.insert_before(editormd_link) # 添加html标签的xmlns属性 html_soup.html['xmlns'] = "http://www.w3.org/1999/xhtml" @@ -312,227 +242,6 @@ class ReportEPUB(): except FileNotFoundError as e: pass - # 替换HTML文本中的脑图为静态图片 - for mindmap in mindmap_tag: - # print('转换脑图') - html_str = ''' - - - - - - Markmap - - - - - - - {svg_content} - - - - '''.format(svg_content=mindmap) - # 脑图HTML文件路径 - temp_mindmap_html = settings.BASE_DIR +'/media/report_epub/mindmap_{}.html'.format(str(time.time())) - mindmap_img_filename = 'mindmap_{}.jpg'.format(str(time.time())) - mindmap_img_path = self.base_path + '/OEBPS/Images/' + mindmap_img_filename - - # 写入临时HTML文件 - with open(temp_mindmap_html,'w+',encoding='utf-8') as mindmap_html: - mindmap_html.write(html_str) - - # 生成静态图片 - geneta_js_img(temp_mindmap_html,mindmap_img_path,'mindmap') - - # 将图片标签设置进去 - mindmap.name = 'img' - mindmap['src'] = '../Images/' + mindmap_img_filename - mindmap.string = '' - os.remove(temp_mindmap_html) # 删除临时的HTML - - # 替换公式为静态图片 - for tex in tex_tag: - # print('转换公式') - html_str = ''' - - - - - - - Markmap - - - - - - {content} - - - - - '''.format(content=tex) - # 公式HTML文件路径 - temp_tex_html = settings.BASE_DIR + '/media/report_epub/tex_{}.html'.format(str(time.time())) - tex_img_filename = 'tex_{}.jpg'.format(str(time.time())) - tex_img_path = self.base_path + '/OEBPS/Images/' + tex_img_filename - - with open(temp_tex_html, 'w+', encoding='utf-8') as tex_html: - tex_html.write(html_str) - - # 生成静态图片 - geneta_js_img(temp_tex_html, tex_img_path,'tex') - - # 将图片标签添加进去 - # tex.name = 'img' - # tex['src'] = '../Images/' + tex_img_filename - tex.string = '' - tex_img_tag = html_soup.new_tag(name='img',src='../Images/' + tex_img_filename) - tex.insert(0,tex_img_tag) - os.remove(temp_tex_html) # 删除临时的HTML - - # 替换流程图为静态图片 - for flowchart in flowchart_tag: - # print("转换流程图") - html_str = ''' - - - - - - - Markmap - - - - - - - {content} - - - - - '''.format(content=flowchart) - # 流程图HTML文件路径 - temp_flow_html = settings.BASE_DIR + '/media/report_epub/flow_{}.html'.format(str(time.time())) - flow_img_filename = 'flow_{}.jpg'.format(str(time.time())) - flow_img_path = self.base_path + '/OEBPS/Images/' + flow_img_filename - - with open(temp_flow_html, 'w+', encoding='utf-8') as flow_html: - flow_html.write(html_str) - - # 生成静态图片 - geneta_js_img(temp_flow_html, flow_img_path,'flowchart') - - # 将图片标签添加进去 - flowchart.string = '' - flow_img_tag = html_soup.new_tag(name='img', src='../Images/' + flow_img_filename) - flowchart.insert(0, flow_img_tag) - os.remove(temp_flow_html) # 删除临时的HTML - - # 替换时序图为静态图片 - for seque in seque_tag: - # print("转换时序图") - html_str = ''' - - - - - - Markmap - - - - - - - {content} - - - - - '''.format(content=seque) - # 时序图HTML文件路径 - temp_seque_html = settings.BASE_DIR + '/media/report_epub/seque_{}.html'.format(str(time.time())) - seque_img_filename = 'seque_{}.jpg'.format(str(time.time())) - seque_img_path = self.base_path + '/OEBPS/Images/' + seque_img_filename - with open(temp_seque_html, 'w+', encoding='utf-8') as seque_html: - seque_html.write(html_str) - - # 生成静态图片 - geneta_js_img(temp_seque_html, seque_img_path, 'seque') - - # 将图片标签添加进去 - seque.string = '' - seque_img_tag = html_soup.new_tag(name='img', src='../Images/' + seque_img_filename) - seque.insert(0, seque_img_tag) - os.remove(temp_seque_html) # 删除临时的HTML - - # 替换echart图表为静态图片 - for echart in echart_tag: - html_str = ''' - - - - - - Markmap - - - - - {svg_content} - - - - '''.format(svg_content=echart) - # 脑图HTML文件路径 - temp_echart_html = settings.BASE_DIR + '/media/report_epub/echart_{}.html'.format(str(time.time())) - echart_img_filename = 'echart_{}.jpg'.format(str(time.time())) - echart_img_path = self.base_path + '/OEBPS/Images/' + echart_img_filename - - # 写入临时HTML文件 - with open(temp_echart_html, 'w+', encoding='utf-8') as echart_html: - echart_html.write(html_str) - - # 生成静态图片 - geneta_js_img(temp_echart_html, echart_img_path, 'echart') - - # 将图片标签设置进去 - echart.name = 'img' - echart['src'] = '../Images/' + echart_img_filename - echart.string = '' - os.remove(temp_echart_html) # 删除临时的HTML - - # 替换code标签的内容 - # for code in code_tag: - # code_str = code.get_text() - # code.clear() - # code['class'] = '' - # code.string = code_str - # 创建写入临时HTML文件 temp_file_path = self.base_path + '/OEBPS/Text/{0}.xhtml'.format(d.id) with open(temp_file_path, 'a+', encoding='utf-8') as htmlfile: @@ -564,6 +273,11 @@ class ReportEPUB(): for d in data: # 拼接HTML字符串 html_str = "

{}

".format(d.name) + if d.content is None: + d.content = markdown.markdown( + d.pre_content, + extensions=['markdown.extensions.fenced_code','markdown.extensions.tables'] + ) html_str += d.content self.write_html(d=d,html_str=html_str) # 生成HTML # 生成HTML的目录位置 @@ -596,6 +310,11 @@ class ReportEPUB(): toc_summary_str += '