1、实现项目队列转换节省资源

2、实现同一个项目通过配置限定转换线程
3、修复转换Word没有图片的问题
4、优化Mac下单页打印中文字体无法显示的问题
pull/358/head
lifei6671 2018-07-10 16:26:25 +08:00
parent 01084c7b0b
commit 483df1f67b
10 changed files with 317 additions and 88 deletions

View File

@ -74,7 +74,12 @@ mail_expired=30
secure=LOGIN
###############配置PDF生成工具地址###################
wkhtmltopdf=D:/Program Files/wkhtmltopdf/bin/wkhtmltopdf.exe
#同一个项目同时运行导出程序的并行数量取值1-4之间取值越大导出速度越快越占用资源
export_process_num=1
#并发导出的项目限制指同一时间限制的导出项目数量如果为0则不限制。设置的越大越占用资源
export_limit_num=1
#指同时等待导出的任务数量
export_queue_limit_num=100
###############配置CDN加速##################
cdn=

View File

@ -135,6 +135,33 @@ func GetUploadFileSize() int64 {
}
return 0
}
//同一项目导出线程的并发数
func GetExportProcessNum() int {
exportProcessNum := beego.AppConfig.DefaultInt("export_process_num",1)
if exportProcessNum <= 0 || exportProcessNum > 4 {
exportProcessNum = 1
}
return exportProcessNum;
}
//导出项目队列的并发数量
func GetExportLimitNum() int {
exportLimitNum := beego.AppConfig.DefaultInt("export_limit_num",1)
if exportLimitNum < 0 {
exportLimitNum = 1
}
return exportLimitNum;
}
//等待导出队列的长度
func GetExportQueueLimitNum() int {
exportQueueLimitNum := beego.AppConfig.DefaultInt("export_queue_limit_num",10)
if exportQueueLimitNum <= 0 {
exportQueueLimitNum = 100
}
return exportQueueLimitNum
}
//判断是否是允许商城的文件类型.
func IsAllowUploadFileExt(ext string) bool {

View File

@ -154,7 +154,7 @@ func (c *BaseController) ShowErrorPage(errCode int, errMsg string) {
var buf bytes.Buffer
if err := beego.ExecuteViewPathTemplate(&buf, "document/export.tpl", beego.BConfig.WebConfig.ViewsPath, map[string]interface{}{"ErrorMessage": errMsg, "errCode": errCode, "BaseUrl": conf.BaseUrl}); err != nil {
if err := beego.ExecuteViewPathTemplate(&buf, "errors/error.tpl", beego.BConfig.WebConfig.ViewsPath, map[string]interface{}{"ErrorMessage": errMsg, "ErrorCode": errCode, "BaseUrl": conf.BaseUrl}); err != nil {
c.Abort("500")
}

View File

@ -28,6 +28,7 @@ import (
"gopkg.in/russross/blackfriday.v2"
"github.com/lifei6671/mindoc/utils/cryptil"
"fmt"
"github.com/lifei6671/mindoc/utils/filetil"
)
// DocumentController struct
@ -857,29 +858,32 @@ func (c *DocumentController) Export() {
return
}
eBookResult, err := bookResult.Converter(c.CruSession.SessionID())
outputPath := filepath.Join(conf.WorkingDirectory, "uploads", "books", strconv.Itoa(bookResult.BookId))
if err != nil {
beego.Error("转换文档失败:" + bookResult.BookName + " -> " + err.Error())
c.Abort("500")
}
pdfpath := filepath.Join(outputPath, "book.pdf")
epubpath := filepath.Join(outputPath, "book.epub")
mobipath := filepath.Join(outputPath, "book.mobi")
docxpath := filepath.Join(outputPath, "book.docx")
if output == "pdf" {
c.Ctx.Output.Download(eBookResult.PDFPath, bookResult.BookName+".pdf")
if output == "pdf" && filetil.FileExists(pdfpath){
c.Ctx.Output.Download(pdfpath, bookResult.BookName+".pdf")
c.Abort("200")
} else if output == "epub" && filetil.FileExists(epubpath){
c.Ctx.Output.Download(epubpath, bookResult.BookName+".epub")
c.Abort("200")
} else if output == "epub" {
c.Ctx.Output.Download(eBookResult.EpubPath, bookResult.BookName+".epub")
} else if output == "mobi" && filetil.FileExists(mobipath) {
c.Ctx.Output.Download(mobipath, bookResult.BookName+".mobi")
c.Abort("200")
} else if output == "mobi" {
c.Ctx.Output.Download(eBookResult.MobiPath, bookResult.BookName+".mobi")
} else if output == "docx" && filetil.FileExists(docxpath){
c.Ctx.Output.Download(docxpath, bookResult.BookName+".docx")
c.Abort("200")
} else if output == "docx" {
c.Ctx.Output.Download(eBookResult.WordPath, bookResult.BookName+".docx")
c.Abort("200")
}else if output == "pdf" || output == "epub" || output == "docx" || output == "mobi"{
models.BackgroupConvert(c.CruSession.SessionID(),bookResult)
c.ShowErrorPage(200,"文档正在后台转换,请稍后再下载")
}else{
c.ShowErrorPage(200,"不支持的文件格式")
}

View File

@ -18,6 +18,7 @@ import (
"github.com/lifei6671/mindoc/utils/ziptil"
"github.com/lifei6671/mindoc/utils/cryptil"
"sync"
"html"
)
type Converter struct {
@ -26,6 +27,9 @@ type Converter struct {
Config Config
Debug bool
GeneratedCover string
ProcessNum int //并发的任务数量
process chan func()
limitChan chan bool
}
//目录结构
@ -93,6 +97,9 @@ func NewConverter(configFile string, debug ...bool) (converter *Converter, err e
Config: cfg,
BasePath: basepath,
Debug: db,
ProcessNum: 1,
process: make(chan func(),4),
limitChan: make(chan bool,1),
}
}
}
@ -100,89 +107,113 @@ func NewConverter(configFile string, debug ...bool) (converter *Converter, err e
}
//执行文档转换
func (this *Converter) Convert() (err error) {
if !this.Debug { //调试模式下不删除生成的文件
defer this.converterDefer() //最后移除创建的多余而文件
func (convert *Converter) Convert() (err error) {
if !convert.Debug { //调试模式下不删除生成的文件
defer convert.converterDefer() //最后移除创建的多余而文件
}
if convert.process == nil{
convert.process = make(chan func(),4)
}
if convert.limitChan == nil {
if convert.ProcessNum <= 0 {
convert.ProcessNum = 1
}
convert.limitChan = make(chan bool,convert.ProcessNum)
for i := 0; i < convert.ProcessNum;i++{
convert.limitChan <- true
}
}
if err = this.generateMimeType(); err != nil {
if err = convert.generateMimeType(); err != nil {
return
}
if err = this.generateMetaInfo(); err != nil {
if err = convert.generateMetaInfo(); err != nil {
return
}
if err = this.generateTocNcx(); err != nil { //生成目录
if err = convert.generateTocNcx(); err != nil { //生成目录
return
}
if err = this.generateSummary(); err != nil { //生成文档内目录
if err = convert.generateSummary(); err != nil { //生成文档内目录
return
}
if err = this.generateTitlePage(); err != nil { //生成封面
if err = convert.generateTitlePage(); err != nil { //生成封面
return
}
if err = this.generateContentOpf(); err != nil { //这个必须是generate*系列方法的最后一个调用
if err = convert.generateContentOpf(); err != nil { //这个必须是generate*系列方法的最后一个调用
return
}
//将当前文件夹下的所有文件压缩成zip包然后直接改名成content.epub
f := filepath.Join(this.OutputPath, "content.epub")
f := filepath.Join(convert.OutputPath, "content.epub")
os.Remove(f) //如果原文件存在了,则删除;
if err = ziptil.Zip(this.BasePath,f); err == nil {
if err = ziptil.Zip(convert.BasePath,f); err == nil {
//创建导出文件夹
os.Mkdir(this.BasePath+"/"+output, os.ModePerm)
if len(this.Config.Format) > 0 {
os.Mkdir(convert.BasePath+"/"+output, os.ModePerm)
if len(convert.Config.Format) > 0 {
var errs []string
group := sync.WaitGroup{}
for _, v := range this.Config.Format {
fmt.Println("convert to " + v)
switch strings.ToLower(v) {
case "epub":
group.Add(1)
go func(group *sync.WaitGroup) {
if err = this.convertToEpub(); err != nil {
errs = append(errs, err.Error())
fmt.Println("转换EPUB文档失败" + err.Error())
}
group.Done()
}(&group)
case "mobi":
group.Add(1)
go func(group *sync.WaitGroup) {
if err = this.convertToMobi(); err != nil {
errs = append(errs, err.Error())
fmt.Println("转换MOBI文档失败" + err.Error())
}
group.Done()
}(&group)
case "pdf":
group.Add(1)
go func(group *sync.WaitGroup) {
if err = this.convertToPdf(); err != nil {
fmt.Println("转换PDF文档失败" + err.Error())
errs = append(errs, err.Error())
}
group.Done()
}(&group)
case "docx":
group.Add(1)
go func(group *sync.WaitGroup) {
if err = this.convertToDocx(); err != nil {
fmt.Println("转换WORD文档失败" + err.Error())
errs = append(errs, err.Error())
go func(convert *Converter) {
for _, v := range convert.Config.Format {
fmt.Println("convert to " + v)
switch strings.ToLower(v) {
case "epub":
convert.process <- func() {
if err = convert.convertToEpub(); err != nil {
errs = append(errs, err.Error())
fmt.Println("转换EPUB文档失败" + err.Error())
}
}
group.Done()
}(&group)
case "mobi":
convert.process <- func() {
if err = convert.convertToMobi(); err != nil {
errs = append(errs, err.Error())
fmt.Println("转换MOBI文档失败" + err.Error())
}
}
case "pdf":
convert.process <- func() {
if err = convert.convertToPdf(); err != nil {
fmt.Println("转换PDF文档失败" + err.Error())
errs = append(errs, err.Error())
}
}
case "docx":
convert.process <- func() {
if err = convert.convertToDocx(); err != nil {
fmt.Println("转换WORD文档失败" + err.Error())
errs = append(errs, err.Error())
}
}
}
}
close(convert.process)
}(convert)
group := sync.WaitGroup{}
for {
action, isClosed := <-convert.process
fmt.Println(action,isClosed)
if action == nil && !isClosed {
break;
}
group.Add(1)
<- convert.limitChan
fmt.Println("正在处理")
go func(group *sync.WaitGroup) {
action()
group.Done()
convert.limitChan <- true
}(&group)
}
group.Wait()
if len(errs) > 0 {
err = errors.New(strings.Join(errs, "\n"))
}
} else {
err = this.convertToPdf()
err = convert.convertToPdf()
if err != nil {
fmt.Println(err)
}
@ -272,7 +303,7 @@ func (this *Converter) generateTocNcx() (err error) {
</ncx>
`
codes, _ := this.tocToXml(0, 1)
ncx = fmt.Sprintf(ncx, this.Config.Language, this.Config.Title, strings.Join(codes, ""))
ncx = fmt.Sprintf(ncx, this.Config.Language, html.EscapeString(this.Config.Title), strings.Join(codes, ""))
return ioutil.WriteFile(filepath.Join(this.BasePath, "toc.ncx"), []byte(ncx), os.ModePerm)
}
@ -327,11 +358,11 @@ func (this *Converter) tocToSummary(pid int) (summarys []string) {
summarys = append(summarys, "<ul>")
for _, toc := range this.Config.Toc {
if toc.Pid == pid {
summarys = append(summarys, fmt.Sprintf(`<li><a href="%v">%v</a></li>`, toc.Link, toc.Title))
summarys = append(summarys, fmt.Sprintf(`<li><a href="%v">%v</a></li>`, toc.Link, html.EscapeString(toc.Title)))
for _, item := range this.Config.Toc {
if item.Pid == toc.Id {
summarys = append(summarys, fmt.Sprintf(`<li><ul><li><a href="%v">%v</a></li>`, item.Link, item.Title))
summarys = append(summarys, fmt.Sprintf(`<li><ul><li><a href="%v">%v</a></li>`, item.Link, html.EscapeString(item.Title)))
summarys = append(summarys, "<li>")
summarys = append(summarys, this.tocToSummary(item.Id)...)
summarys = append(summarys, "</li></ul></li>")
@ -352,7 +383,7 @@ func (this *Converter) getNavPoint(toc Toc, idx int) (navpoint string, nextidx i
<text>%v</text>
</navLabel>
<content src="%v"/>`
navpoint = fmt.Sprintf(navpoint, toc.Id, idx, toc.Title, toc.Link)
navpoint = fmt.Sprintf(navpoint, toc.Id, idx, html.EscapeString(toc.Title), toc.Link)
this.Config.Order = append(this.Config.Order, toc.Link)
nextidx = idx + 1
return
@ -377,7 +408,7 @@ func (this *Converter) generateContentOpf() (err error) {
<dc:creator opf:file-as="Unknown" opf:role="aut">%v</dc:creator>
<meta name="calibre:timestamp" content="%v"/>
`
meta = fmt.Sprintf(meta, this.Config.Title, this.Config.Contributor, this.Config.Publisher, this.Config.Description, this.Config.Language, this.Config.Creator, this.Config.Timestamp)
meta = fmt.Sprintf(meta, html.EscapeString(this.Config.Title), html.EscapeString(this.Config.Contributor), html.EscapeString(this.Config.Publisher), html.EscapeString(this.Config.Description), this.Config.Language, html.EscapeString(this.Config.Creator), this.Config.Timestamp)
if len(this.Config.Cover) > 0 {
meta = meta + `<meta name="cover" content="cover"/>`
guide = `<reference href="titlepage.xhtml" title="Cover" type="cover"/>`

View File

@ -9,8 +9,6 @@ import (
"strings"
"time"
"encoding/base64"
"github.com/PuerkitoBio/goquery"
"github.com/astaxie/beego"
"github.com/astaxie/beego/logs"
@ -23,6 +21,13 @@ import (
"regexp"
"github.com/lifei6671/mindoc/utils/cryptil"
"github.com/lifei6671/mindoc/utils/requests"
"github.com/lifei6671/mindoc/utils/gopool"
"encoding/base64"
"net/http"
)
var(
exportLimitWorkerChannel = gopool.NewChannelPool(conf.GetExportProcessNum(),conf.GetExportQueueLimitNum())
)
type BookResult struct {
@ -209,6 +214,14 @@ func (m *BookResult) ToBookResult(book Book) *BookResult {
return m
}
//后台转换
func BackgroupConvert(sessionId string,bookResult *BookResult){
exportLimitWorkerChannel.LoadOrStore(bookResult.Identify, func() {
bookResult.Converter(sessionId)
})
exportLimitWorkerChannel.Start()
}
//导出PDF、word等格式
func (m *BookResult) Converter(sessionId string) (ConvertBookResult, error) {
@ -232,7 +245,14 @@ func (m *BookResult) Converter(sessionId string) (ConvertBookResult, error) {
beego.Error("创建目录失败 => ",tempOutputPath,err)
}
defer os.RemoveAll(strings.TrimSuffix(tempOutputPath,"source"))
//defer os.RemoveAll(strings.TrimSuffix(tempOutputPath,"source"))
sourceDir := strings.TrimSuffix(tempOutputPath,"source");
if filetil.FileExists(sourceDir) {
if err := os.MkdirAll(sourceDir,0755); err != nil {
beego.Error("删除临时目录失败 ->", sourceDir , err)
}
}
if filetil.FileExists(pdfpath) && filetil.FileExists(epubpath) && filetil.FileExists(mobipath) && filetil.FileExists(docxpath) {
convertBookResult.EpubPath = epubpath
@ -333,19 +353,47 @@ func (m *BookResult) Converter(sessionId string) (ConvertBookResult, error) {
doc, err := goquery.NewDocumentFromReader(bufio)
doc.Find("img").Each(func(i int, contentSelection *goquery.Selection) {
if src, ok := contentSelection.Attr("src"); ok && strings.HasPrefix(src, "/") {
//contentSelection.SetAttr("src", baseUrl + src)
spath := filepath.Join(conf.WorkingDirectory, src)
if src, ok := contentSelection.Attr("src"); ok {
var encodeString string
if ff, e := ioutil.ReadFile(spath); e == nil {
//如果是本地路径则直接读取文件内容
if strings.HasPrefix(src, "/") {
spath := filepath.Join(conf.WorkingDirectory, src)
encodeString := base64.StdEncoding.EncodeToString(ff)
if ff, e := ioutil.ReadFile(spath); e == nil {
encodeString = base64.StdEncoding.EncodeToString(ff)
}else{
return
}
}else{
client := &http.Client{}
if req,err := http.NewRequest("GET",src,nil); err == nil {
req.Header.Set("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36")
req.Header.Set("Referer",src)
//10秒连接超时时间
client.Timeout = time.Second * 10
src = "data:image/" + filepath.Ext(src) + ";base64," + encodeString
if resp ,err := client.Do(req);err == nil {
contentSelection.SetAttr("src", src)
defer resp.Body.Close()
if body, err := ioutil.ReadAll(resp.Body);err == nil {
encodeString = base64.StdEncoding.EncodeToString(body)
}else{
return
}
}else{
return
}
}else{
return
}
}
src = "data:image/" + filepath.Ext(src) + ";base64," + encodeString
contentSelection.SetAttr("src", src)
}
})
@ -385,6 +433,7 @@ func (m *BookResult) Converter(sessionId string) (ConvertBookResult, error) {
OutputPath: filepath.Join(strings.TrimSuffix(tempOutputPath, "source"),"output"),
Config: ebookConfig,
Debug: true,
ProcessNum: conf.GetExportProcessNum(),
}
os.MkdirAll(eBookConverter.OutputPath,0766)
@ -616,3 +665,4 @@ func (m *BookResult) FindFirstDocumentByBookId(bookId int) (*Document, error) {
return doc, err
}

View File

@ -1,3 +1,12 @@
@import url("https://fonts.googleapis.com/css?family=Montserrat:300,400,500,600,700,800");
@font-face {
font-family: 'FontAwesome';
src: url("../font-awesome/fonts/fontawesome-webfont.eot?v=4.7.0");
src: url("../font-awesome/fonts/fontawesome-webfont.eot?#iefix&v=4.7.0") format("embedded-opentype"), url("../font-awesome/fonts/fontawesome-webfont.woff2?v=4.7.0") format("woff2"), url("../plugins/font-awesome/fonts/fontawesome-webfont.woff?v=4.7.0") format("woff"), url("../plugins/font-awesome/fonts/fontawesome-webfont.ttf?v=4.7.0") format("truetype"), url("../plugins/font-awesome/fonts/fontawesome-webfont.svg?v=4.7.0#fontawesomeregular") format("svg");
font-weight: normal;
font-style: normal;
}
html,body {
height: 100%;
font-size: 12px;
@ -15,10 +24,11 @@ body {
font-size: 14px;
word-wrap: break-word;
line-height: 1em;
-webkit-font-smoothing: antialiased;
}
h1,h2,h3,h4,h5,h6,strong,input,select,textarea,button,body,code {
font-family: "Helvetica Neue",Helvetica,"Segoe UI",Arial,freesans,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Microsoft Yahei","Helvetica Neue",Helvetica;
font-family: "Montserrat","Helvetica Neue",Helvetica,"Segoe UI",Arial,freesans,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Microsoft Yahei","Helvetica Neue",Helvetica;
}
h1 {
font-size: 2.25em;

View File

@ -1,4 +1,7 @@
/*************表格样式****************/
.markdown-body{
font-family: "Montserrat", sans-serif !important;
}
.editormd-preview-container table {
margin-top: 0;
margin-bottom: 24px;

View File

@ -0,0 +1,99 @@
package gopool
import (
"sync"
"errors"
"fmt"
)
var (
HandlerIsExistErr = errors.New("指定的键已存在")
WorkerChanClosedErr = errors.New("队列已关闭")
)
type ChannelHandler func()
type entry struct {
handler ChannelHandler
key string
}
type ChannelPool struct {
maxWorkerNum int
maxPoolNum int
wait *sync.WaitGroup
cache *sync.Map
worker chan *entry
limit chan bool
isClosed bool
once *sync.Once
}
func NewChannelPool(maxWorkerNum, maxPoolNum int) (*ChannelPool) {
if maxWorkerNum <= 0 {
maxWorkerNum = 1
}
if maxPoolNum <= 0 {
maxWorkerNum = 100
}
return &ChannelPool{
maxWorkerNum: maxWorkerNum,
maxPoolNum: maxPoolNum,
wait: &sync.WaitGroup{},
cache: &sync.Map{},
worker: make(chan *entry, maxWorkerNum),
limit: make(chan bool, maxWorkerNum),
isClosed: false,
once: &sync.Once{},
}
}
func (pool *ChannelPool) LoadOrStore(key string,value ChannelHandler) error {
if pool.isClosed {
return WorkerChanClosedErr
}
if _,loaded := pool.cache.LoadOrStore(key,false); loaded {
return HandlerIsExistErr
}else{
pool.worker <- &entry{handler:value,key:key}
return nil
}
}
func (pool *ChannelPool) Start() {
pool.once.Do(func() {
go func() {
for i :=0; i < pool.maxWorkerNum; i ++ {
pool.limit <- true
}
for {
actual, isClosed := <-pool.worker
//当队列被关闭,则跳出循环
if actual == nil && !isClosed {
fmt.Println("工作队列已关闭")
break
}
limit := <-pool.limit
if limit {
pool.wait.Add(1)
go func(actual *entry) {
defer pool.wait.Done()
defer func() {
pool.cache.Delete(actual.key)
pool.limit <- true
}()
actual.handler()
}(actual)
}
}
}()
})
}
func (pool *ChannelPool) Wait() {
close(pool.worker)
pool.wait.Wait()
}

View File

@ -17,7 +17,7 @@
<link href="{{cdncss "/static/bootstrap/css/bootstrap.min.css"}}" rel="stylesheet">
<link href="{{cdncss "/static/jstree/3.3.4/themes/default/style.min.css"}}" rel="stylesheet">
<link href="{{cdncss "/static/font-awesome/css/font-awesome.min.css"}}" rel="stylesheet">
<link href="{{cdncss "/static/nprogress/nprogress.css"}}" rel="stylesheet">
<link href="{{cdncss "/static/css/kancloud.css"}}" rel="stylesheet">
<link href="{{cdncss "/static/css/jstree.css"}}" rel="stylesheet">
@ -27,7 +27,7 @@
<link href="{{cdncss "/static/highlight/styles/vs.css"}}" rel="stylesheet">
<link href="{{cdncss "/static/katex/katex.min.css"}}" rel="stylesheet">
<link href="{{cdncss "/static/css/print.css"}}" media="print" rel="stylesheet">
<link href="{{cdncss "/static/font-awesome/css/font-awesome.min.css"}}" rel="stylesheet">
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
<!-- WARNING: Respond.js doesn't work if you view the page via file:// -->