Merge pull request #770 from Augists/master

add support for importing docx file
pull/777/head
玖亖伍 2022-02-24 14:50:26 +08:00 committed by GitHub
commit cf628f06ca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 660 additions and 8 deletions

View File

@ -127,7 +127,7 @@ project_id_existed = 文档标识已被使用
project_id_error = 项目标识有误
project_id_length = 项目标识必须小于50字符
import_file_empty = 请选择需要上传的文件
file_type_placeholder = 请选择Zip文件
file_type_placeholder = 请选择Zip或Docx文件
publish_to_queue = 发布任务已推送到任务队列,稍后将在后台执行。
team_name_empty = 团队名称不能为空
operate_failed = 操作失败

View File

@ -340,7 +340,7 @@ func (c *BookController) UploadCover() {
fileName := "cover_" + strconv.FormatInt(time.Now().UnixNano(), 16)
//附件路径按照项目组织
// filePath := filepath.Join("uploads", book.Identify, "images", fileName+ext)
// filePath := filepath.Join("uploads", book.Identify, "images", fileName+ext)
filePath := filepath.Join(conf.WorkingDirectory, "uploads", book.Identify, "images", fileName+ext)
path := filepath.Dir(filePath)
@ -571,7 +571,7 @@ func (c *BookController) Copy() {
}
}
//导入zip压缩包
// 导入zip压缩包或docx
func (c *BookController) Import() {
file, moreFile, err := c.GetFile("import-file")
@ -608,7 +608,7 @@ func (c *BookController) Import() {
ext := filepath.Ext(moreFile.Filename)
if !strings.EqualFold(ext, ".zip") {
if !strings.EqualFold(ext, ".zip") && !strings.EqualFold(ext, ".docx") {
c.JsonResult(6004, "不支持的文件类型")
}
@ -643,7 +643,11 @@ func (c *BookController) Import() {
book.Editor = "markdown"
book.Theme = "default"
if strings.EqualFold(ext, ".zip") {
go book.ImportBook(tempPath, c.Lang)
} else if strings.EqualFold(ext, ".docx") {
go book.ImportWordBook(tempPath, c.Lang)
}
logs.Info("用户[", c.Member.Account, "]导入了项目 ->", book)

1
go.mod
View File

@ -14,6 +14,7 @@ require (
github.com/kardianos/service v1.1.0
github.com/lib/pq v1.7.0 // indirect
github.com/lifei6671/gocaptcha v0.1.1
github.com/mattn/go-runewidth v0.0.13
github.com/mattn/go-sqlite3 v2.0.3+incompatible
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646
github.com/russross/blackfriday/v2 v2.1.0

4
go.sum
View File

@ -126,6 +126,8 @@ github.com/lib/pq v1.7.0 h1:h93mCPfUSkaul3Ka/VG8uZdmW1uMHDGxzu0NWHuJmHY=
github.com/lib/pq v1.7.0/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/lifei6671/gocaptcha v0.1.1 h1:5cvU3w0bK8eJm1P6AiQoPuicoZVAgKKpREBxXF9IaHo=
github.com/lifei6671/gocaptcha v0.1.1/go.mod h1:6QlTU2WzFhzqylAJWSo3OANfKCraGccJwbK01P5fFmI=
github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU=
github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mattn/go-sqlite3 v2.0.3+incompatible h1:gXHsfypPkaMZrKbD5209QV9jbUTJKjyR5WD3HYQSd+U=
github.com/mattn/go-sqlite3 v2.0.3+incompatible/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU=
@ -174,6 +176,8 @@ github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/procfs v0.1.3 h1:F0+tqvhOksq22sc6iCHF5WGlWjdwj92p0udFh1VFBS8=
github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=

View File

@ -680,7 +680,7 @@ func (book *Book) ResetDocumentNumber(bookId int) {
}
}
//导入项目
// 导入zip项目
func (book *Book) ImportBook(zipPath string, lang string) error {
if !filetil.FileExists(zipPath) {
return errors.New("文件不存在 => " + zipPath)
@ -978,6 +978,78 @@ func (book *Book) ImportBook(zipPath string, lang string) error {
return err
}
// 导入docx项目
func (book *Book) ImportWordBook(docxPath string, lang string) (err error) {
if !filetil.FileExists(docxPath) {
return errors.New("文件不存在")
}
docxPath = strings.Replace(docxPath, "\\", "/", -1)
o := orm.NewOrm()
o.Insert(book)
relationship := NewRelationship()
relationship.BookId = book.BookId
relationship.RoleId = 0
relationship.MemberId = book.MemberId
err = relationship.Insert()
if err != nil {
logs.Error("插入项目与用户关联 -> ", err)
return err
}
doc := NewDocument()
doc.BookId = book.BookId
doc.MemberId = book.MemberId
docIdentify := strings.Replace(strings.TrimPrefix(docxPath, os.TempDir()+"/"), "/", "-", -1)
if ok, err := regexp.MatchString(`[a-z]+[a-zA-Z0-9_.\-]*$`, docIdentify); !ok || err != nil {
docIdentify = "import-" + docIdentify
}
doc.Identify = docIdentify
if doc.Markdown, err = utils.Docx2md(docxPath, false); err != nil {
logs.Error("导入doc项目转换异常 => ", err)
return err
}
// fmt.Println("===doc.Markdown===")
// fmt.Println(doc.Markdown)
// fmt.Println("==================")
doc.Content = string(blackfriday.Run([]byte(doc.Markdown)))
// fmt.Println("===doc.Content===")
// fmt.Println(doc.Content)
// fmt.Println("==================")
doc.Version = time.Now().Unix()
var docName string
for _, line := range strings.Split(doc.Markdown, "\n") {
if strings.HasPrefix(line, "#") {
docName = strings.TrimLeft(line, "#")
break
}
}
doc.DocumentName = strings.TrimSpace(docName)
doc.DocumentId = book.MemberId
if err := doc.InsertOrUpdate("document_name", "book_id", "markdown", "content"); err != nil {
logs.Error(doc.DocumentId, err)
}
if err != nil {
logs.Error("导入项目异常 => ", err)
book.Description = "【项目导入存在错误:" + err.Error() + "】"
}
logs.Info("项目导入完毕 => ", book.BookName)
book.ReleaseContent(book.BookId, lang)
return err
}
func (book *Book) FindForRoleId(bookId, memberId int) (conf.BookRole, error) {
o := orm.NewOrm()

571
utils/docx2md.go 100644
View File

@ -0,0 +1,571 @@
// https://github.com/mattn/docx2md
// License MIT
package utils
import (
"archive/zip"
"bytes"
"encoding/base64"
"encoding/xml"
"errors"
_ "flag"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"path"
"path/filepath"
_ "runtime"
"strconv"
"strings"
"github.com/mattn/go-runewidth"
)
// Relationship is
type Relationship struct {
Text string `xml:",chardata"`
ID string `xml:"Id,attr"`
Type string `xml:"Type,attr"`
Target string `xml:"Target,attr"`
TargetMode string `xml:"TargetMode,attr"`
}
// Relationships is
type Relationships struct {
XMLName xml.Name `xml:"Relationships"`
Text string `xml:",chardata"`
Xmlns string `xml:"xmlns,attr"`
Relationship []Relationship `xml:"Relationship"`
}
// TextVal is
type TextVal struct {
Text string `xml:",chardata"`
Val string `xml:"val,attr"`
}
// NumberingLvl is
type NumberingLvl struct {
Text string `xml:",chardata"`
Ilvl string `xml:"ilvl,attr"`
Tplc string `xml:"tplc,attr"`
Tentative string `xml:"tentative,attr"`
Start TextVal `xml:"start"`
NumFmt TextVal `xml:"numFmt"`
LvlText TextVal `xml:"lvlText"`
LvlJc TextVal `xml:"lvlJc"`
PPr struct {
Text string `xml:",chardata"`
Ind struct {
Text string `xml:",chardata"`
Left string `xml:"left,attr"`
Hanging string `xml:"hanging,attr"`
} `xml:"ind"`
} `xml:"pPr"`
RPr struct {
Text string `xml:",chardata"`
U struct {
Text string `xml:",chardata"`
Val string `xml:"val,attr"`
} `xml:"u"`
RFonts struct {
Text string `xml:",chardata"`
Hint string `xml:"hint,attr"`
} `xml:"rFonts"`
} `xml:"rPr"`
}
// Numbering is
type Numbering struct {
XMLName xml.Name `xml:"numbering"`
Text string `xml:",chardata"`
Wpc string `xml:"wpc,attr"`
Cx string `xml:"cx,attr"`
Cx1 string `xml:"cx1,attr"`
Mc string `xml:"mc,attr"`
O string `xml:"o,attr"`
R string `xml:"r,attr"`
M string `xml:"m,attr"`
V string `xml:"v,attr"`
Wp14 string `xml:"wp14,attr"`
Wp string `xml:"wp,attr"`
W10 string `xml:"w10,attr"`
W string `xml:"w,attr"`
W14 string `xml:"w14,attr"`
W15 string `xml:"w15,attr"`
W16se string `xml:"w16se,attr"`
Wpg string `xml:"wpg,attr"`
Wpi string `xml:"wpi,attr"`
Wne string `xml:"wne,attr"`
Wps string `xml:"wps,attr"`
Ignorable string `xml:"Ignorable,attr"`
AbstractNum []struct {
Text string `xml:",chardata"`
AbstractNumID string `xml:"abstractNumId,attr"`
RestartNumberingAfterBreak string `xml:"restartNumberingAfterBreak,attr"`
Nsid TextVal `xml:"nsid"`
MultiLevelType TextVal `xml:"multiLevelType"`
Tmpl TextVal `xml:"tmpl"`
Lvl []NumberingLvl `xml:"lvl"`
} `xml:"abstractNum"`
Num []struct {
Text string `xml:",chardata"`
NumID string `xml:"numId,attr"`
AbstractNumID TextVal `xml:"abstractNumId"`
} `xml:"num"`
}
type file struct {
rels Relationships
num Numbering
r *zip.ReadCloser
embed bool
list map[string]int
name string
}
// Node is
type Node struct {
XMLName xml.Name
Attrs []xml.Attr `xml:"-"`
Content []byte `xml:",innerxml"`
Nodes []Node `xml:",any"`
}
// UnmarshalXML is
func (n *Node) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
n.Attrs = start.Attr
type node Node
return d.DecodeElement((*node)(n), &start)
}
func escape(s, set string) string {
replacer := []string{}
for _, r := range []rune(set) {
rs := string(r)
replacer = append(replacer, rs, `\`+rs)
}
return strings.NewReplacer(replacer...).Replace(s)
}
func (zf *file) extract(rel *Relationship, w io.Writer) error {
err := os.MkdirAll(
filepath.Join("uploads",
strings.TrimSuffix(zf.name, ".docx"),
filepath.Dir(rel.Target)),
0755)
if err != nil {
return err
}
for _, f := range zf.r.File {
if f.Name != "word/"+rel.Target {
continue
}
rc, err := f.Open()
if err != nil {
return err
}
defer rc.Close()
b := make([]byte, f.UncompressedSize64)
n, err := rc.Read(b)
if err != nil && err != io.EOF {
return err
}
if zf.embed {
fmt.Fprintf(w, "![](data:image/png;base64,%s)",
base64.StdEncoding.EncodeToString(b[:n]))
} else {
err = ioutil.WriteFile(
filepath.Join("uploads",
strings.TrimSuffix(zf.name, ".docx"),
rel.Target),
b, 0644)
if err != nil {
return err
}
fmt.Fprintf(w, "![](%s)", "/"+filepath.Join(
"uploads",
strings.TrimSuffix(zf.name, ".docx"),
escape(rel.Target, "()")))
}
break
}
return nil
}
func attr(attrs []xml.Attr, name string) (string, bool) {
for _, attr := range attrs {
if attr.Name.Local == name {
return attr.Value, true
}
}
return "", false
}
func (zf *file) walk(node *Node, w io.Writer) error {
switch node.XMLName.Local {
case "hyperlink":
fmt.Fprint(w, "[")
var cbuf bytes.Buffer
for _, n := range node.Nodes {
if err := zf.walk(&n, &cbuf); err != nil {
return err
}
}
fmt.Fprint(w, escape(cbuf.String(), "[]"))
fmt.Fprint(w, "]")
fmt.Fprint(w, "(")
if id, ok := attr(node.Attrs, "id"); ok {
for _, rel := range zf.rels.Relationship {
if id == rel.ID {
fmt.Fprint(w, escape(rel.Target, "()"))
break
}
}
}
fmt.Fprint(w, ")")
case "t":
fmt.Fprint(w, string(node.Content))
case "pPr":
code := false
for _, n := range node.Nodes {
switch n.XMLName.Local {
case "ind":
if left, ok := attr(n.Attrs, "left"); ok {
if i, err := strconv.Atoi(left); err == nil && i > 0 {
fmt.Fprint(w, strings.Repeat(" ", i/360))
}
}
case "pStyle":
if val, ok := attr(n.Attrs, "val"); ok {
if strings.HasPrefix(val, "Heading") {
if i, err := strconv.Atoi(val[7:]); err == nil && i > 0 {
fmt.Fprint(w, strings.Repeat("#", i)+" ")
}
} else if val == "Code" {
code = true
} else {
if i, err := strconv.Atoi(val); err == nil && i > 0 {
fmt.Fprint(w, strings.Repeat("#", i)+" ")
}
}
}
case "numPr":
numID := ""
ilvl := ""
numFmt := ""
start := 1
ind := 0
for _, nn := range n.Nodes {
if nn.XMLName.Local == "numId" {
if val, ok := attr(nn.Attrs, "val"); ok {
numID = val
}
}
if nn.XMLName.Local == "ilvl" {
if val, ok := attr(nn.Attrs, "val"); ok {
ilvl = val
}
}
}
for _, num := range zf.num.Num {
if numID != num.NumID {
continue
}
for _, abnum := range zf.num.AbstractNum {
if abnum.AbstractNumID != num.AbstractNumID.Val {
continue
}
for _, ablvl := range abnum.Lvl {
if ablvl.Ilvl != ilvl {
continue
}
if i, err := strconv.Atoi(ablvl.Start.Val); err == nil {
start = i
}
if i, err := strconv.Atoi(ablvl.PPr.Ind.Left); err == nil {
ind = i / 360
}
numFmt = ablvl.NumFmt.Val
break
}
break
}
break
}
fmt.Fprint(w, strings.Repeat(" ", ind))
switch numFmt {
case "decimal", "aiueoFullWidth":
key := fmt.Sprintf("%s:%d", numID, ind)
cur, ok := zf.list[key]
if !ok {
zf.list[key] = start
} else {
zf.list[key] = cur + 1
}
fmt.Fprintf(w, "%d. ", zf.list[key])
case "bullet":
fmt.Fprint(w, "* ")
}
}
}
if code {
fmt.Fprint(w, "`")
}
for _, n := range node.Nodes {
if err := zf.walk(&n, w); err != nil {
return err
}
}
if code {
fmt.Fprint(w, "`")
}
case "tbl":
var rows [][]string
for _, tr := range node.Nodes {
if tr.XMLName.Local != "tr" {
continue
}
var cols []string
for _, tc := range tr.Nodes {
if tc.XMLName.Local != "tc" {
continue
}
var cbuf bytes.Buffer
if err := zf.walk(&tc, &cbuf); err != nil {
return err
}
cols = append(cols, strings.Replace(cbuf.String(), "\n", "", -1))
}
rows = append(rows, cols)
}
maxcol := 0
for _, cols := range rows {
if len(cols) > maxcol {
maxcol = len(cols)
}
}
widths := make([]int, maxcol)
for _, row := range rows {
for i := 0; i < maxcol; i++ {
if i < len(row) {
width := runewidth.StringWidth(row[i])
if widths[i] < width {
widths[i] = width
}
}
}
}
for i, row := range rows {
if i == 0 {
for j := 0; j < maxcol; j++ {
fmt.Fprint(w, "|")
fmt.Fprint(w, strings.Repeat(" ", widths[j]))
}
fmt.Fprint(w, "|\n")
for j := 0; j < maxcol; j++ {
fmt.Fprint(w, "|")
fmt.Fprint(w, strings.Repeat("-", widths[j]))
}
fmt.Fprint(w, "|\n")
}
for j := 0; j < maxcol; j++ {
fmt.Fprint(w, "|")
if j < len(row) {
width := runewidth.StringWidth(row[j])
fmt.Fprint(w, escape(row[j], "|"))
fmt.Fprint(w, strings.Repeat(" ", widths[j]-width))
} else {
fmt.Fprint(w, strings.Repeat(" ", widths[j]))
}
}
fmt.Fprint(w, "|\n")
}
fmt.Fprint(w, "\n")
case "r":
bold := false
italic := false
strike := false
for _, n := range node.Nodes {
if n.XMLName.Local != "rPr" {
continue
}
for _, nn := range n.Nodes {
switch nn.XMLName.Local {
case "b":
bold = true
case "i":
italic = true
case "strike":
strike = true
}
}
}
if strike {
fmt.Fprint(w, "~~")
}
if bold {
fmt.Fprint(w, "**")
}
if italic {
fmt.Fprint(w, "*")
}
var cbuf bytes.Buffer
for _, n := range node.Nodes {
if err := zf.walk(&n, &cbuf); err != nil {
return err
}
}
fmt.Fprint(w, escape(cbuf.String(), `*~\`))
if italic {
fmt.Fprint(w, "*")
}
if bold {
fmt.Fprint(w, "**")
}
if strike {
fmt.Fprint(w, "~~")
}
case "p":
for _, n := range node.Nodes {
if err := zf.walk(&n, w); err != nil {
return err
}
}
fmt.Fprintln(w)
case "blip":
if id, ok := attr(node.Attrs, "embed"); ok {
for _, rel := range zf.rels.Relationship {
if id != rel.ID {
continue
}
if err := zf.extract(&rel, w); err != nil {
return err
}
}
}
case "Fallback":
case "txbxContent":
var cbuf bytes.Buffer
for _, n := range node.Nodes {
if err := zf.walk(&n, &cbuf); err != nil {
return err
}
}
fmt.Fprintln(w, "\n```\n"+cbuf.String()+"```")
default:
for _, n := range node.Nodes {
if err := zf.walk(&n, w); err != nil {
return err
}
}
}
return nil
}
func readFile(f *zip.File) (*Node, error) {
rc, err := f.Open()
defer rc.Close()
b, _ := ioutil.ReadAll(rc)
if err != nil {
return nil, err
}
var node Node
err = xml.Unmarshal(b, &node)
if err != nil {
return nil, err
}
return &node, nil
}
func findFile(files []*zip.File, target string) *zip.File {
for _, f := range files {
if ok, _ := path.Match(target, f.Name); ok {
return f
}
}
return nil
}
func Docx2md(arg string, embed bool) (string, error) {
r, err := zip.OpenReader(arg)
if err != nil {
return "", err
}
defer r.Close()
var rels Relationships
var num Numbering
for _, f := range r.File {
switch f.Name {
case "word/_rels/document.xml.rels":
rc, err := f.Open()
defer rc.Close()
b, _ := ioutil.ReadAll(rc)
if err != nil {
return "", err
}
err = xml.Unmarshal(b, &rels)
if err != nil {
return "", err
}
case "word/numbering.xml":
rc, err := f.Open()
defer rc.Close()
b, _ := ioutil.ReadAll(rc)
if err != nil {
return "", err
}
err = xml.Unmarshal(b, &num)
if err != nil {
return "", err
}
}
}
f := findFile(r.File, "word/document*.xml")
if f == nil {
return "", errors.New("incorrect document")
}
node, err := readFile(f)
if err != nil {
return "", err
}
fileNames := strings.Split(arg, "/")
fileName := fileNames[len(fileNames)-1]
// make sure the file name
if !strings.HasSuffix(fileName, ".docx") {
log.Fatal("File name must end with .docx")
}
var buf bytes.Buffer
zf := &file{
r: r,
rels: rels,
num: num,
embed: embed,
list: make(map[string]int),
name: fileName,
}
err = zf.walk(node, &buf)
if err != nil {
return "", err
}
return buf.String(), nil
}

View File

@ -248,7 +248,7 @@
</div>
<div class="form-group">
<div class="file-loading">
<input id="import-book-upload" name="import-file" type="file" accept=".zip">
<input id="import-book-upload" name="import-file" type="file" accept=".zip,.docx">
</div>
<div id="kartik-file-errors"></div>
</div>
@ -465,7 +465,7 @@
'required': true,
'validateInitialCount': true,
"language" : "{{i18n $.Lang "common.upload_lang"}}",
'allowedFileExtensions': ['zip'],
'allowedFileExtensions': ['zip', 'docx'],
'msgPlaceholder' : '{{i18n $.Lang "message.file_type_placeholder"}}',
'elErrorContainer' : "#import-book-form-error-message",
'uploadExtraData' : function () {