excelize.go 16.4 KB
Newer Older
xurime's avatar
xurime 已提交
1
// Copyright 2016 - 2023 The excelize Authors. All rights reserved. Use of
xurime's avatar
xurime 已提交
2 3
// this source code is governed by a BSD-style license that can be found in
// the LICENSE file.
xurime's avatar
xurime 已提交
4

5 6 7 8 9
// Package excelize providing a set of functions that allow you to write to and
// read from XLAM / XLSM / XLSX / XLTM / XLTX files. Supports reading and
// writing spreadsheet documents generated by Microsoft Excel™ 2007 and later.
// Supports complex components by high compatibility, and provided streaming
// API for generating or reading data from a worksheet with huge amounts of
xurime's avatar
xurime 已提交
10
// data. This library needs Go version 1.16 or later.
xurime's avatar
xurime 已提交
11 12
//
// See https://xuri.me/excelize for more information about this package.
xurime's avatar
xurime 已提交
13 14 15 16
package excelize

import (
	"archive/zip"
17
	"bytes"
xurime's avatar
xurime 已提交
18
	"encoding/xml"
J
Josh Fyne 已提交
19 20
	"io"
	"os"
xurime's avatar
xurime 已提交
21
	"path/filepath"
xurime's avatar
xurime 已提交
22
	"strconv"
23
	"strings"
24
	"sync"
25 26

	"golang.org/x/net/html/charset"
xurime's avatar
xurime 已提交
27 28
)

29
// File define a populated spreadsheet file struct.
30
type File struct {
31
	mu               sync.Mutex
xurime's avatar
xurime 已提交
32
	checked          sync.Map
33 34 35 36
	options          *Options
	sharedStringItem [][]uint
	sharedStringsMap map[string]int
	sharedStringTemp *os.File
37 38 39
	sheetMap         map[string]string
	streams          map[string]*StreamWriter
	tempFiles        sync.Map
40
	xmlAttr          sync.Map
41
	CalcChain        *xlsxCalcChain
42
	CharsetReader    charsetTranscoderFn
43 44
	Comments         map[string]*xlsxComments
	ContentTypes     *xlsxTypes
45
	DecodeVMLDrawing map[string]*decodeVmlDrawing
46
	DecodeCellImages *decodeCellImages
47 48
	Drawings         sync.Map
	Path             string
49 50
	Pkg              sync.Map
	Relationships    sync.Map
51 52 53 54
	SharedStrings    *xlsxSST
	Sheet            sync.Map
	SheetCount       int
	Styles           *xlsxStyleSheet
55
	Theme            *decodeTheme
56
	VMLDrawing       map[string]*vmlDrawing
57
	VolatileDeps     *xlsxVolTypes
58
	WorkBook         *xlsxWorkbook
xurime's avatar
xurime 已提交
59 60
}

xurime's avatar
xurime 已提交
61 62
// charsetTranscoderFn set user-defined codepage transcoder function for open
// the spreadsheet from non-UTF-8 encoding.
63 64
type charsetTranscoderFn func(charset string, input io.Reader) (rdr io.Reader, err error)

65
// Options define the options for opening and reading the spreadsheet.
66
//
67 68 69
// MaxCalcIterations specifies the maximum iterations for iterative
// calculation, the default value is 0.
//
70 71 72 73 74
// Password specifies the password of the spreadsheet in plain text.
//
// RawCellValue specifies if apply the number format for the cell value or get
// the raw value.
//
75
// UnzipSizeLimit specifies to unzip size limit in bytes on open the
76
// spreadsheet, this value should be greater than or equal to
77
// UnzipXMLSizeLimit, the default size limit is 16GB.
78
//
79 80 81 82 83
// UnzipXMLSizeLimit specifies the memory limit on unzipping worksheet and
// shared string table in bytes, worksheet XML will be extracted to system
// temporary directory when the file size is over this value, this value
// should be less than or equal to UnzipSizeLimit, the default value is
// 16MB.
84
//
85
// ShortDatePattern specifies the short date number format code. In the
86 87 88 89
// spreadsheet applications, date formats display date and time serial numbers
// as date values. Date formats that begin with an asterisk (*) respond to
// changes in regional date and time settings that are specified for the
// operating system. Formats without an asterisk are not affected by operating
90
// system settings. The ShortDatePattern used for specifies apply date formats
91 92
// that begin with an asterisk.
//
93
// LongDatePattern specifies the long date number format code.
94
//
95 96 97 98
// LongTimePattern specifies the long time number format code.
//
// CultureInfo specifies the country code for applying built-in language number
// format code these effect by the system's local language settings.
99
type Options struct {
100
	MaxCalcIterations uint
101 102 103 104
	Password          string
	RawCellValue      bool
	UnzipSizeLimit    int64
	UnzipXMLSizeLimit int64
105 106 107 108
	ShortDatePattern  string
	LongDatePattern   string
	LongTimePattern   string
	CultureInfo       CultureName
109 110
}

111
// OpenFile take the name of a spreadsheet file and returns a populated
xurime's avatar
xurime 已提交
112 113
// spreadsheet file struct for it. For example, open spreadsheet with
// password protection:
114
//
115
//	f, err := excelize.OpenFile("Book1.xlsx", excelize.Options{Password: "password"})
116
//
117
// Close the file by Close function after opening the spreadsheet.
118
func OpenFile(filename string, opts ...Options) (*File, error) {
xurime's avatar
xurime 已提交
119
	file, err := os.Open(filepath.Clean(filename))
120
	if err != nil {
J
Josh Fyne 已提交
121 122
		return nil, err
	}
123
	f, err := OpenReader(file, opts...)
J
Josh Fyne 已提交
124
	if err != nil {
125 126
		if closeErr := file.Close(); closeErr != nil {
			return f, closeErr
127
		}
128
		return f, err
J
Josh Fyne 已提交
129 130
	}
	f.Path = filename
131
	return f, file.Close()
J
Josh Fyne 已提交
132 133
}

134
// newFile is object builder
135 136
func newFile() *File {
	return &File{
137
		options:          &Options{UnzipSizeLimit: UnzipSizeLimit, UnzipXMLSizeLimit: StreamChunkSize},
xurime's avatar
xurime 已提交
138 139
		xmlAttr:          sync.Map{},
		checked:          sync.Map{},
140
		sheetMap:         make(map[string]string),
141
		tempFiles:        sync.Map{},
142
		Comments:         make(map[string]*xlsxComments),
143
		Drawings:         sync.Map{},
144
		sharedStringsMap: make(map[string]int),
145
		Sheet:            sync.Map{},
146 147
		DecodeVMLDrawing: make(map[string]*decodeVmlDrawing),
		VMLDrawing:       make(map[string]*vmlDrawing),
148
		Relationships:    sync.Map{},
149 150 151 152
		CharsetReader:    charset.NewReaderLabel,
	}
}

153 154 155
// checkOpenReaderOptions check and validate options field value for open
// reader.
func (f *File) checkOpenReaderOptions() error {
xurime's avatar
xurime 已提交
156 157
	if f.options.UnzipSizeLimit == 0 {
		f.options.UnzipSizeLimit = UnzipSizeLimit
158 159
		if f.options.UnzipXMLSizeLimit > f.options.UnzipSizeLimit {
			f.options.UnzipSizeLimit = f.options.UnzipXMLSizeLimit
160 161
		}
	}
162 163 164 165
	if f.options.UnzipXMLSizeLimit == 0 {
		f.options.UnzipXMLSizeLimit = StreamChunkSize
		if f.options.UnzipSizeLimit < f.options.UnzipXMLSizeLimit {
			f.options.UnzipXMLSizeLimit = f.options.UnzipSizeLimit
166 167
		}
	}
168
	if f.options.UnzipXMLSizeLimit > f.options.UnzipSizeLimit {
169 170
		return ErrOptionsUnzipSizeLimit
	}
171
	return f.checkDateTimePattern()
172 173 174 175 176 177 178 179 180 181
}

// OpenReader read data stream from io.Reader and return a populated
// spreadsheet file.
func OpenReader(r io.Reader, opts ...Options) (*File, error) {
	b, err := io.ReadAll(r)
	if err != nil {
		return nil, err
	}
	f := newFile()
182
	f.options = getOptions(opts...)
183 184
	if err = f.checkOpenReaderOptions(); err != nil {
		return nil, err
xurime's avatar
xurime 已提交
185 186
	}
	if bytes.Contains(b, oleIdentifier) {
187 188
		if b, err = Decrypt(b, f.options); err != nil {
			return nil, ErrWorkbookFileFormat
189
		}
190 191 192
	}
	zr, err := zip.NewReader(bytes.NewReader(b), int64(len(b)))
	if err != nil {
193 194 195
		if len(f.options.Password) > 0 {
			return nil, ErrWorkbookPassword
		}
J
Josh Fyne 已提交
196 197
		return nil, err
	}
198
	file, sheetCount, err := f.ReadZipReader(zr)
J
Josh Fyne 已提交
199 200
	if err != nil {
		return nil, err
xurime's avatar
xurime 已提交
201
	}
202 203 204 205
	f.SheetCount = sheetCount
	for k, v := range file {
		f.Pkg.Store(k, v)
	}
206 207 208
	if f.CalcChain, err = f.calcChainReader(); err != nil {
		return f, err
	}
209 210 211
	if f.sheetMap, err = f.getSheetMap(); err != nil {
		return f, err
	}
212 213 214 215
	if f.Styles, err = f.stylesReader(); err != nil {
		return f, err
	}
	f.Theme, err = f.themeReader()
216
	return f, err
xurime's avatar
xurime 已提交
217 218
}

219
// getOptions provides a function to parse the optional settings for open
xurime's avatar
xurime 已提交
220
// and reading spreadsheet.
221
func getOptions(opts ...Options) *Options {
222 223 224
	options := &Options{}
	for _, opt := range opts {
		options = &opt
xurime's avatar
xurime 已提交
225
	}
226
	return options
xurime's avatar
xurime 已提交
227 228
}

229 230
// CharsetTranscoder Set user defined codepage transcoder function for open
// XLSX from non UTF-8 encoding.
231 232
func (f *File) CharsetTranscoder(fn charsetTranscoderFn) *File { f.CharsetReader = fn; return f }

233
// Creates new XML decoder with charset reader.
234 235 236 237 238 239
func (f *File) xmlNewDecoder(rdr io.Reader) (ret *xml.Decoder) {
	ret = xml.NewDecoder(rdr)
	ret.CharsetReader = f.CharsetReader
	return
}

xurime's avatar
xurime 已提交
240
// setDefaultTimeStyle provides a function to set default numbers format for
241
// time.Time type cell value by given worksheet name, cell reference and
242
// number format code.
243 244
func (f *File) setDefaultTimeStyle(sheet, cell string, format int) error {
	s, err := f.GetCellStyle(sheet, cell)
245 246 247 248
	if err != nil {
		return err
	}
	if s == 0 {
249
		style, _ := f.NewStyle(&Style{NumFmt: format})
250
		err = f.SetCellStyle(sheet, cell, cell, style)
251
	}
252
	return err
253 254
}

xurime's avatar
xurime 已提交
255 256
// workSheetReader provides a function to get the pointer to the structure
// after deserialization by given worksheet name.
257
func (f *File) workSheetReader(sheet string) (ws *xlsxWorksheet, err error) {
258 259 260 261
	var (
		name string
		ok   bool
	)
262 263 264
	if err = checkSheetName(sheet); err != nil {
		return
	}
265
	if name, ok = f.getSheetXMLPath(sheet); !ok {
266
		err = ErrSheetNotExist{sheet}
267
		return
268
	}
269 270 271 272
	if worksheet, ok := f.Sheet.Load(name); ok && worksheet != nil {
		ws = worksheet.(*xlsxWorksheet)
		return
	}
273 274
	for _, sheetType := range []string{"xl/chartsheets", "xl/dialogsheet", "xl/macrosheet"} {
		if strings.HasPrefix(name, sheetType) {
275
			err = newNotWorksheetError(sheet)
276 277
			return
		}
278 279
	}
	ws = new(xlsxWorksheet)
xurime's avatar
xurime 已提交
280
	if attrs, ok := f.xmlAttr.Load(name); !ok {
281
		d := f.xmlNewDecoder(bytes.NewReader(namespaceStrictToTransitional(f.readBytes(name))))
xurime's avatar
xurime 已提交
282 283 284 285 286
		if attrs == nil {
			attrs = []xml.Attr{}
		}
		attrs = append(attrs.([]xml.Attr), getRootElement(d)...)
		f.xmlAttr.Store(name, attrs)
287
	}
288
	if err = f.xmlNewDecoder(bytes.NewReader(namespaceStrictToTransitional(f.readBytes(name)))).
289 290 291 292
		Decode(ws); err != nil && err != io.EOF {
		return
	}
	err = nil
xurime's avatar
xurime 已提交
293
	if _, ok = f.checked.Load(name); !ok {
294 295
		ws.checkSheet()
		if err = ws.checkRow(); err != nil {
296 297
			return
		}
xurime's avatar
xurime 已提交
298
		f.checked.Store(name, true)
299
	}
300
	f.Sheet.Store(name, ws)
301
	return
xurime's avatar
xurime 已提交
302
}
303

xurime's avatar
xurime 已提交
304
// checkSheet provides a function to fill each row element and make that is
305
// continuous in a worksheet of XML.
306
func (ws *xlsxWorksheet) checkSheet() {
307 308 309 310 311 312 313 314
	row, r0 := ws.checkSheetRows()
	sheetData := xlsxSheetData{Row: make([]xlsxRow, row)}
	row = 0
	for _, r := range ws.SheetData.Row {
		if r.R == nil {
			row++
			r.R = intPtr(row)
			sheetData.Row[row-1] = r
315 316
			continue
		}
317 318
		if *r.R == row && row > 0 {
			sheetData.Row[*r.R-1].C = append(sheetData.Row[*r.R-1].C, r.C...)
319
			continue
320
		}
321 322 323
		if *r.R != 0 {
			sheetData.Row[*r.R-1] = r
			row = *r.R
324
		}
325
	}
326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
	for i := 1; i <= len(sheetData.Row); i++ {
		sheetData.Row[i-1].R = intPtr(i)
	}
	ws.checkSheetR0(&sheetData, r0)
}

// checkSheetRows returns the last row number of the worksheet and rows element
// with r="0" attribute.
func (ws *xlsxWorksheet) checkSheetRows() (int, []xlsxRow) {
	var (
		row, max  int
		r0        []xlsxRow
		maxRowNum = func(num int, c []xlsxC) int {
			for _, cell := range c {
				if _, n, err := CellNameToCoordinates(cell.R); err == nil && n > num {
					num = n
				}
			}
			return num
		}
	)
	for i, r := range ws.SheetData.Row {
		if r.R == nil {
			row++
350 351
			continue
		}
352 353 354 355 356
		if i == 0 && *r.R == 0 {
			if num := maxRowNum(row, r.C); num > max {
				max = num
			}
			r0 = append(r0, r)
357 358
			continue
		}
359 360 361
		if *r.R != 0 && *r.R > row {
			row = *r.R
		}
362
	}
363 364
	if max > row {
		row = max
365
	}
366
	return row, r0
367 368 369 370 371
}

// checkSheetR0 handle the row element with r="0" attribute, cells in this row
// could be disorderly, the cell in this row can be used as the value of
// which cell is empty in the normal rows.
372 373 374 375 376 377 378 379 380 381 382 383
func (ws *xlsxWorksheet) checkSheetR0(sheetData *xlsxSheetData, r0s []xlsxRow) {
	for _, r0 := range r0s {
		for _, cell := range r0.C {
			if col, row, err := CellNameToCoordinates(cell.R); err == nil {
				rowIdx := row - 1
				columns, colIdx := len(sheetData.Row[rowIdx].C), col-1
				for c := columns; c < col; c++ {
					sheetData.Row[rowIdx].C = append(sheetData.Row[rowIdx].C, xlsxC{})
				}
				if !sheetData.Row[rowIdx].C[colIdx].hasValue() {
					sheetData.Row[rowIdx].C[colIdx] = cell
				}
384 385 386 387
			}
		}
	}
	ws.SheetData = *sheetData
xurime's avatar
xurime 已提交
388 389
}

390 391 392
// setRels provides a function to set relationships by given relationship ID,
// XML path, relationship type, target and target mode.
func (f *File) setRels(rID, relPath, relType, target, targetMode string) int {
393
	rels, _ := f.relsReader(relPath)
394 395 396
	if rels == nil || rID == "" {
		return f.addRels(relPath, relType, target, targetMode)
	}
397 398
	rels.mu.Lock()
	defer rels.mu.Unlock()
399 400 401 402 403 404 405 406 407 408 409 410 411
	var ID int
	for i, rel := range rels.Relationships {
		if rel.ID == rID {
			rels.Relationships[i].Type = relType
			rels.Relationships[i].Target = target
			rels.Relationships[i].TargetMode = targetMode
			ID, _ = strconv.Atoi(strings.TrimPrefix(rID, "rId"))
			break
		}
	}
	return ID
}

xurime's avatar
xurime 已提交
412 413 414
// addRels provides a function to add relationships by given XML path,
// relationship type, target and target mode.
func (f *File) addRels(relPath, relType, target, targetMode string) int {
415
	uniqPart := map[string]string{
416 417
		SourceRelationshipSharedStrings: "/xl/sharedStrings.xml",
	}
418
	rels, _ := f.relsReader(relPath)
xurime's avatar
xurime 已提交
419 420 421
	if rels == nil {
		rels = &xlsxRelationships{}
	}
422 423
	rels.mu.Lock()
	defer rels.mu.Unlock()
424
	var rID int
425
	for idx, rel := range rels.Relationships {
426 427 428 429
		ID, _ := strconv.Atoi(strings.TrimPrefix(rel.ID, "rId"))
		if ID > rID {
			rID = ID
		}
430 431 432 433 434 435
		if relType == rel.Type {
			if partName, ok := uniqPart[rel.Type]; ok {
				rels.Relationships[idx].Target = partName
				return rID
			}
		}
436 437
	}
	rID++
xurime's avatar
xurime 已提交
438 439 440 441 442 443 444 445 446
	var ID bytes.Buffer
	ID.WriteString("rId")
	ID.WriteString(strconv.Itoa(rID))
	rels.Relationships = append(rels.Relationships, xlsxRelationship{
		ID:         ID.String(),
		Type:       relType,
		Target:     target,
		TargetMode: targetMode,
	})
447
	f.Relationships.Store(relPath, rels)
xurime's avatar
xurime 已提交
448 449 450
	return rID
}

451
// UpdateLinkedValue fix linked values within a spreadsheet are not updating in
452
// Office Excel application. This function will be remove value tag when met a
453
// cell have a linked value. Reference
454
// https://social.technet.microsoft.com/Forums/office/en-US/e16bae1f-6a2c-4325-8013-e989a3479066/excel-2010-linked-cells-not-updating
455
//
456 457
// Notice: after opening generated workbook, Excel will update the linked value
// and generate a new value and will prompt to save the file or not.
458 459 460
//
// For example:
//
461 462 463 464 465 466
//	<row r="19" spans="2:2">
//	    <c r="B19">
//	        <f>SUM(Sheet2!D2,Sheet2!D11)</f>
//	        <v>100</v>
//	     </c>
//	</row>
467 468 469
//
// to
//
470 471 472 473 474
//	<row r="19" spans="2:2">
//	    <c r="B19">
//	        <f>SUM(Sheet2!D2,Sheet2!D11)</f>
//	    </c>
//	</row>
xurime's avatar
xurime 已提交
475
func (f *File) UpdateLinkedValue() error {
476 477 478 479
	wb, err := f.workbookReader()
	if err != nil {
		return err
	}
480 481
	// recalculate formulas
	wb.CalcPr = nil
482
	for _, name := range f.GetSheetList() {
xurime's avatar
xurime 已提交
483
		ws, err := f.workSheetReader(name)
xurime's avatar
xurime 已提交
484
		if err != nil {
485
			if err.Error() == newNotWorksheetError(name).Error() {
486 487
				continue
			}
xurime's avatar
xurime 已提交
488 489
			return err
		}
xurime's avatar
xurime 已提交
490 491
		for indexR := range ws.SheetData.Row {
			for indexC, col := range ws.SheetData.Row[indexR].C {
492
				if col.F != nil && col.V != "" {
xurime's avatar
xurime 已提交
493 494
					ws.SheetData.Row[indexR].C[indexC].V = ""
					ws.SheetData.Row[indexR].C[indexC].T = ""
495 496 497 498
				}
			}
		}
	}
xurime's avatar
xurime 已提交
499
	return nil
500
}
501 502

// AddVBAProject provides the method to add vbaProject.bin file which contains
503 504
// functions and/or macros. The file extension should be XLSM or XLTM. For
// example:
505
//
506 507 508 509 510
//	codeName := "Sheet1"
//	if err := f.SetSheetProps("Sheet1", &excelize.SheetPropsOptions{
//	    CodeName: &codeName,
//	}); err != nil {
//	    fmt.Println(err)
511
//	    return
512
//	}
513 514
//	file, err := os.ReadFile("vbaProject.bin")
//	if err != nil {
515
//	    fmt.Println(err)
516 517 518 519 520
//	    return
//	}
//	if err := f.AddVBAProject(file); err != nil {
//	    fmt.Println(err)
//	    return
521 522 523
//	}
//	if err := f.SaveAs("macros.xlsm"); err != nil {
//	    fmt.Println(err)
524
//	    return
525
//	}
526
func (f *File) AddVBAProject(file []byte) error {
527 528
	var err error
	// Check vbaProject.bin exists first.
529
	if !bytes.Contains(file, oleIdentifier) {
530
		return ErrAddVBAProject
531
	}
532 533 534 535
	rels, err := f.relsReader(f.getWorkbookRelsPath())
	if err != nil {
		return err
	}
536 537
	rels.mu.Lock()
	defer rels.mu.Unlock()
538 539
	var rID int
	var ok bool
540
	for _, rel := range rels.Relationships {
541 542 543 544 545 546 547 548 549 550 551
		if rel.Target == "vbaProject.bin" && rel.Type == SourceRelationshipVBAProject {
			ok = true
			continue
		}
		t, _ := strconv.Atoi(strings.TrimPrefix(rel.ID, "rId"))
		if t > rID {
			rID = t
		}
	}
	rID++
	if !ok {
552
		rels.Relationships = append(rels.Relationships, xlsxRelationship{
553 554 555 556 557
			ID:     "rId" + strconv.Itoa(rID),
			Target: "vbaProject.bin",
			Type:   SourceRelationshipVBAProject,
		})
	}
558
	f.Pkg.Store("xl/vbaProject.bin", file)
559 560 561
	return err
}

562 563
// setContentTypePartProjectExtensions provides a function to set the content
// type for relationship parts and the main document part.
564
func (f *File) setContentTypePartProjectExtensions(contentType string) error {
565
	var ok bool
566 567 568 569
	content, err := f.contentTypesReader()
	if err != nil {
		return err
	}
570 571
	content.mu.Lock()
	defer content.mu.Unlock()
572 573 574 575 576 577 578
	for _, v := range content.Defaults {
		if v.Extension == "bin" {
			ok = true
		}
	}
	for idx, o := range content.Overrides {
		if o.PartName == "/xl/workbook.xml" {
579
			content.Overrides[idx].ContentType = contentType
580 581 582 583 584
		}
	}
	if !ok {
		content.Defaults = append(content.Defaults, xlsxDefault{
			Extension:   "bin",
585
			ContentType: ContentTypeVBA,
586 587
		})
	}
588
	return err
589
}