# Portions copyright © 2005-2009 Stephen John Machin, Lingfo Pty Ltd
# This module is part of the xlrd3 package, which is released under a
# BSD-style licence.
# 2009-05-31 SJM Fixed problem with no CODEPAGE record on extremely minimal BIFF2.x 3rd-party file
# 2009-04-27 SJM Integrated on_demand patch by Armando Serrano Lombillo
# 2008-02-09 SJM Excel 2.0: build XFs on the fly from cell attributes
# 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files.
# 2007-10-11 SJM Added missing entry for blank cell type to ctype_text
# 2007-07-11 SJM Allow for BIFF2/3-style FORMAT record in BIFF4/8 file
# 2007-04-22 SJM Remove experimental "trimming" facility.
#for debugging only
from math import isnan
import time
from struct import unpack
from array import array
from .biffh import *
from .formula import dump_formula, decompile_formula, rangename2d
from .formatting import nearest_colour_index, Format
from .xfcell import XFCell
DEBUG = 0
OBJ_MSO_DEBUG = 0
_WINDOW2_options = (
# Attribute names and initial values to use in case
# a WINDOW2 record is not written.
("show_formulas", 0),
("show_grid_lines", 1),
("show_sheet_headers", 1),
("panes_are_frozen", 0),
("show_zero_values", 1),
("automatic_grid_line_colour", 1),
("columns_from_right_to_left", 0),
("show_outline_symbols", 1),
("remove_splits_if_pane_freeze_is_removed", 0),
("sheet_selected", 0),
# "sheet_visible" appears to be merely a clone of "sheet_selected".
# The real thing is the visibility attribute from the BOUNDSHEET record.
("sheet_visible", 0),
("show_in_page_break_preview", 0),
)
def int_floor_div(x, y):
return divmod(x, y)[0]
class Sheet(BaseObject):
"""Contains the data for one worksheet.
In the cell access functions, "rowx" is a row index, counting from zero,
and "colx" is a column index, counting from zero.
Negative values for row/column indexes and slice positions are supported in
the expected fashion.
For information about cell types and cell values, refer to the documentation
of the Cell class.
WARNING: You don't call this class yourself. You access Sheet objects via
the Book object that was returned when you called xlrd.open_workbook("myfile.xls").
"""
# Name of sheet.
name = ''
# Number of rows in sheet. A row index is in range(thesheet.nrows).
nrows = 0
# Number of columns in sheet. A column index is in range(thesheet.ncols).
ncols = 0
# The map from a column index to a Colinfo object. Often there is an entry
# in COLINFO records for all column indexes in range(257).
# Note that xlrd ignores the entry for the non-existent
# 257th column. On the other hand, there may be no entry for unused columns.
# - New in version 0.6.1
colinfo_map = {}
# The map from a row index to a Rowinfo object. Note that it is possible
# to have missing entries -- at least one source of XLS files doesn't
# bother writing ROW records.
# - New in version 0.6.1
rowinfo_map = {}
# List of address ranges of cells containing column labels.
# These are set up in Excel by Insert > Name > Labels > Columns.
# - New in version 0.6.0
# How to deconstruct the list::
#
# for crange in thesheet.col_label_ranges:
# rlo, rhi, clo, chi = crange
# for rx in xrange(rlo, rhi):
# for cx in xrange(clo, chi):
# print "Column label at (rowx=%d, colx=%d) is %r" \
# (rx, cx, thesheet.cell_value(rx, cx))
#
col_label_ranges = []
# List of address ranges of cells containing row labels.
# For more details, see col_label_ranges above.
# - New in version 0.6.0
row_label_ranges = []
# List of address ranges of cells which have been merged.
# These are set up in Excel by Format > Cells > Alignment, then ticking
# the "Merge cells" box.
# - New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True)
# How to deconstruct the list::
#
# for crange in thesheet.merged_cells:
# rlo, rhi, clo, chi = crange
# for rowx in xrange(rlo, rhi):
# for colx in xrange(clo, chi):
# # cell (rlo, clo) (the top left one) will carry the data
# # and formatting info; the remainder will be recorded as
# # blank cells, but a renderer will apply the formatting info
# # for the top left cell (e.g. border, pattern) to all cells in
# # the range.
#
merged_cells = []
# Default column width from DEFCOLWIDTH record, else None.
# From the OOo docs:
# """Column width in characters, using the width of the zero character
# from default font (first FONT record in the file). Excel adds some
# extra space to the default width, depending on the default font and
# default font size. The algorithm how to exactly calculate the resulting
# column width is not known.
# Example: The default width of 8 set in this record results in a column
# width of 8.43 using Arial font with a size of 10 points."""
# For the default hierarchy, refer to the Colinfo class above.
# - New in version 0.6.1
defcolwidth = None
# Default column width from STANDARDWIDTH record, else None.
# From the OOo docs:
# """Default width of the columns in 1/256 of the width of the zero
# character, using default font (first FONT record in the file)."""
# For the default hierarchy, refer to the Colinfo class above.
# - New in version 0.6.1
standardwidth = None
# Default value to be used for a row if there is
# no ROW record for that row.
# From the optional DEFAULTROWHEIGHT record.
default_row_height = None
# Default value to be used for a row if there is
# no ROW record for that row.
# From the ´optional´ DEFAULTROWHEIGHT record.
default_row_height_mismatch = None
# Default value to be used for a row if there is
# no ROW record for that row.
# From the ´optional´ DEFAULTROWHEIGHT record.
default_row_hidden = None
# Default value to be used for a row if there is
# no ROW record for that row.
# From the optional DEFAULTROWHEIGHT record.
default_additional_space_above = None
# Default value to be used for a row if there is
# no ROW record for that row.
# From the optional DEFAULTROWHEIGHT record.
default_additional_space_below = None
# Visibility of the sheet. 0 = visible, 1 = hidden (can be unhidden
# by user -- Format/Sheet/Unhide), 2 = "very hidden" (can be unhidden
# only by VBA macro).
visibility = 0
# A 256-element tuple corresponding to the contents of the GCW record for this sheet.
# If no such record, treat as all bits zero.
# Applies to BIFF4-7 only. See docs of Colinfo class for discussion.
gcw = (0, ) * 256
def __init__(self, book, position, name, number):
self.book = book
self.biff_version = book.biff_version
self._position = position
self.logfile = book.logfile
self.pickleable = book.pickleable
# (to_py3) self.dont_use_array = not(array_array and (CAN_PICKLE_ARRAY or not book.pickleable))
self.name = name
self.number = number
self.verbosity = book.verbosity
self.formatting_info = book.formatting_info
self._xf_index_to_xl_type_map = book._xf_index_to_xl_type_map
self.nrows = 0 # actual, including possibly empty cells
self.ncols = 0
self._maxdatarowx = -1 # highest rowx containing a non-empty cell
self._maxdatacolx = -1 # highest colx containing a non-empty cell
self._dimnrows = 0 # as per DIMENSIONS record
self._dimncols = 0
self._cell_values = []
self._cell_types = []
self._cell_xf_indexes = []
self._need_fix_ragged_rows = 0
self.defcolwidth = None
self.standardwidth = None
self.default_row_height = None
self.default_row_height_mismatch = 0
self.default_row_hidden = 0
self.default_additional_space_above = 0
self.default_additional_space_below = 0
self.colinfo_map = {}
self.rowinfo_map = {}
self.col_label_ranges = []
self.row_label_ranges = []
self.merged_cells = []
self._xf_index_stats = [0, 0, 0, 0]
self.visibility = book._sheet_visibility[number] # from BOUNDSHEET record
for attr, defval in _WINDOW2_options:
setattr(self, attr, defval)
self.first_visible_rowx = 0
self.first_visible_colx = 0
self.gridline_colour_index = 0x40
self.gridline_colour_rgb = None # pre-BIFF8
self.cached_page_break_preview_mag_factor = 0
self.cached_normal_view_mag_factor = 0
self._ixfe = None # BIFF2 only
self._cell_attr_to_xfx = {} # BIFF2.0 only
#### Don't initialise this here, use class attribute initialisation.
#### self.gcw = (0, ) * 256 ####
if self.biff_version >= 80:
self.utter_max_rows = 65536
else:
self.utter_max_rows = 16384
self.utter_max_cols = 256
def cell(self, rowx, colx):
""" Get the XFCell() object in the given row and column. """
if self.formatting_info:
xf_index = self.cell_xf_index(rowx, colx)
else:
xf_index = None
ctype = self.cell_type(rowx, colx)
value = self.cell_value(rowx, colx)
return Cell(ctype, value, xf_index, self)
def cell_value(self, rowx, colx):
""" Value of the cell in the given row and column. """
return self._cell_values[rowx][colx]
def cell_type(self, rowx, colx):
""" Type of the cell in the given row and column.
Refer to the documentation of the Cell class.
"""
return self._cell_types[rowx][colx]
# New in version 0.6.1
def cell_xf_index(self, rowx, colx):
""" XF index of the cell in the given row and column.
This is an index into Book.xf_list.
"""
self.req_fmt_info()
xfx = self._cell_xf_indexes[rowx][colx]
if xfx > -1:
self._xf_index_stats[0] += 1
return xfx
# Check for a row xf_index
try:
xfx = self.rowinfo_map[rowx].xf_index
if xfx > -1:
self._xf_index_stats[1] += 1
return xfx
except KeyError:
pass
# Check for a column xf_index
try:
xfx = self.colinfo_map[colx].xf_index
assert xfx > -1
self._xf_index_stats[2] += 1
return xfx
except KeyError:
# If all else fails, 15 is used as hardwired global default xf_index.
self._xf_index_stats[3] += 1
return 15
def row(self, rowx):
""" Returns a sequence of the Cell objects in the given row. """
return [self.cell(rowx, colx) for colx in range(self.ncols)]
def row_types(self, rowx, start_colx=0, end_colx=None):
""" Returns a slice of the types of the cells in the given row. """
if end_colx is None:
return self._cell_types[rowx][start_colx:]
return self._cell_types[rowx][start_colx:end_colx]
def row_values(self, rowx, start_colx=0, end_colx=None):
""" Returns a slice of the values of the cells in the given row. """
if end_colx is None:
return self._cell_values[rowx][start_colx:]
return self._cell_values[rowx][start_colx:end_colx]
def row_slice(self, rowx, start_colx=0, end_colx=None):
""" Returns a slice of the Cell objects in the given row. """
nc = self.ncols
if start_colx < 0:
start_colx += nc
if start_colx < 0:
start_colx = 0
if end_colx is None or end_colx > nc:
end_colx = nc
elif end_colx < 0:
end_colx += nc
return [self.cell(rowx, colx) for colx in range(start_colx, end_colx)]
def col_slice(self, colx, start_rowx=0, end_rowx=None):
""" Returns a slice of the Cell objects in the given column. """
nr = self.nrows
if start_rowx < 0:
start_rowx += nr
if start_rowx < 0:
start_rowx = 0
if end_rowx is None or end_rowx > nr:
end_rowx = nr
elif end_rowx < 0:
end_rowx += nr
return [self.cell(rowx, colx) for rowx in range(start_rowx, end_rowx)]
col = col_slice
""" Returns a sequence of the Cell objects in the given column. """
def col_values(self, colx, start_rowx=0, end_rowx=None):
""" Returns a slice of the values of the cells in the given column. """
nr = self.nrows
if start_rowx < 0:
start_rowx += nr
if start_rowx < 0:
start_rowx = 0
if end_rowx is None or end_rowx > nr:
end_rowx = nr
elif end_rowx < 0:
end_rowx += nr
return [self._cell_values[rowx][colx] for rowx in range(start_rowx, end_rowx)]
def col_types(self, colx, start_rowx=0, end_rowx=None):
""" Returns a slice of the types of the cells in the given column. """
nr = self.nrows
if start_rowx < 0:
start_rowx += nr
if start_rowx < 0:
start_rowx = 0
if end_rowx is None or end_rowx > nr:
end_rowx = nr
elif end_rowx < 0:
end_rowx += nr
return [self._cell_types[rowx][colx] for rowx in range(start_rowx, end_rowx)]
# Following methods are used in building the worksheet.
# They are not part of the API.
def extend_cells(self, nr, nc):
assert 1 <= nc <= self.utter_max_cols
assert 1 <= nr <= self.utter_max_rows
if nr <= self.nrows:
# New cell is in an existing row, so extend that row (if necessary).
# Note that nr < self.nrows means that the cell data
# is not in ascending row order!!
self._need_fix_ragged_rows = 1
nrx = nr - 1
trow = self._cell_types[nrx]
tlen = len(trow)
nextra = max(nc, self.ncols) - tlen
if nextra > 0:
xce = XL_CELL_EMPTY
#(to_py3) if self.dont_use_array: ... removed
trow.extend(array('B', [xce]) * nextra)
if self.formatting_info:
self._cell_xf_indexes[nrx].extend(array('h', [-1]) * nextra)
self._cell_values[nrx].extend([''] * nextra)
if nc > self.ncols:
self.ncols = nc
self._need_fix_ragged_rows = 1
if nr > self.nrows:
scta = self._cell_types.append
scva = self._cell_values.append
scxa = self._cell_xf_indexes.append
fmt_info = self.formatting_info
xce = XL_CELL_EMPTY
nc = self.ncols
#(to_py3) if self.dont_use_array: ... removed
for _unused in range(self.nrows, nr):
scta(array('B', [xce]) * nc)
scva([''] * nc)
if fmt_info:
scxa(array('h', [-1]) * nc)
self.nrows = nr
def fix_ragged_rows(self):
t0 = time.time()
ncols = self.ncols
xce = XL_CELL_EMPTY
s_cell_types = self._cell_types
s_cell_values = self._cell_values
s_cell_xf_indexes = self._cell_xf_indexes
s_fmt_info = self.formatting_info
totrowlen = 0
for rowx in range(self.nrows):
trow = s_cell_types[rowx]
rlen = len(trow)
totrowlen += rlen
nextra = ncols - rlen
if nextra > 0:
s_cell_values[rowx][rlen:] = [''] * nextra
trow.extend(array('B', [xce]) * nextra)
if s_fmt_info:
s_cell_xf_indexes[rowx][rlen:] = array('h', [-1]) * nextra
self._fix_ragged_rows_time = time.time() - t0
def tidy_dimensions(self):
if self.verbosity >= 3:
fprintf(self.logfile,
"tidy_dimensions: nrows=%d ncols=%d _need_fix_ragged_rows=%d\n",
self.nrows, self.ncols, self._need_fix_ragged_rows)
if self.merged_cells:
nr = nc = 0
umaxrows = self.utter_max_rows
umaxcols = self.utter_max_cols
for crange in self.merged_cells:
rlo, rhi, clo, chi = crange
if not (0 <= rlo < rhi <= umaxrows) \
or not (0 <= clo < chi <= umaxcols):
fprintf(self.logfile,
"*** WARNING: sheet #%d (%r), MERGEDCELLS bad range %r\n",
self.number, self.name, crange)
if rhi > nr: nr = rhi
if chi > nc: nc = chi
self.extend_cells(nr, nc)
if self.verbosity >= 1 and \
(self.nrows != self._dimnrows or self.ncols != self._dimncols):
fprintf(self.logfile,
"NOTE *** sheet %d (%r): DIMENSIONS R,C = %d,%d should be %d,%d\n",
self.number,
self.name,
self._dimnrows,
self._dimncols,
self.nrows,
self.ncols,
)
if self._need_fix_ragged_rows:
self.fix_ragged_rows()
def put_cell(self, rowx, colx, ctype, value, xf_index):
try:
self._cell_types[rowx][colx] = ctype
self._cell_values[rowx][colx] = value
if self.formatting_info:
self._cell_xf_indexes[rowx][colx] = xf_index
except IndexError:
self.extend_cells(rowx+1, colx+1)
try:
self._cell_types[rowx][colx] = ctype
self._cell_values[rowx][colx] = value
if self.formatting_info:
self._cell_xf_indexes[rowx][colx] = xf_index
except:
print("put_cell", rowx, colx, file=self.logfile)
raise
except:
print("put_cell", rowx, colx, file=self.logfile)
raise
def put_blank_cell(self, rowx, colx, xf_index):
# This is used for cells from BLANK and MULBLANK records
ctype = XL_CELL_BLANK
value = ''
try:
self._cell_types[rowx][colx] = ctype
self._cell_values[rowx][colx] = value
self._cell_xf_indexes[rowx][colx] = xf_index
except IndexError:
self.extend_cells(rowx+1, colx+1)
try:
self._cell_types[rowx][colx] = ctype
self._cell_values[rowx][colx] = value
self._cell_xf_indexes[rowx][colx] = xf_index
except:
print("put_cell", rowx, colx, file=self.logfile)
raise
except:
print("put_cell", rowx, colx, file=self.logfile)
raise
def put_number_cell(self, rowx, colx, value, xf_index):
# for debugging
if type(value) == float and isnan(value):
pass
ctype = self._xf_index_to_xl_type_map[xf_index]
try:
self._cell_types[rowx][colx] = ctype
self._cell_values[rowx][colx] = value
if self.formatting_info:
self._cell_xf_indexes[rowx][colx] = xf_index
except IndexError:
self.extend_cells(rowx+1, colx+1)
try:
self._cell_types[rowx][colx] = ctype
self._cell_values[rowx][colx] = value
if self.formatting_info:
self._cell_xf_indexes[rowx][colx] = xf_index
except:
print("put_number_cell", rowx, colx, file=self.logfile)
raise
except:
print("put_number_cell", rowx, colx, file=self.logfile)
raise
# === Methods after this line neither know nor care about how cells are stored.
def read(self, bk):
global rc_stats
DEBUG = 0
verbose = DEBUG or self.verbosity >= 2
verbose_rows = DEBUG or self.verbosity >= 4
verbose_formulas = 1 and verbose
oldpos = bk._position
bk._position = self._position
XL_SHRFMLA_ETC_ETC = (
XL_SHRFMLA, XL_ARRAY, XL_TABLEOP, XL_TABLEOP2,
XL_ARRAY2, XL_TABLEOP_B2,
)
self_put_number_cell = self.put_number_cell
self_put_cell = self.put_cell
self_put_blank_cell = self.put_blank_cell
local_unpack = unpack
bk_get_record_parts = bk.get_record_parts
bv = self.biff_version
fmt_info = self.formatting_info
eof_found = 0
while 1:
rc, data_len, data = bk_get_record_parts()
if rc == XL_NUMBER:
rowx, colx, xf_index, d = local_unpack('> 15) & 1
r.outline_level = bits2 & 7
r.outline_group_starts_ends = (bits2 >> 4) & 1
r.hidden = (bits2 >> 5) & 1
r.height_mismatch = (bits2 >> 6) & 1
r.has_default_xf_index = (bits2 >> 7) & 1
r.xf_index = (bits2 >> 16) & 0xfff
r.additional_space_above = (bits2 >> 28) & 1
r.additional_space_below = (bits2 >> 29) & 1
if not r.has_default_xf_index:
r.xf_index = -1
self.rowinfo_map[rowx] = r
if 0 and r.xf_index > -1:
fprintf(self.logfile,
"**ROW %d %d %d\n",
self.number, rowx, r.xf_index)
if verbose_rows:
print('ROW', rowx, bits1, bits2, file=self.logfile)
r.dump(self.logfile,
header="--- sh #%d, rowx=%d ---" % (self.number, rowx))
elif rc in XL_FORMULA_OPCODES: # 06, 0206, 0406
if bv >= 50:
# IMPORTANT result_str is bytes
rowx, colx, xf_index, result_str, flags = local_unpack('= 30:
rowx, colx, xf_index, result_str, flags = local_unpack(' 20))
else:
strg = unpack_unicode(data2, 0, lenlen=2)
self.put_cell(rowx, colx, XL_CELL_TEXT, strg, xf_index)
# if DEBUG: print "FORMULA strg %r" % strg
elif result_str[0] == 1: #b'\x01':
# boolean formula result
value = result_str[2]
self.put_cell(rowx, colx, XL_CELL_BOOLEAN, value, xf_index)
elif result_str[0] == 2: #b'\x02':
# Error in cell
value = result_str[2]
self.put_cell(rowx, colx, XL_CELL_ERROR, value, xf_index)
elif result_str[0] == 3:#b'\x03':
# empty ... i.e. empty (zero-length) string, NOT an empty cell.
self.put_cell(rowx, colx, XL_CELL_TEXT, "", xf_index)
else:
raise XLRDError("unexpected special case (0x%02x) in FORMULA" % result_str[0])
else:
# it is a number
d = local_unpack(' 255: break # Excel does 0 to 256 inclusive
self.colinfo_map[colx] = c
if 0:
fprintf(self.logfile,
"**COL %d %d %d\n",
self.number, colx, c.xf_index)
if verbose:
fprintf(
self.logfile,
"COLINFO sheet #%d cols %d-%d: wid=%d xf_index=%d flags=0x%04x\n",
self.number, first_colx, last_colx, c.width, c.xf_index, flags,
)
c.dump(self.logfile, header='===')
elif rc == XL_DEFCOLWIDTH:
self.defcolwidth, = local_unpack(">= 1
self.gcw = tuple(gcw)
if 0:
showgcw = "".join(["F "[x] for x in gcw]).rstrip().replace(' ', '.')
print("GCW:", showgcw)
elif rc == XL_BLANK:
if not fmt_info: continue
rowx, colx, xf_index = local_unpack(' found EOF", file=self.logfile)
elif rc == XL_COUNTRY:
bk.handle_country(data)
elif rc == XL_LABELRANGES:
pos = 0
pos = unpack_cell_range_address_list_update_pos(
self.row_label_ranges, data, pos, bv, addr_size=8,
)
pos = unpack_cell_range_address_list_update_pos(
self.col_label_ranges, data, pos, bv, addr_size=8,
)
assert pos == data_len
elif rc == XL_ARRAY:
row1x, rownx, col1x, colnx, array_flags, tokslen = \
local_unpack("= 80
num_CFs, needs_recalc, browx1, browx2, bcolx1, bcolx2 = \
unpack("<6H", data[0:12])
if self.verbosity >= 1:
fprintf(self.logfile,
"\n*** WARNING: Ignoring CONDFMT (conditional formatting) record\n" \
"*** in Sheet %d (%r).\n" \
"*** %d CF record(s); needs_recalc_or_redraw = %d\n" \
"*** Bounding box is %s\n",
self.number, self.name, num_CFs, needs_recalc,
rangename2d(browx1, browx2+1, bcolx1, bcolx2+1),
)
olist = [] # updated by the function
pos = unpack_cell_range_address_list_update_pos(
olist, data, 12, bv, addr_size=8)
# print >> self.logfile, repr(result), len(result)
if self.verbosity >= 1:
fprintf(self.logfile,
"*** %d individual range(s):\n" \
"*** %s\n",
len(olist),
", ".join([rangename2d(*coords) for coords in olist]),
)
elif rc == XL_CF:
if not fmt_info: continue
cf_type, cmp_op, sz1, sz2, flags = unpack("> 26) & 1
bord_block = (flags >> 28) & 1
patt_block = (flags >> 29) & 1
if self.verbosity >= 1:
fprintf(self.logfile,
"\n*** WARNING: Ignoring CF (conditional formatting) sub-record.\n" \
"*** cf_type=%d, cmp_op=%d, sz1=%d, sz2=%d, flags=0x%08x\n" \
"*** optional data blocks: font=%d, border=%d, pattern=%d\n",
cf_type, cmp_op, sz1, sz2, flags,
font_block, bord_block, patt_block,
)
# hex_char_dump(data, 0, data_len)
pos = 12
if font_block:
(font_height, font_options, weight, escapement, underline,
font_colour_index, two_bits, font_esc, font_underl) = \
unpack("<64x i i H H B 3x i 4x i i i 18x", data[pos:pos+118])
font_style = (two_bits > 1) & 1
posture = (font_options > 1) & 1
font_canc = (two_bits > 7) & 1
cancellation = (font_options > 7) & 1
if self.verbosity >= 1:
fprintf(self.logfile,
"*** Font info: height=%d, weight=%d, escapement=%d,\n" \
"*** underline=%d, colour_index=%d, esc=%d, underl=%d,\n" \
"*** style=%d, posture=%d, canc=%d, cancellation=%d\n",
font_height, weight, escapement, underline,
font_colour_index, font_esc, font_underl,
font_style, posture, font_canc, cancellation,
)
pos += 118
if bord_block:
pos += 8
if patt_block:
pos += 4
fmla1 = data[pos:pos+sz1]
pos += sz1
if verbose and sz1:
fprintf(self.logfile,
"*** formula 1:\n",
)
dump_formula(bk, fmla1, sz1, bv, reldelta=0, verbose=1)
fmla2 = data[pos:pos+sz2]
pos += sz2
assert pos == data_len
if verbose and sz2:
fprintf(self.logfile,
"*** formula 2:\n",
)
dump_formula(bk, fmla2, sz2, bv, reldelta=0, verbose=1)
elif rc == XL_DEFAULTROWHEIGHT:
if data_len == 4:
bits, self.default_row_height = unpack("> 1) & 1
self.default_additional_space_above = (bits >> 2) & 1
self.default_additional_space_below = (bits >> 3) & 1
elif rc == XL_MERGEDCELLS:
if not fmt_info: continue
pos = unpack_cell_range_address_list_update_pos(
self.merged_cells, data, 0, bv, addr_size=8)
if verbose:
fprintf(self.logfile,
"MERGEDCELLS: %d ranges\n", int_floor_div(pos - 2, 8))
assert pos == data_len, \
"MERGEDCELLS: pos=%d data_len=%d" % (pos, data_len)
elif rc == XL_WINDOW2:
if bv >= 80:
(options,
self.first_visible_rowx, self.first_visible_colx,
self.gridline_colour_index,
self.cached_page_break_preview_mag_factor,
self.cached_normal_view_mag_factor
) = unpack(">= 1
# print "WINDOW2: visible=%d selected=%d" \
# % (self.sheet_visible, self.sheet_selected)
#### all of the following are for BIFF <= 4W
elif bv <= 45:
if rc == XL_FORMAT or rc == XL_FORMAT2:
bk.handle_format(data, rc)
elif rc == XL_FONT or rc == XL_FONT_B3B4:
bk.handle_font(data)
elif rc == XL_STYLE:
if not self.book._xf_epilogue_done:
self.book.xf_epilogue()
bk.handle_style(data)
elif rc == XL_PALETTE:
bk.handle_palette(data)
elif rc == XL_BUILTINFMTCOUNT:
bk.handle_builtinfmtcount(data)
elif rc == XL_XF4 or rc == XL_XF3 or rc == XL_XF2: #### N.B. not XL_XF
bk.handle_xf(data)
elif rc == XL_DATEMODE:
bk.handle_datemode(data)
elif rc == XL_CODEPAGE:
bk.handle_codepage(data)
elif rc == XL_FILEPASS:
bk.handle_filepass(data)
elif rc == XL_WRITEACCESS:
bk.handle_writeaccess(data)
elif rc == XL_IXFE:
self._ixfe = local_unpack('> 15) & 1
r.outline_level = 0
r.outline_group_starts_ends = 0
r.hidden = 0
r.height_mismatch = 0
r.has_default_xf_index = has_defaults & 1
r.additional_space_above = 0
r.additional_space_below = 0
if not r.has_default_xf_index:
r.xf_index = -1
elif data_len == 18:
# Seems the XF index in the cell_attr is dodgy
xfx = local_unpack(' -1:
fprintf(self.logfile,
"**ROW %d %d %d\n",
self.number, rowx, r.xf_index)
if verbose_rows:
print('ROW_B2', rowx, bits1, has_defaults, file=self.logfile)
r.dump(self.logfile,
header="--- sh #%d, rowx=%d ---" % (self.number, rowx))
elif rc == XL_COLWIDTH: # BIFF2 only
if not fmt_info: continue
first_colx, last_colx, width\
= local_unpack("= 2
if self.biff_version == 21:
if self._xf_index_to_xl_type_map:
if true_xfx is not None:
xfx = true_xfx
else:
xfx = cell_attr[0] & 0x3F
if xfx == 0x3F:
if self._ixfe is None:
raise XLRDError("BIFF2 cell record has XF index 63 but no preceding IXFE record.")
xfx = self._ixfe
# OOo docs are capable of interpretation that each
# cell record is preceded immediately by its own IXFE record.
# Empirical evidence is that (sensibly) an IXFE record applies to all
# following cell records until another IXFE comes along.
return xfx
# Have either Excel 2.0, or broken 2.1 w/o XF records -- same effect.
self.biff_version = self.book.biff_version = 20
#### check that XF slot in cell_attr is zero
xfx_slot = cell_attr[0] & 0x3F
assert xfx_slot == 0
xfx = self._cell_attr_to_xfx.get(cell_attr)
if xfx is not None:
return xfx
if verbose:
fprintf(self.logfile, "New cell_attr %r at (%r, %r)\n", cell_attr, rowx, colx)
book = self.book
xf = self.fake_XF_from_BIFF20_cell_attr(cell_attr)
xfx = len(book.xf_list)
xf.xf_index = xfx
book.xf_list.append(xf)
if verbose:
xf.dump(self.logfile, header="=== Faked XF %d ===" % xfx, footer="======")
if xf.format_key not in book.format_map:
msg = "ERROR *** XF[%d] unknown format key (%d, 0x%04x)\n"
fprintf(self.logfile, msg,
xf.xf_index, xf.format_key, xf.format_key)
fmt = Format(xf.format_key, FUN, "General")
book.format_map[xf.format_key] = fmt
while len(book.format_list) <= xf.format_key:
book.format_list.append(fmt)
cellty_from_fmtty = {
FNU: XL_CELL_NUMBER,
FUN: XL_CELL_NUMBER,
FGE: XL_CELL_NUMBER,
FDT: XL_CELL_DATE,
FTX: XL_CELL_NUMBER, # Yes, a number can be formatted as text.
}
fmt = book.format_map[xf.format_key]
cellty = cellty_from_fmtty[fmt.type]
self._xf_index_to_xl_type_map[xf.xf_index] = cellty
self._cell_attr_to_xfx[cell_attr] = xfx
return xfx
def fake_XF_from_BIFF20_cell_attr(self, cell_attr):
from .formatting import XF, XFAlignment, XFBorder, XFBackground, XFProtection
xf = XF()
xf.alignment = XFAlignment()
xf.alignment.indent_level = 0
xf.alignment.shrink_to_fit = 0
xf.alignment.text_direction = 0
xf.border = XFBorder()
xf.border.diag_up = 0
xf.border.diag_down = 0
xf.border.diag_colour_index = 0
xf.border.diag_line_style = 0 # no line
xf.background = XFBackground()
xf.protection = XFProtection()
(prot_bits, font_and_format, halign_etc) = unpack('> 6
upkbits(xf.protection, prot_bits, (
(6, 0x40, 'cell_locked'),
(7, 0x80, 'formula_hidden'),
))
xf.alignment.hor_align = halign_etc & 0x07
for mask, side in ((0x08, 'left'), (0x10, 'right'), (0x20, 'top'), (0x40, 'bottom')):
if halign_etc & mask:
colour_index, line_style = 8, 1 # black, thin
else:
colour_index, line_style = 0, 0 # none, none
setattr(xf.border, side + '_colour_index', colour_index)
setattr(xf.border, side + '_line_style', line_style)
bg = xf.background
if halign_etc & 0x80:
bg.fill_pattern = 17
else:
bg.fill_pattern = 0
bg.background_colour_index = 9 # white
bg.pattern_colour_index = 8 # black
xf.parent_style_index = 0 # ???????????
xf.alignment.vert_align = 2 # bottom
xf.alignment.rotation = 0
for attr_stem in ("format", "font", "alignment", "border", \
"background", "protection"):
attr = "_%s_flag" % attr_stem
setattr(xf, attr, 1)
return xf
def req_fmt_info(self):
if not self.formatting_info:
raise XLRDError("Feature requires open_workbook(..., formatting_info=True)")
# Determine column display width.
# - New in version 0.6.1
#
# @param colx Index of the queried column, range 0 to 255.
# Note that it is possible to find out the width that will be used to display
# columns with no cell information e.g. column IV (colx=255).
# @return The column width that will be used for displaying
# the given column by Excel, in units of 1/256th of the width of a
# standard character (the digit zero in the first font).
def computed_column_width(self, colx):
self.req_fmt_info()
if self.biff_version >= 80:
colinfo = self.colinfo_map.get(colx, None)
if colinfo is not None:
return colinfo.width
if self.standardwidth is not None:
return self.standardwidth
elif self.biff_version >= 40:
if self.gcw[colx]:
if self.standardwidth is not None:
return self.standardwidth
else:
colinfo = self.colinfo_map.get(colx, None)
if colinfo is not None:
return colinfo.width
elif self.biff_version == 30:
colinfo = self.colinfo_map.get(colx, None)
if colinfo is not None:
return colinfo.width
# All roads lead to Rome and the DEFCOLWIDTH ...
if self.defcolwidth is not None:
return self.defcolwidth * 256
return 8 * 256 # 8 is what Excel puts in a DEFCOLWIDTH record
def handle_msodrawingetc(self, recid, data_len, data):
if not OBJ_MSO_DEBUG:
return
DEBUG = 1
if self.biff_version < 80:
return
o = MSODrawing()
pos = 0
while pos < data_len:
tmp, fbt, cb = unpack('> 4) & 0xFFF
if ver == 0xF:
ndb = 0 # container
else:
ndb = cb
if DEBUG:
hex_char_dump(data, pos, ndb + 8, base=0, fout=self.logfile)
fprintf(self.logfile,
"fbt:0x%04X inst:%d ver:0x%X cb:%d (0x%04X)\n",
fbt, inst, ver, cb, cb)
if fbt == 0xF010: # Client Anchor
assert ndb == 18
(o.anchor_unk,
o.anchor_colx_lo, o.anchor_rowx_lo,
o.anchor_colx_hi, o.anchor_rowx_hi) = unpack('> 1) & 1
# Docs say NULL [sic] bytes padding between string count and string data
# to ensure that string is word-aligned. Appears to be nonsense.
# There also seems to be a random(?) byte after the string (not counted in the
# string length.
o.original_author, endpos = unpack_unicode_update_pos(data, 8, lenlen=2)
assert endpos == data_len - 1
o.last_byte = data[-1]
if DEBUG:
o.dump(self.logfile, header="=== MSNote ===", footer= " ")
def handle_txo(self, data):
if not OBJ_MSO_DEBUG:
return
DEBUG = 1
if self.biff_version < 80:
return
o = MSTxo()
data_len = len(data)
option_flags, o.rot, cchText, cbRuns = unpack('>= 2 # div by 4 to drop the 2 flag bits
if flags & 1:
return i / 100.0
return float(i)
else:
# It's the most significant 30 bits of an IEEE 754 64-bit FP number
# (to_py3): replaced b'\0\0\0\0' + chr(flags & 252) + rk_str[1:4]
_bytes = array('B', b'\0\0\0\0')
_bytes.append(flags & 252)
_bytes.extend(rk_str[1:4])
d, = unpack(' see XFCell() class in the xfcell module.
class Cell(XFCell):
def __repr__(self):
if not self.has_xf:
return "%s:%r" % (ctype_text[self.ctype], self.value)
else:
return "%s:%r (XF:%r)" % (ctype_text[self.ctype], self.value, self.xf_index)
# There is one and only one instance of an empty cell -- it's a singleton. This is it.
# You may use a test like "acell is empty_cell".
empty_cell = Cell(XL_CELL_EMPTY, '')
##### =============== Colinfo and Rowinfo ============================== #####
# Width and default formatting information that applies to one or
# more columns in a sheet. Derived from COLINFO records.
#
#
# Here is the default hierarchy for width, according to the OOo docs:
#
# In BIFF3, if a COLINFO record is missing for a column,
# the width specified in the record DEFCOLWIDTH is used instead.
#
# In BIFF4-BIFF7, the width set in this [COLINFO] record is only used,
# if the corresponding bit for this column is cleared in the GCW
# record, otherwise the column width set in the DEFCOLWIDTH record
# is used (the STANDARDWIDTH record is always ignored in this case [see footnote!]).
#
# In BIFF8, if a COLINFO record is missing for a column,
# the width specified in the record STANDARDWIDTH is used.
# If this [STANDARDWIDTH] record is also missing,
# the column width of the record DEFCOLWIDTH is used instead.
#
# Footnote: The docs on the GCW record say this:
#
# If a bit is set, the corresponding column uses the width set in the STANDARDWIDTH
# record. If a bit is cleared, the corresponding column uses the width set in the
# COLINFO record for this column.
#
# If a bit is set, and the worksheet does not contain the STANDARDWIDTH record, or if
# the bit is cleared, and the worksheet does not contain the COLINFO record, the DEFCOLWIDTH
# record of the worksheet will be used instead.
#
# At the moment (2007-01-17) xlrd is going with the GCW version of the story.
# Reference to the source may be useful: see the computed_column_width(colx) method
# of the Sheet class.
# - New in version 0.6.1
class Colinfo(BaseObject):
# Width of the column in 1/256 of the width of the zero character,
# using default font (first FONT record in the file).
width = 0
# XF index to be used for formatting empty cells.
xf_index = -1
# 1 = column is hidden
hidden = 0
# Value of a 1-bit flag whose purpose is unknown
# but is often seen set to 1
bit1_flag = 0
# Outline level of the column, in range(7).
# (0 = no outline)
outline_level = 0
# 1 = column is collapsed
collapsed = 0
# Height and default formatting information that applies to a row in a sheet.
# Derived from ROW records.
# - New in version 0.6.1
class Rowinfo(BaseObject):
##
# Height of the row, in twips. One twip == 1/20 of a point
height = 0
##
# 0 = Row has custom height; 1 = Row has default height
has_default_height = 0
##
# Outline level of the row
outline_level = 0
##
# 1 = Outline group starts or ends here (depending on where the
# outline buttons are located, see WSBOOL record [TODO ??]),
# and is collapsed
outline_group_starts_ends = 0
##
# 1 = Row is hidden (manually, or by a filter or outline group)
hidden = 0
##
# 1 = Row height and default font height do not match
height_mismatch = 0
##
# 1 = the xf_index attribute is usable; 0 = ignore it
has_default_xf_index = 0
##
# Index to default XF record for empty cells in this row.
# Don't use this if has_default_xf_index == 0.
xf_index = -9999
##
# This flag is set, if the upper border of at least one cell in this row
# or if the lower border of at least one cell in the row above is
# formatted with a thick line style. Thin and medium line styles are not
# taken into account.
additional_space_above = 0
##
# This flag is set, if the lower border of at least one cell in this row
# or if the upper border of at least one cell in the row below is
# formatted with a medium or thick line style. Thin line styles are not
# taken into account.
additional_space_below = 0