The Python xlrd library will parse cell comments (if you turn on xlrd.sheet.OBJ_MSO_DEBUG
, you'll see them), but it doesn't expose them from the API. You could either parse the dump or hack on it a bit so you can get to them programmatically. Here's a start (tested extremely minimally):
diff --git a/xlrd/sheet.py b/xlrd/sheet.py
--- a/xlrd/sheet.py
+++ b/xlrd/sheet.py
@@ -206,6 +206,7 @@
self._dimncols = 0
self._cell_values = []
self._cell_types = []
+ self._cell_notes = []
self._cell_xf_indexes = []
self._need_fix_ragged_rows = 0
self.defcolwidth = None
@@ -252,6 +253,7 @@
return Cell(
self._cell_types[rowx][colx],
self._cell_values[rowx][colx],
+ self._cell_notes[rowx][colx],
xfx,
)
@@ -422,12 +424,14 @@
if self.formatting_info:
self._cell_xf_indexes[nrx].extend(aa('h', [-1]) * nextra)
self._cell_values[nrx].extend([''] * nextra)
+ self._cell_notes[nrx].extend([None] * nextra)
if nc > self.ncols:
self.ncols = nc
self._need_fix_ragged_rows = 1
if nr > self.nrows:
scta = self._cell_types.append
scva = self._cell_values.append
+ scna = self._cell_notes.append
scxa = self._cell_xf_indexes.append
fmt_info = self.formatting_info
xce = XL_CELL_EMPTY
@@ -436,6 +440,7 @@
for _unused in xrange(self.nrows, nr):
scta([xce] * nc)
scva([''] * nc)
+ scna([None] * nc)
if fmt_info:
scxa([-1] * nc)
else:
@@ -443,6 +448,7 @@
for _unused in xrange(self.nrows, nr):
scta(aa('B', [xce]) * nc)
scva([''] * nc)
+ scna([None] * nc)
if fmt_info:
scxa(aa('h', [-1]) * nc)
self.nrows = nr
@@ -454,6 +460,7 @@
aa = array_array
s_cell_types = self._cell_types
s_cell_values = self._cell_values
+ s_cell_notes = self._cell_notes
s_cell_xf_indexes = self._cell_xf_indexes
s_dont_use_array = self.dont_use_array
s_fmt_info = self.formatting_info
@@ -465,6 +472,7 @@
nextra = ncols - rlen
if nextra > 0:
s_cell_values[rowx][rlen:] = [''] * nextra
+ s_cell_notes[rowx][rlen:] = [None] * nextra
if s_dont_use_array:
trow[rlen:] = [xce] * nextra
if s_fmt_info:
@@ -600,6 +608,7 @@
bk_get_record_parts = bk.get_record_parts
bv = self.biff_version
fmt_info = self.formatting_info
+ txos = {}
eof_found = 0
while 1:
# if DEBUG: print "SHEET.READ: about to read from position %d" % bk._position
@@ -877,13 +886,23 @@
break
elif rc == XL_OBJ:
# handle SHEET-level objects; note there's a separate Book.handle_obj
- self.handle_obj(data)
+ obj = self.handle_obj(data)
+ if obj:
+ obj_id = obj.id
+ else:
+ obj_id = None
elif rc == XL_MSO_DRAWING:
self.handle_msodrawingetc(rc, data_len, data)
elif rc == XL_TXO:
- self.handle_txo(data)
+ txo = self.handle_txo(data)
+ if txo and obj_id:
+ txos[obj_id] = txo
+ obj_id = None
elif rc == XL_NOTE:
- self.handle_note(data)
+ note = self.handle_note(data)
+ txo = txos.get(note.object_id)
+ if txo:
+ self._cell_notes[note.rowx][note.colx] = txo.text
elif rc == XL_FEAT11:
self.handle_feat11(data)
elif rc in bofcodes: ##### EMBEDDED BOF #####
@@ -1387,19 +1406,16 @@
def handle_obj(self, data):
- if not OBJ_MSO_DEBUG:
- return
- DEBUG = 1
if self.biff_version < 80:
return
o = MSObj()
data_len = len(data)
pos = 0
- if DEBUG:
+ if OBJ_MSO_DEBUG:
fprintf(self.logfile, "... OBJ record ...\n")
while pos < data_len:
ft, cb = unpack('<HH', data[pos:pos+4])
- if DEBUG:
+ if OBJ_MSO_DEBUG:
hex_char_dump(data, pos, cb, base=0, fout=self.logfile)
if ft == 0x15: # ftCmo ... s/b first
assert pos == 0
@@ -1430,16 +1446,14 @@
else:
# didn't break out of while loop
assert pos == data_len
- if DEBUG:
+ if OBJ_MSO_DEBUG:
o.dump(self.logfile, header="=== MSOBj ===", footer= " ")
+ return o
def handle_note(self, data):
- if not OBJ_MSO_DEBUG:
- return
- DEBUG = 1
if self.biff_version < 80:
return
- if DEBUG:
+ if OBJ_MSO_DEBUG:
fprintf(self.logfile, '... NOTE record ...\n')
hex_char_dump(data, 0, len(data), base=0, fout=self.logfile)
o = MSNote()
@@ -1453,13 +1467,11 @@
o.original_author, endpos = unpack_unicode_update_pos(data, 8, lenlen=2)
assert endpos == data_len - 1
o.last_byte = data[-1]
- if DEBUG:
+ if OBJ_MSO_DEBUG:
o.dump(self.logfile, header="=== MSNote ===", footer= " ")
+ return o
def handle_txo(self, data):
- if not OBJ_MSO_DEBUG:
- return
- DEBUG = 1
if self.biff_version < 80:
return
o = MSTxo()
@@ -1477,8 +1489,9 @@
rc3, data3_len, data3 = self.book.get_record_parts()
assert rc3 == XL_CONTINUE
# ignore the formatting runs for the moment
- if DEBUG:
+ if OBJ_MSO_DEBUG:
o.dump(self.logfile, header="=== MSTxo ===", footer= " ")
+ return o
def handle_feat11(self, data):
if not OBJ_MSO_DEBUG:
@@ -1638,11 +1651,12 @@
class Cell(BaseObject):
- __slots__ = ['ctype', 'value', 'xf_index']
+ __slots__ = ['ctype', 'value', 'note', 'xf_index']
- def __init__(self, ctype, value, xf_index=None):
+ def __init__(self, ctype, value, note=None, xf_index=None):
self.ctype = ctype
self.value = value
+ self.note = note
self.xf_index = xf_index
def __repr__(self):
Then you could write something like:
import xlrd
xlrd.sheet.OBJ_MSO_DEBUG = True
xls = xlrd.open_workbook('foo.xls')
for sheet in xls.sheets():
print 'sheet %s (%d x %d)' % (sheet.name, sheet.nrows, sheet.ncols)
for rownum in xrange(sheet.nrows):
for cell in sheet.row(rownum):
print cell, cell.note