这个最大的耗时应该在从磁盘拿数据的时候是一次IO是一行中的一个column获取数据导致的性能瓶颈,如果一次获取是一个chunk,效率上应该会好很多。
// GetRowAndAppendToChunk gets a Row from the ListInDisk by RowPtr. Return the Row and the Ref Chunk.
func (l *ListInDisk) GetRowAndAppendToChunk(ptr RowPtr, chk *Chunk) (row Row, _ *Chunk, err error) {
off, err := l.getOffset(ptr.ChkIdx, ptr.RowIdx)
if err != nil {
return
}
r := l.dataFile.getSectionReader(off)
format := rowInDisk{numCol: len(l.fieldTypes)}
_, err = format.ReadFrom(r)
if err != nil {
return row, nil, err
}
row, chk = format.toRow(l.fieldTypes, chk)
return row, chk, err
}
// ReadFrom reads data of r, deserializes it from the format of diskFormatRow
// into Row.
func (row *rowInDisk) ReadFrom(r io.Reader) (n int64, err error) {
b := make([]byte, 8*row.numCol)
var n1 int
n1, err = io.ReadFull(r, b)
n += int64(n1)
if err != nil {
return
}
row.sizesOfColumns = bytesToI64Slice(b)
row.cells = make([][]byte, 0, row.numCol)
for _, size := range row.sizesOfColumns {
if size == -1 {
continue
}
cell := make([]byte, size)
row.cells = append(row.cells, cell)
n1, err = io.ReadFull(r, cell)
n += int64(n1)
if err != nil {
return
}
}
return
}