Skip to content

Commit a3d887a

Browse files
author
Dmitry Alekseichik
authored
Added mapping cell ids with header column name (#93)
* Added mapping cell ids with header column name * Added readme description * Exclude masOS files in .gitignore * Added tests and small refactoring
1 parent e0d5657 commit a3d887a

File tree

7 files changed

+89
-29
lines changed

7 files changed

+89
-29
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,6 @@ spec/reports
1515
test/tmp
1616
test/version_tmp
1717
tmp
18+
19+
# Mac finder artifacts
20+
.DS_Store

README.md

+8
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,14 @@ remote_url = 'http://dev-builds.libreoffice.org/tmp/test.xlsx'
100100
Creek::Book.new remote_url, remote: true
101101
```
102102

103+
## Mapping cells with header names
104+
By default, Creek will map cell names with letter and number(A1, B3 and etc). To be able to get cell values by header column name use ***with_headers*** (can be used only with ***#simple_rows*** method!!!) during creation *(Note: header column is first string of sheet)*
105+
106+
```ruby
107+
creek = Creek::Book.new file.path, with_headers: true
108+
```
109+
110+
103111
## Contributing
104112

105113
Contributions are welcomed. You can fork a repository, add your code changes to the forked branch, ensure all existing unit tests pass, create new unit tests which cover your new changes and finally create a pull request.

lib/creek.rb

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# frozen_string_literal: true
2+
13
require 'creek/version'
24
require 'creek/book'
35
require 'creek/styles/constants'

lib/creek/book.rb

+14-4
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,11 @@
44
require 'open-uri'
55

66
module Creek
7-
87
class Creek::Book
9-
108
attr_reader :files,
119
:sheets,
12-
:shared_strings
10+
:shared_strings,
11+
:with_headers
1312

1413
DATE_1900 = Date.new(1899, 12, 30).freeze
1514
DATE_1904 = Date.new(1904, 1, 1).freeze
@@ -23,6 +22,7 @@ def initialize path, options = {}
2322
path = download_file(path) if options[:remote]
2423
@files = Zip::File.open(path)
2524
@shared_strings = SharedStrings.new(self)
25+
@with_headers = options.fetch(:with_headers, false)
2626
end
2727

2828
def sheets
@@ -41,7 +41,17 @@ def sheets
4141
rels = Nokogiri::XML::Document.parse(rels_doc).css("Relationship")
4242
@sheets = xml.css(cssPrefix+'sheet').map do |sheet|
4343
sheetfile = rels.find { |el| sheet.attr("r:id") == el.attr("Id") }.attr("Target")
44-
Sheet.new(self, sheet.attr("name"), sheet.attr("sheetid"), sheet.attr("state"), sheet.attr("visible"), sheet.attr("r:id"), sheetfile)
44+
sheet = Sheet.new(
45+
self,
46+
sheet.attr("name"),
47+
sheet.attr("sheetid"),
48+
sheet.attr("state"),
49+
sheet.attr("visible"),
50+
sheet.attr("r:id"),
51+
sheetfile
52+
)
53+
sheet.with_headers = with_headers
54+
sheet
4555
end
4656
end
4757

lib/creek/sheet.rb

+30-20
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,25 @@
1+
# frozen_string_literal: true
2+
13
require 'zip/filesystem'
24
require 'nokogiri'
35

46
module Creek
57
class Creek::Sheet
68
include Creek::Utils
79

10+
HEADERS_ROW_NUMBER = '1'
11+
12+
attr_accessor :with_headers
813
attr_reader :book,
914
:name,
1015
:sheetid,
1116
:state,
1217
:visible,
1318
:rid,
14-
:index
15-
19+
:index,
20+
:headers
1621

17-
def initialize book, name, sheetid, state, visible, rid, sheetfile
22+
def initialize(book, name, sheetid, state, visible, rid, sheetfile)
1823
@book = book
1924
@name = name
2025
@sheetid = sheetid
@@ -46,7 +51,6 @@ def images_at(cell)
4651
@drawing.images_at(cell) if @images_present
4752
end
4853

49-
5054
##
5155
# Provides an Enumerator that returns a hash representing each row.
5256
# The key of the hash is the column ID and the value is the value of the cell.
@@ -89,35 +93,37 @@ def rows_generator include_meta_data=false, use_simple_rows_format=false
8993
closer = Nokogiri::XML::Reader::TYPE_END_ELEMENT
9094
Enumerator.new do |y|
9195
row, cells, cell = nil, {}, nil
92-
cell_type = nil
96+
cell_type = nil
9397
cell_style_idx = nil
9498
@book.files.file.open(path) do |xml|
9599
Nokogiri::XML::Reader.from_io(xml).each do |node|
96-
if (node.name.eql? 'row') and (node.node_type.eql? opener)
100+
if node.name == 'row' && node.node_type == opener
97101
row = node.attributes
98-
row['cells'] = Hash.new
99-
cells = Hash.new
102+
row['cells'] = {}
103+
cells = {}
100104
y << (include_meta_data ? row : cells) if node.self_closing?
101-
elsif (node.name.eql? 'row') and (node.node_type.eql? closer)
105+
elsif node.name == 'row' && node.node_type == closer
102106
processed_cells = fill_in_empty_cells(cells, row['r'], cell, use_simple_rows_format)
107+
@headers = processed_cells if row['r'] == HEADERS_ROW_NUMBER
103108

104109
if @images_present
105110
processed_cells.each do |cell_name, cell_value|
106111
next unless cell_value.nil?
112+
107113
processed_cells[cell_name] = images_at(cell_name)
108114
end
109115
end
110116

111117
row['cells'] = processed_cells
112118
y << (include_meta_data ? row : processed_cells)
113-
elsif (node.name.eql? 'c') and (node.node_type.eql? opener)
119+
elsif node.name == 'c' && node.node_type == opener
114120
cell_type = node.attributes['t']
115121
cell_style_idx = node.attributes['s']
116122
cell = node.attributes['r']
117-
elsif (['v', 't'].include? node.name) and (node.node_type.eql? opener)
123+
elsif %w[v t].include?(node.name) && node.node_type == opener
118124
unless cell.nil?
119125
node.read
120-
cells[(use_simple_rows_format ? cell.tr("0-9", "") : cell)] = convert(node.value, cell_type, cell_style_idx)
126+
cells[cell] = convert(node.value, cell_type, cell_style_idx)
121127
end
122128
end
123129
end
@@ -142,15 +148,13 @@ def converter_options
142148
# The unzipped XML file does not contain any node for empty cells.
143149
# Empty cells are being padded in using this function
144150
def fill_in_empty_cells(cells, row_number, last_col, use_simple_rows_format)
145-
new_cells = Hash.new
151+
new_cells = {}
152+
return new_cells if cells.empty?
146153

147-
unless cells.empty?
148-
last_col = last_col.gsub(row_number, '')
149-
150-
("A"..last_col).to_a.each do |column|
151-
id = use_simple_rows_format ? "#{column}" : "#{column}#{row_number}"
152-
new_cells[id] = cells[id]
153-
end
154+
last_col = last_col.gsub(row_number, '')
155+
('A'..last_col).to_a.each do |column|
156+
id = cell_id(column, use_simple_rows_format, row_number)
157+
new_cells[id] = cells["#{column}#{row_number}"]
154158
end
155159

156160
new_cells
@@ -172,5 +176,11 @@ def extract_drawing_filepath
172176
sheet_rels_filepath = expand_to_rels_path(sheet_filepath)
173177
parse_xml(sheet_rels_filepath).css("Relationship[@Id='#{drawing_rid}']").first.attributes['Target'].value
174178
end
179+
180+
def cell_id(column, use_simple_rows_format, row_number = '')
181+
return "#{column}#{row_number}" unless use_simple_rows_format
182+
183+
with_headers && headers ? headers[column] : column
184+
end
175185
end
176186
end
9.09 KB
Binary file not shown.

spec/sheet_spec.rb

+32-5
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,26 @@
1+
# frozen_string_literal: true
2+
13
require './spec/spec_helper'
24

35
describe 'sheet' do
46
let(:book_with_images) { Creek::Book.new('spec/fixtures/sample-with-images.xlsx') }
5-
let(:book_no_images) { Creek::Book.new('spec/fixtures/sample.xlsx') }
67
let(:sheetfile) { 'worksheets/sheet1.xml' }
78
let(:sheet_with_images) { Creek::Sheet.new(book_with_images, 'Sheet 1', 1, '', '', '1', sheetfile) }
8-
let(:sheet_no_images) { Creek::Sheet.new(book_no_images, 'Sheet 1', 1, '', '', '1', sheetfile) }
99

1010
def load_cell(rows, cell_name)
11-
cell = rows.find { |row| !row[cell_name].nil? }
11+
cell = rows.find { |row| row[cell_name] }
1212
cell[cell_name] if cell
1313
end
1414

1515
context 'escaped ampersand' do
1616
let(:book_escaped) { Creek::Book.new('spec/fixtures/escaped.xlsx') }
1717
it 'does NOT escape ampersand' do
18-
expect(book_escaped.sheets[0].rows.to_enum.map(&:values)).to eq([["abc", "def"], ["ghi", "j&k"]])
18+
expect(book_escaped.sheets[0].rows.to_enum.map(&:values)).to eq([%w[abc def], %w[ghi j&k]])
1919
end
2020

2121
let(:book_escaped2) { Creek::Book.new('spec/fixtures/escaped2.xlsx') }
2222
it 'does escape ampersand' do
23-
expect(book_escaped2.sheets[0].rows.to_enum.map(&:values)).to eq([["abc", "def"], ["ghi", "j&k"]])
23+
expect(book_escaped2.sheets[0].rows.to_enum.map(&:values)).to eq([%w[abc def], %w[ghi j&k]])
2424
end
2525
end
2626

@@ -66,6 +66,9 @@ def load_cell(rows, cell_name)
6666
end
6767

6868
context 'with excel without images' do
69+
let(:book_no_images) { Creek::Book.new('spec/fixtures/sample.xlsx') }
70+
let(:sheet_no_images) { Creek::Sheet.new(book_no_images, 'Sheet 1', 1, '', '', '1', sheetfile) }
71+
6972
it 'does not break on with_images' do
7073
rows = sheet_no_images.with_images.rows.map { |r| r }
7174
expect(load_cell(rows, 'A10')).to eq(0.15)
@@ -94,4 +97,28 @@ def load_cell(rows, cell_name)
9497
expect(image).to eq(nil)
9598
end
9699
end
100+
101+
describe '#simple_rows' do
102+
let(:book_with_headers) { Creek::Book.new('spec/fixtures/sample-with-headers.xlsx') }
103+
let(:sheet) { Creek::Sheet.new(book_with_headers, 'Sheet 1', 1, '', '', '1', sheetfile) }
104+
105+
subject { sheet.simple_rows.to_a[1] }
106+
107+
it 'returns values by letters' do
108+
expect(subject['A']).to eq 'value1'
109+
expect(subject['B']).to eq 'value2'
110+
end
111+
112+
context 'when enable with_headers property' do
113+
before { sheet.with_headers = true }
114+
115+
subject { sheet.simple_rows.to_a[1] }
116+
117+
it 'returns values by headers name' do
118+
expect(subject['HeaderA']).to eq 'value1'
119+
expect(subject['HeaderB']).to eq 'value2'
120+
expect(subject['HeaderC']).to eq 'value3'
121+
end
122+
end
123+
end
97124
end

0 commit comments

Comments
 (0)