@@ -4,7 +4,7 @@ class GroupBy
44
55 attr_reader :groups
66
7- # Yield over each group created by group_by. A DataFrame is yielded in
7+ # Iterate over each group created by group_by. A DataFrame is yielded in
88 # block.
99 def each_group &block
1010 groups . keys . each do |k |
@@ -26,6 +26,7 @@ def initialize context, names
2626 @groups . freeze
2727 end
2828
29+ # Get a Daru::Vector of the size of each group.
2930 def size
3031 index =
3132 if multi_indexed_grouping?
@@ -38,23 +39,79 @@ def size
3839 Daru ::Vector . new ( values , index : index , name : :size )
3940 end
4041
42+ # Get the first group
4143 def first
4244 head ( 1 )
4345 end
4446
47+ # Get the last group
4548 def last
4649 tail ( 1 )
4750 end
4851
52+ # Get the top 'n' groups
53+ # @param quantity [Fixnum] (5) The number of groups.
54+ # @example Usage of head
55+ # df = Daru::DataFrame.new({
56+ # a: %w{foo bar foo bar foo bar foo foo},
57+ # b: %w{one one two three two two one three},
58+ # c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
59+ # d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
60+ # })
61+ # df.group_by([:a, :b]).head(1)
62+ # # =>
63+ # # #<Daru::DataFrame:82745170 @name = d7003f75-5eb9-4967-9303-c08dd9160224 @size = 6>
64+ # # a b c d
65+ # # 1 bar one 2 22
66+ # # 3 bar three 1 44
67+ # # 5 bar two 6 66
68+ # # 0 foo one 1 11
69+ # # 7 foo three 8 88
70+ # # 2 foo two 3 33
4971 def head quantity = 5
5072 select_groups_from :first , quantity
5173 end
5274
75+ # Get the bottom 'n' groups
76+ # @param quantity [Fixnum] (5) The number of groups.
77+ # @example Usage of tail
78+ # df = Daru::DataFrame.new({
79+ # a: %w{foo bar foo bar foo bar foo foo},
80+ # b: %w{one one two three two two one three},
81+ # c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
82+ # d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
83+ # })
84+ # # df.group_by([:a, :b]).tail(1)
85+ # # =>
86+ # # #<Daru::DataFrame:82378270 @name = 0623db46-5425-41bd-a843-99baac3d1d9a @size = 6>
87+ # # a b c d
88+ # # 1 bar one 2 22
89+ # # 3 bar three 1 44
90+ # # 5 bar two 6 66
91+ # # 6 foo one 3 77
92+ # # 7 foo three 8 88
93+ # # 4 foo two 3 55
5394 def tail quantity = 5
5495 select_groups_from :last , quantity
5596 end
5697
5798 # Calculate mean of numeric groups, excluding missing values.
99+ # @example Usage of mean
100+ # df = Daru::DataFrame.new({
101+ # a: %w{foo bar foo bar foo bar foo foo},
102+ # b: %w{one one two three two two one three},
103+ # c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
104+ # d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
105+ # df.group_by([:a, :b]).mean
106+ # # =>
107+ # # #<Daru::DataFrame:81097450 @name = 0c32983f-3e06-451f-a9c9-051cadfe7371 @size = 6>
108+ # # c d
109+ # # ["bar", "one"] 2 22
110+ # # ["bar", "three"] 1 44
111+ # # ["bar", "two"] 6 66
112+ # # ["foo", "one"] 2.0 44.0
113+ # # ["foo", "three"] 8 88
114+ # # ["foo", "two"] 3.0 44.0
58115 def mean
59116 apply_method :numeric , :mean
60117 end
@@ -69,6 +126,24 @@ def sum
69126 apply_method :numeric , :sum
70127 end
71128
129+ # Count groups, excludes missing values.
130+ # @example Using count
131+ # df = Daru::DataFrame.new({
132+ # a: %w{foo bar foo bar foo bar foo foo},
133+ # b: %w{one one two three two two one three},
134+ # c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
135+ # d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
136+ # })
137+ # df.group_by([:a, :b]).count
138+ # # =>
139+ # # #<Daru::DataFrame:76900210 @name = 7b9cf55d-17f8-48c7-b03a-2586c6e5ec5a @size = 6>
140+ # # c d
141+ # # ["bar", "one"] 1 1
142+ # # ["bar", "two"] 1 1
143+ # # ["bar", "three"] 1 1
144+ # # ["foo", "one"] 2 2
145+ # # ["foo", "three"] 1 1
146+ # # ["foo", "two"] 2 2
72147 def count
73148 width = @non_group_vectors . size
74149 Daru ::DataFrame . new ( [ size ] *width , order : @non_group_vectors )
@@ -91,6 +166,21 @@ def min
91166 end
92167
93168 # Returns one of the selected groups as a DataFrame.
169+ # @param group [Array] The group that is to be selected from those grouped.
170+ #
171+ # @example Getting a group
172+ #
173+ # df = Daru::DataFrame.new({
174+ # a: %w{foo bar foo bar foo bar foo foo},
175+ # b: %w{one one two three two two one three},
176+ # c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
177+ # d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
178+ # })
179+ # df.group_by([:a, :b]).get_group ['bar','two']
180+ # #=>
181+ # ##<Daru::DataFrame:83258980 @name = 687ee3f6-8874-4899-97fa-9b31d84fa1d5 @size = 1>
182+ # # a b c d
183+ # # 5 bar two 6 66
94184 def get_group group
95185 indexes = @groups [ group ]
96186 elements = [ ]
0 commit comments