Skip to content

Commit 87cc4ba

Browse files
author
Francisco Santiago
committed
-- Added setup structure
1 parent fefcbf7 commit 87cc4ba

File tree

7 files changed

+250
-0
lines changed

7 files changed

+250
-0
lines changed

elasticparser/__init__.py

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# -*- coding: utf-8 -*-
2+
3+
from .parser import agg_to_df

elasticparser/elasticparser.py

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
class ElasticParser():
2+
3+
def cardinality(agg):
4+
pass
5+
6+
def max(agg):
7+
pass
8+
9+
def date_histogram(agg):
10+
pass
11+
12+
def flatten(agg):
13+
pass

elasticparser/parsers.py

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from .elasticparser import ElasticParser
2+
3+
def agg_to_df(agg):
4+
"""Converts a nested elasticsearch response object to a
5+
flat list of dictionaries.
6+
"""
7+
8+
return ElasticParser().flatten(agg)

setup.cfg

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[bdist_wheel]

setup.py

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Always prefer setuptools over distutils
2+
from setuptools import setup, find_packages
3+
# To use a consistent encoding
4+
from codecs import open
5+
from os import path
6+
7+
here = path.abspath(path.dirname(__file__))
8+
9+
# Get the long description from the README file
10+
with open(path.join(here, 'README.rst'), encoding='utf-8') as f:
11+
long_description = f.read()
12+
13+
setup(
14+
name='capmodel',
15+
version='1.0.0',
16+
description='Capacity Modelling Tools',
17+
long_description=long_description,
18+
url='https://github.com/LibertyGlobal/ComponentsCapacityPlanning',
19+
author='Liberty Global',
20+
author_email='',
21+
license='MIT',
22+
classifiers=[
23+
'Development Status :: 4 - Beta',
24+
'Intended Audience :: Developers',
25+
'Topic :: Software Development :: Capacity Modelling Tools',
26+
'License :: OSI Approved :: MIT License',
27+
'Programming Language :: Python :: 3.5',
28+
],
29+
keywords='capacity modelling',
30+
packages=find_packages(exclude=['contrib', 'docs', 'tests']),
31+
install_requires=[
32+
'elasticsearch>=5.0.0,<6.0.0',
33+
'suds-jurko==0.6',
34+
'arrow==0.10.0',
35+
'pandas==0.20.1',
36+
'scikit-learn==0.18.1',
37+
],
38+
python_requires='>=3.5',
39+
extras_require={
40+
'dev': [],
41+
'test': ['pytest==3.0.*'],
42+
},
43+
)

tests/__init__.py

Whitespace-only changes.

tests/test_elasticsearch.py

+182
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
import unittest
2+
import elasticparser
3+
4+
def test_agg_to_df_date_hist_max(self):
5+
"""Tests the conversion of an aggregation to data frame.
6+
The aggregation contains these levels:
7+
- date_histogram
8+
- max
9+
"""
10+
11+
query = {"aggs":{"daily":{"date_histogram":{"field":"timestamp","interval":"day"},"aggs":{"temperature":{"max":{"field":"temperature"}},"rain":{"max":{"field":"rain"}},"wind_speed":{"max":{"field":"wind_speed"}}}}},"query":{"bool":{"filter":{"range":{"timestamp":{"gte":1496002400,"lte":1496302400}}}}},"size":0}
12+
13+
result = {
14+
"took": 7,
15+
"timed_out": "false",
16+
"_shards": {
17+
"total": 15,
18+
"successful": 15,
19+
"failed": 0
20+
},
21+
"hits": {
22+
"total": 5194,
23+
"max_score": 0,
24+
"hits": []
25+
},
26+
"aggregations": {
27+
"my_agg": {
28+
"buckets": [{
29+
"key_as_string": "1495929600",
30+
"key": 1495929600000,
31+
"doc_count": 450,
32+
"observed_tp_replaytv": { "value": 32317308 },
33+
"observed_tp_vod": { "value": 37417283 },
34+
"observed_tp_livetv": { "value": 77495254 }
35+
},
36+
{
37+
"key_as_string": "1496016000",
38+
"key": 1496016000000,
39+
"doc_count": 325,
40+
"observed_tp_replaytv": { "value": None },
41+
"observed_tp_vod": { "value": 418968 },
42+
"observed_tp_livetv": { "value": 3986292 }
43+
},
44+
{
45+
"key_as_string": "1496102400",
46+
"key": 1496102400000,
47+
"doc_count": 2621,
48+
"observed_tp_replaytv": { "value": 2966713 },
49+
"observed_tp_vod": { "value": 3328655 },
50+
"observed_tp_livetv": { "value": 6485277 }
51+
}
52+
]
53+
}
54+
}
55+
}
56+
57+
expected = pd.DataFrame.from_records([
58+
{
59+
"key": 1495929600000,
60+
"doc_count": 450,
61+
"observed_tp_replaytv": "32317308",
62+
"observed_tp_vod": "37417283",
63+
"observed_tp_livetv": "77495254",
64+
},
65+
{
66+
"key": 1496016000000,
67+
"doc_count": 325,
68+
"observed_tp_replaytv": None,
69+
"observed_tp_vod": "418968",
70+
"observed_tp_livetv": "3986292",
71+
},
72+
{
73+
"key": 1496102400000,
74+
"doc_count": 2621,
75+
"observed_tp_replaytv": "2966713",
76+
"observed_tp_vod": "3328655",
77+
"observed_tp_livetv": "6485277",
78+
},
79+
])
80+
81+
def test_agg_to_df_date_hist_cardinality(self):
82+
"""Tests the conversion of an aggregation to data frame.
83+
The aggregation contains these levels:
84+
- date_histogram
85+
- cardinality
86+
"""
87+
88+
query = {"query":{"bool":{"must":[{"query_string":{"analyze_wildcard":"true","query":"message:atANDmessage:horizon4ANDfields.environment:mapng_at"}}]}},"sort":[{"timestamp_mili":{"order":"asc"}}],"aggs":{"time_buckets":{"date_histogram":{"field":"timestamp_mili","interval":"day","time_zone":"Europe/Berlin","min_doc_count":1},"aggs":{"sub_agg":{"cardinality":{"field":"cpe_id","precision_threshold":100000}}}}},"size":0}
89+
90+
agg_result = {
91+
"took": 182,
92+
"timed_out": "false",
93+
"_shards": {
94+
"total": 6,
95+
"successful": 6,
96+
"failed": 0
97+
},
98+
"hits": {
99+
"total": 112908,
100+
"max_score": 0,
101+
"hits": []
102+
},
103+
"aggregations": {
104+
"time_buckets": {
105+
"buckets": [
106+
{
107+
"key_as_string": "1493848800000",
108+
"key": 1493848800000,
109+
"doc_count": 50806,
110+
"sub_agg": {
111+
"value": 12386
112+
}
113+
},
114+
{
115+
"key_as_string": "1493935200000",
116+
"key": 1493935200000,
117+
"doc_count": 62102,
118+
"sub_agg": {
119+
"value": 9911
120+
}
121+
}
122+
]
123+
}
124+
}
125+
}
126+
127+
expected = pd.DataFrame.from_records([
128+
{
129+
"key": 1493848800000,
130+
"doc_count": 50806,
131+
"sub_agg": 12386
132+
},
133+
{
134+
"key": 1493935200000,
135+
"doc_count": 62102,
136+
"sub_agg": 9911
137+
},
138+
])
139+
140+
print(expected)
141+
142+
def test_agg_to_df_cardinality(self):
143+
"""Tests the conversion of an aggregation to data frame.
144+
The aggregation contains these levels:
145+
- cardinality
146+
"""
147+
148+
query = {"size":0,"aggs":{"event":{"filter":{"term":{"api":"event"}},"aggs":{"number_stbs":{"cardinality":{"field":"stbid","precision_threshold":10}}}}}}
149+
150+
agg_result = {
151+
"took": 51090,
152+
"timed_out": "false",
153+
"_shards": {
154+
"total": 1,
155+
"successful": 1,
156+
"failed": 0
157+
},
158+
"hits": {
159+
"total": 325362803,
160+
"max_score": 0,
161+
"hits": []
162+
},
163+
"aggregations": {
164+
"event": {
165+
"doc_count": 1492155,
166+
"number_stbs": {
167+
"value": 146753
168+
}
169+
}
170+
}
171+
}
172+
173+
expected = pd.DataFrame.from_records([
174+
{
175+
"doc_count": 1492155,
176+
"number_stbs": "146753",
177+
},
178+
])
179+
180+
df = elasticsearch.agg_to_df(agg_result)
181+
182+
self.assertEquals(expected, df)

0 commit comments

Comments
 (0)