-- Added setup structure

Francisco Santiago · Francisco Santiago · commit 87cc4ba69f2b · 2017-06-02T16:04:47.000+02:00
diff --git a/elasticparser/__init__.py b/elasticparser/__init__.py
@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+
+from .parser import agg_to_df
diff --git a/elasticparser/elasticparser.py b/elasticparser/elasticparser.py
@@ -0,0 +1,13 @@
+class ElasticParser():
+
+    def cardinality(agg):
+        pass
+
+    def max(agg):
+        pass
+
+    def date_histogram(agg):
+        pass
+
+    def flatten(agg):
+        pass
diff --git a/elasticparser/parsers.py b/elasticparser/parsers.py
@@ -0,0 +1,8 @@
+from .elasticparser import ElasticParser
+
+def agg_to_df(agg):
+    """Converts a nested elasticsearch response object to a 
+    flat list of dictionaries.
+    """
+
+    return ElasticParser().flatten(agg)
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1 @@
+[bdist_wheel]
diff --git a/setup.py b/setup.py
@@ -0,0 +1,43 @@
+# Always prefer setuptools over distutils
+from setuptools import setup, find_packages
+# To use a consistent encoding
+from codecs import open
+from os import path
+
+here = path.abspath(path.dirname(__file__))
+
+# Get the long description from the README file
+with open(path.join(here, 'README.rst'), encoding='utf-8') as f:
+    long_description = f.read()
+
+setup(
+    name='capmodel',
+    version='1.0.0',
+    description='Capacity Modelling Tools',
+    long_description=long_description,
+    url='https://github.com/LibertyGlobal/ComponentsCapacityPlanning',
+    author='Liberty Global',
+    author_email='',
+    license='MIT',
+    classifiers=[
+        'Development Status :: 4 - Beta',
+        'Intended Audience :: Developers',
+        'Topic :: Software Development :: Capacity Modelling Tools',
+        'License :: OSI Approved :: MIT License',
+        'Programming Language :: Python :: 3.5',
+    ],
+    keywords='capacity modelling',
+    packages=find_packages(exclude=['contrib', 'docs', 'tests']),
+    install_requires=[
+        'elasticsearch>=5.0.0,<6.0.0',
+        'suds-jurko==0.6',
+        'arrow==0.10.0',
+        'pandas==0.20.1',
+        'scikit-learn==0.18.1',
+    ],
+    python_requires='>=3.5',
+    extras_require={
+        'dev': [],
+        'test': ['pytest==3.0.*'],
+    },
+)
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/test_elasticsearch.py b/tests/test_elasticsearch.py
@@ -0,0 +1,182 @@
+import unittest
+import elasticparser
+
+def test_agg_to_df_date_hist_max(self):
+    """Tests the conversion of an aggregation to data frame.
+    The aggregation contains these levels:
+        - date_histogram
+            - max
+    """
+
+    query = {"aggs":{"daily":{"date_histogram":{"field":"timestamp","interval":"day"},"aggs":{"temperature":{"max":{"field":"temperature"}},"rain":{"max":{"field":"rain"}},"wind_speed":{"max":{"field":"wind_speed"}}}}},"query":{"bool":{"filter":{"range":{"timestamp":{"gte":1496002400,"lte":1496302400}}}}},"size":0}
+
+    result = {
+        "took": 7,
+        "timed_out": "false",
+        "_shards": {
+            "total": 15,
+            "successful": 15,
+            "failed": 0
+        },
+        "hits": {
+            "total": 5194,
+            "max_score": 0,
+            "hits": []
+        },
+        "aggregations": {
+            "my_agg": {
+                "buckets": [{
+                        "key_as_string": "1495929600",
+                        "key": 1495929600000,
+                        "doc_count": 450,
+                        "observed_tp_replaytv": { "value": 32317308 },
+                        "observed_tp_vod": { "value": 37417283 },
+                        "observed_tp_livetv": { "value": 77495254 }
+                    },
+                    {
+                        "key_as_string": "1496016000",
+                        "key": 1496016000000,
+                        "doc_count": 325,
+                        "observed_tp_replaytv": { "value": None },
+                        "observed_tp_vod": { "value": 418968 },
+                        "observed_tp_livetv": { "value": 3986292 }
+                    },
+                    {
+                        "key_as_string": "1496102400",
+                        "key": 1496102400000,
+                        "doc_count": 2621,
+                        "observed_tp_replaytv": { "value": 2966713 },
+                        "observed_tp_vod": { "value": 3328655 },
+                        "observed_tp_livetv": { "value": 6485277 }
+                    }
+                ]
+            }
+        }
+    }
+
+    expected = pd.DataFrame.from_records([
+        {
+            "key": 1495929600000,
+            "doc_count": 450,
+            "observed_tp_replaytv": "32317308",
+            "observed_tp_vod": "37417283",
+            "observed_tp_livetv": "77495254",
+        },
+        {
+            "key": 1496016000000,
+            "doc_count": 325,
+            "observed_tp_replaytv": None,
+            "observed_tp_vod": "418968",
+            "observed_tp_livetv": "3986292",
+        },
+        {
+            "key": 1496102400000,
+            "doc_count": 2621,
+            "observed_tp_replaytv": "2966713",
+            "observed_tp_vod": "3328655",
+            "observed_tp_livetv": "6485277",
+        },
+    ])
+
+def test_agg_to_df_date_hist_cardinality(self):
+    """Tests the conversion of an aggregation to data frame.
+    The aggregation contains these levels:
+        - date_histogram
+            - cardinality
+    """
+
+    query = {"query":{"bool":{"must":[{"query_string":{"analyze_wildcard":"true","query":"message:atANDmessage:horizon4ANDfields.environment:mapng_at"}}]}},"sort":[{"timestamp_mili":{"order":"asc"}}],"aggs":{"time_buckets":{"date_histogram":{"field":"timestamp_mili","interval":"day","time_zone":"Europe/Berlin","min_doc_count":1},"aggs":{"sub_agg":{"cardinality":{"field":"cpe_id","precision_threshold":100000}}}}},"size":0}
+    
+    agg_result = {
+       "took": 182,
+       "timed_out": "false",
+       "_shards": {
+          "total": 6,
+          "successful": 6,
+          "failed": 0
+       },
+       "hits": {
+          "total": 112908,
+          "max_score": 0,
+          "hits": []
+       },
+       "aggregations": {
+          "time_buckets": {
+             "buckets": [
+                {
+                   "key_as_string": "1493848800000",
+                   "key": 1493848800000,
+                   "doc_count": 50806,
+                   "sub_agg": {
+                      "value": 12386
+                   }
+                },
+                {
+                   "key_as_string": "1493935200000",
+                   "key": 1493935200000,
+                   "doc_count": 62102,
+                   "sub_agg": {
+                      "value": 9911
+                   }
+                }
+             ]
+          }
+       }
+    }
+
+    expected = pd.DataFrame.from_records([
+        {
+            "key": 1493848800000,
+            "doc_count": 50806,
+            "sub_agg": 12386
+        },
+        {
+            "key": 1493935200000,
+            "doc_count": 62102,
+            "sub_agg": 9911
+        },
+    ])
+
+    print(expected)
+
+def test_agg_to_df_cardinality(self):
+    """Tests the conversion of an aggregation to data frame.
+    The aggregation contains these levels:
+        - cardinality
+    """
+
+    query = {"size":0,"aggs":{"event":{"filter":{"term":{"api":"event"}},"aggs":{"number_stbs":{"cardinality":{"field":"stbid","precision_threshold":10}}}}}}
+
+    agg_result = {
+        "took": 51090,
+        "timed_out": "false",
+        "_shards": {
+            "total": 1,
+            "successful": 1,
+            "failed": 0
+        },
+        "hits": {
+            "total": 325362803,
+            "max_score": 0,
+            "hits": []
+        },
+        "aggregations": {
+            "event": {
+                "doc_count": 1492155,
+                "number_stbs": {
+                    "value": 146753
+                }
+            }
+        }
+    }
+
+    expected = pd.DataFrame.from_records([
+        {
+            "doc_count": 1492155,
+            "number_stbs": "146753",
+        },
+    ])
+
+    df = elasticsearch.agg_to_df(agg_result)
+
+    self.assertEquals(expected, df)

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# -- coding: utf-8 --`
	`2`	`+`
	`3`	`+from .parser import agg_to_df`