From 033f6e0da7724d4070781ab23ead9b72b8c46834 Mon Sep 17 00:00:00 2001 From: Adrien Date: Tue, 9 Feb 2021 09:18:42 +0100 Subject: [PATCH] first commit --- .gitignore | 52 +++++++++++++++++++++++++++ src/fuel_price_tracker/__init__.py | 11 ++++++ src/fuel_price_tracker/manage_data.py | 25 +++++++++++++ src/fuel_price_tracker/parsing.py | 27 ++++++++++++++ 4 files changed, 115 insertions(+) create mode 100644 .gitignore create mode 100644 src/fuel_price_tracker/__init__.py create mode 100644 src/fuel_price_tracker/manage_data.py create mode 100644 src/fuel_price_tracker/parsing.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4f28f76 --- /dev/null +++ b/.gitignore @@ -0,0 +1,52 @@ +# Temporary and binary files +*~ +*.py[cod] +*.so +*.cfg +!.isort.cfg +!setup.cfg +*.orig +*.log +*.pot +__pycache__/* +.cache/* +.*.swp +*/.ipynb_checkpoints/* +.DS_Store +data/* + +# Project files +.ropeproject +.project +.pydevproject +.settings +.idea +.vscode +tags + +# Package files +*.egg +*.eggs/ +.installed.cfg +*.egg-info + +# Unittest and coverage +htmlcov/* +.coverage +.tox +junit.xml +coverage.xml +.pytest_cache/ + +# Build and docs folder/files +build/* +dist/* +sdist/* +docs/api/* +docs/_rst/* +docs/_build/* +cover/* +MANIFEST + +# Per-project virtualenvs +.venv*/ diff --git a/src/fuel_price_tracker/__init__.py b/src/fuel_price_tracker/__init__.py new file mode 100644 index 0000000..07d78fd --- /dev/null +++ b/src/fuel_price_tracker/__init__.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- +from pkg_resources import DistributionNotFound, get_distribution + +try: + # Change here if project is renamed and does not equal the package name + dist_name = __name__ + __version__ = get_distribution(dist_name).version +except DistributionNotFound: + __version__ = "unknown" +finally: + del get_distribution, DistributionNotFound diff --git a/src/fuel_price_tracker/manage_data.py b/src/fuel_price_tracker/manage_data.py new file mode 100644 index 0000000..953b45e --- /dev/null +++ b/src/fuel_price_tracker/manage_data.py @@ -0,0 +1,25 @@ +import urllib +from bs4 import BeautifulSoup +import requests +import os + +url = "https://www.data.gouv.fr/fr/datasets/prix-des-carburants-en-france/" + +if os.listdir("../../data/"): + for item in os.listdir("../../data/"): + os.remove(f"../../data/{item}") +page = requests.get(url) +if page.status_code == 200: + soup = BeautifulSoup(page.text, "lxml") + # print(soup.find_all(attrs={"class": "btn btn-sm btn-primary"})) + links = [ + item.attrs for item in soup.find_all(attrs={"class": "btn btn-sm btn-primary"}) + ] + links = [link["href"] for link in links if "href" in link.keys()] +os.makedirs("../../data", exist_ok=True) +urllib.request.urlretrieve(links[1], "../../data/price.zip") + +import zipfile + +with zipfile.ZipFile("../../data/price.zip", "r") as zip_ref: + zip_ref.extractall("../../data/") diff --git a/src/fuel_price_tracker/parsing.py b/src/fuel_price_tracker/parsing.py new file mode 100644 index 0000000..f2e4041 --- /dev/null +++ b/src/fuel_price_tracker/parsing.py @@ -0,0 +1,27 @@ +import os +import xml.etree.ElementTree as ET + + +def parser(item): + string = "" + for it in item: + string += it[0] + ":\n" + for st in it[1]: + string += f"{st} : {it[1][st]}\n" + return string + + +if os.listdir("../../data/"): + tree = ET.parse("../../data/PrixCarburants_instantane.xml") + root = tree.getroot() + interesting_pomp = [item for item in root if item.attrib["cp"][:2] == "86"] + val = [ + [(item[i].tag, item[i].attrib) for i in range(len(item))] + for item in interesting_pomp + ] + for item in val[:3]: + print(parser(item)) + + +else: + import manage_data