From e0319eb9fb51c9fede87bbd35232ef70fb1a9fbe Mon Sep 17 00:00:00 2001 From: Adrien Date: Thu, 11 Feb 2021 09:17:49 +0100 Subject: [PATCH] cleaning code --- src/fuel_price_tracker/manage_data.py | 15 +++--- src/fuel_price_tracker/parsing.py | 70 ++++++++++++++++++--------- 2 files changed, 54 insertions(+), 31 deletions(-) diff --git a/src/fuel_price_tracker/manage_data.py b/src/fuel_price_tracker/manage_data.py index 953b45e..1db0dd9 100644 --- a/src/fuel_price_tracker/manage_data.py +++ b/src/fuel_price_tracker/manage_data.py @@ -5,9 +5,6 @@ import os url = "https://www.data.gouv.fr/fr/datasets/prix-des-carburants-en-france/" -if os.listdir("../../data/"): - for item in os.listdir("../../data/"): - os.remove(f"../../data/{item}") page = requests.get(url) if page.status_code == 200: soup = BeautifulSoup(page.text, "lxml") @@ -16,10 +13,12 @@ if page.status_code == 200: item.attrs for item in soup.find_all(attrs={"class": "btn btn-sm btn-primary"}) ] links = [link["href"] for link in links if "href" in link.keys()] -os.makedirs("../../data", exist_ok=True) -urllib.request.urlretrieve(links[1], "../../data/price.zip") + os.makedirs("../../data", exist_ok=True) + urllib.request.urlretrieve(links[1], "../../data/price.zip") -import zipfile + import zipfile -with zipfile.ZipFile("../../data/price.zip", "r") as zip_ref: - zip_ref.extractall("../../data/") + with zipfile.ZipFile("../../data/price.zip", "r") as zip_ref: + zip_ref.extractall("../../data/") +else: + print("The remote data is unreachable") diff --git a/src/fuel_price_tracker/parsing.py b/src/fuel_price_tracker/parsing.py index c381857..7e2b89e 100644 --- a/src/fuel_price_tracker/parsing.py +++ b/src/fuel_price_tracker/parsing.py @@ -1,29 +1,53 @@ import os import xml.etree.ElementTree as ET +import argparse +import manage_data +import datetime + +parser = argparse.ArgumentParser() +parser.add_argument("--dept", help="french departement number to process") +parser.add_argument("--gas-type", help="king of fuel to process") + +args = parser.parse_args() -def parser(item): - string = "" - for it in item: - string += it[0] + ":\n" - for st in it[1]: - string += f"{st} : {it[1][st]}\n" - return string +class Pomp(object): + def __init__(self, node) -> None: + super().__init__() + self.node = node + self.tag_list = [item.tag for item in node] + self.attrib_list = [item.attrib for item in node] + self.text_list = [item.text for item in node] + self.latitude = node.attrib["latitude"] + self.longitude = node.attrib["longitude"] + self.adress = node[0].text + self.city = node[1].text + self.cp = node.attrib["cp"] + self.id = node.attrib["id"] + # self.others = [node[][i].text for i in range(len(node[2]))] + + def info(self): + string = "" + for key, value, data, index in zip( + self.tag_list, self.text_list, self.attrib_list, range(len(self.tag_list)) + ): + if key == "services": + string += f"""{key} : {', '.join([self.node[index][i].text for i in range(len(self.node[index]))])}\n""" + elif key == "prix": + keys = list(data.keys()) + keys.remove("id") + string += f"""{key} : {data['nom']} : {data['valeur']} € (dernière mise à jour {data['maj']})\n""" + elif data == {}: + string += f"""{key} : {value}\n""" + else: + string += f"""{key} : {value} + {data}\n""" + return string -if os.listdir("../../data/"): - tree = ET.parse("../../data/PrixCarburants_instantane.xml") - root = tree.getroot() - interesting_pomp = [item for item in root if item.attrib["cp"][:2] == "86"] - for item in interesting_pomp: - print("Longitude :", item.attrib["longitude"], "\n") - print("Latitude :", item.attrib["latitude"], "\n") - print("Code Postal :", item.attrib["cp"], "\n") - - for sub_item in item: - print(sub_item.tag, sub_item.attrib) - print("\n\n") - - -else: - import manage_data +tree = ET.parse("../../data/PrixCarburants_instantane.xml") +root = tree.getroot() +interesting_pomp = [ + Pomp(item).info() for item in root if item.attrib["cp"][:2] == args.dept +] +print(interesting_pomp[19])