cleaning code

This commit is contained in:
Adrien 2021-02-11 09:17:49 +01:00
parent 0ca041e6c8
commit e0319eb9fb
2 changed files with 54 additions and 31 deletions

View File

@ -5,9 +5,6 @@ import os
url = "https://www.data.gouv.fr/fr/datasets/prix-des-carburants-en-france/"
if os.listdir("../../data/"):
for item in os.listdir("../../data/"):
os.remove(f"../../data/{item}")
page = requests.get(url)
if page.status_code == 200:
soup = BeautifulSoup(page.text, "lxml")
@ -16,10 +13,12 @@ if page.status_code == 200:
item.attrs for item in soup.find_all(attrs={"class": "btn btn-sm btn-primary"})
]
links = [link["href"] for link in links if "href" in link.keys()]
os.makedirs("../../data", exist_ok=True)
urllib.request.urlretrieve(links[1], "../../data/price.zip")
os.makedirs("../../data", exist_ok=True)
urllib.request.urlretrieve(links[1], "../../data/price.zip")
import zipfile
import zipfile
with zipfile.ZipFile("../../data/price.zip", "r") as zip_ref:
zip_ref.extractall("../../data/")
with zipfile.ZipFile("../../data/price.zip", "r") as zip_ref:
zip_ref.extractall("../../data/")
else:
print("The remote data is unreachable")

View File

@ -1,29 +1,53 @@
import os
import xml.etree.ElementTree as ET
import argparse
import manage_data
import datetime
parser = argparse.ArgumentParser()
parser.add_argument("--dept", help="french departement number to process")
parser.add_argument("--gas-type", help="king of fuel to process")
args = parser.parse_args()
def parser(item):
string = ""
for it in item:
string += it[0] + ":\n"
for st in it[1]:
string += f"{st} : {it[1][st]}\n"
return string
class Pomp(object):
def __init__(self, node) -> None:
super().__init__()
self.node = node
self.tag_list = [item.tag for item in node]
self.attrib_list = [item.attrib for item in node]
self.text_list = [item.text for item in node]
self.latitude = node.attrib["latitude"]
self.longitude = node.attrib["longitude"]
self.adress = node[0].text
self.city = node[1].text
self.cp = node.attrib["cp"]
self.id = node.attrib["id"]
# self.others = [node[][i].text for i in range(len(node[2]))]
def info(self):
string = ""
for key, value, data, index in zip(
self.tag_list, self.text_list, self.attrib_list, range(len(self.tag_list))
):
if key == "services":
string += f"""{key} : {', '.join([self.node[index][i].text for i in range(len(self.node[index]))])}\n"""
elif key == "prix":
keys = list(data.keys())
keys.remove("id")
string += f"""{key} : {data['nom']} : {data['valeur']} € (dernière mise à jour {data['maj']})\n"""
elif data == {}:
string += f"""{key} : {value}\n"""
else:
string += f"""{key} : {value}
{data}\n"""
return string
if os.listdir("../../data/"):
tree = ET.parse("../../data/PrixCarburants_instantane.xml")
root = tree.getroot()
interesting_pomp = [item for item in root if item.attrib["cp"][:2] == "86"]
for item in interesting_pomp:
print("Longitude :", item.attrib["longitude"], "\n")
print("Latitude :", item.attrib["latitude"], "\n")
print("Code Postal :", item.attrib["cp"], "\n")
for sub_item in item:
print(sub_item.tag, sub_item.attrib)
print("\n\n")
else:
import manage_data
tree = ET.parse("../../data/PrixCarburants_instantane.xml")
root = tree.getroot()
interesting_pomp = [
Pomp(item).info() for item in root if item.attrib["cp"][:2] == args.dept
]
print(interesting_pomp[19])