cleaning code
This commit is contained in:
parent
0ca041e6c8
commit
e0319eb9fb
|
@ -5,9 +5,6 @@ import os
|
|||
|
||||
url = "https://www.data.gouv.fr/fr/datasets/prix-des-carburants-en-france/"
|
||||
|
||||
if os.listdir("../../data/"):
|
||||
for item in os.listdir("../../data/"):
|
||||
os.remove(f"../../data/{item}")
|
||||
page = requests.get(url)
|
||||
if page.status_code == 200:
|
||||
soup = BeautifulSoup(page.text, "lxml")
|
||||
|
@ -16,10 +13,12 @@ if page.status_code == 200:
|
|||
item.attrs for item in soup.find_all(attrs={"class": "btn btn-sm btn-primary"})
|
||||
]
|
||||
links = [link["href"] for link in links if "href" in link.keys()]
|
||||
os.makedirs("../../data", exist_ok=True)
|
||||
urllib.request.urlretrieve(links[1], "../../data/price.zip")
|
||||
os.makedirs("../../data", exist_ok=True)
|
||||
urllib.request.urlretrieve(links[1], "../../data/price.zip")
|
||||
|
||||
import zipfile
|
||||
import zipfile
|
||||
|
||||
with zipfile.ZipFile("../../data/price.zip", "r") as zip_ref:
|
||||
zip_ref.extractall("../../data/")
|
||||
with zipfile.ZipFile("../../data/price.zip", "r") as zip_ref:
|
||||
zip_ref.extractall("../../data/")
|
||||
else:
|
||||
print("The remote data is unreachable")
|
||||
|
|
|
@ -1,29 +1,53 @@
|
|||
import os
|
||||
import xml.etree.ElementTree as ET
|
||||
import argparse
|
||||
import manage_data
|
||||
import datetime
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--dept", help="french departement number to process")
|
||||
parser.add_argument("--gas-type", help="king of fuel to process")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
def parser(item):
|
||||
string = ""
|
||||
for it in item:
|
||||
string += it[0] + ":\n"
|
||||
for st in it[1]:
|
||||
string += f"{st} : {it[1][st]}\n"
|
||||
return string
|
||||
class Pomp(object):
|
||||
def __init__(self, node) -> None:
|
||||
super().__init__()
|
||||
self.node = node
|
||||
self.tag_list = [item.tag for item in node]
|
||||
self.attrib_list = [item.attrib for item in node]
|
||||
self.text_list = [item.text for item in node]
|
||||
self.latitude = node.attrib["latitude"]
|
||||
self.longitude = node.attrib["longitude"]
|
||||
self.adress = node[0].text
|
||||
self.city = node[1].text
|
||||
self.cp = node.attrib["cp"]
|
||||
self.id = node.attrib["id"]
|
||||
# self.others = [node[][i].text for i in range(len(node[2]))]
|
||||
|
||||
def info(self):
|
||||
string = ""
|
||||
for key, value, data, index in zip(
|
||||
self.tag_list, self.text_list, self.attrib_list, range(len(self.tag_list))
|
||||
):
|
||||
if key == "services":
|
||||
string += f"""{key} : {', '.join([self.node[index][i].text for i in range(len(self.node[index]))])}\n"""
|
||||
elif key == "prix":
|
||||
keys = list(data.keys())
|
||||
keys.remove("id")
|
||||
string += f"""{key} : {data['nom']} : {data['valeur']} € (dernière mise à jour {data['maj']})\n"""
|
||||
elif data == {}:
|
||||
string += f"""{key} : {value}\n"""
|
||||
else:
|
||||
string += f"""{key} : {value}
|
||||
{data}\n"""
|
||||
return string
|
||||
|
||||
|
||||
if os.listdir("../../data/"):
|
||||
tree = ET.parse("../../data/PrixCarburants_instantane.xml")
|
||||
root = tree.getroot()
|
||||
interesting_pomp = [item for item in root if item.attrib["cp"][:2] == "86"]
|
||||
for item in interesting_pomp:
|
||||
print("Longitude :", item.attrib["longitude"], "\n")
|
||||
print("Latitude :", item.attrib["latitude"], "\n")
|
||||
print("Code Postal :", item.attrib["cp"], "\n")
|
||||
|
||||
for sub_item in item:
|
||||
print(sub_item.tag, sub_item.attrib)
|
||||
print("\n\n")
|
||||
|
||||
|
||||
else:
|
||||
import manage_data
|
||||
tree = ET.parse("../../data/PrixCarburants_instantane.xml")
|
||||
root = tree.getroot()
|
||||
interesting_pomp = [
|
||||
Pomp(item).info() for item in root if item.attrib["cp"][:2] == args.dept
|
||||
]
|
||||
print(interesting_pomp[19])
|
||||
|
|
Loading…
Reference in New Issue