cleaning code
This commit is contained in:
parent
0ca041e6c8
commit
e0319eb9fb
|
@ -5,9 +5,6 @@ import os
|
||||||
|
|
||||||
url = "https://www.data.gouv.fr/fr/datasets/prix-des-carburants-en-france/"
|
url = "https://www.data.gouv.fr/fr/datasets/prix-des-carburants-en-france/"
|
||||||
|
|
||||||
if os.listdir("../../data/"):
|
|
||||||
for item in os.listdir("../../data/"):
|
|
||||||
os.remove(f"../../data/{item}")
|
|
||||||
page = requests.get(url)
|
page = requests.get(url)
|
||||||
if page.status_code == 200:
|
if page.status_code == 200:
|
||||||
soup = BeautifulSoup(page.text, "lxml")
|
soup = BeautifulSoup(page.text, "lxml")
|
||||||
|
@ -16,10 +13,12 @@ if page.status_code == 200:
|
||||||
item.attrs for item in soup.find_all(attrs={"class": "btn btn-sm btn-primary"})
|
item.attrs for item in soup.find_all(attrs={"class": "btn btn-sm btn-primary"})
|
||||||
]
|
]
|
||||||
links = [link["href"] for link in links if "href" in link.keys()]
|
links = [link["href"] for link in links if "href" in link.keys()]
|
||||||
os.makedirs("../../data", exist_ok=True)
|
os.makedirs("../../data", exist_ok=True)
|
||||||
urllib.request.urlretrieve(links[1], "../../data/price.zip")
|
urllib.request.urlretrieve(links[1], "../../data/price.zip")
|
||||||
|
|
||||||
import zipfile
|
import zipfile
|
||||||
|
|
||||||
with zipfile.ZipFile("../../data/price.zip", "r") as zip_ref:
|
with zipfile.ZipFile("../../data/price.zip", "r") as zip_ref:
|
||||||
zip_ref.extractall("../../data/")
|
zip_ref.extractall("../../data/")
|
||||||
|
else:
|
||||||
|
print("The remote data is unreachable")
|
||||||
|
|
|
@ -1,29 +1,53 @@
|
||||||
import os
|
import os
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
|
import argparse
|
||||||
|
import manage_data
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--dept", help="french departement number to process")
|
||||||
|
parser.add_argument("--gas-type", help="king of fuel to process")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
def parser(item):
|
class Pomp(object):
|
||||||
string = ""
|
def __init__(self, node) -> None:
|
||||||
for it in item:
|
super().__init__()
|
||||||
string += it[0] + ":\n"
|
self.node = node
|
||||||
for st in it[1]:
|
self.tag_list = [item.tag for item in node]
|
||||||
string += f"{st} : {it[1][st]}\n"
|
self.attrib_list = [item.attrib for item in node]
|
||||||
return string
|
self.text_list = [item.text for item in node]
|
||||||
|
self.latitude = node.attrib["latitude"]
|
||||||
|
self.longitude = node.attrib["longitude"]
|
||||||
|
self.adress = node[0].text
|
||||||
|
self.city = node[1].text
|
||||||
|
self.cp = node.attrib["cp"]
|
||||||
|
self.id = node.attrib["id"]
|
||||||
|
# self.others = [node[][i].text for i in range(len(node[2]))]
|
||||||
|
|
||||||
|
def info(self):
|
||||||
|
string = ""
|
||||||
|
for key, value, data, index in zip(
|
||||||
|
self.tag_list, self.text_list, self.attrib_list, range(len(self.tag_list))
|
||||||
|
):
|
||||||
|
if key == "services":
|
||||||
|
string += f"""{key} : {', '.join([self.node[index][i].text for i in range(len(self.node[index]))])}\n"""
|
||||||
|
elif key == "prix":
|
||||||
|
keys = list(data.keys())
|
||||||
|
keys.remove("id")
|
||||||
|
string += f"""{key} : {data['nom']} : {data['valeur']} € (dernière mise à jour {data['maj']})\n"""
|
||||||
|
elif data == {}:
|
||||||
|
string += f"""{key} : {value}\n"""
|
||||||
|
else:
|
||||||
|
string += f"""{key} : {value}
|
||||||
|
{data}\n"""
|
||||||
|
return string
|
||||||
|
|
||||||
|
|
||||||
if os.listdir("../../data/"):
|
tree = ET.parse("../../data/PrixCarburants_instantane.xml")
|
||||||
tree = ET.parse("../../data/PrixCarburants_instantane.xml")
|
root = tree.getroot()
|
||||||
root = tree.getroot()
|
interesting_pomp = [
|
||||||
interesting_pomp = [item for item in root if item.attrib["cp"][:2] == "86"]
|
Pomp(item).info() for item in root if item.attrib["cp"][:2] == args.dept
|
||||||
for item in interesting_pomp:
|
]
|
||||||
print("Longitude :", item.attrib["longitude"], "\n")
|
print(interesting_pomp[19])
|
||||||
print("Latitude :", item.attrib["latitude"], "\n")
|
|
||||||
print("Code Postal :", item.attrib["cp"], "\n")
|
|
||||||
|
|
||||||
for sub_item in item:
|
|
||||||
print(sub_item.tag, sub_item.attrib)
|
|
||||||
print("\n\n")
|
|
||||||
|
|
||||||
|
|
||||||
else:
|
|
||||||
import manage_data
|
|
||||||
|
|
Loading…
Reference in New Issue