From 93d1da4906785a452539a039d83b490272a4c5e0 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 23 Feb 2021 13:10:38 +0100 Subject: [PATCH] [mod] update wikidata_units.json and fetch_wikidata_units.py The fetch_wikidata_units.py result won't change randomly. See comments in the script. --- searx/data/wikidata_units.json | 190 +++++++++++++++++++++++++-------- utils/fetch_wikidata_units.py | 43 +++++--- 2 files changed, 170 insertions(+), 63 deletions(-) diff --git a/searx/data/wikidata_units.json b/searx/data/wikidata_units.json index 966e5e812..2012896fc 100644 --- a/searx/data/wikidata_units.json +++ b/searx/data/wikidata_units.json @@ -1,7 +1,67 @@ { "Q199": "1", + "Q100036106": "int nmi", "Q100149279": "°We", "Q100995": "lb", + "Q101194838": "GHz/V", + "Q101463141": "ym²", + "Q101463237": "zm²", + "Q101463321": "am²", + "Q101463409": "fm²", + "Q101463496": "pm²", + "Q101463679": "hm²", + "Q101464050": "Mm²", + "Q101464215": "Gm²", + "Q101464369": "Tm²", + "Q101464499": "Pm²", + "Q101464624": "Em²", + "Q101464753": "Zm²", + "Q101464875": "Ym²", + "Q101515060": "g/J", + "Q101875087": "cd/cm²", + "Q101877596": "g/ml", + "Q101879174": "dm/s", + "Q102068844": "cm⁻³", + "Q102129339": "min⁻¹", + "Q102129428": "h⁻¹", + "Q102129592": "d⁻¹", + "Q102130673": "ym/s", + "Q102130674": "zm/s", + "Q102130677": "am/s", + "Q102130679": "fm/s", + "Q102130681": "pm/s", + "Q102130684": "nm/s", + "Q102130686": "μm/s", + "Q102130688": "mm/s", + "Q102130690": "dam/s", + "Q102130692": "hm/s", + "Q102130694": "Mm/s", + "Q102130696": "Gm/s", + "Q102130698": "Tm/s", + "Q102130700": "Pm/s", + "Q102130702": "Em/s", + "Q102130704": "Zm/s", + "Q102130706": "Ym/s", + "Q102130743": "ym/s²", + "Q102130745": "zm/s²", + "Q102130747": "am/s²", + "Q102130748": "fm/s²", + "Q102130751": "pm/s²", + "Q102130753": "nm/s²", + "Q102130755": "μm/s²", + "Q102130756": "mm/s²", + "Q102130758": "dm/s²", + "Q102130759": "dam/s²", + "Q102130761": "hm/s²", + "Q102130762": "km/s²", + "Q102130765": "Mm/s²", + "Q102130767": "Gm/s²", + "Q102130769": "Tm/s²", + "Q102130771": "Pm/s²", + "Q102130773": "Em/s²", + "Q102130775": "Zm/s²", + "Q102130777": "Ym/s²", + "Q102178883": "dm³/h", "Q1022113": "cm³", "Q102573": "Bq", "Q103246": "Sv", @@ -9,12 +69,16 @@ "Q10380431": "TJ", "Q1040401": "das", "Q1040427": "hs", - "Q1042866": "Zibit", + "Q104117265": "Bi", + "Q1042866": "Zib", + "Q104907398": "μN m", + "Q104907399": "mN m", "Q1050958": "inHg", "Q1051665": "m/s²", "Q1052397": "rad", "Q1054140": "Mm", "Q10543042": "Ym", + "Q105519288": "B SPL", "Q1057069": "hg", "Q1063756": "rad/s", "Q1063786": "in²", @@ -22,33 +86,34 @@ "Q1066138": "Ps", "Q1067722": "Fg", "Q1069725": "p.", + "Q1072404": "K", "Q1084321": "Tb/s", "Q1086691": "fg", "Q1091257": "tex", "Q1092296": "a", - "Q1104069": "CAD$", + "Q1104069": "$", "Q11061003": "μm²", "Q11061005": "nm²", "Q1131660": "st", "Q1137675": "cr", - "Q1140444": "Zbit", - "Q1140577": "Ybit", - "Q1152074": "Pbit", - "Q1152323": "Tbit", + "Q1140444": "Zb", + "Q1140577": "Yb", + "Q1152074": "Pb", + "Q1152323": "Tb", "Q1165799": "mil", "Q11776930": "Mg", "Q11830636": "psf", "Q11929860": "kpc", "Q1194225": "lbf", - "Q1194580": "Mibit", - "Q1195111": "Ebit", + "Q1194580": "Mib", + "Q1195111": "Eb", "Q1196837": "ω_P", "Q1197459": "Ms", "Q11982285": "Em³", "Q11982288": "Zm³", "Q11982289": "Tm³", "Q12011178": "Zs", - "Q1204894": "Gibit", + "Q1204894": "Gib", "Q12257695": "Eb/s", "Q12257696": "EB/s", "Q12261466": "kB/s", @@ -59,7 +124,7 @@ "Q12269308": "Zb/s", "Q12269309": "ZB/s", "Q1247300": "cm H₂O", - "Q12714022": "sh cwt", + "Q12714022": "cwt", "Q12789864": "GeV", "Q12874593": "W h", "Q128822": "kn", @@ -71,9 +136,9 @@ "Q1323615": "oz t", "Q132643": "kr", "Q13400897": "g", - "Q13479685": "mm wg", - "Q1351253": "Eibit", - "Q1351334": "Pibit", + "Q13479685": "mm H2O", + "Q1351253": "Eib", + "Q1351334": "Pib", "Q13542672": "Ry", "Q13548586": "THz", "Q13582667": "kgf/cm²", @@ -88,13 +153,15 @@ "Q14158377": "A_P", "Q14623803": "MDa", "Q14623804": "kDa", - "Q1472674": "Sv", + "Q1472674": "S", "Q14754979": "Zg", "Q14786969": "MJ", + "Q14850704": "℧", "Q14913554": "Ys", "Q14914907": "th", "Q14916719": "Gpc", "Q14923662": "Pm³", + "Q1501273": "HU", "Q1511773": "LSd", "Q15120301": "l atm", "Q1542309": "xu", @@ -110,7 +177,7 @@ "Q163354": "H", "Q1640501": "hyl", "Q1645498": "μg", - "Q16859309": "lb·ft", + "Q16859309": "lb ft", "Q169893": "S", "Q170804": "Wb", "Q17093295": "m/h", @@ -140,7 +207,7 @@ "Q182429": "m/s", "Q1826195": "dl", "Q18413919": "cm/s", - "Q184172": "FF", + "Q184172": "F", "Q185078": "a", "Q185153": "erg", "Q185648": "Torr", @@ -171,11 +238,10 @@ "Q2029519": "hl", "Q203567": "₦", "Q2042279": "m H₂O", - "Q204737": "៛", "Q2051195": "GWh", "Q2055118": "ppb", "Q2064166": "fc", - "Q206600": "ރ", + "Q206600": "MRF", "Q20706220": "cmm", "Q20706221": "dmm", "Q2080811": "vol%", @@ -196,9 +262,11 @@ "Q21075844": "ml/l", "Q21077820": "mg/m³", "Q21091747": "mg/kg", - "Q211256": "mph", + "Q211256": "mi/h", + "Q21154419": "PD", "Q211580": "BTU (th)", "Q212120": "A h", + "Q213005": "G$", "Q2140397": "in³", "Q214377": "ell", "Q2143992": "kHz", @@ -211,7 +279,7 @@ "Q215571": "N m", "Q21604951": "g/m³", "Q2165290": "yd³", - "Q216880": "kp", + "Q216880": "kgf", "Q217208": "a", "Q2175964": "dm³", "Q218593": "in", @@ -229,11 +297,14 @@ "Q229354": "Ci", "Q232291": "mi²", "Q2332346": "ml", + "Q235729": "y (365 days)", + "Q23808021": "oz (ap.)", "Q23823681": "TW", "Q23925410": "gal (UK)", "Q23925413": "gal (US)", "Q23931040": "dam²", "Q23931103": "nmi²", + "Q240468": "syr£", "Q2414435": "$b.", "Q242988": "Lib$", "Q2438073": "ag", @@ -252,7 +323,7 @@ "Q25511288": "mb", "Q2553708": "MV", "Q2554092": "kV", - "Q259502": "AU$", + "Q259502": "A$", "Q260126": "rem", "Q2612219": "Pg", "Q261247": "ct", @@ -306,8 +377,11 @@ "Q30001831": "aV", "Q30001832": "aW", "Q30001833": "aWb", - "Q3013059": "kyr", - "Q3194304": "kbit", + "Q3013059": "ka", + "Q304479": "tr", + "Q305896": "DPI", + "Q31889818": "ppq", + "Q3194304": "kb", "Q3207456": "mW", "Q321017": "R", "Q3221356": "ym", @@ -330,10 +404,10 @@ "Q3312063": "fL", "Q3320608": "kW", "Q3331719": "dm²", - "Q3332689": "ToR", - "Q3332814": "Mbit", + "Q3332689": "RT", + "Q3332814": "Mb", "Q3396758": "daa", - "Q3414243": "rps", + "Q3414243": "qps", "Q3421309": "R_J", "Q3495543": "mbar", "Q355198": "px", @@ -343,11 +417,11 @@ "Q376660": "nat", "Q37732658": "°R", "Q3773454": "Mpc", - "Q3815076": "Kibit", + "Q3815076": "Kib", "Q3833309": "£", "Q3858002": "mA h", "Q3867152": "ft/s²", - "Q389062": "Tibit", + "Q389062": "Tib", "Q3902688": "pl", "Q3902709": "ps", "Q39360235": "US lea", @@ -359,7 +433,7 @@ "Q39462789": "µin²", "Q39467934": "kgf/m²", "Q39469927": "N/m²", - "Q39617688": "cwt long", + "Q39617688": "cwt", "Q39617818": "t lb", "Q39628023": "y", "Q39699418": "cm/s²", @@ -367,14 +441,14 @@ "Q39709980": "bd", "Q39710113": "bhp EDR", "Q3972226": "kL", - "Q4041686": "iwg", + "Q4041686": "in H20", "Q4068266": "Ʒ", "Q4176683": "aC", - "Q420266": "oz. fl.", + "Q420266": "fl oz", "Q42319606": "people/m²", "Q4243638": "km³", "Q4456994": "mF", - "Q469356": "tn. sh.", + "Q469356": "T", "Q476572": "Ha", "Q482798": "yd", "Q483261": "Da", @@ -390,15 +464,18 @@ "Q514845": "pz", "Q5195628": "hm³", "Q5198770": "dam³", - "Q524410": "byr", + "Q524410": "Ga", + "Q5299480": "DPCm", "Q53393488": "PHz", "Q53393490": "EHz", "Q53393494": "ZHz", "Q53393498": "YHz", "Q53393659": "ML", "Q53393664": "GL", + "Q53393669": "El", "Q53393674": "ZL", "Q53393678": "YL", + "Q53393768": "zl", "Q53393771": "yL", "Q53393868": "GJ", "Q53393886": "PJ", @@ -492,7 +569,7 @@ "Q54083813": "Zkat", "Q5409016": "MVA", "Q5465723": "ft-pdl", - "Q549389": "bit/s", + "Q549389": "b/s", "Q550341": "V A", "Q552299": "ch", "Q55442349": "U/L", @@ -523,6 +600,8 @@ "Q6170164": "yg", "Q6171168": "zg", "Q61756607": "yd", + "Q61771602": "ft", + "Q61771670": "in", "Q61793198": "rd", "Q61794766": "ch (US survey)", "Q61994988": "Wth", @@ -534,13 +613,12 @@ "Q6414556": "kip", "Q648908": "bya", "Q64996135": "gal (US)/min", - "Q65028392": "mm/yr", + "Q65028392": "mm/a", "Q651336": "M_J", "Q6517513": "dag", "Q667419": "UK t", "Q681996": "M⊕", "Q685662": "p_P", - "Q6859652": "mm Hg", "Q686163": "$", "Q68725821": "°Rø", "Q68726230": "°De", @@ -582,20 +660,23 @@ "Q70444514": "Ymol", "Q70444609": "Pmol", "Q712226": "km²", + "Q717310": "Mg", "Q72081071": "MeV", "Q723733": "ms", "Q730251": "ft·lbf", "Q732707": "MHz", "Q73408": "K", "Q7350781": "Mb/s", + "Q7398951": "PPI", "Q743895": "bpm", "Q748716": "ft/s", "Q750178": "‱", + "Q752079": "RT", "Q752197": "kJ/mol", "Q7672057": "TU", "Q777017": "dBm", "Q78754556": "rot", - "Q78756901": "rev", + "Q78756901": "r", "Q78757683": "windings", "Q79726": "kB", "Q79735": "MB", @@ -637,14 +718,16 @@ "Q848856": "dam", "Q851872": "o", "Q854546": "Gm", - "Q855161": "Yibit", + "Q855161": "Yib", "Q856240": "ft³/min", "Q857027": "ft²", "Q85854198": "MN", - "Q864818": "abA", "Q87262709": "kΩ", "Q87416053": "MΩ", "Q88296091": "tsp", + "Q89473028": "bu (UK)", + "Q89662131": "pt (UK)", + "Q901492": "ph", "Q9026416": "MWth", "Q9048643": "nl", "Q905912": "L", @@ -653,7 +736,9 @@ "Q911730": "nx", "Q914151": "P_P", "Q915169": "F_P", - "Q93318": "nmi", + "Q93318": "M", + "Q93678895": "gill (US)", + "Q93679498": "gill (UK)", "Q940052": "q", "Q94076025": "dalm", "Q94076717": "dakat", @@ -664,6 +749,7 @@ "Q94415255": "GC", "Q94415438": "Yrad", "Q94415526": "YC", + "Q94415561": "krad", "Q94415782": "Mrad", "Q94416260": "GN", "Q94416535": "cN", @@ -943,6 +1029,7 @@ "Q96106385": "h°C", "Q96106393": "M°C", "Q96236286": "G°C", + "Q96312779": "μas", "Q97059641": "p°C", "Q97059652": "T°C", "Q97143826": "P°C", @@ -953,9 +1040,21 @@ "Q97143843": "z°C", "Q97143849": "Y°C", "Q97143851": "a°C", + "Q98492214": "den", "Q98538634": "eV/m²", "Q98635536": "eV/m", "Q98642859": "eV m²/kg", + "Q98793302": "qt (UK)", + "Q98793408": "liq qt (US)", + "Q98793687": "dry qt (US)", + "Q99476928": "gf", + "Q99487704": "ppt", + "Q99490009": "BTU (IT)", + "Q99490479": "BTU (39 °F)", + "Q99490986": "BTU (59 °F)", + "Q99491193": "BTU (60 °F)", + "Q99491447": "BTU (mean)", + "Q99492167": "m Hg", "Q11229": "%", "Q11570": "kg", "Q11573": "m", @@ -965,8 +1064,7 @@ "Q12129": "pc", "Q12438": "N", "Q16068": "DM", - "Q1811": "ua", - "Q20764": "Myr", + "Q20764": "Ma", "Q2101": "e", "Q25235": "h", "Q25236": "W", @@ -979,25 +1077,25 @@ "Q25517": "m³", "Q33680": "rad", "Q35852": "ha", - "Q36384": "equiv", + "Q36384": "Eq", "Q3710": "ft", "Q39274": "Sv", "Q39369": "Hz", "Q41509": "mol", "Q41803": "g", "Q42289": "°F", - "Q4406": "TV$", + "Q4406": "$T", "Q44395": "Pa", - "Q4587": "Le", "Q4588": "WS$", "Q4592": "F$", "Q4596": "Rs", "Q4597": "$", "Q47083": "Ω", "Q48013": "oz", + "Q4917": "US$", "Q50094": "Np", "Q50098": "B", - "Q531": "ly", + "Q531": "l.y.", "Q5329": "dB", "Q573": "d", "Q577": "a", diff --git a/utils/fetch_wikidata_units.py b/utils/fetch_wikidata_units.py index 69505968e..69ae8ab27 100644 --- a/utils/fetch_wikidata_units.py +++ b/utils/fetch_wikidata_units.py @@ -12,31 +12,40 @@ from searx import searx_dir from searx.engines.wikidata import send_wikidata_query +# the response contains duplicate ?item with the different ?symbol +# "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result +# even if a ?item has different ?symbol of the same rank. +# A deterministic result +# see: +# * https://www.wikidata.org/wiki/Help:Ranking +# * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section) +# * https://w.wiki/32BT +# see the result for https://www.wikidata.org/wiki/Q11582 +# there are multiple symbols the same rank SARQL_REQUEST = """ -SELECT DISTINCT ?item ?symbol ?P2370 ?P2370Unit ?P2442 ?P2442Unit +SELECT DISTINCT ?item ?symbol WHERE { -?item wdt:P31/wdt:P279 wd:Q47574. -?item wdt:P5061 ?symbol. -FILTER(LANG(?symbol) = "en"). + ?item wdt:P31/wdt:P279 wd:Q47574 . + ?item p:P5061 ?symbolP . + ?symbolP ps:P5061 ?symbol ; + wikibase:rank ?rank . + FILTER(LANG(?symbol) = "en"). } -ORDER BY ?item +ORDER BY ?item DESC(?rank) ?symbol """ def get_data(): - def get_key(unit): - return unit['item']['value'].replace('http://www.wikidata.org/entity/', '') - - def get_value(unit): - return unit['symbol']['value'] - - result = send_wikidata_query(SARQL_REQUEST) - if result is not None: - # sort the unit by entity name - # so different fetchs keep the file unchanged. - list(result['results']['bindings']).sort(key=get_key) - return collections.OrderedDict([(get_key(unit), get_value(unit)) for unit in result['results']['bindings']]) + results = collections.OrderedDict() + response = send_wikidata_query(SARQL_REQUEST) + for unit in response['results']['bindings']: + name = unit['item']['value'].replace('http://www.wikidata.org/entity/', '') + unit = unit['symbol']['value'] + if name not in results: + # ignore duplicate: always use the first one + results[name] = unit + return results def get_wikidata_units_filename():