Benutzer:Mps/MetadatenEinwohnerzahlJPUpdater.cs
aus Wikipedia, der freien Enzyklopädie
< Benutzer:Mps
Dies ist die aktuelle Version dieser Seite, zuletzt bearbeitet am 3. April 2021 um 21:59 Uhr durch imported>Mps(36927) (Aktualisierung auf .NET 5).
/* Program to update the population numbers of Japanese municipalities in Wikidata and the German Wikipedia's population metadata templates
* with the numbers from the Japanese Wikipedia.
* The program throttles writing access to the Wikipedia/Wikidata servers by using the "maxlag=5" parameter and
* limiting the edits to 30 per minute.
*/
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Reflection;
using System.Security.Cryptography;
using System.Text;
using System.Text.Encodings.Web;
using System.Text.Json;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
using System.Web;
using System.Xml;
using Microsoft.AspNetCore.WebUtilities;
namespace MetadatenEinwohnerzahlJPUpdater
{
#region Exceptions
internal class BusinessException : Exception
{
public BusinessException(string message)
: base(message)
{
}
}
internal class MediawikiException : BusinessException
{
public MediawikiException(string message)
: base(message)
{
}
public MediawikiException(string code, string info)
: base("Mediawiki error \"" + code + ": " + info + "\"")
{
}
}
internal class MediawikiLagException : MediawikiException
{
public MediawikiLagException(string code, string info)
: base("Mediawiki error \"" + code + ": " + info + "\"")
{
LagTime = GetLagTime(info);
}
public TimeSpan LagTime { get; }
public static TimeSpan GetLagTime(string info)
{
var lagtime = -1;
var match = Regex.Match(info, "Waiting for [^ ]*: ([0-9.-]+) seconds lagged");
if (match.Success)
{
int.TryParse(match.Groups[1].Value, out lagtime);
}
return TimeSpan.FromSeconds(lagtime);
}
}
#endregion
public class Program
{
private const int MaxEditsPerMinute = 30;
private const string TimestampFilename = "jaWP_timestamps.json";
private static readonly HttpClientHandler httpClientHandler = new HttpClientHandler { AutomaticDecompression = DecompressionMethods.All };
private static readonly object consoleLock = new object();
private static readonly SemaphoreSlim settingsLock = new SemaphoreSlim(1);
private static async Task<string> SetWikidataClaimAsync(WikidataEntry entry, string claimId, string editToken)
{
claimId ??= entry.Id.ToLowerInvariant() + "$" + Guid.NewGuid();
#region Aussagenobjekt erstellen
var claim = new
{
id = claimId,
mainsnak = new
{
snaktype = "value",
property = "P1082", // Eigenschaft "Einwohnerzahl"
datavalue = new
{
value = new
{
amount = "+" + entry.Population,
unit = "1"
},
type = "quantity"
}
},
qualifiers = new Dictionary<string, List<object>>
{
{
"P585", new List<object> // Eigenschaft "Zeitpunkt"
{
new
{
snaktype = "value",
property = "P585",
datavalue = new
{
value = new
{
time = entry.Source.AsOf.ToString("+yyyy-MM-ddT00:00:00Z"),
timezone = 0,
before = 0,
after = 0,
precision = 11,
calendarmodel = "http://www.wikidata.org/entity/Q1985727"
},
type = "time"
}
}
}
}
},
type = "statement",
rank = "preferred",
references = new List<dynamic>
{
new
{
snaks = new Dictionary<string, List<object>>
{
{
"P143", new List<object> // Eigenschaft "Datenherkunft"
{
new
{
snaktype = "value",
property = "P143",
datavalue = new
{
value = new { entity_type = "item", numeric_id = 177837 },
// Q177837 = "Japanischsprachige Wikipedia"
type = "wikibase-entityid"
}
}
}
},
{
"P854", new List<object> // Eigenschaft "URL"
{
new
{
snaktype = "value",
property = "P854",
datavalue = new { value = entry.Source.SourceUri, type = "string" }
}
}
},
{
"P364", new List<object> // Eigenschaft "Originalsprache"
{
new
{
snaktype = "value",
property = "P364",
datavalue = new
{
value = new { entity_type = "item", numeric_id = 5287 }, // Q5287 = "Japanisch"
type = "wikibase-entityid"
}
}
}
},
{
"P813", new List<object> // Eigenschaft "abgerufen am"
{
new
{
snaktype = "value",
property = "P813",
datavalue = new
{
value = new
{
time = entry.Source.AccessDate.ToString("+yyyy-MM-ddT00:00:00Z"),
timezone = 0,
before = 0,
after = 0,
precision = 11,
calendarmodel = "http://www.wikidata.org/entity/Q1985727"
},
type = "time"
}
}
}
}
}
}
}
};
if (!string.IsNullOrWhiteSpace(entry.Source.SourceTitle))
{
var reference = claim.references[0].snaks;
reference.Add("P1476", new List<object> // Eigenschaft "Titel"
{
new
{
snaktype = "value",
property = "P1476",
datavalue = new
{
value = new
{
language = "ja",
text = entry.Source.SourceTitle
},
type = "monolingualtext"
}
}
});
}
#endregion
var claimJson = JsonSerializer.Serialize(claim);
claimJson = claimJson.Replace("entity_type", "entity-type").Replace("numeric_id", "numeric-id");
bool retry;
using var client = CreateHttpClient("wikidata");
do
{
retry = false;
using var response = await client.PostAsync(GetQuery(new Dictionary<string, string>
{
{ "assert", "user" },
{ "action", "wbsetclaim" },
{ "format", "json" },
{ "summary", "using [[de:Benutzer:Mps/MetadatenEinwohnerzahlJPUpdater.cs]]" },
{ "maxlag", "5" }
}), FormUrlEncoded(new Dictionary<string, string>
{
{ "token", editToken },
{ "claim", claimJson }
}));
await using var stream = await response.Content.ReadAsStreamAsync();
Dictionary<string, object> result;
using (var reader = new StreamReader(stream))
{
result = JsonSerializer.Deserialize<Dictionary<string, object>>(await reader.ReadToEndAsync());
}
if (result!.TryGetValue("error", out var errorObj))
{
var error = ((JsonElement)errorObj).EnumerateObject().ToDictionary(x => x.Name, x => x.Value.ToString());
var code = error["code"];
var info = error["info"];
if (code == "maxlag")
{
await Task.Delay(MediawikiLagException.GetLagTime(info));
retry = true;
}
else
{
throw new MediawikiException(code, info);
}
}
} while (retry);
return claimId;
}
private static async Task UpdateWikidataAsync(NetworkCredential credentials, Settings settings, IEnumerable<WikidataEntry> wikidataList)
{
int wdCursorX, wdCursorY;
lock (consoleLock)
{
Console.Write("Schreibe Wikidata-Elemente: ");
wdCursorX = Console.CursorLeft;
wdCursorY = Console.CursorTop;
Console.WriteLine();
}
await LoginAsync("wikidata", credentials);
var editToken = await GetTokenAsync("wikidata", TokenType.Csrf);
foreach (var item in wikidataList)
{
var throttleTask = Task.Delay(TimeSpan.FromMinutes(1.0 / MaxEditsPerMinute));
var prefAndName = PrefIsoCode.First(iso => iso.Value == item.Source.Iso).Key + item.Name;
if (settings.WikiData.AccessDates.TryGetValue(prefAndName, out var prevAccessDate) && prevAccessDate >= item.Source.AccessDate) { continue; }
lock (consoleLock)
{
Console.SetCursorPosition(wdCursorX, wdCursorY);
Console.Write($"{item.Id,11}");
}
settings.WikiData.Ids.TryGetValue(prefAndName, out var cachedId);
var claimId = cachedId;
try
{
claimId = await SetWikidataClaimAsync(item, claimId, editToken);
}
catch (MediawikiException e)
{
Console.WriteLine();
Console.Error.WriteLine($"{claimId}: {e.Message}");
}
settings.WikiData.AccessDates.AddOrUpdate(prefAndName, item.Source.AccessDate, (_, _) => item.Source.AccessDate);
settings.WikiData.Ids.AddOrUpdate(prefAndName, claimId, (_, _) => claimId);
await SaveSettingsAsync(settings);
await throttleTask;
}
lock (consoleLock)
{
Console.SetCursorPosition(wdCursorX, wdCursorY);
Console.WriteLine(" fertig." + new string(' ', 16));
}
}
private static async Task UpdateDeWikiAsync(NetworkCredential credentials, Settings settings, IEnumerable<PrefTemplateData> prefDataList)
{
int deCursorX, deCursorY;
lock (consoleLock)
{
Console.Write("Schreibe in deutsche Wikipedia: ");
deCursorX = Console.CursorLeft;
deCursorY = Console.CursorTop;
Console.WriteLine();
}
await LoginAsync("de", credentials);
var editToken = await GetTokenAsync("de", TokenType.Csrf);
using var client = CreateHttpClient("de");
foreach (var prefData in prefDataList)
{
if (settings.De.AccessDates.TryGetValue(prefData.Kanji, out var prevAccessDate) && prevAccessDate >= prefData.AccessDate) { continue; }
lock (consoleLock)
{
Console.SetCursorPosition(deCursorX, deCursorY);
Console.Write(prefData.Iso);
}
var pageTitleDe = "Vorlage:Metadaten Einwohnerzahl " + prefData.Iso;
var md5Hash = CalcMd5Hash(new UTF8Encoding(false).GetBytes(prefData.ContentDe));
using var response = await client.PostAsync(GetQuery(new Dictionary<string, string>
{
{ "assert", "user" },
{ "action", "edit" },
{ "title", pageTitleDe },
{ "summary", "Metadaten-Aktualisierung per [[Benutzer:Mps/MetadatenEinwohnerzahlJPUpdater.cs]]" },
{ "nocreate", "" }
}), FormUrlEncoded(new Dictionary<string, string>
{
{ "token", editToken },
{ "text", prefData.ContentDe },
{ "md5", md5Hash }
}));
await using var stream = await response.Content.ReadAsStreamAsync();
using var reader = XmlReader.Create(stream, new XmlReaderSettings { Async = true });
while (await reader.ReadAsync())
{
CheckForError(reader);
}
settings.De.AccessDates.AddOrUpdate(prefData.Kanji, prefData.AccessDate, (_, _) => prefData.AccessDate);
await SaveSettingsAsync(settings);
}
lock (consoleLock)
{
Console.SetCursorPosition(deCursorX, deCursorY);
Console.WriteLine(" fertig.");
}
}
private static async Task<int> Main(string[] args)
{
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
try
{
var credentials = GetCredentials(args);
var sources = await GetJapaneseSourceDataAsync();
var settings = await LoadSettingsAsync();
var watch = Stopwatch.StartNew();
var prefDataList = new BlockingCollection<PrefTemplateData>();
var wikidataList = new BlockingCollection<WikidataEntry>();
var unmatchedItems = new ConcurrentBag<string>();
var sourceTask = ConvertSourceDataAsync(sources, settings, prefDataList, wikidataList, unmatchedItems);
var wikideTask = UpdateDeWikiAsync(credentials, settings, prefDataList.GetConsumingEnumerable());
var wikidataTask = UpdateWikidataAsync(credentials, settings, wikidataList.GetConsumingEnumerable());
await Task.WhenAll(sourceTask, wikideTask, wikidataTask);
watch.Stop();
if (unmatchedItems.Count > 0)
{
Console.WriteLine($"Folgende Einträge konnten nicht gematcht werden: {string.Join("\n", unmatchedItems)}");
}
Console.WriteLine();
Console.WriteLine($"Fertig ({watch.Elapsed:m\\:ss}).");
return 0;
}
catch (Exception ex)
{
if (ex is AggregateException aggregate)
{
ex = aggregate.GetBaseException();
}
Console.Error.WriteLine(ex);
return 1;
}
finally
{
httpClientHandler.Dispose();
}
}
private static async Task SaveSettingsAsync(Settings settings)
{
await settingsLock.WaitAsync();
try
{
await using var stream = File.Create(TimestampFilename);
await JsonSerializer.SerializeAsync(stream, settings, new JsonSerializerOptions
{
WriteIndented = true,
Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping
});
}
finally
{
settingsLock.Release();
}
}
[SuppressMessage("ReSharper", "AccessToDisposedClosure")]
private static async Task ConvertSourceDataAsync(IDictionary<string, string> sources, Settings settings,
BlockingCollection<PrefTemplateData> prefDataList, BlockingCollection<WikidataEntry> wikidataList,
ConcurrentBag<string> unmatchedItems)
{
using var semaphore = new SemaphoreSlim(4);
using var client = CreateHttpClient("ja");
var tasks = PrefIsoCode.Keys.Select(prefKanji => Task.Factory.StartNew(async () =>
{
try
{
await semaphore.WaitAsync();
var templateData = await ReadTemplateDataAsync(client, prefKanji, sources, settings);
if (templateData != null)
{
prefDataList.Add(templateData);
var wikidataIds = await ReadWikidataIdsAsync(client, prefKanji);
// Vorlageninhalte mit Artikelnamen und Wikidata-Ids abgleichen
templateData.Match(wikidataIds, wikidataList, unmatchedItems);
}
}
finally
{
semaphore.Release();
}
}, TaskCreationOptions.LongRunning).Unwrap());
await Task.WhenAll(tasks);
prefDataList.CompleteAdding();
wikidataList.CompleteAdding();
}
// Japanische Inhalte einlesen
private static async Task<PrefTemplateData> ReadTemplateDataAsync(HttpClient client, string prefKanji, IDictionary<string, string> sources, Settings settings)
{
PrefTemplateData templateData = null;
using var response = await client.GetAsync(GetQuery(new Dictionary<string, string>
{
{ "action", "query" },
{ "prop", "revisions" },
{ "titles", "Template:自治体人口/" + prefKanji },
{ "rvprop", "content|timestamp" }
}));
var prefIso = PrefIsoCode[prefKanji];
TraverseHttpResponse(response, reader =>
{
if (reader.NodeType == XmlNodeType.Element && reader.LocalName == "rev")
{
var curTimestamp = DateTime.Parse(reader.GetAttribute("timestamp") ?? string.Empty, null, DateTimeStyles.AdjustToUniversal);
templateData = new PrefTemplateData(reader.ReadString(), sources, prefKanji, prefIso, curTimestamp);
}
});
return templateData;
}
private static async Task<IList<WikidataEntry>> ReadWikidataIdsAsync(HttpClient client, string prefKanji)
{
// Vorlageneinbindungen und deren Wikidata-Links ermitteln
var wikidataIds = new List<WikidataEntry>();
using var response = await client.GetAsync(GetQuery(new Dictionary<string, string>
{
{ "action", "query" },
{ "generator", "embeddedin" },
{ "geititle", "Template:自治体人口/" + prefKanji },
{ "geinamespace", "0" },
{ "geilimit", "500" },
{ "prop", "pageprops" }
}));
TraverseHttpResponse(response, reader =>
{
if (reader.NodeType == XmlNodeType.Element && reader.LocalName == "page")
{
var name = reader.GetAttribute("title");
if (reader.ReadToDescendant("pageprops", ""))
{
// ReSharper disable once AccessToModifiedClosure
wikidataIds.Add(new WikidataEntry { Name = name, Id = reader.GetAttribute("wikibase_item") });
}
}
}
);
return wikidataIds;
}
private static async Task<Settings> LoadSettingsAsync()
{
Settings settings;
if (File.Exists(TimestampFilename))
{
await using var stream = File.OpenRead(TimestampFilename);
settings = await JsonSerializer.DeserializeAsync<Settings>(stream);
}
else
{
settings = new Settings();
}
return settings;
}
private static async Task<IDictionary<string, string>> GetJapaneseSourceDataAsync()
{
Console.WriteLine("Quellen ermitteln.");
string dokuContent = null;
var sources = new Dictionary<string, string>();
using var client = CreateHttpClient("ja");
using var response = await client.GetAsync(GetQuery(new Dictionary<string, string>
{
{ "action", "query" },
{ "prop", "revisions" },
{ "titles", "Template:自治体人口/doc" },
{ "rvprop", "content" }
}));
TraverseHttpResponse(response, reader =>
{
if (reader.NodeType == XmlNodeType.Element && reader.LocalName == "rev")
{
dokuContent = reader.ReadString();
}
});
if (dokuContent == null)
{
throw new BusinessException("Japanische Dokumentation nicht gefunden");
}
foreach (Match match in Regex.Matches(dokuContent, @"\|\[\[Template:自治体人口/(?<pref>.+?)\|\k<pref>\]\]\s*\|\|\s*\[(?<url>.+?)\]"))
{
if (PrefIsoCode.ContainsKey(match.Groups["pref"].Value))
{
sources.Add(match.Groups["pref"].Value, match.Groups["url"].Value);
}
else
{
Debug.WriteLine($"Unbekannter Eintrag \"{match.Groups["pref"].Value}\"");
}
}
if (sources.Count == 0)
{
throw new BusinessException("Japanische Quellen nicht gefunden");
}
if (sources.Count != PrefIsoCode.Count)
{
throw new BusinessException("Für einige Präfekturen wurden keine Quellen gefunden");
}
return sources;
}
private static NetworkCredential GetCredentials(string[] args)
{
if (args.Length >= 2)
{
return new NetworkCredential(args[0], args[1]);
}
Console.Write("Benutzer: ");
var user = Console.ReadLine();
Console.Write("Passwort: ");
var password = string.Empty;
// Sternchen zeigen, statt eingetipptem Passwort
while (true)
{
var keyInfo = Console.ReadKey(true);
if (keyInfo.Key == ConsoleKey.Enter)
{
Console.WriteLine();
break;
}
if (keyInfo.Key == ConsoleKey.Backspace)
{
if (password.Length > 0)
{
password = password.Remove(password.Length - 1);
// bei Backspace: Cursor eine Position zurück, mit Leerzeichen überschreiben, wieder eine Position zurück
Console.Write(keyInfo.KeyChar + " " + keyInfo.KeyChar);
}
}
else
{
password += keyInfo.KeyChar;
Console.Write("*");
}
}
Console.WriteLine();
return new NetworkCredential(user, password);
}
public class WikidataEntry
{
public string Id { get; set; }
public string Name { get; set; }
public int Population { get; set; }
public PrefTemplateData Source { get; set; }
public override string ToString() => Name;
}
public class PrefTemplateData
{
private static readonly Regex convertToGermanRegex =
new Regex(@"\s*\|\s*date\s*=\s*(\d{4})年(\d{1,2})月(\d{1,2})日.*\s*\|\s*source\s*=.*?\n(.+\n)\s*\|\s*#default",
RegexOptions.Singleline);
private static readonly Regex extractPopulationRegex = new Regex(@"\|\s*([^\|]+?)\s*=\s*(\d+)");
public PrefTemplateData(string contentJa, IDictionary<string, string> sources, string prefKanji, string prefIso,
DateTime accessDate)
{
Iso = prefIso;
Kanji = prefKanji;
SourceUri = sources[prefKanji];
AccessDate = accessDate;
try
{
byte[] raw;
using (var webClient = new WebClient())
{
raw = webClient.DownloadData(SourceUri);
}
var responseString = Encoding.UTF8.GetString(raw);
var match = Regex.Match(responseString, "charset\\s*=\\s*\"?(.+?)[\">]", RegexOptions.IgnoreCase);
if (match.Success && !match.Groups[1].Value.Equals("utf-8", StringComparison.InvariantCultureIgnoreCase))
{
try
{
var encoding = Encoding.GetEncoding(match.Groups[1].Value);
responseString = encoding.GetString(raw);
}
catch (ArgumentException)
{
}
}
match = Regex.Match(responseString, "<title>\\s*(.+?)\\s*</title>",
RegexOptions.Singleline | RegexOptions.IgnoreCase);
if (match.Success)
{
SourceTitle = new string(HttpUtility.HtmlDecode(match.Groups[1].Value).Where(ch => ch >= ' ').ToArray());
}
}
catch (WebException)
{
}
ConvertToGerman(contentJa, prefIso);
ExtractPopulations(contentJa);
}
public string Iso { get; }
public string Kanji { get; }
public string ContentDe { get; private set; }
public Dictionary<string, int> Populations { get; private set; }
public DateTime AsOf { get; private set; }
public string SourceUri { get; }
public string SourceTitle { get; }
public DateTime AccessDate { get; }
private void ConvertToGerman(string contentJa, string prefIso)
{
// Deutschen Inhalt erstellen
var match = convertToGermanRegex.Match(contentJa);
if (match.Success)
{
var year = int.Parse(match.Groups[1].Value);
var month = int.Parse(match.Groups[2].Value);
var day = int.Parse(match.Groups[3].Value);
AsOf = new DateTime(year, month, day);
string title;
if (string.IsNullOrWhiteSpace(SourceTitle))
{
title = "ohne Titel";
}
else
{
title = "{{lang|ja|" + SourceTitle.Replace("|", "{{!}}") + "}}";
}
ContentDe =
@"<includeonly>{{#if: {{{2|}}} | <!-- Die Metadaten-Parameter sind hier unabhängig vom Schlüssel -->
{{#switch: {{{2}}}
| STAND=" + AsOf.ToString("yyyy-MM-dd") + @"
| QUELLE={{Internetquelle|url=" + SourceUri + "|titel=" + title + "| hrsg={{lang|ja|" + Kanji + "}}|datum=" +
AsOf.ToString("yyyy-MM-dd") + "|sprache=ja|abruf=" + AccessDate.ToString("yyyy-MM-dd") + @"}}
}} |
{{#switch: {{{1}}}
" + match.Groups[4].Value +
'\t' +
@"|#default= <span class=""error"">Ungültige Gemeinde <code>{{{1}}}</code></span>[[Kategorie:Wikipedia:Fehler in Vorlage Metadaten Einwohnerzahl]]
}} }}</includeonly><noinclude>
Teil der {{Kategorie:Vorlage:Metadaten Einwohnerzahl JP}}
[[Kategorie:Vorlage:Metadaten Einwohnerzahl JP|" + prefIso.Substring(3) + @"]]
</noinclude>";
}
else
{
throw new BusinessException("Japanischer Inhalt nicht auswertbar");
}
}
private void ExtractPopulations(string contentJa)
{
var match = extractPopulationRegex.Match(contentJa);
var result = new Dictionary<string, int>();
while (match.Success)
{
if (match.Groups[1].Value != "date")
{
result.Add(match.Groups[1].Value, int.Parse(match.Groups[2].Value));
}
match = match.NextMatch();
}
Populations = result;
}
public void Match(IList<WikidataEntry> transcludedWikidataIds, BlockingCollection<WikidataEntry> wikidataList,
ConcurrentBag<string> missingItems)
{
foreach (var key in Populations.Keys)
{
var unit = key;
if (unit == "23区計")
{
unit = "東京都区部";
}
var lemmaAndWikidataId = transcludedWikidataIds.FirstOrDefault(transclusion =>
{
if (unit.EndsWith("区"))
{
var citySuffixPos = unit.IndexOf('市');
if (citySuffixPos < 0)
{
return transclusion.Name.StartsWith(unit); // sollte nur bei den Tokioter Sonder-ku zutreffen
}
var city = unit.Substring(1, citySuffixPos);
var district = unit.Substring(citySuffixPos + 1);
return transclusion.Name.StartsWith(district) && transclusion.Name.Contains(city) ||
transclusion.Name.StartsWith(district);
}
return transclusion.Name.StartsWith(unit);
});
if (lemmaAndWikidataId?.Name == null)
{
missingItems.Add($"{Iso}: {key} nicht in den Einbindungen gefunden!");
}
else
{
wikidataList.Add(new WikidataEntry
{
Id = lemmaAndWikidataId.Id,
Name = key,
Population = Populations[key],
Source = this
});
}
}
}
public override string ToString()
{
return Iso;
}
}
public class Settings
{
public DeSettings De { get; set; } = new();
public WíkidataSettings WikiData { get; set; } = new();
}
public class DeSettings
{
public ConcurrentDictionary<string, DateTime> AccessDates { get; set; } = new();
}
public class WíkidataSettings
{
public ConcurrentDictionary<string, DateTime> AccessDates { get; set; } = new();
public ConcurrentDictionary<string, string> Ids { get; set; } = new();
}
#region ISO-Codes
public static readonly Dictionary<string, string> PrefIsoCode = new()
{
{ "北海道", "JP-01" },
{ "青森県", "JP-02" },
{ "岩手県", "JP-03" },
{ "宮城県", "JP-04" },
{ "秋田県", "JP-05" },
{ "山形県", "JP-06" },
{ "福島県", "JP-07" },
{ "茨城県", "JP-08" },
{ "栃木県", "JP-09" },
{ "群馬県", "JP-10" },
{ "埼玉県", "JP-11" },
{ "千葉県", "JP-12" },
{ "東京都", "JP-13" },
{ "神奈川県", "JP-14" },
{ "新潟県", "JP-15" },
{ "富山県", "JP-16" },
{ "石川県", "JP-17" },
{ "福井県", "JP-18" },
{ "山梨県", "JP-19" },
{ "長野県", "JP-20" },
{ "岐阜県", "JP-21" },
{ "静岡県", "JP-22" },
{ "愛知県", "JP-23" },
{ "三重県", "JP-24" },
{ "滋賀県", "JP-25" },
{ "京都府", "JP-26" },
{ "大阪府", "JP-27" },
{ "兵庫県", "JP-28" },
{ "奈良県", "JP-29" },
{ "和歌山県", "JP-30" },
{ "鳥取県", "JP-31" },
{ "島根県", "JP-32" },
{ "岡山県", "JP-33" },
{ "広島県", "JP-34" },
{ "山口県", "JP-35" },
{ "徳島県", "JP-36" },
{ "香川県", "JP-37" },
{ "愛媛県", "JP-38" },
{ "高知県", "JP-39" },
{ "福岡県", "JP-40" },
{ "佐賀県", "JP-41" },
{ "長崎県", "JP-42" },
{ "熊本県", "JP-43" },
{ "大分県", "JP-44" },
{ "宮崎県", "JP-45" },
{ "鹿児島県", "JP-46" },
{ "沖縄県", "JP-47" }
};
#endregion
#region Netzwerk- und Mediawiki-Funktionen
public static HttpClient CreateHttpClient(string language)
{
var assemblyName = Assembly.GetExecutingAssembly().GetName();
return new(httpClientHandler, false)
{
BaseAddress = new Uri($"https://{(language == "wikidata" ? "www.wikidata.org" : language + ".wikipedia.org")}/w/api.php"),
DefaultRequestHeaders =
{
UserAgent =
{
new ProductInfoHeaderValue(assemblyName.Name!, assemblyName.Version!.ToString(2)),
new ProductInfoHeaderValue($"({Environment.OSVersion.VersionString}; .NET CLR {Environment.Version})")
}
}
};
}
public static string GetQuery(IDictionary<string, string> query) =>
QueryHelpers.AddQueryString("?format=xml", query);
public static HttpContent FormUrlEncoded(IDictionary<string, string> postData) =>
postData == null ? null : new StringContent(QueryHelpers.AddQueryString(string.Empty, postData).TrimStart('?'), new UTF8Encoding(false), "application/x-www-form-urlencoded");
public static void TraverseHttpResponse(HttpResponseMessage response, Action<XmlReader> onXmlNode)
{
using var stream = response.Content.ReadAsStream();
using var reader = XmlReader.Create(stream);
if (!reader.ReadToFollowing("api"))
{
throw new MediawikiException("Malformed response");
}
while (reader.Read())
{
CheckForError(reader);
onXmlNode(reader);
}
}
public static string CalcMd5Hash(byte[] data)
{
using var md5 = MD5.Create();
var md5Hash = md5.ComputeHash(data);
return BitConverter.ToString(md5Hash).Replace("-", "").ToLowerInvariant();
}
private static void CheckForError(XmlReader reader)
{
if (reader.NodeType != XmlNodeType.Element) { return; }
if (reader.LocalName == "warnings")
{
Debug.WriteLine($" warning: {reader.ReadInnerXml()}");
return;
}
if (reader.LocalName != "error") { return; }
var code = reader.GetAttribute("code");
var info = reader.GetAttribute("info");
Debug.WriteLine($" error \"{code}\": {info}");
if (code == "maxlag")
{
throw new MediawikiLagException(code, info);
}
throw new MediawikiException(code, info);
}
public enum TokenType
{
Csrf,
Watch,
Patrol,
Rollback,
UserRights,
Login,
CreateAccount
}
public static async Task<string> GetTokenAsync(string lang, TokenType tokenType)
{
var tokenName = tokenType.ToString().ToLowerInvariant();
using var client = CreateHttpClient(lang);
using var response = await client.PostAsync(GetQuery(new Dictionary<string, string>
{
{ "action", "query" },
{ "meta", "tokens" },
{ "type", tokenName }
}), null!);
string token = null;
TraverseHttpResponse(response, reader =>
{
if (reader.NodeType == XmlNodeType.Element && reader.LocalName == "tokens")
{
token = reader.GetAttribute(tokenName + "token");
}
});
return token;
}
public static async Task LoginAsync(string lang, NetworkCredential credentials)
{
using var client = CreateHttpClient(lang);
using var response = await client.PostAsync(GetQuery(new Dictionary<string, string>
{
{ "action", "login" },
{ "lgname", credentials.UserName }
}), FormUrlEncoded(new Dictionary<string, string>
{
{ "lgpassword", credentials.Password },
{ "lgtoken", await GetTokenAsync(lang, TokenType.Login) }
}));
TraverseHttpResponse(response, reader =>
{
if (reader.NodeType == XmlNodeType.Element && reader.LocalName == "login")
{
var result = reader.GetAttribute("result");
if (result != "Success")
{
if (result == "Throttled")
{
result += $" (Please wait {reader.GetAttribute("wait")}s)";
}
throw new MediawikiException(result);
}
}
});
}
//public static async Task LogoutAsync(string lang)
//{
// using var client = CreateHttpClient(lang);
// using var response = client.GetAsync(GetQuery(new Dictionary<string, string> { { "action", "logout" } }));
//}
#endregion
}
}