//Bot code by Lsj to locate images used on other languages versions
using System;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
using System.Collections;
using System.Collections.Generic;
using System.Xml;
using System.Threading;
using System.Web;
using System.Net;
using DotNetWikiBot;
class MyBot : Bot
{
public static bool tryload(Page p, int iattempt)
{
int itry = 1;
while (true)
{
try
{
p.Load();
return true;
}
catch (WebException e)
{
string message = e.Message;
Console.Error.WriteLine(message);
itry++;
if (itry > iattempt)
return false;
}
}
}
public static bool trysave(Page p, int iattempt)
{
int itry = 1;
while (true)
{
try
{
p.Save();
return true;
}
catch (WebException e)
{
string message = e.Message;
Console.Error.WriteLine(message);
itry++;
if (itry > iattempt)
return false;
}
}
}
//public static List<string> Interwiki(Site site, string title)
////Borrowed from http://sv.wikipedia.org/wiki/Wikipedia:Projekt_DotNetWikiBot_Framework/Innocent_bot/Addbotkopia
//{
// List<string> r = new List<string>();
// XmlDocument doc = new XmlDocument();
// string url = "action=wbgetentities&sites=svwiki&titles=" + HttpUtility.UrlEncode(title) + "&languages=sv&format=xml";
// //string tmpStr = site.PostDataAndGetResultHTM(site.site+"/w/api.php", url);
// try
// {
// string tmpStr = site.PostDataAndGetResultHTM(site.site + "/w/api.php", url);
// doc.LoadXml(tmpStr);
// for (int i = 0; i < doc.GetElementsByTagName("sitelink").Count; i++)
// {
// string s = doc.GetElementsByTagName("sitelink")[i].Attributes.GetNamedItem("site").Value;
// string t = doc.GetElementsByTagName("sitelink")[i].Attributes.GetNamedItem("title").Value;
// s = s.Replace("_", "-");
// string t2 = s.Substring(0, s.Length - 4) + ":" + t;
// //Console.WriteLine(t2);
// r.Add(t2);
// }
// }
// catch (WebException e)
// {
// string message = e.Message;
// Console.Error.WriteLine(message);
// }
// return r;
//}
public static void Main()
{
string makelang = "nl";
string botaccount = "Lsjbot";
Console.Write("Password: ");
string password = Console.ReadLine();
Site svsite = new Site("https://"+makelang+".wikipedia.org", botaccount, password);
Site cmsite = new Site("https://commons.wikimedia.org", botaccount, password);
//Site wdsite = new Site("http://wikidata.org", botaccount, password);
string editcomment = "Fixar bilder från iw";
switch (makelang)
{
case "sv":
editcomment = "Fixar bilder från iw";
break;
case "ceb":
editcomment = "Galeriya sa hulagway";
break;
case "nl":
editcomment = "Fotogalerij van interwiki";
break;
default:
editcomment = "Image gallery from interwiki";
break;
}
svsite.defaultEditComment = editcomment;
svsite.minorEditByDefault = false;
Console.WriteLine("apipath = "+svsite.apiPath);
//Skip images in blacklist:
List<string> blacklist = new List<string>();
blacklist.Add("Gatunek niejadalny.svg");
blacklist.Add("Foodlogo3.svg");
List<string> blacktype = new List<string>();
//blacktype.Add(".svg");
//blacktype.Add(".png");
PageList pl = new PageList(svsite);
////////////////////////////////////
//Select how to get pages. Uncomment as needed.
////////////////////////////////////
//Find articles from a category
pl.FillFromCategory("Koning van Zweden");
//pl.FillFromCategoryTree("Personer i Nederländernas historia");
//Find articles from all the links to a template, mostly useful on very small wikis
// pl.FillFromLinksToPage("Mall:Taxobox");
//Set specific article:
//Page pp = new Page(svsite, "Amsterdam");pl.Add(pp);
//Skip all namespaces except regular articles:
pl.RemoveNamespaces(new int[] {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,100,101});
///////////////////////////////////////
//Choose what to do with the pix that are found:
// nchoice = 0: do nothing, except list on standard output
// nchoice = 1: add as gallery in target article
// nchoice = 2: add as separate pix in target article
// nchoice = 3: add in article discussion
// nchoice = 4: list in separate workpage "Användare:Botaccount/Gallery"
////////////////////////////////////////
int nchoice = 3;
// If ntop is non-zero, don't use all pix but only the ntop most used ones.
int ntop = 6;
Page pwork = new Page(svsite, "Användare:" + botaccount + "/Gallery");
if (nchoice == 4)
{
pwork.Load();
}
Dictionary<string, Site> sitedict = new Dictionary<string,Site>();
string sbrack = "[]'† ?";
char[] brackets = sbrack.ToCharArray();
//int nfound = 0;
DateTime oldtime = DateTime.Now;
int nedit = 0;
int iremain = pl.Count();
foreach(Page p in pl)
{
DateTime nexttime = oldtime.AddSeconds(7);
//Skip start of alphabet:
//if (String.Compare(p.title,"Acacia tortilis") < 0 )
// continue;
iremain--;
Console.WriteLine(iremain.ToString() + " remaining.");
if ( !tryload(p,1))
continue;
string origtext = p.text;
//find images already in page:
List<string> oldpix = p.GetImages();
Console.WriteLine("Oldpix:");
int npix = 0;
foreach (string oldpic in oldpix)
{
Console.WriteLine(oldpic);
npix++;
}
Console.WriteLine("npix = " + npix.ToString());
//if it already has a gallery, skip it:
if (p.text.Contains("<gallery>"))
continue;
//if it doesn't contain "Lsjbot", skip it:
//if (!p.text.Contains("Lsjbot"))
// continue;
//find iw:
Dictionary<string, string> newpix = new Dictionary<string, string>();
//string[] iw = p.GetInterWikiLinks();
List<string> iwlist = p.GetInterLanguageLinks();
Console.WriteLine("iwlist.Count " +iwlist.Count);
//if (iw.Length == 0)
// iwlist = Interwiki(wdsite, p.title);
//else
//{
// foreach (string iws in iw)
// iwlist.Add(iws);
//}
foreach (string iws in iwlist)
{
string[] ss = iws.Split(':');
string iwcode = ss[0];
string iwtitle = ss[1];
Console.WriteLine("iw - " + iwcode + ":" + iwtitle);
if (iwcode == "nah")
continue;
if (!sitedict.ContainsKey(iwcode))
{
string iwurl = "https://" + iwcode + ".wikipedia.org";
try
{
try
{
sitedict.Add(iwcode, new Site(iwurl, botaccount, password));
}
catch (WebException e)
{
Console.WriteLine(e.Message);
continue;
}
}
catch (WikiBotException e)
{
Console.WriteLine(e.Message);
continue;
}
}
Page piw = new Page(sitedict[iwcode], iwtitle);
try
{
piw.Load();
}
catch (WebException e)
{
Console.WriteLine(e.Message);
continue;
}
if (!piw.Exists())
{
Console.WriteLine("Not found despite iw");
continue;
}
List<string> iwpix = piw.GetImages();
foreach (string iwpicture in iwpix)
{
string iwpic = iwpicture;
//Remove file prefix:
if (iwpic.Contains(":"))
iwpic = iwpic.Split(':')[1];
if (newpix.ContainsKey(iwpic))
newpix[iwpic] = newpix[iwpic] + ":" + iwcode;
else
newpix.Add(iwpic, iwcode);
}
}
bool fromcommons = false;
if (newpix.Count == 0)
{
if ( p.text.Contains("ommonscat|"))
{
fromcommons = true;
string s = "";
if (p.text.IndexOf("{{commonscat|") > 0)
{
s = p.text.Remove(0, p.text.IndexOf("{{commonscat|"));
s = s.Remove(s.IndexOf("}}"));
s = s.Remove(0, "{{commonscat|".Length);
}
else if (p.text.IndexOf("{{Commonscat|") > 0)
{
s = p.text.Remove(0, p.text.IndexOf("{{Commonscat|"));
s = s.Remove(s.IndexOf("}}"));
s = s.Remove(0, "{{Commonscat|".Length);
}
if (String.IsNullOrEmpty(s))
continue;
if (s.Contains("|"))
s = s.Remove(s.IndexOf("|"));
s = "Category:" + s;
//Console.WriteLine(s);
//Console.ReadLine();
PageList plc = new PageList(cmsite);
try
{
plc.FillFromCategoryTree(s);
}
catch (WebException e)
{
Console.WriteLine(e.Message);
continue;
}
foreach (Page pc in plc)
{
Console.WriteLine("pc = " + pc.title);
newpix.Add(pc.title, "cm");
}
}
}
//Check if pix from iw is already used in target article:
//
//Workaround because a Dictionary can't be modified while iterating over its keys:
List<string> dummykeys = new List<string>();
foreach (string dk in newpix.Keys)
dummykeys.Add(dk);
foreach (string newpic in dummykeys)
{
//Check if pix from iw is already used in target article:
if (p.text.Contains(newpic))
newpix[newpic] = "/// ALREADY USED";
else if (newpic.Contains(":"))
{
if ( p.text.Contains(newpic.Remove(0,newpic.IndexOf(':')+1)))
newpix[newpic] = "/// ALREADY USED";
}
if ((!newpic.Contains(".")) || (newpic.LastIndexOf('.') != newpic.Length-4))
{
newpix[newpic] = "/// NOT A FILE";
}
//Check if pic in blacklist:
if (blacklist.Contains(newpic))
newpix[newpic] = "/// BLACKLISTED IMAGE";
foreach (string filetype in blacktype)
{
if (newpic.Contains(filetype))
newpix[newpic] = "/// BLACKLISTED FILETYPE";
}
if (newpix[newpic].Contains("///"))
continue;
//Check if pic really exists on Commons:
if (!fromcommons)
{
string res = cmsite.indexPath + "?title=" +
HttpUtility.UrlEncode("File:" + newpic);
//Console.WriteLine("commonsres = " + res);
string src = "";
try
{
src = cmsite.GetWebPage(res); // cmsite.GetPageHTM(res);
}
catch (WebException e)
{
newpix[newpic] = "/// NOT FOUND ON COMMONS";
string message = e.Message;
if (message.Contains(": (404) "))
{ // Not Found
Console.Error.WriteLine(Bot.Msg("Page \"{0}\" doesn't exist."), newpic);
Console.WriteLine("Image not found " + newpic);
continue;
}
else
{
Console.Error.WriteLine(message);
continue;
}
}
}
}
int nnew = 0;
foreach (string newpic in newpix.Keys)
{
Console.WriteLine(newpic + " ! " + newpix[newpic]);
if (!newpix[newpic].Contains("///"))
nnew++;
}
Console.WriteLine("# new pix = " + nnew.ToString());
if (nnew == 0)
continue;
//OK, so we found some pix. Now what do we do with them?
//First get rid of the ones we don't want:
foreach (string newpic in dummykeys)
if (newpix[newpic].Contains("///"))
newpix[newpic] = "";
//Then figure out which new pix have the most interwiki use:
List<string> pixtouse = new List<string>();
if ((ntop > 0) && (ntop < nnew))
{
int nused = 0;
while (nused < ntop)
{
string longest = "";
int maxlength = -1;
foreach (string newpic in dummykeys)
{
if (newpix[newpic].Length > maxlength)
{
longest = newpic;
maxlength = newpix[newpic].Length;
}
}
pixtouse.Add(longest);
newpix[longest] = "";
nused++;
}
}
else
foreach (string newpic in newpix.Keys)
if ( newpix[newpic] != "" )
pixtouse.Add(newpic);
//Then actually use them, according to nchoice value:
string gallerylabel = "Bildgalleri";
string talkpage = "Diskussion";
string disktext = "\n\n==Bilder från interwiki==\nBoten " + botaccount + " har identifierat följande bilder som används på andra språkversioner av den här artikeln:\n\n";
switch (makelang)
{
case "sv":
gallerylabel = "Bildgalleri";
talkpage = "Diskussion";
disktext = "\n\n==Bilder från interwiki==\nBoten " + botaccount + " har identifierat följande bilder som används på andra språkversioner av den här artikeln:\n\n";
break;
case "ceb":
gallerylabel = "Galeriya sa hulagway";
talkpage = "Hisgot";
break;
case "war":
gallerylabel = "Image gallery";
talkpage = "Hiruhimangraw";
break;
case "nl":
gallerylabel = "Image gallery";
talkpage = "Overleg";
disktext = "\n\n==Foto's van interwiki==\nDe bot "+botaccount + " heeft de volgende beelden gebruikt op andere versies van dit artikel geïdentificeerd:\n\n";
break;
default:
gallerylabel = "Image gallery";
break;
}
string gallery = "\n\n== "+gallerylabel+" ==\n\n<gallery>\n";
switch (nchoice)
{
case 1:
foreach (string newpic in pixtouse)
gallery = gallery + newpic + "\n";
gallery = gallery + "</gallery>\n\n";
int ipos = p.text.IndexOf("[[Kategori");
if ((ipos < 0 ) && (makelang == "war"))
ipos = p.text.IndexOf("[[Kaarangay");
string botendtext = "== Källor ==";
if (p.text.Contains(botendtext))
{
ipos = p.text.IndexOf(botendtext);
}
if (ipos > 0)
p.text = p.text.Insert(ipos, gallery);
else
p.text += gallery;
break;
case 2:
foreach (string newpic in pixtouse)
p.text = p.text.Replace("[[Kategori", "[[Fil:" + newpic + "|thumb|right|]]\n\n" + "[[Kategori");
break;
case 3:
Page pdisk = new Page(svsite, talkpage + ":" + p.title);
if (!tryload(pdisk,2))
continue;
pdisk.text = pdisk.text + disktext;
gallery = gallery.Replace("== " + gallerylabel + " ==", "=== " + gallerylabel + " ===");
foreach (string newpic in pixtouse)
gallery = gallery + newpic + "\n";
gallery = gallery + "</gallery>\n\n";
pdisk.text = pdisk.text + gallery;
//Bot.editComment = "Fixar bildförslag från iw";
//isMinorEdit = false;
trysave(pdisk,2);
//Thread.Sleep(15000);//milliseconds
Console.WriteLine("<ret>");
Console.ReadLine();
break;
case 4:
pwork.text = pwork.text + "===" + p.title + "===\n";
foreach (string newpic in pixtouse)
gallery = gallery + newpic + "\n";
gallery = gallery + "</gallery>\n\n";
pwork.text = pwork.text + gallery;
break;
}
//DONE! Now save if needed.
//Bot.editComment = editcomment;
//isMinorEdit = false;
int ntry = 0;
if (p.text != origtext)
while (ntry < 3)
{
try
{
p.Save();
ntry = 999;
}
catch (WebException e)
{
Console.WriteLine(e.Message);
ntry++;
continue;
}
}
if (nchoice == 4)
trysave(pwork,3);
//Thread.Sleep(4000);//milliseconds
Console.WriteLine("nexttime = "+nexttime.ToLongTimeString());
Console.WriteLine("Now = " + DateTime.Now.ToLongTimeString());
while (DateTime.Now.CompareTo(nexttime) < 0)
continue;
oldtime = DateTime.Now;
nedit++;
}
Console.WriteLine("Total #edits = " + nedit.ToString());
}
}