plum erg
This commit is contained in:
33
scripts/build_html_with_svg.py
Normal file
33
scripts/build_html_with_svg.py
Normal file
@@ -0,0 +1,33 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import print_function
|
||||
import argparse, sys, json, time, os
|
||||
from jinja2 import Template, DictLoader, Environment, FileSystemLoader
|
||||
|
||||
ap = argparse.ArgumentParser("template + svg = interface")
|
||||
ap.add_argument("template")
|
||||
ap.add_argument("svg")
|
||||
ap.add_argument("--output", type=argparse.FileType('w'), default=sys.stdout)
|
||||
args = ap.parse_args()
|
||||
|
||||
tpath, tname = os.path.split(args.template)
|
||||
env = Environment(loader=FileSystemLoader(tpath))
|
||||
import jinjafilters
|
||||
for name, fn in jinjafilters.all.items():
|
||||
env.filters[name] = fn
|
||||
|
||||
from xml.etree import ElementTree as ET
|
||||
|
||||
ET.register_namespace("","http://www.w3.org/2000/svg")
|
||||
ET.register_namespace("xlink","http://www.w3.org/1999/xlink")
|
||||
with open(args.svg) as f:
|
||||
svgt = ET.parse(f)
|
||||
# print ("svgt", svgt)
|
||||
svg_root = svgt.getroot()
|
||||
svg_root.attrib['xmlns:xlink'] = 'http://www.w3.org/1999/xlink'
|
||||
del svg_root.attrib['viewBox']
|
||||
svg = ET.tostring(svgt.getroot(), method="xml")
|
||||
|
||||
tvars = {'svg': svg}
|
||||
template = env.get_template(tname)
|
||||
print (template.render(**tvars).encode("utf-8"), file=args.output)
|
||||
105
scripts/buildsitemap.py
Normal file
105
scripts/buildsitemap.py
Normal file
@@ -0,0 +1,105 @@
|
||||
from mwclient import Site, APIError
|
||||
from mwclient.page import Page
|
||||
import argparse, json, sys
|
||||
|
||||
ap = argparse.ArgumentParser("")
|
||||
ap.add_argument("--wikiprotocol", default="http")
|
||||
ap.add_argument("--wikihost", default="localhost")
|
||||
ap.add_argument("--wikipath", default="/mw/")
|
||||
ap.add_argument("--limit", default=None)
|
||||
ap.add_argument("--output", default=sys.stdout, type=argparse.FileType('w'))
|
||||
# ap.add_argument("--user", default=None)
|
||||
# ap.add_argument("--password", default=None)
|
||||
args = ap.parse_args()
|
||||
|
||||
site = Site((args.wikiprotocol, args.wikihost), path=args.wikipath)
|
||||
|
||||
|
||||
def category_subcats (site, cattitle, objects=True):
|
||||
cmcontinue = None
|
||||
ret = []
|
||||
while True:
|
||||
if cmcontinue == None:
|
||||
resp = site.api("query", list="categorymembers", cmtitle=cattitle, cmtype="subcat", cmlimit=50)
|
||||
else:
|
||||
resp = site.api("query", list="categorymembers", cmtitle=cattitle, cmtype="subcat", cmlimit=50, cmcontinue=cmcontinue)
|
||||
ret.extend([x['title'] for x in resp['query']['categorymembers']])
|
||||
if 'continue' in resp:
|
||||
cmcontinue = resp['continue']['cmcontinue']
|
||||
else:
|
||||
break
|
||||
if objects:
|
||||
# print "converting to page objects ({0})".format(len(ret))
|
||||
ret = [site.pages[x] for x in ret]
|
||||
return ret
|
||||
|
||||
"""
|
||||
>>> c.page_title
|
||||
'Équipes'
|
||||
>>> c.name
|
||||
'Catégorie:Équipes'
|
||||
"""
|
||||
"""
|
||||
graph = {
|
||||
nodes: [
|
||||
{name: "Bienvenue"}
|
||||
]
|
||||
links: [
|
||||
{source: "name", target: "name2"}
|
||||
]
|
||||
}
|
||||
"""
|
||||
redirects = {}
|
||||
pages = []
|
||||
count = 0
|
||||
all_links = set()
|
||||
page_exists = {}
|
||||
|
||||
def resolve (x):
|
||||
while x in redirects:
|
||||
x = redirects[x]
|
||||
return x
|
||||
|
||||
print ("Pass 1")
|
||||
for p in site.allpages():
|
||||
r = p.redirects_to()
|
||||
if (r):
|
||||
redirects[p.name] = r.name
|
||||
else:
|
||||
page_exists[p.name] = True
|
||||
|
||||
print ("Pass 2")
|
||||
# pages_by_title = {}
|
||||
for p in site.allpages():
|
||||
if (p.name in redirects):
|
||||
continue
|
||||
pd = {}
|
||||
pd['title'] = p.page_title
|
||||
pd['ns'] = 0
|
||||
print ("Page {0}".format(p.name), file=sys.stderr)
|
||||
|
||||
# categories
|
||||
cats = [c.page_title for c in p.categories()]
|
||||
pd['cats'] = cats
|
||||
|
||||
# links
|
||||
links = [x for x in p.links() if x.namespace == 0]
|
||||
links = [resolve(x.name) for x in links]
|
||||
links = [x for x in links if x in page_exists]
|
||||
for l in links:
|
||||
if p.name < l:
|
||||
link = (p.name, l)
|
||||
else:
|
||||
link = (l, p.name)
|
||||
all_links.add(link)
|
||||
|
||||
pages.append(pd)
|
||||
count += 1
|
||||
if args.limit and count >= args.limit:
|
||||
break
|
||||
|
||||
graph = {}
|
||||
graph['nodes'] = pages
|
||||
graph['links'] = [{'source': a, 'target': b} for a, b in all_links]
|
||||
graph['redirects'] = redirects
|
||||
print (json.dumps(graph, indent=2), file=args.output)
|
||||
91
scripts/categories.py
Normal file
91
scripts/categories.py
Normal file
@@ -0,0 +1,91 @@
|
||||
from mwclient import Site, APIError
|
||||
from mwclient.page import Page
|
||||
import argparse, json, sys
|
||||
|
||||
import unidecode
|
||||
# unaccented_string = unidecode.unidecode(accented_string)
|
||||
|
||||
ap = argparse.ArgumentParser("")
|
||||
ap.add_argument("--wikiprotocol", default="http")
|
||||
ap.add_argument("--wikihost", default="localhost")
|
||||
ap.add_argument("--wikipath", default="/mw/")
|
||||
ap.add_argument("--limit", default=None)
|
||||
ap.add_argument("--output", default=sys.stdout, type=argparse.FileType('w'))
|
||||
# ap.add_argument("--user", default=None)
|
||||
# ap.add_argument("--password", default=None)
|
||||
args = ap.parse_args()
|
||||
|
||||
site = Site((args.wikiprotocol, args.wikihost), path=args.wikipath)
|
||||
|
||||
def category_subcats (site, cattitle, objects=True):
|
||||
cmcontinue = None
|
||||
ret = []
|
||||
while True:
|
||||
if cmcontinue == None:
|
||||
resp = site.api("query", list="categorymembers", cmtitle=cattitle, cmtype="subcat", cmlimit=50)
|
||||
else:
|
||||
resp = site.api("query", list="categorymembers", cmtitle=cattitle, cmtype="subcat", cmlimit=50, cmcontinue=cmcontinue)
|
||||
ret.extend([x['title'] for x in resp['query']['categorymembers']])
|
||||
if 'continue' in resp:
|
||||
cmcontinue = resp['continue']['cmcontinue']
|
||||
else:
|
||||
break
|
||||
if objects:
|
||||
# print "converting to page objects ({0})".format(len(ret))
|
||||
ret = [site.pages[x] for x in ret]
|
||||
return ret
|
||||
|
||||
def strip_namespace (x):
|
||||
if ":" in x:
|
||||
return x.split(":", 1)[1]
|
||||
return x
|
||||
|
||||
cats = list(site.allcategories())
|
||||
cats.sort(key=lambda x: unidecode.unidecode(x.name))
|
||||
# cats_by_name = {}
|
||||
cats = [{ 'title': cat.page_title, 'name': cat.name } for cat in cats]
|
||||
index = {}
|
||||
for c in cats:
|
||||
index[c['title']] = c
|
||||
|
||||
for c in cats:
|
||||
sc = category_subcats(site, c['name'], objects=False)
|
||||
sc = [strip_namespace(x) for x in sc]
|
||||
if sc:
|
||||
for subcat in sc:
|
||||
subcat = index[subcat]
|
||||
if 'parent' not in subcat:
|
||||
subcat['parent'] = c
|
||||
|
||||
def make_hierarchy (items):
|
||||
root = {'children': []}
|
||||
for item in items:
|
||||
if 'parent' in item:
|
||||
if 'children' not in item['parent']:
|
||||
item['parent']['children'] = []
|
||||
item['parent']['children'].append(item)
|
||||
item['parent'] = item['parent']['title']
|
||||
# del item['parent']
|
||||
else:
|
||||
root['children'].append(item)
|
||||
return root
|
||||
|
||||
def flatten (root, depth=0):
|
||||
for x in root['children']:
|
||||
r = {'title': x['title'], 'name': x['name'], 'depth': depth}
|
||||
if 'parent' in x:
|
||||
r['parent'] = x['parent']
|
||||
yield(r)
|
||||
if 'children' in x:
|
||||
for item in flatten(x, depth+1):
|
||||
yield item
|
||||
|
||||
import json
|
||||
root = make_hierarchy(cats)
|
||||
#print (json.dumps(root, indent=2), file=args.output)
|
||||
|
||||
flat = list(flatten(root))
|
||||
print (json.dumps(flat, indent=2), file=args.output)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user