ويكيبيديا:تقارير قاعدة البيانات/Potential biographies of living people (2)/Configuration

potenshblps2.py

عدل
#! /usr/bin/env python
# Public domain; bjweeks, MZMcBride; 2012

import datetime
import MySQLdb
import wikitools
import settings

report_title = settings.rootpage + 'Potential biographies of living people (2)'

report_template = u'''\
Articles that are in a "XXXX births" category (greater than 1899) that are \
not in [[:Category:Living people]], [[:Category:Possibly living people]], \
or a "XXXX deaths" category (limited to the first 1000 entries); \
data as of <onlyinclude>%s</onlyinclude>.

{| class="wikitable sortable plainlinks" style="width:100%%; margin:auto;"
|- style="white-space:nowrap;"
! No.
! Biography
! Birth year
|-
%s
|}
'''

excluded_categories = ['Living_people',
                       'Possibly_living_people',
                       'Disappeared_people',
                       'Missing_people',
                       'Year_of_death_unknown',
                       'Year_of_death_missing',
                       '20th-century_deaths',
                       '21st-century_deaths',
                       '1900s_deaths',
                       '2000s_deaths']

current_year = int(datetime.datetime.utcnow().strftime('%Y'))

wiki = wikitools.Wiki(settings.apiurl)
wiki.login(settings.username, settings.password)

conn = MySQLdb.connect(host=settings.host,
                       db=settings.dbname,
                       read_default_file='~/.my.cnf')
cursor = conn.cursor()

skip_page_ids = set()

for cat in excluded_categories:
    cursor.execute('''
    /* potenshblps2.py SLOW_OK */
    SELECT
      cl_from
    FROM categorylinks
    WHERE cl_to = %s;
    ''' , cat)
    results = cursor.fetchall()
    for row in results:
        cl_from = row[0]
        skip_page_ids.add(cl_from)

for year in range(1900, current_year+1):
    cursor.execute('''
    /* potenshblps2.py SLOW_OK */
    SELECT
      cl_from
    FROM categorylinks
    WHERE cl_to = '%s_deaths';
    ''' % year)
    results = cursor.fetchall()
    for row in results:
        cl_from = row[0]
        skip_page_ids.add(cl_from)

for year in range(1900, current_year+1, 10):
    cursor.execute('''
    /* potenshblps2.py SLOW_OK */
    SELECT
      cl_from
    FROM categorylinks
    WHERE cl_to = '%ss_deaths';
    ''' % year)
    results = cursor.fetchall()
    for row in results:
        cl_from = row[0]
        skip_page_ids.add(cl_from)

target_page_ids = []
for year in range(1900, current_year+1):
    cursor.execute('''
    /* potenshblps2.py SLOW_OK */
    SELECT
      cl_from
    FROM categorylinks
    WHERE cl_to = '%s_births';
    ''' % year)
    results = cursor.fetchall()
    for result in results:
        cl_from = result[0]
        if cl_from not in skip_page_ids:
            target_page_ids.append(str(cl_from))

cursor.execute('''
/* potenshblps2.py SLOW_OK */
SELECT
  page_title,
  cl_to
FROM page
JOIN categorylinks
ON cl_from = page_id
WHERE page_id IN (%s)
AND cl_to LIKE '%%_births'
AND page_namespace = 0
AND page_is_redirect = 0
ORDER BY cl_to DESC;
''' % (','.join(target_page_ids)))

i = 1
output = []
for row in cursor.fetchall():
    page_title = u'[[%s]]' % unicode(row[0], 'utf-8')
    birth_year = u'%s' % row[1].strip('_births')
    table_row = u'''| %d
| %s
| %s
|-''' % (i, page_title, birth_year)
    output.append(table_row)
    i += 1

cursor.execute('''
               SELECT
                 UNIX_TIMESTAMP() - UNIX_TIMESTAMP(rc_timestamp)
               FROM recentchanges
               ORDER BY rc_timestamp DESC
               LIMIT 1;
               ''')
rep_lag = cursor.fetchone()[0]
time_diff = datetime.datetime.utcnow() - datetime.timedelta(seconds=rep_lag)
current_of = time_diff.strftime('%H:%M, %d %B %Y (UTC)')

report = wikitools.Page(wiki, report_title)
report_text = report_template % (current_of, '\n'.join(output))
report_text = report_text.encode('utf-8')
report.edit(report_text, summary=settings.editsumm, bot=1)

cursor.close()
conn.close()

crontab

عدل
50 0 * * * python ~/scripts/database-reports/potenshblps2.py > /dev/null