diff --git a/.gitignore b/.gitignore index e03ba36..92e818f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,7 @@ *.pyc +dist/* *~ #*# .#* +.DS_Store +._.* \ No newline at end of file diff --git a/CHANGELOG b/CHANGELOG deleted file mode 100644 index 8e6d8e4..0000000 --- a/CHANGELOG +++ /dev/null @@ -1,124 +0,0 @@ -Changes since 1.2: -* Implements the new query-continue procedure in the MW API - The current querycontinue - option in APIRequest.query() is deprecated and will be removed in a future release. - A FutureWarning is issued for invocations of query() where the 'action' API parameter - == 'query' and querycontinue is True (the default setting). To silence the warning, set - querycontinue to False if unneeded or use the new queryGen() generator function. - queryGen does not attempt to stitch together the results into a single object, so it - requires slightly more effort to use, but the results should be more reliable. -* wikiFile.File.getHistory is renamed getFileHistory, so as not to conflict with the - same-named function in Page. getHistory() will still work as previously for now, but - issues a FutureWarning -* Prints upload errors -* Supports more auth methods -* Uses new token retrieval method in MediaWiki 1.24+ -* Fix issue with unicode normalization causing some md5 checks to fail when editing - -Changes since 1.1.1: -* Added getHistory and getHistoryGen functions to Page to get revision history/content -* Support for HTTP Auth -* Category.getAllMembersGen passes the namespace parameter correctly -* generator queries with an empty result set don't get caught in an infinite loop -* Added support for AssertEdit -* Fixed equality checks for user objects -* No longer breaks with Unicode namespace names -* Files are reset to the beginning before upload -* HTTP headers forced to be strings (Python 2.7 fix) -* User object now includes user ID number -* User.getTalkPage() added to easily get the Page object for a user's talk page -* wiki.UserBlocked exception raised if trying to edit while blocked -* Page, User, and Wiki objects now have hash functions -* File object constructor takes a pageid argument to align it with Page -* Fixed bug when trying to use pagelist to make a list of categories - -Changes since 1.1: -* Compatibility added for the new login method in MediaWiki 1.15.3 and higher. - See for more details. -* Using the logout() function sets the correct version number in the user-agent - -Changes since 1.0: -* Unicode bug in pagelist.listFromTitles fixed. -* Page objects now have an "unprefixedtitle" attribute with the title minus the namespace - prefix -* The page object sconstructor now accepts a "namespace" argument to set the namespace - based on a namespace index rather than the title -* The Page class now subclasses object -* maxlag can be ignored entirely by setting it to <0 -* New function: User.isBlocked() -* The User.blocked attribute now has three possible values - None, False, True - - corresponding to unknown, not blocked, blocked. Previously False could mean - either unknown or not blocked. -* Wiki.login() now has a domain argument, used for wikis that have LDAP login -* File upload support has been added for action=import and action=upload, the poster - package is required for this -* Bug in APIRequest.changeParam() fixed -* New Page.getCategories() function to get a list of categories on a page, also - added a "categories" attribute to Page -* Cookiejar files are no longer world-readable by setting the umask to 0077 before - creating the file -* If the API is disabled, an APIDisabled exception will be raised instead of repeating the - request forever -* Added File.getHistory() function and File.history attribute to get file upload history -* Added File.upload() function to upload files and UploadError exception for errors - during uploading -* Added an APIListResult class for cases where the API result is only a list such as - action=opensearch -* Added a Namespace class to wiki.py. Namespace 'constants' are now added as attributes - to Wiki objects. The attributes use the canonical name, in the same style as MediaWiki - - NS_NAME. Namespace subclasses int, so they function as integers in every way, except - the OR operator ( | ) is overridden to produce a list of namespaces for use in an API query -* Page.__getSection now uses a better, non-hacky way to determine section numbers from section - names, and now works correctly on pages with transcluded sections -* Page.edit() now accepts the new "watchlist" option -* If logged in, the default User-Agent header now includes the username - -Changes since 0.1.1: -* pagelist.listFromTitles() fixed -* automatic query-continue alogrithm improved -* performace of pagelist functions improved -* API query results now use the APIResult class, subclassing dict. HTTP response headers are - included in the results as .response member variable -* setNamespace() function added to Page class to allow changing the namespace of a Page object -* Title normalization improved -* API requests now print the actual exception info when retrying. If not retrying, the - exception is not caught. The ServerError exception has been removed. -* Added option to skip MD5 check when editing, as PHP's urldecoding fails in some corner cases -* Handling of sections in the Page class (particularly section 0) is improved -* Added some compatibility for read-restricted wikis. Not having read access previously caused - creating a Wiki object to fail as it tried to retrive the site info before login -* Handling of non-existent pages improved -* All modules imported when doing "import wikitools" -* Category.getAllMembers() can now be filtered by namespace by passing a list of - namespaces as the "namespaces" parameter -* File class (subclass of Page) added in wikifile.py (to avoid conflict with builtin file - objects) - includes functions to get file usage and download the file -* __str__ and __repr__ functions added for most objects to give useful string representations -* in-code documentation improved -* More functions now return some value, useful for debugging -* Broken Wiki.setUserAgent() function fixed - -List of changes since 0.1 release: -* Make page existence and setPageInfo() checks more consistent. -* Use **kwargs in Page.edit() rather than dozen keyword arguments all False by default, - this is a breaking change for anything didn't explicitly use the keyword arguments - for some reason. If the first param isn't a keyword, its treated as "text" for partial BC, - arguments are now all the same as the API action=edit params, "newtext" and "basetime" - still work for BC -* Fix broken User.page -* APIRequest now makes a copy() of params rather than using it directly -* reduce calls to setPageInfo() by using the title if pageid isn't available -* Make redirect following less random on calls to setPageInfo in Page - - note that this removes the followRedir param from setPageInfo, replacing it with - a followRedir member variable -* set maxlag directly in the query params for temporary raises for login/siteinfo queries - rather than using setmaxlag(), which could reset it back to 5 if the user set it to 120 - before logging in -* fix maxlag bug that changed it to 120 but didn't reset it back to 5 -* rewrite most of __longQuery and improve resultCombine -* add missing module import in api.py -* use pickle for cookie files, add an option to login() to verify the cookies are correct - with isLoggedIn(), set to True by default -* fix User.isIP check - several non IPs were treated as IP addresses -* support reblock option for user blocks -* improve namespace guessing/title normalization, include namespace aliases, - add the namespace prefix to category objects if its not already there diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..add9f04 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,4 @@ +Changes since 3.0.0: +* `wikitools3` uses Python 3 syntax, and Python 2 is no longer supported. + +For earlier versions, see the original [`wikitools`](https://pypi.org/project/wikitools/). \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 2cc478c..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,2 +0,0 @@ -include README.md CHANGELOG -graft wikitools diff --git a/README.md b/README.md index 2dc2cc6..c6b5b20 100644 --- a/README.md +++ b/README.md @@ -1,72 +1,47 @@ -wikitools -- Package for working with MediaWiki wikis - -Requirements ------------- - - * Python 2.5+. Not compatible with Python 3; not tested on older versions - * Bob Ippolito's simplejson module, if using Python < 2.6 - - * To upload files or import XML, you need Chris AtLee's poster package - - * The wiki this is used for should be running at least MediaWiki version 1.13 and have the API enabled. - -Installation ------------- - - * Run "python setup.py install" or copy the wikitools directory - to an appropriate Python module directory. - * An exe installer for Windows is also available (should be run as an - administrator to avoid errors) - * An RPM for Linux is also available. - * Arch Linux users may use AUR package for their convenience: - - -Available modules ------------------ - - * api.py - Contains the APIRequest class, for doing queries directly, - see API examples below - * wiki.py - Contains the Wiki class, used for logging in to the site, - storing cookies, and storing basic site information - * page.py - Contains the Page class for dealing with individual pages - on the wiki. Can be used to get page info and text, as well as edit and - other actions if enabled on the wiki - * category.py - Category is a subclass of Page with extra functions for - working with categories - * wikifile.py - File is a subclass of Page with extra functions for - working with files - note that there may be some issues with shared - repositories, as the pages for files on shared repos technically don't - exist on the local wiki. - * user.py - Contains the User class for getting information about and - blocking/unblocking users - * pagelist.py - Contains several functions for getting a list of Page - objects from lists of titles, pageids, or API query results - -Further documentation ---------------------- - * https://code.google.com/p/python-wikitools/wiki/Documentation - -Current limitations -------------------- - - * Can only do what the API can do. On a site without the edit-API enabled - (disabled by default prior to MediaWiki 1.14), you cannot edit/delete/ - protect pages, only retrieve information about them. - * May have issues with some non-ASCII characters. Most of these bugs - should be resolved, though full UTF-8 support is still a little flaky - * Usage on restricted-access (logged-out users can't read) wikis is - mostly untested - -Quick start ------------ +# `wikitools3` — Package for working with MediaWiki wikis + +## Requirements + + * Python 3. Not compatible with Python 2. If you are using Python 2, use the original [`wikitools`](https://pypi.org/project/wikitools/) instead. + * `wikitools3` uses [`poetry`](https://python-poetry.org/) for dependency management. If you are installing via `pip`, you should not need to install `poetry` separately. + * To upload files or import XML, you need Chris AtLee's [`poster3`](http://pypi.python.org/pypi/poster3) package. This should be automatically installed by `pip` and/or `poetry` when you install `wikitools3`. + * The MediaWiki instance you are working with should be version 1.13 or later and have the API enabled. + +## Installation + + * Run `pip install wikitools3`. This is the preferred installation method. + * Alternately, download the source repository and run `poetry install` within the `wikitools3` directory or copy the `wikitools3/wikitools3` subdirectory directly into the top-level directory of your project. + +## Available modules + + * `api.py` - Contains the `APIRequest` class, for doing queries directly, see API examples below + * `wiki.py` - Contains the `Wiki` class, used for logging in to the site, storing cookies, and storing basic site information + * `page.py` - Contains the `Page` class for dealing with individual pages on the wiki. Can be used to get page info and text, as well as edit and other actions if enabled on the wiki + * `category.py` - `Category` is a subclass of `Page` with extra functions for working with categories + * `wikifile.py` - `File` is a subclass of `Page` with extra functions for working with files - note that there may be some issues with shared repositories, as the pages for files on shared repos technically don't exist on the local wiki. + * `user.py` - Contains the `User` class for getting information about and blocking/unblocking users + * `pagelist.py` - Contains several functions for getting a list of `Page` objects from lists of titles, pageids, or API query results + +## Further documentation + + * See also: the legacy `wikitools` documentation at [Google Code](https://code.google.com/p/python-wikitools/wiki/Documentation). + +## Current limitations + + * Can only do what the API can do. On a site without the edit-API enabled (disabled by default prior to MediaWiki 1.14), you cannot edit/delete/protect pages, only retrieve information about them. + * May have issues with some non-ASCII characters. Most of these bugs should be resolved, though full UTF-8 support is still a little flaky. + * Usage on restricted-access (logged-out users can't read) wikis is mostly untested. + * `wikitools3` has not been tested beyond the needs of `wikiteam`. If functionality from `wikitools` for Python 2 works for you, but the same functionality does not work for you in `wikitools3`, please submit a bug report at [github.com/elsiehupp/wikitools3/issues](https://github.com/elsiehupp/wikitools3/issues). + +## Quick start To make a simple query: ```python -#!/usr/bin/python +#!/usr/bin/env python3 -from wikitools import wiki -from wikitools import api +from wikitools3 import wiki +from wikitools3 import api # create a Wiki object site = wiki.Wiki("http://my.wikisite.org/w/api.php") @@ -100,15 +75,13 @@ token = api.APIRequest(site, params).query()['query']['tokens']['csrftoken'] params = { 'action':'thank', 'rev':diff, 'token':token } ``` -For most normal usage, you may not have to do API queries yourself and can just -use the various classes. For example, to add a template to the top of all the -pages in namespace 0 in a category: +For most normal usage, you may not have to do API queries yourself and can just use the various classes. For example, to add a template to the top of all the pages in namespace `0` in a category: ```python -#!/usr/bin/python +#!/usr/bin/env python3 -from wikitools import wiki -from wikitools import category +from wikitools3 import wiki +from wikitools3 import category site = wiki.Wiki("http://my.wikisite.org/w/api.php") site.login("username", "password") @@ -120,16 +93,13 @@ for article in cat.getAllMembersGen(namespaces=[0]): article.edit(prependtext="{{template}}\n") ``` -See the MediaWiki API documentation at -for more information about using the MediaWiki API. You can get an example of -what query results will look like by doing the queries in your web browser using -the "jsonfm" format option - -Licensed under the GNU General Public License, version 3. A copy of the -license is included with this release. +See the [MediaWiki API documentation](https://www.mediawiki.org/wiki/API:Main_page) for more information about using the MediaWiki API. You can get an example of what query results will look like by doing the queries in your web browser using the `jsonfm` format option. + +Licensed under the [GNU General Public License, version 3](https://www.gnu.org/licenses/gpl-3.0.en.html). A copy of the license is included with this release. Authors ------- * Original source code Alex Z. (User:Mr.Z-man @ en.wikipedia) * Some code/assistance (User:Bjweeks @ en.wikipedia) +* Python 3 migration Elsie Hupp ([github.com/elsiehupp](https://github.com/elsiehupp)) diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..522af4a --- /dev/null +++ b/poetry.lock @@ -0,0 +1,20 @@ +[[package]] +name = "poster3" +version = "0.8.1" +description = "Streaming HTTP uploads and multipart/form-data encoding" +category = "main" +optional = false +python-versions = "*" + +[package.extras] +poster3 = ["buildutils", "sphinx"] + +[metadata] +lock-version = "1.1" +python-versions = "^3" +content-hash = "740bcec113e5c93070143646202dd102ef7ddf8f6c61d026719754044d544e60" + +[metadata.files] +poster3 = [ + {file = "poster3-0.8.1-py3-none-any.whl", hash = "sha256:1b27d7d63e3191e5d7238631fc828e4493590e94dcea034e386c079d853cce14"}, +] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..5e94a13 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,33 @@ +[tool.poetry] +name = "wikitools3" +version = "3.0.0" +description = "Python package for interacting with a MediaWiki wiki. It is used by WikiTeam for archiving MediaWiki wikis." +authors = ["Alex Zaddach ", "Elsie Hupp "] +maintainers = ["Elsie Hupp "] +readme = "README.md" +repository = "https://github.com/elsiehupp/wikitools3" +keywords = ["wikipedia", "mediawiki", "archive", "scrape", "archiveteam"] +license = "GPL-3.0-or-later" +include = ["CHANGELOG.md"] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Natural Language :: English", + "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Topic :: Internet :: WWW/HTTP :: Dynamic Content :: Wiki", + "Topic :: Software Development :: Libraries :: Python Modules", +] + +[tool.poetry.dependencies] +python = "^3" +poster3 = "^0.8.1" + +[tool.poetry.dev-dependencies] +twine = "*" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/setup.py b/setup.py deleted file mode 100644 index ae645ff..0000000 --- a/setup.py +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env python - -from distutils.core import setup - -setup(name='wikitools', - version='1.4', - description='Python package for interacting with a MediaWiki wiki', - long_description = """A Python package for interacting with a MediaWiki wiki using the MediaWiki API. -Designed for MediaWiki version 1.20 and higher, should work on 1.13+. -The edit-API must be enabled on the site to use editing features. -Please report any bugs to """, - author='Alex Zaddach (User:Mr.Z-man @ en.wikipedia)', - author_email='mrzmanwiki@gmail.com', - url='https://github.com/alexz-enwp/wikitools', - license='GPL v3', - packages=['wikitools'], - package_data={'wikitools': ['COPYING']} - ) diff --git a/wikitools/COPYING b/wikitools/COPYING deleted file mode 100644 index 94a9ed0..0000000 --- a/wikitools/COPYING +++ /dev/null @@ -1,674 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. diff --git a/wikitools/api.py b/wikitools/api.py deleted file mode 100644 index 85e2132..0000000 --- a/wikitools/api.py +++ /dev/null @@ -1,403 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2008-2013 Alex Zaddach (mrzmanwiki@gmail.com) - -# This file is part of wikitools. -# wikitools is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# wikitools is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with wikitools. If not, see . - -# This module is documented at http://code.google.com/p/python-wikitools/wiki/api - -import urllib2 -import re -import time -import sys -import wiki -import base64 -import warnings -import copy -from urllib import quote_plus, _is_unicode -try: - from poster.encode import multipart_encode - canupload = True -except: - canupload = False - -try: - import json -except: - import simplejson as json -try: - import gzip - import StringIO -except: - gzip = False - -class APIError(Exception): - """Base class for errors""" - -class APIDisabled(APIError): - """API not enabled""" - -class APIRequest: - """A request to the site's API""" - def __init__(self, wiki, data, write=False, multipart=False): - """ - wiki - A Wiki object - data - API parameters in the form of a dict - write - set to True if doing a write query, so it won't try again on error - multipart - use multipart data transfer, required for file uploads, - requires the poster package - - maxlag is set by default to 5 but can be changed - format is always set to json - """ - if not canupload and multipart: - raise APIError("The poster module is required for multipart support") - self.sleep = 5 - self.data = data.copy() - self.data['format'] = "json" - self.iswrite = write - if wiki.assertval is not None and self.iswrite: - self.data['assert'] = wiki.assertval - if not 'maxlag' in self.data and not wiki.maxlag < 0: - self.data['maxlag'] = wiki.maxlag - self.multipart = multipart - if self.multipart: - (datagen, self.headers) = multipart_encode(self.data) - self.encodeddata = '' - for singledata in datagen: - self.encodeddata = self.encodeddata + singledata - else: - self.encodeddata = urlencode(self.data, 1) - self.headers = { - "Content-Type": "application/x-www-form-urlencoded", - "Content-Length": str(len(self.encodeddata)) - } - self.headers["User-agent"] = wiki.useragent - if gzip: - self.headers['Accept-Encoding'] = 'gzip' - self.wiki = wiki - self.response = False - if wiki.auth: - self.headers['Authorization'] = "Basic {0}".format( - base64.encodestring(wiki.auth + ":" + wiki.httppass)).replace('\n','') - if hasattr(wiki, "passman"): - self.opener = urllib2.build_opener(urllib2.HTTPDigestAuthHandler(wiki.passman), urllib2.HTTPCookieProcessor(wiki.cookies)) - else: - self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(wiki.cookies)) - self.request = urllib2.Request(self.wiki.apibase, self.encodeddata, self.headers) - - def setMultipart(self, multipart=True): - """Enable multipart data transfer, required for file uploads.""" - if not canupload and multipart: - raise APIError("The poster package is required for multipart support") - self.multipart = multipart - if multipart: - (datagen, headers) = multipart_encode(self.data) - self.headers.pop('Content-Length') - self.headers.pop('Content-Type') - self.headers.update(headers) - self.encodeddata = '' - for singledata in datagen: - self.encodeddata = self.encodeddata + singledata - else: - self.encodeddata = urlencode(self.data, 1) - self.headers['Content-Length'] = str(len(self.encodeddata)) - self.headers['Content-Type'] = "application/x-www-form-urlencoded" - - def changeParam(self, param, value): - """Change or add a parameter after making the request object - - Simply changing self.data won't work as it needs to update other things. - - value can either be a normal string value, or a file-like object, - which will be uploaded, if setMultipart was called previously. - - """ - if param == 'format': - raise APIError('You can not change the result format') - self.data[param] = value - if self.multipart: - (datagen, headers) = multipart_encode(self.data) - self.headers.pop('Content-Length') - self.headers.pop('Content-Type') - self.headers.update(headers) - self.encodeddata = '' - for singledata in datagen: - self.encodeddata = self.encodeddata + singledata - else: - self.encodeddata = urlencode(self.data, 1) - self.headers['Content-Length'] = str(len(self.encodeddata)) - self.headers['Content-Type'] = "application/x-www-form-urlencoded" - self.request = urllib2.Request(self.wiki.apibase, self.encodeddata, self.headers) - - def query(self, querycontinue=True): - """Actually do the query here and return usable stuff - - querycontinue - look for query-continue in the results and continue querying - until there is no more data to retrieve (DEPRECATED: use queryGen as a more - reliable and efficient alternative) - - """ - if querycontinue and self.data['action'] == 'query': - warnings.warn("""The querycontinue option is deprecated and will be removed -in a future release, use the new queryGen function instead -for queries requring multiple requests""", FutureWarning) - data = False - while not data: - rawdata = self.__getRaw() - data = self.__parseJSON(rawdata) - if not data and type(data) is APIListResult: - break - if 'error' in data: - if self.iswrite and data['error']['code'] == 'blocked': - raise wiki.UserBlocked(data['error']['info']) - raise APIError(data['error']['code'], data['error']['info']) - if 'query-continue' in data and querycontinue: - data = self.__longQuery(data) - return data - - def queryGen(self): - """Unlike the old query-continue method that tried to stitch results - together, which could work poorly for complex result sets and could - use a lot of memory, this yield each set returned by the API and lets - the user process the data. - Loosely based on the recommended implementation on mediawiki.org - - """ - reqcopy = copy.deepcopy(self.request) - self.changeParam('continue', '') - while True: - data = False - while not data: - rawdata = self.__getRaw() - data = self.__parseJSON(rawdata) - if not data and type(data) is APIListResult: - break - if 'error' in data: - if self.iswrite and data['error']['code'] == 'blocked': - raise wiki.UserBlocked(data['error']['info']) - raise APIError(data['error']['code'], data['error']['info']) - yield data - if 'continue' not in data: - break - else: - self.request = copy.deepcopy(reqcopy) - for param in data['continue']: - self.changeParam(param, data['continue'][param]) - - def __longQuery(self, initialdata): - """For queries that require multiple requests""" - self._continues = set() - self._generator = '' - total = initialdata - res = initialdata - params = self.data - numkeys = len(res['query-continue'].keys()) - while numkeys > 0: - key1 = '' - key2 = '' - possiblecontinues = res['query-continue'].keys() - if len(possiblecontinues) == 1: - key1 = possiblecontinues[0] - keylist = res['query-continue'][key1].keys() - if len(keylist) == 1: - key2 = keylist[0] - else: - for key in keylist: - if len(key) < 11: - key2 = key - break - else: - key2 = keylist[0] - else: - for posskey in possiblecontinues: - keylist = res['query-continue'][posskey].keys() - for key in keylist: - if len(key) < 11: - key1 = posskey - key2 = key - break - if key1: - break - else: - key1 = possiblecontinues[0] - key2 = res['query-continue'][key1].keys()[0] - if isinstance(res['query-continue'][key1][key2], int): - cont = res['query-continue'][key1][key2] - else: - cont = res['query-continue'][key1][key2].encode('utf-8') - if len(key2) >= 11 and key2.startswith('g'): - self._generator = key2 - for ckey in self._continues: - params.pop(ckey, None) - else: - self._continues.add(key2) - params[key2] = cont - req = APIRequest(self.wiki, params) - res = req.query(False) - for type in possiblecontinues: - total = resultCombine(type, total, res) - if 'query-continue' in res: - numkeys = len(res['query-continue'].keys()) - else: - numkeys = 0 - return total - - def __getRaw(self): - data = False - while not data: - try: - if self.sleep >= self.wiki.maxwaittime or self.iswrite: - catcherror = None - else: - catcherror = Exception - data = self.opener.open(self.request) - self.response = data.info() - if gzip: - encoding = self.response.get('Content-encoding') - if encoding in ('gzip', 'x-gzip'): - data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(data.read())) - except catcherror, exc: - errname = sys.exc_info()[0].__name__ - errinfo = exc - print("%s: %s trying request again in %d seconds" % (errname, errinfo, self.sleep)) - time.sleep(self.sleep+0.5) - self.sleep+=5 - return data - - def __parseJSON(self, data): - maxlag = True - while maxlag: - try: - maxlag = False - parsed = json.loads(data.read()) - content = None - if isinstance(parsed, dict): - content = APIResult(parsed) - content.response = self.response.items() - elif isinstance(parsed, list): - content = APIListResult(parsed) - content.response = self.response.items() - else: - content = parsed - if 'error' in content: - error = content['error']['code'] - if error == "maxlag": - lagtime = int(re.search("(\d+) seconds", content['error']['info']).group(1)) - if lagtime > self.wiki.maxwaittime: - lagtime = self.wiki.maxwaittime - print("Server lag, sleeping for "+str(lagtime)+" seconds") - maxlag = True - time.sleep(int(lagtime)+0.5) - return False - except: # Something's wrong with the data... - data.seek(0) - if "MediaWiki API is not enabled for this site. Add the following line to your LocalSettings.php
$wgEnableAPI=true;
" in data.read(): - raise APIDisabled("The API is not enabled on this site") - print "Invalid JSON, trying request again" - # FIXME: Would be nice if this didn't just go forever if its never going to work - return False - return content - -class APIResult(dict): - response = [] - -class APIListResult(list): - response = [] - -def resultCombine(type, old, new): - """Experimental-ish result-combiner thing - - If the result isn't something from action=query, - this will just explode, but that shouldn't happen hopefully? - - """ - ret = old - if type in new['query']: # Basic list, easy - ret['query'][type].extend(new['query'][type]) - else: # Else its some sort of prop=thing and/or a generator query - for key in new['query']['pages'].keys(): # Go through each page - if not key in old['query']['pages']: # if it only exists in the new one - ret['query']['pages'][key] = new['query']['pages'][key] # add it to the list - else: - if not type in new['query']['pages'][key]: - continue - elif type in new['query']['pages'][key] and not type in ret['query']['pages'][key]: # if only the new one does, just add it to the return - ret['query']['pages'][key][type] = new['query']['pages'][key][type] - continue - else: # Need to check for possible duplicates for some, this is faster than just iterating over new and checking for dups in ret - retset = set([tuple(entry.items()) for entry in ret['query']['pages'][key][type]]) - newset = set([tuple(entry.items()) for entry in new['query']['pages'][key][type]]) - retset.update(newset) - ret['query']['pages'][key][type] = [dict(entry) for entry in retset] - return ret - -def urlencode(query,doseq=0): - """ - Hack of urllib's urlencode function, which can handle - utf-8, but for unknown reasons, chooses not to by - trying to encode everything as ascii - """ - if hasattr(query,"items"): - # mapping objects - query = query.items() - else: - # it's a bother at times that strings and string-like objects are - # sequences... - try: - # non-sequence items should not work with len() - # non-empty strings will fail this - if len(query) and not isinstance(query[0], tuple): - raise TypeError - # zero-length sequences of all types will get here and succeed, - # but that's a minor nit - since the original implementation - # allowed empty dicts that type of behavior probably should be - # preserved for consistency - except TypeError: - ty,va,tb = sys.exc_info() - raise TypeError, "not a valid non-string sequence or mapping object", tb - - l = [] - if not doseq: - # preserve old behavior - for k, v in query: - k = quote_plus(str(k)) - v = quote_plus(str(v)) - l.append(k + '=' + v) - else: - for k, v in query: - k = quote_plus(str(k)) - if isinstance(v, str): - v = quote_plus(v) - l.append(k + '=' + v) - elif _is_unicode(v): - # is there a reasonable way to convert to ASCII? - # encode generates a string, but "replace" or "ignore" - # lose information and "strict" can raise UnicodeError - v = quote_plus(v.encode("utf8","replace")) - l.append(k + '=' + v) - else: - try: - # is this a sufficient test for sequence-ness? - x = len(v) - except TypeError: - # not a sequence - v = quote_plus(str(v)) - l.append(k + '=' + v) - else: - # loop over the sequence - for elt in v: - l.append(k + '=' + quote_plus(str(elt))) - return '&'.join(l) - diff --git a/wikitools/category.py b/wikitools/category.py deleted file mode 100644 index f46a987..0000000 --- a/wikitools/category.py +++ /dev/null @@ -1,115 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2008-2013 Alex Zaddach (mrzmanwiki@gmail.com) - -# This file is part of wikitools. -# wikitools is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# wikitools is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with wikitools. If not, see . - -import wiki -import page -import api - -class Category(page.Page): - """A category on the wiki""" - def __init__(self, site, title=False, check=True, followRedir=False, section=False, sectionnumber=False, pageid=False): - """ - wiki - A wiki object - title - The page title, as a string or unicode object - check - Checks for existence, normalizes title, required for most things - followRedir - follow redirects (check must be true) - section - the section name - sectionnumber - the section number - pageid - pageid, can be in place of title - """ - page.Page.__init__(self, site=site, title=title, check=check, followRedir=followRedir, section=section, sectionnumber=sectionnumber, pageid=pageid) - self.members = [] - if self.namespace != 14: - self.setNamespace(14, check) - - def getAllMembers(self, titleonly=False, reload=False, namespaces=False): - """Gets a list of pages in the category - - titleonly - set to True to only create a list of strings, - else it will be a list of Page objects - reload - reload the list even if it was generated before - namespaces - List of namespaces to restrict to (queries with this option will not be cached) - - """ - if self.members and not reload: - if titleonly: - if namespaces is not False: - return [p.title for p in self.members if p.namespace in namespaces] - else: - return [p.title for p in self.members] - if namespaces is False: - return self.members - else: - return [p for p in self.members if p.namespace in namespaces] - else: - ret = [] - members = [] - for member in self.__getMembersInternal(namespaces): - members.append(member) - if titleonly: - ret.append(member.title) - if titleonly: - return ret - if namespaces is False: - self.members = members - return members - - def getAllMembersGen(self, titleonly=False, reload=False, namespaces=False): - """Generator function for pages in the category - - titleonly - set to True to return strings, - else it will return Page objects - reload - reload the list even if it was generated before - namespaces - List of namespaces to restrict to (queries with this option will not be cached) - - """ - if self.members and not reload: - for member in self.members: - if namespaces is False or member.namespace in namespaces: - if titleonly: - yield member.title - else: - yield member - else: - if namespaces is False: - self.members = [] - for member in self.__getMembersInternal(namespaces): - if namespaces is False: - self.members.append(member) - if titleonly: - yield member.title - else: - yield member - - def __getMembersInternal(self, namespaces=False): - params = {'action':'query', - 'list':'categorymembers', - 'cmtitle':self.title, - 'cmlimit':self.site.limit, - 'cmprop':'title' - } - if namespaces is not False: - params['cmnamespace'] = '|'.join([str(ns) for ns in namespaces]) - while True: - req = api.APIRequest(self.site, params) - data = req.query(False) - for item in data['query']['categorymembers']: - yield page.Page(self.site, item['title'], check=False, followRedir=False) - try: - params['cmcontinue'] = data['query-continue']['categorymembers']['cmcontinue'] - except: - break diff --git a/wikitools/page.py b/wikitools/page.py deleted file mode 100644 index fc6213f..0000000 --- a/wikitools/page.py +++ /dev/null @@ -1,817 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2008-2013 Alex Zaddach (mrzmanwiki@gmail.com), bjweeks - -# This file is part of wikitools. -# wikitools is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# wikitools is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with wikitools. If not, see . - -import datetime -import wiki -import api -import urllib -import re -from hashlib import md5 -import unicodedata - - -class BadTitle(wiki.WikiError): - """Invalid title""" - -class NoPage(wiki.WikiError): - """Non-existent page""" - -class BadNamespace(wiki.WikiError): - """Invalid namespace number""" - -class EditError(wiki.WikiError): - """Problem with edit request""" - -class ProtectError(wiki.WikiError): - """Problem with protection request""" - -def namespaceDetect(title, site): - """ Detect the namespace of a given title - title - the page title - site - the wiki object the page is on - """ - bits = title.split(':', 1) - if len(bits) == 1 or bits[0] == '': - return 0 - else: - nsprefix = bits[0].lower() # wp:Foo and caTEGory:Foo are normalized by MediaWiki - for ns in site.namespaces: - if nsprefix == site.namespaces[ns]['*'].lower(): - return int(ns) - else: - if site.NSaliases: - for ns in site.NSaliases: - if nsprefix == ns.lower(): - return int(site.NSaliases[ns]) - return 0 - -class Page(object): - """ A page on the wiki""" - - def __init__(self, site, title=False, check=True, followRedir=True, section=False, sectionnumber=None, pageid=False, namespace=False): - """ - wiki - A wiki object - title - The page title, as a string or unicode object - check - Checks for existence, normalizes title, required for most things - followRedir - follow redirects (check must be true) - section - the section name - sectionnumber - the section number - pageid - pageid, can be in place of title - namespace - use to set the namespace prefix *if its not already in the title* - """ - # Initialize instance vars from function args - if not title and not pageid: - raise wiki.WikiError("No title or pageid given") - self.site = site - if pageid: - self.pageid = int(pageid) - else: - self.pageid = 0 - self.followRedir = followRedir - self.title = title - self.unprefixedtitle = False # will be set later - self.urltitle = '' - self.wikitext = '' - self.templates = [] - self.links = [] - self.categories = [] - self.exists = True # If we're not going to check, assume it does - self.protection = {} - self.namespace = namespace - - # Things that need to be done before anything else - if self.title: - self.title = self.title.replace('_', ' ') - if self.namespace: - if namespace not in self.site.namespaces.keys(): - raise BadNamespace(namespace) - if self.title: - self.unprefixedtitle = self.title - self.title = ':'.join((self.site.namespaces[self.namespace]['*'], self.title.decode('utf8'))) - if int(self.namespace) is 0 and self.title: - self.namespace = int(self.namespace) - self.unprefixedtitle = self.title - # Setting page info with API, should set: - # pageid, exists, title, unprefixedtitle, namespace - if check: - self.setPageInfo() - else: - if self.namespace is False and self.title: - self.namespace = namespaceDetect(self.title, self.site) - if self.namespace is not 0: - nsname = self.site.namespaces[self.namespace]['*'] - self.unprefixedtitle = self.title.split(':', 1)[1] - self.title = ':'.join((nsname, self.unprefixedtitle)) - else: - self.unprefixedtitle = self.title - - if section or sectionnumber is not None: - self.setSection(section, sectionnumber) - else: - self.section = False - if title: - if not isinstance(self.title, unicode): - self.title = unicode(self.title, 'utf-8') - if not isinstance(self.unprefixedtitle, unicode): - self.unprefixedtitle = unicode(self.unprefixedtitle, 'utf-8') - self.urltitle = urllib.quote(self.title.encode('utf-8')).replace('%20', '_').replace('%2F', '/') - - def setPageInfo(self): - """Sets basic page info, required for almost everything""" - followRedir = self.followRedir - params = {'action':'query'} - if self.pageid: - params['pageids'] = self.pageid - else: - params['titles'] = self.title - if followRedir: - params['redirects'] = '' - req = api.APIRequest(self.site, params) - response = req.query(False) - self.pageid = response['query']['pages'].keys()[0] - if self.pageid > 0: - self.exists = True - if 'missing' in response['query']['pages'][str(self.pageid)]: - if not self.title: - # Pageids are never recycled, so a bad pageid with no title will never work - raise wiki.WikiError("Bad pageid given with no title") - self.exists = False - if 'invalid' in response['query']['pages'][str(self.pageid)]: - raise BadTitle(self.title) - if 'title' in response['query']['pages'][str(self.pageid)]: - self.title = response['query']['pages'][str(self.pageid)]['title'].encode('utf-8') - self.namespace = int(response['query']['pages'][str(self.pageid)]['ns']) - if self.namespace is not 0: - self.unprefixedtitle = self.title.split(':', 1)[1] - else: - self.unprefixedtitle = self.title - self.pageid = int(self.pageid) - if self.pageid < 0: - self.pageid = 0 - return self - - def setNamespace(self, newns, recheck=False): - """Change the namespace number of a page object - - Updates the title with the new prefix - newns - integer namespace number - recheck - redo pageinfo checks - - """ - if not newns in self.site.namespaces.keys(): - raise BadNamespace - if self.namespace == newns: - return self.namespace - if self.title: - if self.namespace != 0: - bits = self.title.split(':', 1) - nsprefix = bits[0].lower() - for ns in self.site.namespaces: - if nsprefix == self.site.namespaces[ns]['*'].lower(): - self.title = bits[1] - break - else: - if self.site.NSaliases: - for ns in self.site.NSaliases: - if nsprefix == ns.lower(): - self.title = bits[1] - break - self.namespace = newns - if self.namespace: - self.title = self.site.namespaces[self.namespace]['*']+':'+self.title - self.urltitle = urllib.quote(self.title.encode('utf-8')).replace('%20', '_').replace('%2F', '/') - else: - self.namespace = newns - if recheck: - self.pageid = False - self.setPageInfo() - else: - self.pageid = 0 - self.wikitext = '' - self.templates = [] - self.links = [] - return self.namespace - - def setSection(self, section=None, number=None): - """Set a section for the page - - section - the section name - number - the section number - - """ - if section is None and number is None: - self.section = False - elif number is not None: - try: - self.section = str(int(number)) - except ValueError: - raise WikiError("Section number must be an int") - else: - self.section = self.__getSection(section) - self.wikitext = '' - return self.section - - def __getSection(self, section): - if not self.title: - self.setPageInfo() - params = { - 'action': 'parse', - 'page':self.title, - 'prop':'sections' - } - number = False - req = api.APIRequest(self.site, params) - response = req.query() - for item in response['parse']['sections']: - if section == item['line'] or section == item['anchor']: - if item['index'].startswith('T'): # TODO: It would be cool if it set the page title to the template in this case - continue - number = item['index'] - break - return number - - def canHaveSubpages(self): - """Is the page in a namespace that allows subpages?""" - if not self.title: - self.setPageInfo() - return 'subpages' in self.site.namespaces[self.namespace] - - def isRedir(self): - """Is the page a redirect?""" - params = {'action':'query', - 'redirects':'' - } - if not self.exists: - raise NoPage - if self.pageid != 0 and self.exists: - params['pageids'] = self.pageid - elif self.title: - params['titles'] = self.title - else: - self.setPageInfo() - if self.pageid != 0 and self.exists: - params['pageids'] = self.pageid - else: - raise NoPage - req = api.APIRequest(self.site, params) - res = req.query(False) - if 'redirects' in res['query']: - return True - else: - return False - - def isTalk(self): - """Is the page a discussion page?""" - if not self.title: - self.setPageInfo() - return (self.namespace%2==1 and self.namespace >= 0) - - def toggleTalk(self, check=True, followRedir=True): - """Switch to and from the talk namespaces - - Returns a new page object that's either the talk or non-talk - version of the current page - - check and followRedir - same meaning as Page constructor - - """ - if not self.title: - self.setPageInfo() - ns = self.namespace - if ns < 0: - return False - nsname = self.site.namespaces[ns]['*'] - if self.isTalk(): - newns = self.site.namespaces[ns-1]['*'] - else: - newns = self.site.namespaces[ns+1]['*'] - try: - pagename = self.title.split(nsname+':',1)[1] - except: - pagename = self.title - if newns != '': - newname = newns+':'+pagename - else: - newname = pagename - return Page(self.site, newname, check, followRedir) - - def getWikiText(self, expandtemplates=False, force=False): - """Gets the Wikitext of the page - - expandtemplates - expand the templates to wikitext instead of transclusions - force - load the text even if we already loaded it before - - """ - - if self.wikitext and not force: - return self.wikitext - if self.pageid == 0 and not self.title: - self.setPageInfo() - if not self.exists: - raise NoPage - params = { - 'action': 'query', - 'prop': 'revisions', - 'rvprop': 'content|timestamp', - 'rvlimit': '1' - } - if self.pageid: - params['pageids'] = self.pageid - else: - params['titles'] = self.title - if expandtemplates: - params['rvexpandtemplates'] = '1' - if self.section is not False: - params['rvsection'] = self.section - req = api.APIRequest(self.site, params) - response = req.query(False) - if self.pageid == 0: - self.pageid = int(response['query']['pages'].keys()[0]) - if self.pageid == -1: - self.exists == False - raise NoPage - self.wikitext = response['query']['pages'][str(self.pageid)]['revisions'][0]['*'].encode('utf-8') - self.lastedittime = response['query']['pages'][str(self.pageid)]['revisions'][0]['timestamp'] - return self.wikitext - - def getLinks(self, force=False): - """Gets a list of all the internal links *on* the page - - force - load the list even if we already loaded it before - - """ - if self.links and not force: - return self.links - if self.pageid == 0 and not self.title: - self.setPageInfo() - if not self.exists: - raise NoPage - params = { - 'action': 'query', - 'prop': 'links', - 'pllimit': self.site.limit, - } - if self.pageid > 0: - params['pageids'] = self.pageid - else: - params['titles'] = self.title - req = api.APIRequest(self.site, params) - self.links = [] - for data in req.queryGen(): - self.links.extend(self.__extractToList(data, 'links')) - return self.links - - def getProtection(self, force=False): - """Returns the current protection status of the page""" - if self.protection and not force: - return self.protection - if self.pageid == 0 and not self.title: - self.setPageInfo() - params = { - 'action': 'query', - 'prop': 'info', - 'inprop': 'protection', - } - if not self.exists or self.pageid <= 0: - params['titles'] = self.title - else: - params['titles'] = self.title - req = api.APIRequest(self.site, params) - response = req.query(False) - for pr in response['query'].values()[0].values()[0]['protection']: - if pr['level']: - if pr['expiry'] == 'infinity': - expiry = 'infinity' - else: - expiry = datetime.datetime.strptime(pr['expiry'],'%Y-%m-%dT%H:%M:%SZ') - self.protection[pr['type']] = { - 'expiry': expiry, - 'level': pr['level'] - } - return self.protection - - def getTemplates(self, force=False): - """Gets all list of all the templates on the page - - force - load the list even if we already loaded it before - - """ - if self.templates and not force: - return self.templates - if self.pageid == 0 and not self.title: - self.setPageInfo() - if not self.exists: - raise NoPage - params = { - 'action': 'query', - 'prop': 'templates', - 'tllimit': self.site.limit, - } - if self.pageid: - params['pageids'] = self.pageid - else: - params['titles'] = self.title - req = api.APIRequest(self.site, params) - self.templates = [] - for data in req.queryGen(): - self.templates.extend(self.__extractToList(data, 'templates')) - return self.templates - - def getCategories(self, force=False): - """Gets all list of all the categories on the page - - force - load the list even if we already loaded it before - - """ - if self.categories and not force: - return self.categories - if self.pageid == 0 and not self.title: - self.setPageInfo() - if not self.exists: - raise NoPage - params = { - 'action': 'query', - 'prop': 'categories', - 'cllimit': self.site.limit, - } - if self.pageid: - params['pageids'] = self.pageid - else: - params['titles'] = self.title - req = api.APIRequest(self.site, params) - self.categories = [] - for data in req.queryGen(): - self.categories.extend(self.__extractToList(data, 'categories')) - return self.categories - - def getHistory(self, direction='older', content=True, limit='all'): - """Get the history of a page - - direction - 2 options: 'older' (default) - start with the current revision and get older ones - 'newer' - start with the oldest revision and get newer ones - content - If False, get only metadata (timestamp, edit summary, user, etc) - If True (default), also get the revision text - limit - Only retrieve a certain number of revisions. If 'all' (default), all revisions are returned - - The data is returned in essentially the same format as the API, a list of dicts that look like: - {u'*': u"Page content", # Only returned when content=True - u'comment': u'Edit summary', - u'contentformat': u'text/x-wiki', # Only returned when content=True - u'contentmodel': u'wikitext', # Only returned when content=True - u'parentid': 139946, # id of previous revision - u'revid': 139871, # revision id - u'sha1': u'0a5cec3ca3e084e767f00c9a5645c17ac27b2757', # sha1 hash of page content - u'size': 129, # size of page in bytes - u'timestamp': u'2002-08-05T14:11:27Z', # timestamp of edit - u'user': u'Username', - u'userid': 48 # user id - } - - Note that unlike other get* functions, the data is not cached - """ - max = limit - if limit == 'all': - max = float("inf") - if limit == 'all' or limit > self.site.limit: - limit = self.site.limit - history = [] - rvc = None - while True: - revs, rvc = self.__getHistoryInternal(direction, content, limit, rvc) - history = history+revs - if len(history) == max or rvc is None: - break - if max - len(history) < self.site.limit: - limit = max - len(history) - return history - - def getHistoryGen(self, direction='older', content=True, limit='all'): - """Generator function for page history - - The interface is the same as getHistory, but it will only retrieve 1 revision at a time. - This will be slower and have much higher network overhead, but does not require storing - the entire page history in memory - """ - max = limit - count = 0 - rvc = None - while True: - revs, rvc = self.__getHistoryInternal(direction, content, 1, rvc) - yield revs[0] - count += 1 - if count == max or rvc is None: - break - - def __getHistoryInternal(self, direction, content, limit, rvcontinue): - - if self.pageid == 0 and not self.title: - self.setPageInfo() - if not self.exists: - raise NoPage - if direction != 'newer' and direction != 'older': - raise wiki.WikiError("direction must be 'newer' or 'older'") - params = { - 'action':'query', - 'prop':'revisions', - 'rvdir':direction, - 'rvprop':'ids|flags|timestamp|user|userid|size|sha1|comment', - 'continue':'', - 'rvlimit':limit - } - if self.pageid: - params['pageids'] = self.pageid - else: - params['titles'] = self.title - - if content: - params['rvprop']+='|content' - if rvcontinue: - params['continue'] = rvcontinue['continue'] - params['rvcontinue'] = rvcontinue['rvcontinue'] - req = api.APIRequest(self.site, params) - response = req.query(False) - id = response['query']['pages'].keys()[0] - if not self.pageid: - self.pageid = int(id) - revs = response['query']['pages'][id]['revisions'] - rvc = None - if 'continue' in response: - rvc = response['continue'] - return (revs, rvc) - - def __extractToList(self, json, stuff): - list = [] - if self.pageid == 0: - self.pageid = json['query']['pages'].keys()[0] - if stuff in json['query']['pages'][str(self.pageid)]: - for item in json['query']['pages'][str(self.pageid)][stuff]: - list.append(item['title']) - return list - - def edit(self, *args, **kwargs): - """Edit the page - - Arguments are a subset of the API's action=edit arguments, valid arguments - are defined in the validargs set - To skip the MD5 check, set "skipmd5" keyword argument to True - http://www.mediawiki.org/wiki/API:Edit_-_Create%26Edit_pages#Parameters - - For backwards compatibility: - 'newtext' is equivalent to 'text' - 'basetime' is equivalent to 'basetimestamp' - - """ - validargs = set(['text', 'summary', 'minor', 'notminor', 'bot', 'basetimestamp', 'starttimestamp', - 'recreate', 'createonly', 'nocreate', 'watch', 'unwatch', 'watchlist', 'prependtext', 'appendtext', - 'section', 'captchaword', 'captchaid']) - # For backwards compatibility - if 'newtext' in kwargs: - kwargs['text'] = kwargs['newtext'] - del kwargs['newtext'] - if 'basetime' in kwargs: - kwargs['basetimestamp'] = kwargs['basetime'] - del kwargs['basetime'] - if len(args) and 'text' not in kwargs: - kwargs['text'] = args[0] - skipmd5 = False - if 'skipmd5' in kwargs and kwargs['skipmd5']: - skipmd5 = True - invalid = set(kwargs.keys()).difference(validargs) - if invalid: - for arg in invalid: - del kwargs[arg] - if not self.title: - self.setPageInfo() - if not 'section' in kwargs and self.section is not False: - kwargs['section'] = self.section - if not 'text' in kwargs and not 'prependtext' in kwargs and not 'appendtext' in kwargs: - raise EditError("No text specified") - if 'prependtext' in kwargs and 'section' in kwargs: - raise EditError("Bad param combination") - if 'createonly' in kwargs and 'nocreate' in kwargs: - raise EditError("Bad param combination") - token = self.site.getToken('csrf') - if 'text' in kwargs: - hashtext = kwargs['text'] - elif 'prependtext' in kwargs and 'appendtext' in kwargs: - hashtext = kwargs['prependtext']+kwargs['appendtext'] - elif 'prependtext' in kwargs: - hashtext = kwargs['prependtext'] - else: - hashtext = kwargs['appendtext'] - params = { - 'action': 'edit', - 'title':self.title, - 'token':token, - } - if not skipmd5: - if not isinstance(hashtext, unicode): - hashtext = hashtext.decode('utf8') - hashtext = unicodedata.normalize('NFC', hashtext).encode('utf8') - params['md5'] = md5(hashtext).hexdigest() - params.update(kwargs) - req = api.APIRequest(self.site, params, write=True) - result = req.query() - if 'edit' in result and result['edit']['result'] == 'Success': - self.wikitext = '' - self.links = [] - self.templates = [] - self.exists = True - return result - - def move(self, mvto, reason=False, movetalk=False, noredirect=False, watch=False, unwatch=False): - """Move the page - - Params are the same as the API: - mvto - page title to move to, the only required param - reason - summary for the log - movetalk - move the corresponding talk page - noredirect - don't create a redirect at the previous title - watch - add the page to your watchlist - unwatch - remove the page from your watchlist - - """ - if not self.title and self.pageid == 0: - self.setPageInfo() - if not self.exists: - raise NoPage - token = self.site.getToken('csrf') - params = { - 'action': 'move', - 'to':mvto, - 'token':token, - } - if self.pageid: - params['fromid'] = self.pageid - else: - params['from'] = self.title - if reason: - params['reason'] = reason.encode('utf-8') - if movetalk: - params['movetalk'] = '1' - if noredirect: - params['noredirect'] = '1' - if watch: - params['watch'] = '1' - if unwatch: - params['unwatch'] = '1' - req = api.APIRequest(self.site, params, write=True) - result = req.query() - if 'move' in result: - self.title = result['move']['to'] - self.namespace = namespaceDetect(self.title, self.site) - if self.namespace is not 0: - self.unprefixedtitle = self.title.split(':', 1)[1] - else: - self.unprefixedtitle = self.title - if not isinstance(self.title, unicode): - self.title = unicode(self.title, 'utf-8') - self.urltitle = urllib.quote(self.title.encode('utf-8')).replace('%20', '_').replace('%2F', '/') - else: - self.urltitle = urllib.quote(self.title.encode('utf-8')).replace('%20', '_').replace('%2F', '/') - return result - - def protect(self, restrictions={}, expirations={}, reason=False, cascade=False): - """Protect a page - - Restrictions and expirations are dictionaries of - protection level/expiry settings, e.g., {'edit':'sysop'} and - {'move':'3 days'}. expirations can also be a string to set - all levels to the same expiration - - reason - summary for log - cascade - apply protection to all pages transcluded on the page - - """ - if not self.title: - self.setPageInfo() - if not restrictions: - raise ProtectError("No protection levels given") - if len(expirations) > len(restrictions): - raise ProtectError("More expirations than restrictions given") - token = self.site.getToken('csrf') - protections = '' - expiry = '' - if isinstance(expirations, str): - expiry = expirations - for type in restrictions: - if protections: - protections+="|" - protections+= type+"="+restrictions[type] - if isinstance(expirations, dict) and type in expirations: - if expiry: - expiry+="|" - expiry+=expirations[type] - elif isinstance(expirations, dict): - if expiry: - expiry+="|" - expiry+='indefinite' - params = {'action':'protect', - 'title':self.title, - 'token':token, - 'protections':protections - } - if expiry: - params['expiry'] = expiry - if reason: - params['reason'] = reason - if cascade: - params['cascade'] = '' - req = api.APIRequest(self.site, params, write=True) - result = req.query() - if 'protect' in result: - self.protection = {} - return result - - def delete(self, reason=False, watch=False, unwatch=False): - """Delete the page - - reason - summary for log - watch - add the page to your watchlist - unwatch - remove the page from your watchlist - - """ - if not self.title and self.pageid == 0: - self.setPageInfo() - if not self.exists: - raise NoPage - token = self.site.getToken('csrf') - params = { - 'action': 'delete', - 'token':token, - } - if self.pageid: - params['pageid'] = self.pageid - else: - params['title'] = self.title - if reason: - params['reason'] = reason.encode('utf-8') - if watch: - params['watch'] = '1' - if unwatch: - params['unwatch'] = '1' - req = api.APIRequest(self.site, params, write=True) - result = req.query() - if 'delete' in result: - self.pageid = 0 - self.exists = False - self.wikitext = '' - self.templates = '' - self.links = '' - self.protection = {} - self.section = False - return result - - - def __hash__(self): - return int(self.pageid) ^ hash(self.site.apibase) - - def __str__(self): - if self.title: - title = self.title - else: - title = 'pageid: '+self.pageid - return self.__class__.__name__ +' '+repr(title) + " from " + repr(self.site.domain) - - def __repr__(self): - if self.title: - title = self.title - else: - title = 'pageid: '+self.pageid - return "<"+self.__module__+'.'+self.__class__.__name__+" "+repr(title)+" using "+repr(self.site.apibase)+">" - - def __eq__(self, other): - if not isinstance(other, Page): - return False - if self.title: - if self.title == other.title and self.site == other.site: - return True - else: - if self.pageid == other.pageid and self.site == other.site: - return True - return False - - def __ne__(self, other): - if not isinstance(other, Page): - return True - if self.title: - if self.title == other.title and self.site == other.site: - return False - else: - if self.pageid == other.pageid and self.site == other.site: - return False - return True diff --git a/wikitools/pagelist.py b/wikitools/pagelist.py deleted file mode 100644 index d063283..0000000 --- a/wikitools/pagelist.py +++ /dev/null @@ -1,159 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2008-2013 Alex Zaddach (mrzmanwiki@gmail.com) - -# This file is part of wikitools. -# wikitools is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# wikitools is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with wikitools. If not, see . - -import api -import page -import category -import wikifile -import math - -def listFromQuery(site, queryresult): - """Generate a list of pages from an API query result - - queryresult is the list of pages from a list or generator query - e.g. - for a list=categorymembers query, use result['query']['categorymembers'] - for a generator query, use result['query']['pages'] - - """ - ret = [] - if isinstance(queryresult, list): - for item in queryresult: - pageid = False - if 'pageid' in item: - pageid = item['pageid'] - if item['ns'] == 14: - item = category.Category(site, title=item['title'], check=False, followRedir=False, pageid=pageid) - elif item['ns'] == 6: - item = wikifile.File(site, title=item['title'], check=False, followRedir=False, pageid=pageid) - else: - item = page.Page(site, title=item['title'], check=False, followRedir=False, pageid=pageid) - ret.append(item) - else: - for key in queryresult.keys(): - item = queryresult[key] - pageid = False - if 'pageid' in item: - pageid = item['pageid'] - if item['ns'] == 14: - item = category.Category(site, title=item['title'], check=False, followRedir=False, pageid=pageid) - elif item['ns'] == 6: - item = wikifile.File(site, title=item['title'], check=False, followRedir=False, pageid=pageid) - else: - item = page.Page(site, title=item['title'], check=False, followRedir=False, pageid=pageid) - ret.append(item) - return ret - -def listFromTitles(site, titles, check=True, followRedir=False): - """Create a list of page objects from a list of titles - - check and followRedir have the same meaning as in page.Page - - """ - ret = [] - if not check: - for title in titles: - title = page.Page(site, title=title, check=False) - ret.append(title) - else: - querylist = [] - limit = int(site.limit) - if len(titles) > limit/10: - iters = int(math.ceil(float(len(titles)) / (limit/10))) - for x in range(0,iters): - lower = x*limit/10 - upper = (x+1)*limit/10 - querylist.append(titles[lower:upper]) - else: - querylist.append(titles) - response = False - for item in querylist: - tlist = '|'.join(item) - if not isinstance(tlist, unicode): - tlist = unicode(tlist, 'utf8') - params = {'action':'query', - 'titles':tlist, - } - if followRedir: - params['redirects'] = '' - req = api.APIRequest(site, params) - res = req.query(False) - for key in res['query']['pages']: - obj = res['query']['pages'][key] - item = makePage(key, obj, site) - ret.append(item) - return ret - -def listFromPageids(site, pageids, check=True, followRedir=False): - """Create a list of page objects from a list of pageids - - check and followRedir have the same meaning as in page.Page - - """ - ret = [] - if not check: - for id in pageids: - title = page.Page(site, pageid=id, check=False) - ret.append(title) - else: - querylist = [] - limit = int(site.limit) - if len(pageids) > limit/10: - iters = int(math.ceil(float(len(pageids)) / (limit/10))) - for x in range(0,iters): - lower = x*limit/10 - upper = (x+1)*limit/10 - querylist.append(pageids[lower:upper]) - else: - querylist.append(pageids) - response = False - for item in querylist: - ids = [str(id) for id in item] - idlist = '|'.join(ids) - params = {'action':'query', - 'pageids':idlist, - } - if followRedir: - params['redirects'] = '' - req = api.APIRequest(site, params) - res = req.query() - if not response: - response = res - else: - response = api.resultCombine('', response, res) - for key in response['query']['pages'].keys(): - res = response['query']['pages'][key] - item = makePage(key, res, site) - ret.append(item) - return ret - -def makePage(key, result, site): - title=False - if 'title' in result: - title = result['title'] - if 'ns' in result and result['ns'] == 14: - item = category.Category(site, title=title, check=False, followRedir=False, pageid=key) - elif 'ns' in result and result['ns'] == 6: - item = wikifile.File(site, title=title, check=False, followRedir=False, pageid=key) - else: - item = page.Page(site, title=title, check=False, followRedir=False, pageid=key) - if 'missing' in result: - item.exists = False - if 'invalid' in result: - item = False - if 'ns' in result: - item.setNamespace(int(result['ns'])) - return item diff --git a/wikitools/user.py b/wikitools/user.py deleted file mode 100644 index 1d3e9d2..0000000 --- a/wikitools/user.py +++ /dev/null @@ -1,222 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2008-2013 Alex Zaddach (mrzmanwiki@gmail.com), bjweeks - -# This file is part of wikitools. -# wikitools is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# wikitools is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with wikitools. If not, see . - -import wiki -import page -import api -import socket -import re - -class User: - """A user on the wiki""" - def __init__(self, site, name, check=True): - """ - wiki - A wiki object - name - The username, as a string - check - Checks for existence, normalizes name - """ - self.site = site - self.name = name.strip() - if not isinstance(self.name, unicode): - self.name = unicode(self.name, 'utf8') - self.exists = True # If we're not going to check, assume it does - self.blocked = None # So we can tell the difference between blocked/not blocked/haven't checked - self.editcount = -1 - self.groups = [] - self.id = 0 - if check: - self.setUserInfo() - self.isIP = False - self.IPcheck() - self.page = page.Page(self.site, ':'.join([self.site.namespaces[2]['*'], self.name]), check=check, followRedir=False) - - def IPcheck(self): - try: #IPv4 check - s = socket.inet_aton(self.name.replace(' ', '_')) - if socket.inet_ntoa(s) == self.name: - self.isIP = True - self.exists = False - return - except: - pass - try: - s = socket.inet_pton(socket.AF_INET6, self.name.replace(' ', '_')) - if self.IPnorm(socket.inet_ntop(socket.AF_INET6, s)) == self.IPnorm(self.name): - self.isIP = True - self.exists = False - self.name = self.IPnorm(self.name) - return - except: - pass - - def IPnorm(self, ip): - """This is basically a port of MediaWiki's IP::sanitizeIP but assuming no CIDR ranges""" - ip = ip.upper() - # Expand zero abbreviations - abbrevPos = ip.find('::') - if abbrevPos != -1: - addressEnd = len(ip) - 1 - # If the '::' is at the beginning... - if abbrevPos == 0: - repeat = '0:' - extra = '0' if ip == '::' else '' - pad = 9 - elif abbrevPos == addressEnd - 1: - repeat = ':0' - extra = '' - pad = 9 - else: - repeat = ':0' - extra = ':' - pad = 8 - ip = ip.replace( '::', repeat*(pad-ip.count(':'))+extra) - # Remove leading zereos from each bloc as needed - ip = re.sub('/(^|:)0+(([0-9A-Fa-f]{1,4}))/', '\1\2', ip) - return ip; - - def setUserInfo(self): - """Sets basic user info""" - params = { - 'action': 'query', - 'list': 'users', - 'ususers':self.name, - 'usprop':'blockinfo|groups|editcount' - } - req = api.APIRequest(self.site, params) - response = req.query(False) - user = response['query']['users'][0] - self.name = user['name'] - if 'missing' in user or 'invalid' in user: - self.exists = False - return - self.id = int(user['userid']) - self.editcount = int(user['editcount']) - if 'groups' in user: - self.groups = user['groups'] - if 'blockedby' in user: - self.blocked = True - else: - self.blocked = False - return self - - def getTalkPage(self, check=True, followRedir=False): - """Convenience function to get an object for the user's talk page""" - return page.Page(self.site, ':'.join([self.site.namespaces[3]['*'], self.name]), check=check, followRedir=False) - - def isBlocked(self, force=False): - """Determine if a user is blocked""" - if self.blocked is not None and not force: - return self.blocked - params = {'action':'query', - 'list':'blocks', - 'bkusers':self.name, - 'bkprop':'id' - } - req = api.APIRequest(self.site, params) - res = req.query(False) - if len(res['query']['blocks']) > 0: - self.blocked = True - else: - self.blocked = False - return self.blocked - - def block(self, reason=False, expiry=False, anononly=False, nocreate=False, autoblock=False, noemail=False, hidename=False, allowusertalk=False, reblock=False): - """Block the user - - Params are the same as the API - reason - block reason - expiry - block expiration - anononly - block anonymous users only - nocreate - disable account creation - autoblock - block IP addresses used by the user - noemail - block user from sending email through the site - hidename - hide the username from the log (requires hideuser right) - allowusertalk - allow the user to edit their talk page - reblock - overwrite existing block - - """ - token = self.site.getToken('csrf') - params = {'action':'block', - 'user':self.name, - 'token':token - } - if reason: - params['reason'] = reason - if expiry: - params['expiry'] = expiry - if anononly: - params['anononly'] = '' - if nocreate: - params['nocreate'] = '' - if autoblock: - params['autoblock'] = '' - if noemail: - params['noemail'] = '' - if hidename: - params['hidename'] = '' - if allowusertalk: - params['allowusertalk'] = '' - if reblock: - params['reblock'] = '' - req = api.APIRequest(self.site, params, write=False) - res = req.query() - if 'block' in res: - self.blocked = True - return res - - def unblock(self, reason=False): - """Unblock the user - - reason - reason for the log - - """ - token = self.site.getToken('csrf') - params = { - 'action': 'unblock', - 'user': self.name, - 'token': token - } - if reason: - params['reason'] = reason - req = api.APIRequest(self.site, params, write=False) - res = req.query() - if 'unblock' in res: - self.blocked = False - return res - - def __hash__(self): - return int(self.name) ^ hash(self.site.apibase) - - def __eq__(self, other): - if not isinstance(other, User): - return False - if self.name == other.name and self.site == other.site: - return True - return False - def __ne__(self, other): - if not isinstance(other, User): - return True - if self.name == other.name and self.site == other.site: - return False - return True - - def __str__(self): - return self.__class__.__name__ + ' ' + repr(self.name) + " on " + repr(self.site.domain) - - def __repr__(self): - return "<"+self.__module__+'.'+self.__class__.__name__+" "+repr(self.name)+" on "+repr(self.site.apibase)+">" - diff --git a/wikitools/wiki.py b/wikitools/wiki.py deleted file mode 100644 index 4267c9c..0000000 --- a/wikitools/wiki.py +++ /dev/null @@ -1,409 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2008-2013 Alex Zaddach (mrzmanwiki@gmail.com) - -# This file is part of wikitools. -# wikitools is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# wikitools is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with wikitools. If not, see . - -import cookielib -import api -import urllib -import re -import time -import os -import warnings -from urlparse import urlparse -from urllib2 import HTTPPasswordMgrWithDefaultRealm -try: - import cPickle as pickle -except: - import pickle - -class WikiError(Exception): - """Base class for errors""" - -class UserBlocked(WikiError): - """Trying to edit while blocked""" - -class Namespace(int): - """ - Class for namespace 'constants' - Names are based on canonical (non-localized) names - This functions as an integer in every way, except that the OR operator ( | ) - is overridden to produce a string namespace list for use in API queries - wikiobj.NS_MAIN|wikiobj.NS_USER|wikiobj.NS_PROJECT returns '0|2|4' - """ - def __or__(self, other): - return '|'.join([str(self), str(other)]) - - def __ror__(self, other): - return '|'.join([str(other), str(self)]) - -VERSION = '1.4' - -class Wiki: - """A Wiki site""" - - def __init__(self, url="https://en.wikipedia.org/w/api.php", httpuser=None, httppass=None, preauth=False): - """ - url - A URL to the site's API, defaults to en.wikipedia - httpuser - optional user name for HTTP Auth - httppass - password for HTTP Auth, leave out to enter interactively - preauth - true to send headers for HTTP Auth on the first request - instead of relying on the negotiation for them - - """ - self.apibase = url - self.cookies = WikiCookieJar() - self.username = '' - urlbits = urlparse(self.apibase) - self.domain = '://'.join([urlbits.scheme, urlbits.netloc]) - if httpuser is not None: - if httppass is None: - from getpass import getpass - self.httppass = getpass("HTTP Auth password for "+httpuser+": ") - if preauth: - self.httppass = httppass - self.auth = httpuser - else: - self.passman = HTTPPasswordMgrWithDefaultRealm() - self.passman.add_password(None, self.domain, httpuser, httppass) - else: - self.passman = None - self.auth = None - self.maxlag = 5 - self.maxwaittime = 120 - self.useragent = "python-wikitools/%s" % VERSION - self.cookiepath = '' - self.limit = 500 - self.siteinfo = {} - self.namespaces = {} - self.NSaliases = {} - self.assertval = None - self.newtoken = False - try: - self.setSiteinfo() - except api.APIError: # probably read-restricted - pass - - def setSiteinfo(self): - """Retrieves basic siteinfo - - Called when constructing, - or after login if the first call failed - - """ - params = {'action':'query', - 'meta':'siteinfo|tokens', - 'siprop':'general|namespaces|namespacealiases', - } - if self.maxlag < 120: - params['maxlag'] = 120 - req = api.APIRequest(self, params) - info = req.query(False) - sidata = info['query']['general'] - for item in sidata: - self.siteinfo[item] = sidata[item] - nsdata = info['query']['namespaces'] - for ns in nsdata: - nsinfo = nsdata[ns] - self.namespaces[nsinfo['id']] = nsinfo - if ns != "0": - try: - attr = "NS_%s" % (nsdata[ns]['canonical'].replace(' ', '_').upper()) - except KeyError: - attr = "NS_%s" % (nsdata[ns]['*'].replace(' ', '_').upper()) - else: - attr = "NS_MAIN" - setattr(self, attr.encode('utf8'), Namespace(ns.encode('utf8'))) - nsaliasdata = info['query']['namespacealiases'] - if nsaliasdata: - for ns in nsaliasdata: - self.NSaliases[ns['*']] = ns['id'] - if not 'writeapi' in sidata: - warnings.warn(UserWarning, "WARNING: Write-API not enabled, you will not be able to edit") - version = re.search("\d\.(\d\d)", self.siteinfo['generator']) - if not int(version.group(1)) >= 13: # Will this even work on 13? - warnings.warn(UserWarning, "WARNING: Some features may not work on older versions of MediaWiki") - if 'tokens' in info['query'].keys(): - self.newtoken = True - return self - - def login(self, username, password=False, remember=False, force=False, verify=True, domain=None): - """Login to the site - - remember - saves cookies to a file - the filename will be: - hash(username - apibase).cookies - the cookies will be saved in the current directory, change cookiepath - to use a different location - force - forces login over the API even if a cookie file exists - and overwrites an existing cookie file if remember is True - verify - Checks cookie validity with isLoggedIn() - domain - domain name, required for some auth systems like LDAP - - """ - if not force: - try: - cookiefile = self.cookiepath + str(hash(username+' - '+self.apibase))+'.cookies' - self.cookies.load(self, cookiefile, True, True) - self.username = username - if not verify or self.isLoggedIn(self.username): - return True - except: - pass - if not password: - from getpass import getpass - password = getpass("Wiki password for "+username+": ") - def loginerror(info): - try: - print info['login']['result'] - except: - print info['error']['code'] - print info['error']['info'] - return False - data = { - "action" : "login", - "lgname" : username, - "lgpassword" : password, - } - if domain is not None: - data["lgdomain"] = domain - if self.maxlag < 120: - data['maxlag'] = 120 - req = api.APIRequest(self, data) - info = req.query() - if info['login']['result'] == "Success": - self.username = username - elif info['login']['result'] == "NeedToken": - req.changeParam('lgtoken', info['login']['token']) - info = req.query() - if info['login']['result'] == "Success": - self.username = username - else: - return loginerror(info) - else: - return loginerror(info) - if not self.siteinfo: - self.setSiteinfo() - params = { - 'action': 'query', - 'meta': 'userinfo', - 'uiprop': 'rights', - } - if self.maxlag < 120: - params['maxlag'] = 120 - req = api.APIRequest(self, params) - info = req.query(False) - user_rights = info['query']['userinfo']['rights'] - if 'apihighlimits' in user_rights: - self.limit = 5000 - if remember: - cookiefile = self.cookiepath + str(hash(self.username+' - '+self.apibase))+'.cookies' - self.cookies.save(self, cookiefile, True, True) - if self.useragent == "python-wikitools/%s" % VERSION: - self.useragent = "python-wikitools/%s (User:%s)" % (VERSION, self.username) - return True - - def logout(self): - params = { 'action': 'logout' } - if self.maxlag < 120: - params['maxlag'] = 120 - cookiefile = self.cookiepath + str(hash(self.username+' - '+self.apibase))+'.cookies' - try: - os.remove(cookiefile) - except: - pass - req = api.APIRequest(self, params, write=True) - # action=logout returns absolutely nothing, which json.loads() treats as False - # causing APIRequest.query() to get stuck in a loop - req.opener.open(req.request) - self.cookies = WikiCookieJar() - self.username = '' - self.maxlag = 5 - self.useragent = "python-wikitools/%s" % VERSION - self.limit = 500 - return True - - def isLoggedIn(self, username = False): - """Verify that we are a logged in user - - username - specify a username to check against - - """ - - data = { - "action" : "query", - "meta" : "userinfo", - } - if self.maxlag < 120: - data['maxlag'] = 120 - req = api.APIRequest(self, data) - info = req.query(False) - if info['query']['userinfo']['id'] == 0: - return False - elif username and info['query']['userinfo']['name'] != username: - return False - else: - return True - - def setMaxlag(self, maxlag = 5): - """Set the maximum server lag to allow - - If the lag is > the maxlag value, all requests will wait - Setting to a negative number will disable maxlag checks - - """ - try: - int(maxlag) - except: - raise WikiError("maxlag must be an integer") - self.maxlag = int(maxlag) - return self.maxlag - - def setUserAgent(self, useragent): - """Function to set a different user-agent""" - self.useragent = str(useragent) - return self.useragent - - def setAssert(self, value): - """Set an assertion value - - This only makes a difference on sites with the AssertEdit extension - on others it will be silently ignored - This is only checked on edits, so only applied to write queries - - Set to None (the default) to not use anything - http://www.mediawiki.org/wiki/Extension:Assert_Edit - - """ - valid = ['user', 'bot', 'true', 'false', 'exists', 'test', None] - if value not in valid: - raise WikiError("Invalid assertion") - self.assertval = value - return self.assertval - - def getToken(self, type): - """Get a token - - For wikis with MW 1.24 or newer: - type (string) - csrf, deleteglobalaccount, patrol, rollback, setglobalaccountstatus, userrights, watch - - For older wiki versions, only csrf (edit, move, etc.) tokens are supported - - """ - if self.newtoken: - params = { - 'action':'query', - 'meta':'tokens', - 'type':type, - } - req = api.APIRequest(self, params) - response = req.query(False) - token = response['query']['tokens'][type+'token'] - else: - if type not in ['edit', 'delete', 'protect', 'move', 'block', 'unblock', 'email', 'csrf']: - raise WikiError('Token type unavailable') - params = { - 'action':'query', - 'prop':'info', - 'intoken':'edit', - 'titles':'1' - } - req = api.APIRequest(self, params) - response = req.query(False) - if response.get('data', False): - pid = response['data']['query']['pages'].keys()[0] - token = response['query']['pages'][pid]['edittoken'] - else: - pages = response['query']['pages'] - token = pages.itervalues().next()['edittoken'] - return token - - - def __hash__(self): - return hash(self.apibase) - - def __eq__(self, other): - if not isinstance(other, Wiki): - return False - if self.apibase == other.apibase: - return True - return False - def __ne__(self, other): - if not isinstance(other, Wiki): - return True - if self.apibase == other.apibase: - return False - return True - - def __str__(self): - if self.username: - user = ' - using User:'+self.username - else: - user = ' - not logged in' - return self.domain + user - - def __repr__(self): - if self.username: - user = ' User:'+self.username - else: - user = ' not logged in' - return "<"+self.__module__+'.'+self.__class__.__name__+" "+repr(self.apibase)+user+">" - - - -class CookiesExpired(WikiError): - """Cookies are expired, needs to be an exception so login() will use the API instead""" - -class WikiCookieJar(cookielib.FileCookieJar): - def save(self, site, filename=None, ignore_discard=False, ignore_expires=False): - if not filename: - filename = self.filename - old_umask = os.umask(0077) - f = open(filename, 'w') - f.write('') - content = '' - for c in self: - if not ignore_discard and c.discard: - continue - if not ignore_expires and c.is_expired: - continue - cook = pickle.dumps(c, 2) - f.write(cook+'|~|') - content+=str(int(time.time()))+'|~|' # record the current time so we can test for expiration later - content+='site.limit = %d;' % (site.limit) # This eventially might have more stuff in it - f.write(content) - f.close() - os.umask(old_umask) - - def load(self, site, filename, ignore_discard, ignore_expires): - f = open(filename, 'r') - cookies = f.read().split('|~|') - saved = cookies[len(cookies)-2] - if int(time.time()) - int(saved) > 1296000: # 15 days, not sure when the cookies actually expire... - f.close() - os.remove(filename) - raise CookiesExpired - sitedata = cookies[len(cookies)-1] - del cookies[len(cookies)-2] - del cookies[len(cookies)-1] - for c in cookies: - cook = pickle.loads(c) - if not ignore_discard and cook.discard: - continue - if not ignore_expires and cook.is_expired: - continue - self.set_cookie(cook) - exec sitedata - f.close() - diff --git a/wikitools/wikifile.py b/wikitools/wikifile.py deleted file mode 100644 index 61f73a8..0000000 --- a/wikitools/wikifile.py +++ /dev/null @@ -1,257 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2009-2013 Alex Zaddach (mrzmanwiki@gmail.com) - -# This file is part of wikitools. -# wikitools is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# wikitools is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with wikitools. If not, see . - -import wiki -import page -import api -import urllib2 -import warnings - -class FileDimensionError(wiki.WikiError): - """Invalid dimensions""" - -class UploadError(wiki.WikiError): - """Error during uploading""" - -class File(page.Page): - """A file on the wiki""" - def __init__(self, wiki, title, check=True, followRedir=False, section=False, sectionnumber=False, pageid=False): - """ - wiki - A wiki object - title - The page title, as a string or unicode object - check - Checks for existence, normalizes title, required for most things - followRedir - follow redirects (check must be true) - section - the section name - sectionnumber - the section number - pageid - pageid, can be in place of title - """ - page.Page.__init__(self, wiki, title, check, followRedir, section, sectionnumber, pageid) - if self.namespace != 6: - self.setNamespace(6, check) - self.usage = [] - self.filehistory = [] - - def getHistory(self, force=False): - warnings.warn("""File.getHistory has been renamed to File.getFileHistory""", FutureWarning) - return self.getFileHistory(force) - - def getFileHistory(self, force=False): - if self.filehistory and not force: - return self.filehistory - if self.pageid == 0 and not self.title: - self.setPageInfo() - params = { - 'action': 'query', - 'prop': 'imageinfo', - 'iilimit': self.site.limit, - } - if self.pageid > 0: - params['pageids'] = self.pageid - else: - params['titles'] = self.title - req = api.APIRequest(self.site, params) - self.filehistory = [] - for data in req.queryGen(): - pid = data['query']['pages'].keys()[0] - for item in data['query']['pages'][pid]['imageinfo']: - self.filehistory.append(item) - return self.filehistory - - def getUsage(self, titleonly=False, force=False, namespaces=False): - """Gets a list of pages that use the file - - titleonly - set to True to only create a list of strings, - else it will be a list of Page objects - force - reload the list even if it was generated before - namespaces - List of namespaces to restrict to (queries with this option will not be cached) - - """ - if self.usage and not reload: - if titleonly: - if namespaces is not False: - return [p.title for p in self.usage if p.namespace in namespaces] - else: - return [p.title for p in self.usage] - if namespaces is False: - return self.usage - else: - return [p for p in self.usage if p.namespace in namespaces] - else: - ret = [] - usage = [] - for title in self.__getUsageInternal(namespaces): - usage.append(title) - if titleonly: - ret.append(title.title) - if titleonly: - return ret - if namespaces is False: - self.usage = usage - return usage - - def getUsageGen(self, titleonly=False, force=False, namespaces=False): - """Generator function for pages that use the file - - titleonly - set to True to return strings, - else it will return Page objects - force - reload the list even if it was generated before - namespaces - List of namespaces to restrict to (queries with this option will not be cached) - - """ - if self.usage and not reload: - for title in self.usage: - if namespaces is False or title.namespace in namespaces: - if titleonly: - yield title.title - else: - yield title - else: - if namespaces is False: - self.usage = [] - for title in self.__getUsageInternal(): - if namespaces is False: - self.usage.append(title) - if titleonly: - yield title.title - else: - yield title - - def __getUsageInternal(self, namespaces=False): - params = {'action':'query', - 'list':'imageusage', - 'iutitle':self.title, - 'iulimit':self.site.limit, - } - if namespaces is not False: - params['iunamespace'] = '|'.join([str(ns) for ns in namespaces]) - while True: - req = api.APIRequest(self.site, params) - data = req.query(False) - for item in data['query']['imageusage']: - yield page.Page(self.site, item['title'], check=False, followRedir=False) - try: - params['iucontinue'] = data['query-continue']['imageusage']['iucontinue'] - except: - break - - def __extractToList(self, json, stuff): - list = [] - if stuff in json['query']: - for item in json['query'][stuff]: - list.append(item['title']) - return list - - def download(self, width=False, height=False, location=False): - """Download the image to a local file - - width/height - set width OR height of the downloaded image - location - set the filename to save to. If not set, the page title - minus the namespace prefix will be used and saved to the current directory - - """ - if self.pageid == 0: - self.setPageInfo() - params = {'action':'query', - 'prop':'imageinfo', - 'iiprop':'url' - } - if width and height: - raise FileDimensionError("Can't specify both width and height") - if width: - params['iiurlwidth'] = width - if height: - params['iiurlheight'] = height - if self.pageid != 0: - params['pageids'] = self.pageid - elif self.title: - params['titles'] = self.title - else: - self.setPageInfo() - if not self.exists: # Non-existant files may be on a shared repo (e.g. commons) - params['titles'] = self.title - else: - params['pageids'] = self.pageid - req = api.APIRequest(self.site, params) - res = req.query(False) - key = res['query']['pages'].keys()[0] - url = res['query']['pages'][key]['imageinfo'][0]['url'] - if not location: - location = self.title.split(':', 1)[1] - opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.site.cookies)) - headers = { "User-agent": self.site.useragent } - request = urllib2.Request(url, None, headers) - data = opener.open(request) - f = open(location, 'wb', 0) - f.write(data.read()) - f.close() - return location - - def upload(self, fileobj=None, comment='', url=None, ignorewarnings=False, watch=False): - """Upload a file, requires the "poster" module - - fileobj - A file object opened for reading - comment - The log comment, used as the inital page content if the file - doesn't already exist on the wiki - url - A URL to upload the file from, if allowed on the wiki - ignorewarnings - Ignore warnings about duplicate files, etc. - watch - Add the page to your watchlist - - """ - if not api.canupload and fileobj: - raise UploadError("The poster module is required for file uploading") - if not fileobj and not url: - raise UploadError("Must give either a file object or a URL") - if fileobj and url: - raise UploadError("Cannot give a file and a URL") - if fileobj: - if not isinstance(fileobj, file): - raise UploadError('If uploading from a file, a file object must be passed') - if fileobj.mode not in ['r', 'rb', 'r+']: - raise UploadError('File must be readable') - fileobj.seek(0) - params = {'action':'upload', - 'comment':comment, - 'filename':self.unprefixedtitle, - 'token':self.site.getToken('csrf') - } - if url: - params['url'] = url - else: - params['file'] = fileobj - if ignorewarnings: - params['ignorewarnings'] = '' - if watch: - params['watch'] = '' - req = api.APIRequest(self.site, params, write=True, multipart=bool(fileobj)) - res = req.query() - if 'upload' in res: - if res['upload']['result'] == 'Success': - self.wikitext = '' - self.links = [] - self.templates = [] - self.exists = True - elif res['upload']['result'] == 'Warning': - for warning in res['upload']['warnings'].keys(): - if warning == 'duplicate': - print 'File is a duplicate of ' + res['upload']['warnings']['duplicate'][0] - elif warning == 'page-exists' or warning == 'exists': - print 'Page already exists: ' + res['upload']['warnings'][warning] - else: - print 'Warning: ' + warning + ' ' + res['upload']['warnings'][warning] - return res - - diff --git a/wikitools3/LICENSE.md b/wikitools3/LICENSE.md new file mode 100644 index 0000000..2fb2e74 --- /dev/null +++ b/wikitools3/LICENSE.md @@ -0,0 +1,675 @@ +### GNU GENERAL PUBLIC LICENSE + +Version 3, 29 June 2007 + +Copyright (C) 2007 Free Software Foundation, Inc. + + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + +### Preamble + +The GNU General Public License is a free, copyleft license for +software and other kinds of works. + +The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom +to share and change all versions of a program--to make sure it remains +free software for all its users. We, the Free Software Foundation, use +the GNU General Public License for most of our software; it applies +also to any other work released this way by its authors. You can apply +it to your programs, too. + +When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + +To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you +have certain responsibilities if you distribute copies of the +software, or if you modify it: responsibilities to respect the freedom +of others. + +For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + +Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + +For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + +Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the +manufacturer can do so. This is fundamentally incompatible with the +aim of protecting users' freedom to change the software. The +systematic pattern of such abuse occurs in the area of products for +individuals to use, which is precisely where it is most unacceptable. +Therefore, we have designed this version of the GPL to prohibit the +practice for those products. If such problems arise substantially in +other domains, we stand ready to extend this provision to those +domains in future versions of the GPL, as needed to protect the +freedom of users. + +Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish +to avoid the special danger that patents applied to a free program +could make it effectively proprietary. To prevent this, the GPL +assures that patents cannot be used to render the program non-free. + +The precise terms and conditions for copying, distribution and +modification follow. + +### TERMS AND CONDITIONS + +#### 0. Definitions. + +"This License" refers to version 3 of the GNU General Public License. + +"Copyright" also means copyright-like laws that apply to other kinds +of works, such as semiconductor masks. + +"The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + +To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of +an exact copy. The resulting work is called a "modified version" of +the earlier work or a work "based on" the earlier work. + +A "covered work" means either the unmodified Program or a work based +on the Program. + +To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + +To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user +through a computer network, with no transfer of a copy, is not +conveying. + +An interactive user interface displays "Appropriate Legal Notices" to +the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + +#### 1. Source Code. + +The "source code" for a work means the preferred form of the work for +making modifications to it. "Object code" means any non-source form of +a work. + +A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + +The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + +The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + +The Corresponding Source need not include anything that users can +regenerate automatically from other parts of the Corresponding Source. + +The Corresponding Source for a work in source code form is that same +work. + +#### 2. Basic Permissions. + +All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + +You may make, run and propagate covered works that you do not convey, +without conditions so long as your license otherwise remains in force. +You may convey covered works to others for the sole purpose of having +them make modifications exclusively for you, or provide you with +facilities for running those works, provided that you comply with the +terms of this License in conveying all material for which you do not +control copyright. Those thus making or running the covered works for +you must do so exclusively on your behalf, under your direction and +control, on terms that prohibit them from making any copies of your +copyrighted material outside their relationship with you. + +Conveying under any other circumstances is permitted solely under the +conditions stated below. Sublicensing is not allowed; section 10 makes +it unnecessary. + +#### 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + +No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + +When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such +circumvention is effected by exercising rights under this License with +respect to the covered work, and you disclaim any intention to limit +operation or modification of the work as a means of enforcing, against +the work's users, your or third parties' legal rights to forbid +circumvention of technological measures. + +#### 4. Conveying Verbatim Copies. + +You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + +You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + +#### 5. Conveying Modified Source Versions. + +You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these +conditions: + +- a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. +- b) The work must carry prominent notices stating that it is + released under this License and any conditions added under + section 7. This requirement modifies the requirement in section 4 + to "keep intact all notices". +- c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. +- d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + +A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + +#### 6. Conveying Non-Source Forms. + +You may convey a covered work in object code form under the terms of +sections 4 and 5, provided that you also convey the machine-readable +Corresponding Source under the terms of this License, in one of these +ways: + +- a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. +- b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the Corresponding + Source from a network server at no charge. +- c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. +- d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. +- e) Convey the object code using peer-to-peer transmission, + provided you inform other peers where the object code and + Corresponding Source of the work are being offered to the general + public at no charge under subsection 6d. + +A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + +A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, +family, or household purposes, or (2) anything designed or sold for +incorporation into a dwelling. In determining whether a product is a +consumer product, doubtful cases shall be resolved in favor of +coverage. For a particular product received by a particular user, +"normally used" refers to a typical or common use of that class of +product, regardless of the status of the particular user or of the way +in which the particular user actually uses, or expects or is expected +to use, the product. A product is a consumer product regardless of +whether the product has substantial commercial, industrial or +non-consumer uses, unless such uses represent the only significant +mode of use of the product. + +"Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to +install and execute modified versions of a covered work in that User +Product from a modified version of its Corresponding Source. The +information must suffice to ensure that the continued functioning of +the modified object code is in no case prevented or interfered with +solely because modification has been made. + +If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + +The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or +updates for a work that has been modified or installed by the +recipient, or for the User Product in which it has been modified or +installed. Access to a network may be denied when the modification +itself materially and adversely affects the operation of the network +or violates the rules and protocols for communication across the +network. + +Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + +#### 7. Additional Terms. + +"Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + +When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + +Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders +of that material) supplement the terms of this License with terms: + +- a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or +- b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or +- c) Prohibiting misrepresentation of the origin of that material, + or requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or +- d) Limiting the use for publicity purposes of names of licensors + or authors of the material; or +- e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or +- f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions + of it) with contractual assumptions of liability to the recipient, + for any liability that these contractual assumptions directly + impose on those licensors and authors. + +All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + +If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + +Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; the +above requirements apply either way. + +#### 8. Termination. + +You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + +However, if you cease all violation of this License, then your license +from a particular copyright holder is reinstated (a) provisionally, +unless and until the copyright holder explicitly and finally +terminates your license, and (b) permanently, if the copyright holder +fails to notify you of the violation by some reasonable means prior to +60 days after the cessation. + +Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + +Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + +#### 9. Acceptance Not Required for Having Copies. + +You are not required to accept this License in order to receive or run +a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + +#### 10. Automatic Licensing of Downstream Recipients. + +Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + +An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + +You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + +#### 11. Patents. + +A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + +A contributor's "essential patent claims" are all patent claims owned +or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + +Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + +In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + +If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + +If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + +A patent license is "discriminatory" if it does not include within the +scope of its coverage, prohibits the exercise of, or is conditioned on +the non-exercise of one or more of the rights that are specifically +granted under this License. You may not convey a covered work if you +are a party to an arrangement with a third party that is in the +business of distributing software, under which you make payment to the +third party based on the extent of your activity of conveying the +work, and under which the third party grants, to any of the parties +who would receive the covered work from you, a discriminatory patent +license (a) in connection with copies of the covered work conveyed by +you (or copies made from those copies), or (b) primarily for and in +connection with specific products or compilations that contain the +covered work, unless you entered into that arrangement, or that patent +license was granted, prior to 28 March 2007. + +Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + +#### 12. No Surrender of Others' Freedom. + +If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under +this License and any other pertinent obligations, then as a +consequence you may not convey it at all. For example, if you agree to +terms that obligate you to collect a royalty for further conveying +from those to whom you convey the Program, the only way you could +satisfy both those terms and this License would be to refrain entirely +from conveying the Program. + +#### 13. Use with the GNU Affero General Public License. + +Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + +#### 14. Revised Versions of this License. + +The Free Software Foundation may publish revised and/or new versions +of the GNU General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in +detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies that a certain numbered version of the GNU General Public +License "or any later version" applies to it, you have the option of +following the terms and conditions either of that numbered version or +of any later version published by the Free Software Foundation. If the +Program does not specify a version number of the GNU General Public +License, you may choose any version ever published by the Free +Software Foundation. + +If the Program specifies that a proxy can decide which future versions +of the GNU General Public License can be used, that proxy's public +statement of acceptance of a version permanently authorizes you to +choose that version for the Program. + +Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + +#### 15. Disclaimer of Warranty. + +THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT +WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND +PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE +DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR +CORRECTION. + +#### 16. Limitation of Liability. + +IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR +CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES +ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT +NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR +LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM +TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER +PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +#### 17. Interpretation of Sections 15 and 16. + +If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + +END OF TERMS AND CONDITIONS + +### How to Apply These Terms to Your New Programs + +If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these +terms. + +To do so, attach the following notices to the program. It is safest to +attach them to the start of each source file to most effectively state +the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper +mail. + +If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands \`show w' and \`show c' should show the +appropriate parts of the General Public License. Of course, your +program's commands might be different; for a GUI interface, you would +use an "about box". + +You should also get your employer (if you work as a programmer) or +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. For more information on this, and how to apply and follow +the GNU GPL, see . + +The GNU General Public License does not permit incorporating your +program into proprietary programs. If your program is a subroutine +library, you may consider it more useful to permit linking proprietary +applications with the library. If this is what you want to do, use the +GNU Lesser General Public License instead of this License. But first, +please read . diff --git a/wikitools/__init__.py b/wikitools3/__init__.py similarity index 54% rename from wikitools/__init__.py rename to wikitools3/__init__.py index 5b49c38..8801d51 100644 --- a/wikitools/__init__.py +++ b/wikitools3/__init__.py @@ -1,24 +1,25 @@ -# -*- coding: utf-8 -*- +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- # -*- coding: utf-8 -*- # Copyright 2008-2013 Alex Zaddach (mrzmanwiki@gmail.com) -# This file is part of wikitools. -# wikitools is free software: you can redistribute it and/or modify +# This file is part of wikitools3. +# wikitools3 is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. - -# wikitools is distributed in the hope that it will be useful, + +# wikitools3 is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. - + # You should have received a copy of the GNU General Public License -# along with wikitools. If not, see . +# along with wikitools3. If not, see . __all__ = ["wiki", "api", "page", "category", "user", "pagelist", "wikifile"] -from wiki import * -from api import * -from page import * -from category import * -from user import * -from wikifile import * +from wikitools3.api import * +from wikitools3.category import * +from wikitools3.page import * +from wikitools3.user import * +from wikitools3.wiki import * +from wikitools3.wikifile import * diff --git a/wikitools3/api.py b/wikitools3/api.py new file mode 100644 index 0000000..2849a9c --- /dev/null +++ b/wikitools3/api.py @@ -0,0 +1,442 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright 2008-2013 Alex Zaddach (mrzmanwiki@gmail.com) + +# This file is part of wikitools3. +# wikitools3 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# wikitools3 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with wikitools3. If not, see . + +# This module is documented at http://code.google.com/p/python-wikitools/wiki/api + +import base64 +import copy +import re +import sys +import time +import urllib +import warnings +from urllib.parse import quote_plus + +import wikitools3.wiki as wiki +from poster3.encode import multipart_encode + +canupload = True + +import json + +try: + import gzip + from io import StringIO +except: + gzip = False + + +class APIError(Exception): + """Base class for errors""" + + +class APIDisabled(APIError): + """API not enabled""" + + +class APIRequest: + """A request to the site's API""" + + def __init__(self, wiki, data, write=False, multipart=False): + """ + wiki - A Wiki object + data - API parameters in the form of a dict + write - set to True if doing a write query, so it won't try again on error + multipart - use multipart data transfer, required for file uploads, + requires the poster3 package + + maxlag is set by default to 5 but can be changed + format is always set to json + """ + if not canupload and multipart: + raise APIError("The poster3 module is required for multipart support") + self.sleep = 5 + self.data = data.copy() + self.data["format"] = "json" + self.iswrite = write + if wiki.assertval is not None and self.iswrite: + self.data["assert"] = wiki.assertval + if not "maxlag" in self.data and not wiki.maxlag < 0: + self.data["maxlag"] = wiki.maxlag + self.multipart = multipart + if self.multipart: + (datagen, self.headers) = multipart_encode(self.data) + self.encodeddata = "" + for singledata in datagen: + self.encodeddata = self.encodeddata + singledata + else: + self.encodeddata = urlencode(self.data, 1) + self.headers = { + "Content-Type": "application/x-www-form-urlencoded", + "Content-Length": str(len(self.encodeddata)), + } + self.headers["User-agent"] = wiki.useragent + if gzip: + self.headers["Accept-Encoding"] = "gzip" + self.wiki = wiki + self.response = False + if wiki.auth: + self.headers["Authorization"] = "Basic {0}".format( + base64.encodestring(wiki.auth + ":" + wiki.httppass) + ).replace("\n", "") + if hasattr(wiki, "passman"): + self.opener = urllib.build_opener( + urllib.HTTPDigestAuthHandler(wiki.passman), + urllib.HTTPCookieProcessor(wiki.cookies), + ) + else: + self.opener = urllib.build_opener(urllib.HTTPCookieProcessor(wiki.cookies)) + self.request = urllib.Request(self.wiki.apibase, self.encodeddata, self.headers) + + def setMultipart(self, multipart=True): + """Enable multipart data transfer, required for file uploads.""" + if not canupload and multipart: + raise APIError("The poster3 package is required for multipart support") + self.multipart = multipart + if multipart: + (datagen, headers) = multipart_encode(self.data) + self.headers.pop("Content-Length") + self.headers.pop("Content-Type") + self.headers.update(headers) + self.encodeddata = "" + for singledata in datagen: + self.encodeddata = self.encodeddata + singledata + else: + self.encodeddata = urlencode(self.data, 1) + self.headers["Content-Length"] = str(len(self.encodeddata)) + self.headers["Content-Type"] = "application/x-www-form-urlencoded" + + def changeParam(self, param, value): + """Change or add a parameter after making the request object + + Simply changing self.data won't work as it needs to update other things. + + value can either be a normal string value, or a file-like object, + which will be uploaded, if setMultipart was called previously. + + """ + if param == "format": + raise APIError("You can not change the result format") + self.data[param] = value + if self.multipart: + (datagen, headers) = multipart_encode(self.data) + self.headers.pop("Content-Length") + self.headers.pop("Content-Type") + self.headers.update(headers) + self.encodeddata = "" + for singledata in datagen: + self.encodeddata = self.encodeddata + singledata + else: + self.encodeddata = urlencode(self.data, 1) + self.headers["Content-Length"] = str(len(self.encodeddata)) + self.headers["Content-Type"] = "application/x-www-form-urlencoded" + self.request = urllib.Request(self.wiki.apibase, self.encodeddata, self.headers) + + def query(self, querycontinue=True): + """Actually do the query here and return usable stuff + + querycontinue - look for query-continue in the results and continue querying + until there is no more data to retrieve (DEPRECATED: use queryGen as a more + reliable and efficient alternative) + + """ + if querycontinue and self.data["action"] == "query": + warnings.warn( + """The querycontinue option is deprecated and will be removed +in a future release, use the new queryGen function instead +for queries requring multiple requests""", + FutureWarning, + ) + data = False + while not data: + rawdata = self.__getRaw() + data = self.__parseJSON(rawdata) + if not data and type(data) is APIListResult: + break + if "error" in data: + if self.iswrite and data["error"]["code"] == "blocked": + raise wiki.UserBlocked(data["error"]["info"]) + raise APIError(data["error"]["code"], data["error"]["info"]) + if "query-continue" in data and querycontinue: + data = self.__longQuery(data) + return data + + def queryGen(self): + """Unlike the old query-continue method that tried to stitch results + together, which could work poorly for complex result sets and could + use a lot of memory, this yield each set returned by the API and lets + the user process the data. + Loosely based on the recommended implementation on mediawiki.org + + """ + reqcopy = copy.deepcopy(self.request) + self.changeParam("continue", "") + while True: + data = False + while not data: + rawdata = self.__getRaw() + data = self.__parseJSON(rawdata) + if not data and type(data) is APIListResult: + break + if "error" in data: + if self.iswrite and data["error"]["code"] == "blocked": + raise wiki.UserBlocked(data["error"]["info"]) + raise APIError(data["error"]["code"], data["error"]["info"]) + yield data + if "continue" not in data: + break + else: + self.request = copy.deepcopy(reqcopy) + for param in data["continue"]: + self.changeParam(param, data["continue"][param]) + + def __longQuery(self, initialdata): + """For queries that require multiple requests""" + self._continues = set() + self._generator = "" + total = initialdata + res = initialdata + params = self.data + numkeys = len(res["query-continue"].keys()) + while numkeys > 0: + key1 = "" + key2 = "" + possiblecontinues = res["query-continue"].keys() + if len(possiblecontinues) == 1: + key1 = possiblecontinues[0] + keylist = res["query-continue"][key1].keys() + if len(keylist) == 1: + key2 = keylist[0] + else: + for key in keylist: + if len(key) < 11: + key2 = key + break + else: + key2 = keylist[0] + else: + for posskey in possiblecontinues: + keylist = res["query-continue"][posskey].keys() + for key in keylist: + if len(key) < 11: + key1 = posskey + key2 = key + break + if key1: + break + else: + key1 = possiblecontinues[0] + key2 = res["query-continue"][key1].keys()[0] + if isinstance(res["query-continue"][key1][key2], int): + cont = res["query-continue"][key1][key2] + else: + cont = res["query-continue"][key1][key2].encode("utf-8") + if len(key2) >= 11 and key2.startswith("g"): + self._generator = key2 + for ckey in self._continues: + params.pop(ckey, None) + else: + self._continues.add(key2) + params[key2] = cont + req = APIRequest(self.wiki, params) + res = req.query(False) + for type in possiblecontinues: + total = resultCombine(type, total, res) + if "query-continue" in res: + numkeys = len(res["query-continue"].keys()) + else: + numkeys = 0 + return total + + def __getRaw(self): + data = False + while not data: + try: + if self.sleep >= self.wiki.maxwaittime or self.iswrite: + catcherror = None + else: + catcherror = Exception + data = self.opener.open(self.request) + self.response = data.info() + if gzip: + encoding = self.response.get("Content-encoding") + if encoding in ("gzip", "x-gzip"): + data = gzip.GzipFile( + "", "rb", 9, StringIO.StringIO(data.read()) + ) + except catcherror as exc: + errname = sys.exc_info()[0].__name__ + errinfo = exc + print( + "%s: %s trying request again in %d seconds" + % (errname, errinfo, self.sleep) + ) + time.sleep(self.sleep + 0.5) + self.sleep += 5 + return data + + def __parseJSON(self, data): + maxlag = True + while maxlag: + try: + maxlag = False + parsed = json.loads(data.read()) + content = None + if isinstance(parsed, dict): + content = APIResult(parsed) + content.response = self.response.items() + elif isinstance(parsed, list): + content = APIListResult(parsed) + content.response = self.response.items() + else: + content = parsed + if "error" in content: + error = content["error"]["code"] + if error == "maxlag": + lagtime = int( + re.search("(\d+) seconds", content["error"]["info"]).group( + 1 + ) + ) + if lagtime > self.wiki.maxwaittime: + lagtime = self.wiki.maxwaittime + print("Server lag, sleeping for " + str(lagtime) + " seconds") + maxlag = True + time.sleep(int(lagtime) + 0.5) + return False + except: # Something's wrong with the data... + data.seek(0) + if ( + "MediaWiki API is not enabled for this site. Add the following line to your LocalSettings.php
$wgEnableAPI=true;
" + in data.read() + ): + raise APIDisabled("The API is not enabled on this site") + print("Invalid JSON, trying request again") + # FIXME: Would be nice if this didn't just go forever if its never going to work + return False + return content + + +class APIResult(dict): + response = [] + + +class APIListResult(list): + response = [] + + +def resultCombine(type, old, new): + """Experimental-ish result-combiner thing + + If the result isn't something from action=query, + this will just explode, but that shouldn't happen hopefully? + + """ + ret = old + if type in new["query"]: # Basic list, easy + ret["query"][type].extend(new["query"][type]) + else: # Else its some sort of prop=thing and/or a generator query + for key in new["query"]["pages"].keys(): # Go through each page + if not key in old["query"]["pages"]: # if it only exists in the new one + ret["query"]["pages"][key] = new["query"]["pages"][ + key + ] # add it to the list + else: + if not type in new["query"]["pages"][key]: + continue + elif ( + type in new["query"]["pages"][key] + and not type in ret["query"]["pages"][key] + ): # if only the new one does, just add it to the return + ret["query"]["pages"][key][type] = new["query"]["pages"][key][type] + continue + else: # Need to check for possible duplicates for some, this is faster than just iterating over new and checking for dups in ret + retset = set( + [ + tuple(entry.items()) + for entry in ret["query"]["pages"][key][type] + ] + ) + newset = set( + [ + tuple(entry.items()) + for entry in new["query"]["pages"][key][type] + ] + ) + retset.update(newset) + ret["query"]["pages"][key][type] = [dict(entry) for entry in retset] + return ret + + +def urlencode(query, doseq=0): + """ + Hack of urllib's urlencode function, which can handle + utf-8, but for unknown reasons, chooses not to by + trying to encode everything as ascii + """ + if hasattr(query, "items"): + # mapping objects + query = query.items() + else: + # it's a bother at times that strings and string-like objects are + # sequences... + try: + # non-sequence items should not work with len() + # non-empty strings will fail this + if len(query) and not isinstance(query[0], tuple): + raise TypeError + # zero-length sequences of all types will get here and succeed, + # but that's a minor nit - since the original implementation + # allowed empty dicts that type of behavior probably should be + # preserved for consistency + except TypeError: + ty, va, tb = sys.exc_info() + raise TypeError("not a valid non-string sequence or mapping object", tb) + + l = [] + if not doseq: + # preserve old behavior + for k, v in query: + k = quote_plus(str(k)) + v = quote_plus(str(v)) + l.append(k + "=" + v) + else: + for k, v in query: + k = quote_plus(str(k)) + if isinstance(v, str): + v = quote_plus(v) + l.append(k + "=" + v) + elif v.type(str): + # is there a reasonable way to convert to ASCII? + # encode generates a string, but "replace" or "ignore" + # lose information and "strict" can raise UnicodeError + v = quote_plus(v.encode("utf8", "replace")) + l.append(k + "=" + v) + else: + try: + # is this a sufficient test for sequence-ness? + x = len(v) + except TypeError: + # not a sequence + v = quote_plus(str(v)) + l.append(k + "=" + v) + else: + # loop over the sequence + for elt in v: + l.append(k + "=" + quote_plus(str(elt))) + return "&".join(l) diff --git a/wikitools3/category.py b/wikitools3/category.py new file mode 100644 index 0000000..afa6151 --- /dev/null +++ b/wikitools3/category.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright 2008-2013 Alex Zaddach (mrzmanwiki@gmail.com) + +# This file is part of wikitools3. +# wikitools3 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# wikitools3 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with wikitools3. If not, see . + +import wikitools3.api as api +import wikitools3.page as page + + +class Category(page.Page): + """A category on the wiki""" + + def __init__( + self, + site, + title=False, + check=True, + followRedir=False, + section=False, + sectionnumber=False, + pageid=False, + ): + """ + wiki - A wiki object + title - The page title, as a string or unicode object + check - Checks for existence, normalizes title, required for most things + followRedir - follow redirects (check must be true) + section - the section name + sectionnumber - the section number + pageid - pageid, can be in place of title + """ + page.Page.__init__( + self, + site=site, + title=title, + check=check, + followRedir=followRedir, + section=section, + sectionnumber=sectionnumber, + pageid=pageid, + ) + self.members = [] + if self.namespace != 14: + self.setNamespace(14, check) + + def getAllMembers(self, titleonly=False, reload=False, namespaces=False): + """Gets a list of pages in the category + + titleonly - set to True to only create a list of strings, + else it will be a list of Page objects + reload - reload the list even if it was generated before + namespaces - List of namespaces to restrict to (queries with this option will not be cached) + + """ + if self.members and not reload: + if titleonly: + if namespaces is not False: + return [p.title for p in self.members if p.namespace in namespaces] + else: + return [p.title for p in self.members] + if namespaces is False: + return self.members + else: + return [p for p in self.members if p.namespace in namespaces] + else: + ret = [] + members = [] + for member in self.__getMembersInternal(namespaces): + members.append(member) + if titleonly: + ret.append(member.title) + if titleonly: + return ret + if namespaces is False: + self.members = members + return members + + def getAllMembersGen(self, titleonly=False, reload=False, namespaces=False): + """Generator function for pages in the category + + titleonly - set to True to return strings, + else it will return Page objects + reload - reload the list even if it was generated before + namespaces - List of namespaces to restrict to (queries with this option will not be cached) + + """ + if self.members and not reload: + for member in self.members: + if namespaces is False or member.namespace in namespaces: + if titleonly: + yield member.title + else: + yield member + else: + if namespaces is False: + self.members = [] + for member in self.__getMembersInternal(namespaces): + if namespaces is False: + self.members.append(member) + if titleonly: + yield member.title + else: + yield member + + def __getMembersInternal(self, namespaces=False): + params = { + "action": "query", + "list": "categorymembers", + "cmtitle": self.title, + "cmlimit": self.site.limit, + "cmprop": "title", + } + if namespaces is not False: + params["cmnamespace"] = "|".join([str(ns) for ns in namespaces]) + while True: + req = api.APIRequest(self.site, params) + data = req.query(False) + for item in data["query"]["categorymembers"]: + yield page.Page( + self.site, item["title"], check=False, followRedir=False + ) + try: + params["cmcontinue"] = data["query-continue"]["categorymembers"][ + "cmcontinue" + ] + except: + break diff --git a/wikitools3/page.py b/wikitools3/page.py new file mode 100644 index 0000000..9d4884e --- /dev/null +++ b/wikitools3/page.py @@ -0,0 +1,904 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright 2008-2013 Alex Zaddach (mrzmanwiki@gmail.com), bjweeks + +# This file is part of wikitools3. +# wikitools3 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# wikitools3 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with wikitools3. If not, see . + +import datetime +import unicodedata +import urllib +from hashlib import md5 + +import wikitools3.api as api +import wikitools3.wiki as wiki + + +class BadTitle(wiki.WikiError): + """Invalid title""" + + +class NoPage(wiki.WikiError): + """Non-existent page""" + + +class BadNamespace(wiki.WikiError): + """Invalid namespace number""" + + +class EditError(wiki.WikiError): + """Problem with edit request""" + + +class ProtectError(wiki.WikiError): + """Problem with protection request""" + + +def namespaceDetect(title, site): + """Detect the namespace of a given title + title - the page title + site - the wiki object the page is on + """ + bits = title.split(":", 1) + if len(bits) == 1 or bits[0] == "": + return 0 + else: + nsprefix = bits[ + 0 + ].lower() # wp:Foo and caTEGory:Foo are normalized by MediaWiki + for ns in site.namespaces: + if nsprefix == site.namespaces[ns]["*"].lower(): + return int(ns) + else: + if site.NSaliases: + for ns in site.NSaliases: + if nsprefix == ns.lower(): + return int(site.NSaliases[ns]) + return 0 + + +class Page(object): + """A page on the wiki""" + + def __init__( + self, + site, + title=False, + check=True, + followRedir=True, + section=False, + sectionnumber=None, + pageid=False, + namespace=False, + ): + """ + wiki - A wiki object + title - The page title, as a string or unicode object + check - Checks for existence, normalizes title, required for most things + followRedir - follow redirects (check must be true) + section - the section name + sectionnumber - the section number + pageid - pageid, can be in place of title + namespace - use to set the namespace prefix *if its not already in the title* + """ + # Initialize instance vars from function args + if not title and not pageid: + raise wiki.WikiError("No title or pageid given") + self.site = site + if pageid: + self.pageid = int(pageid) + else: + self.pageid = 0 + self.followRedir = followRedir + self.title = title + self.unprefixedtitle = False # will be set later + self.urltitle = "" + self.wikitext = "" + self.templates = [] + self.links = [] + self.categories = [] + self.exists = True # If we're not going to check, assume it does + self.protection = {} + self.namespace = namespace + + # Things that need to be done before anything else + if self.title: + self.title = self.title.replace("_", " ") + if self.namespace: + if namespace not in self.site.namespaces.keys(): + raise BadNamespace(namespace) + if self.title: + self.unprefixedtitle = self.title + self.title = ":".join( + (self.site.namespaces[self.namespace]["*"], self.title) + ) + if int(self.namespace) == 0 and self.title: + self.namespace = int(self.namespace) + self.unprefixedtitle = self.title + # Setting page info with API, should set: + # pageid, exists, title, unprefixedtitle, namespace + if check: + self.setPageInfo() + else: + if self.namespace is False and self.title: + self.namespace = namespaceDetect(self.title, self.site) + if self.namespace != 0: + nsname = self.site.namespaces[self.namespace]["*"] + self.unprefixedtitle = self.title.split(":", 1)[1] + self.title = ":".join((nsname, self.unprefixedtitle)) + else: + self.unprefixedtitle = self.title + + if section or sectionnumber is not None: + self.setSection(section, sectionnumber) + else: + self.section = False + if title: + if not isinstance(self.title, str): + self.title = str(self.title, "utf-8") + if not isinstance(self.unprefixedtitle, str): + self.unprefixedtitle = str(self.unprefixedtitle, "utf-8") + self.urltitle = ( + urllib.parse.quote(self.title.encode("utf-8")) + .replace("%20", "_") + .replace("%2F", "/") + ) + + def setPageInfo(self): + """Sets basic page info, required for almost everything""" + followRedir = self.followRedir + params = {"action": "query"} + if self.pageid: + params["pageids"] = self.pageid + else: + params["titles"] = self.title + if followRedir: + params["redirects"] = "" + req = api.APIRequest(self.site, params) + response = req.query(False) + self.pageid = response["query"]["pages"].keys()[0] + if self.pageid > 0: + self.exists = True + if "missing" in response["query"]["pages"][str(self.pageid)]: + if not self.title: + # Pageids are never recycled, so a bad pageid with no title will never work + raise wiki.WikiError("Bad pageid given with no title") + self.exists = False + if "invalid" in response["query"]["pages"][str(self.pageid)]: + raise BadTitle(self.title) + if "title" in response["query"]["pages"][str(self.pageid)]: + self.title = response["query"]["pages"][str(self.pageid)]["title"].encode( + "utf-8" + ) + self.namespace = int(response["query"]["pages"][str(self.pageid)]["ns"]) + if self.namespace != 0: + self.unprefixedtitle = self.title.split(":", 1)[1] + else: + self.unprefixedtitle = self.title + self.pageid = int(self.pageid) + if self.pageid < 0: + self.pageid = 0 + return self + + def setNamespace(self, newns, recheck=False): + """Change the namespace number of a page object + + Updates the title with the new prefix + newns - integer namespace number + recheck - redo pageinfo checks + + """ + if not newns in self.site.namespaces.keys(): + raise BadNamespace + if self.namespace == newns: + return self.namespace + if self.title: + if self.namespace != 0: + bits = self.title.split(":", 1) + nsprefix = bits[0].lower() + for ns in self.site.namespaces: + if nsprefix == self.site.namespaces[ns]["*"].lower(): + self.title = bits[1] + break + else: + if self.site.NSaliases: + for ns in self.site.NSaliases: + if nsprefix == ns.lower(): + self.title = bits[1] + break + self.namespace = newns + if self.namespace: + self.title = ( + self.site.namespaces[self.namespace]["*"] + ":" + self.title + ) + self.urltitle = ( + urllib.parse.quote(self.title.encode("utf-8")) + .replace("%20", "_") + .replace("%2F", "/") + ) + else: + self.namespace = newns + if recheck: + self.pageid = False + self.setPageInfo() + else: + self.pageid = 0 + self.wikitext = "" + self.templates = [] + self.links = [] + return self.namespace + + def setSection(self, section=None, number=None): + """Set a section for the page + + section - the section name + number - the section number + + """ + if section is None and number is None: + self.section = False + elif number is not None: + try: + self.section = str(int(number)) + except ValueError: + raise wiki.WikiError("Section number must be an int") + else: + self.section = self.__getSection(section) + self.wikitext = "" + return self.section + + def __getSection(self, section): + if not self.title: + self.setPageInfo() + params = {"action": "parse", "page": self.title, "prop": "sections"} + number = False + req = api.APIRequest(self.site, params) + response = req.query() + for item in response["parse"]["sections"]: + if section == item["line"] or section == item["anchor"]: + if item["index"].startswith( + "T" + ): # TODO: It would be cool if it set the page title to the template in this case + continue + number = item["index"] + break + return number + + def canHaveSubpages(self): + """Is the page in a namespace that allows subpages?""" + if not self.title: + self.setPageInfo() + return "subpages" in self.site.namespaces[self.namespace] + + def isRedir(self): + """Is the page a redirect?""" + params = {"action": "query", "redirects": ""} + if not self.exists: + raise NoPage + if self.pageid != 0 and self.exists: + params["pageids"] = self.pageid + elif self.title: + params["titles"] = self.title + else: + self.setPageInfo() + if self.pageid != 0 and self.exists: + params["pageids"] = self.pageid + else: + raise NoPage + req = api.APIRequest(self.site, params) + res = req.query(False) + if "redirects" in res["query"]: + return True + else: + return False + + def isTalk(self): + """Is the page a discussion page?""" + if not self.title: + self.setPageInfo() + return self.namespace % 2 == 1 and self.namespace >= 0 + + def toggleTalk(self, check=True, followRedir=True): + """Switch to and from the talk namespaces + + Returns a new page object that's either the talk or non-talk + version of the current page + + check and followRedir - same meaning as Page constructor + + """ + if not self.title: + self.setPageInfo() + ns = self.namespace + if ns < 0: + return False + nsname = self.site.namespaces[ns]["*"] + if self.isTalk(): + newns = self.site.namespaces[ns - 1]["*"] + else: + newns = self.site.namespaces[ns + 1]["*"] + try: + pagename = self.title.split(nsname + ":", 1)[1] + except: + pagename = self.title + if newns != "": + newname = newns + ":" + pagename + else: + newname = pagename + return Page(self.site, newname, check, followRedir) + + def getWikiText(self, expandtemplates=False, force=False): + """Gets the Wikitext of the page + + expandtemplates - expand the templates to wikitext instead of transclusions + force - load the text even if we already loaded it before + + """ + + if self.wikitext and not force: + return self.wikitext + if self.pageid == 0 and not self.title: + self.setPageInfo() + if not self.exists: + raise NoPage + params = { + "action": "query", + "prop": "revisions", + "rvprop": "content|timestamp", + "rvlimit": "1", + } + if self.pageid: + params["pageids"] = self.pageid + else: + params["titles"] = self.title + if expandtemplates: + params["rvexpandtemplates"] = "1" + if self.section is not False: + params["rvsection"] = self.section + req = api.APIRequest(self.site, params) + response = req.query(False) + if self.pageid == 0: + self.pageid = int(response["query"]["pages"].keys()[0]) + if self.pageid == -1: + self.exists == False + raise NoPage + self.wikitext = response["query"]["pages"][str(self.pageid)]["revisions"][0][ + "*" + ].encode("utf-8") + self.lastedittime = response["query"]["pages"][str(self.pageid)]["revisions"][ + 0 + ]["timestamp"] + return self.wikitext + + def getLinks(self, force=False): + """Gets a list of all the internal links *on* the page + + force - load the list even if we already loaded it before + + """ + if self.links and not force: + return self.links + if self.pageid == 0 and not self.title: + self.setPageInfo() + if not self.exists: + raise NoPage + params = { + "action": "query", + "prop": "links", + "pllimit": self.site.limit, + } + if self.pageid > 0: + params["pageids"] = self.pageid + else: + params["titles"] = self.title + req = api.APIRequest(self.site, params) + self.links = [] + for data in req.queryGen(): + self.links.extend(self.__extractToList(data, "links")) + return self.links + + def getProtection(self, force=False): + """Returns the current protection status of the page""" + if self.protection and not force: + return self.protection + if self.pageid == 0 and not self.title: + self.setPageInfo() + params = { + "action": "query", + "prop": "info", + "inprop": "protection", + } + if not self.exists or self.pageid <= 0: + params["titles"] = self.title + else: + params["titles"] = self.title + req = api.APIRequest(self.site, params) + response = req.query(False) + for pr in response["query"].values()[0].values()[0]["protection"]: + if pr["level"]: + if pr["expiry"] == "infinity": + expiry = "infinity" + else: + expiry = datetime.datetime.strptime( + pr["expiry"], "%Y-%m-%dT%H:%M:%SZ" + ) + self.protection[pr["type"]] = {"expiry": expiry, "level": pr["level"]} + return self.protection + + def getTemplates(self, force=False): + """Gets all list of all the templates on the page + + force - load the list even if we already loaded it before + + """ + if self.templates and not force: + return self.templates + if self.pageid == 0 and not self.title: + self.setPageInfo() + if not self.exists: + raise NoPage + params = { + "action": "query", + "prop": "templates", + "tllimit": self.site.limit, + } + if self.pageid: + params["pageids"] = self.pageid + else: + params["titles"] = self.title + req = api.APIRequest(self.site, params) + self.templates = [] + for data in req.queryGen(): + self.templates.extend(self.__extractToList(data, "templates")) + return self.templates + + def getCategories(self, force=False): + """Gets all list of all the categories on the page + + force - load the list even if we already loaded it before + + """ + if self.categories and not force: + return self.categories + if self.pageid == 0 and not self.title: + self.setPageInfo() + if not self.exists: + raise NoPage + params = { + "action": "query", + "prop": "categories", + "cllimit": self.site.limit, + } + if self.pageid: + params["pageids"] = self.pageid + else: + params["titles"] = self.title + req = api.APIRequest(self.site, params) + self.categories = [] + for data in req.queryGen(): + self.categories.extend(self.__extractToList(data, "categories")) + return self.categories + + def getHistory(self, direction="older", content=True, limit="all"): + """Get the history of a page + + direction - 2 options: 'older' (default) - start with the current revision and get older ones + 'newer' - start with the oldest revision and get newer ones + content - If False, get only metadata (timestamp, edit summary, user, etc) + If True (default), also get the revision text + limit - Only retrieve a certain number of revisions. If 'all' (default), all revisions are returned + + The data is returned in essentially the same format as the API, a list of dicts that look like: + {u'*': u"Page content", # Only returned when content=True + u'comment': u'Edit summary', + u'contentformat': u'text/x-wiki', # Only returned when content=True + u'contentmodel': u'wikitext', # Only returned when content=True + u'parentid': 139946, # id of previous revision + u'revid': 139871, # revision id + u'sha1': u'0a5cec3ca3e084e767f00c9a5645c17ac27b2757', # sha1 hash of page content + u'size': 129, # size of page in bytes + u'timestamp': u'2002-08-05T14:11:27Z', # timestamp of edit + u'user': u'Username', + u'userid': 48 # user id + } + + Note that unlike other get* functions, the data is not cached + """ + max = limit + if limit == "all": + max = float("inf") + if limit == "all" or limit > self.site.limit: + limit = self.site.limit + history = [] + rvc = None + while True: + revs, rvc = self.__getHistoryInternal(direction, content, limit, rvc) + history = history + revs + if len(history) == max or rvc is None: + break + if max - len(history) < self.site.limit: + limit = max - len(history) + return history + + def getHistoryGen(self, direction="older", content=True, limit="all"): + """Generator function for page history + + The interface is the same as getHistory, but it will only retrieve 1 revision at a time. + This will be slower and have much higher network overhead, but does not require storing + the entire page history in memory + """ + max = limit + count = 0 + rvc = None + while True: + revs, rvc = self.__getHistoryInternal(direction, content, 1, rvc) + yield revs[0] + count += 1 + if count == max or rvc is None: + break + + def __getHistoryInternal(self, direction, content, limit, rvcontinue): + + if self.pageid == 0 and not self.title: + self.setPageInfo() + if not self.exists: + raise NoPage + if direction != "newer" and direction != "older": + raise wiki.WikiError("direction must be 'newer' or 'older'") + params = { + "action": "query", + "prop": "revisions", + "rvdir": direction, + "rvprop": "ids|flags|timestamp|user|userid|size|sha1|comment", + "continue": "", + "rvlimit": limit, + } + if self.pageid: + params["pageids"] = self.pageid + else: + params["titles"] = self.title + + if content: + params["rvprop"] += "|content" + if rvcontinue: + params["continue"] = rvcontinue["continue"] + params["rvcontinue"] = rvcontinue["rvcontinue"] + req = api.APIRequest(self.site, params) + response = req.query(False) + id = response["query"]["pages"].keys()[0] + if not self.pageid: + self.pageid = int(id) + revs = response["query"]["pages"][id]["revisions"] + rvc = None + if "continue" in response: + rvc = response["continue"] + return (revs, rvc) + + def __extractToList(self, json, stuff): + list = [] + if self.pageid == 0: + self.pageid = json["query"]["pages"].keys()[0] + if stuff in json["query"]["pages"][str(self.pageid)]: + for item in json["query"]["pages"][str(self.pageid)][stuff]: + list.append(item["title"]) + return list + + def edit(self, *args, **kwargs): + """Edit the page + + Arguments are a subset of the API's action=edit arguments, valid arguments + are defined in the validargs set + To skip the MD5 check, set "skipmd5" keyword argument to True + http://www.mediawiki.org/wiki/API:Edit_-_Create%26Edit_pages#Parameters + + For backwards compatibility: + 'newtext' is equivalent to 'text' + 'basetime' is equivalent to 'basetimestamp' + + """ + validargs = set( + [ + "text", + "summary", + "minor", + "notminor", + "bot", + "basetimestamp", + "starttimestamp", + "recreate", + "createonly", + "nocreate", + "watch", + "unwatch", + "watchlist", + "prependtext", + "appendtext", + "section", + "captchaword", + "captchaid", + ] + ) + # For backwards compatibility + if "newtext" in kwargs: + kwargs["text"] = kwargs["newtext"] + del kwargs["newtext"] + if "basetime" in kwargs: + kwargs["basetimestamp"] = kwargs["basetime"] + del kwargs["basetime"] + if len(args) and "text" not in kwargs: + kwargs["text"] = args[0] + skipmd5 = False + if "skipmd5" in kwargs and kwargs["skipmd5"]: + skipmd5 = True + invalid = set(kwargs.keys()).difference(validargs) + if invalid: + for arg in invalid: + del kwargs[arg] + if not self.title: + self.setPageInfo() + if not "section" in kwargs and self.section is not False: + kwargs["section"] = self.section + if ( + not "text" in kwargs + and not "prependtext" in kwargs + and not "appendtext" in kwargs + ): + raise EditError("No text specified") + if "prependtext" in kwargs and "section" in kwargs: + raise EditError("Bad param combination") + if "createonly" in kwargs and "nocreate" in kwargs: + raise EditError("Bad param combination") + token = self.site.getToken("csrf") + if "text" in kwargs: + hashtext = kwargs["text"] + elif "prependtext" in kwargs and "appendtext" in kwargs: + hashtext = kwargs["prependtext"] + kwargs["appendtext"] + elif "prependtext" in kwargs: + hashtext = kwargs["prependtext"] + else: + hashtext = kwargs["appendtext"] + params = { + "action": "edit", + "title": self.title, + "token": token, + } + if not skipmd5: + if not isinstance(hashtext, str): + hashtext = str(hashtext) + hashtext = unicodedata.normalize("NFC", hashtext).encode("utf8") + params["md5"] = md5(hashtext).hexdigest() + params.update(kwargs) + req = api.APIRequest(self.site, params, write=True) + result = req.query() + if "edit" in result and result["edit"]["result"] == "Success": + self.wikitext = "" + self.links = [] + self.templates = [] + self.exists = True + return result + + def move( + self, + mvto, + reason=False, + movetalk=False, + noredirect=False, + watch=False, + unwatch=False, + ): + """Move the page + + Params are the same as the API: + mvto - page title to move to, the only required param + reason - summary for the log + movetalk - move the corresponding talk page + noredirect - don't create a redirect at the previous title + watch - add the page to your watchlist + unwatch - remove the page from your watchlist + + """ + if not self.title and self.pageid == 0: + self.setPageInfo() + if not self.exists: + raise NoPage + token = self.site.getToken("csrf") + params = { + "action": "move", + "to": mvto, + "token": token, + } + if self.pageid: + params["fromid"] = self.pageid + else: + params["from"] = self.title + if reason: + params["reason"] = reason.encode("utf-8") + if movetalk: + params["movetalk"] = "1" + if noredirect: + params["noredirect"] = "1" + if watch: + params["watch"] = "1" + if unwatch: + params["unwatch"] = "1" + req = api.APIRequest(self.site, params, write=True) + result = req.query() + if "move" in result: + self.title = result["move"]["to"] + self.namespace = namespaceDetect(self.title, self.site) + if self.namespace != 0: + self.unprefixedtitle = self.title.split(":", 1)[1] + else: + self.unprefixedtitle = self.title + if not isinstance(self.title, str): + self.title = str(self.title, "utf-8") + self.urltitle = ( + urllib.parse.quote(self.title.encode("utf-8")) + .replace("%20", "_") + .replace("%2F", "/") + ) + else: + self.urltitle = ( + urllib.parse.quote(self.title.encode("utf-8")) + .replace("%20", "_") + .replace("%2F", "/") + ) + return result + + def protect(self, restrictions={}, expirations={}, reason=False, cascade=False): + """Protect a page + + Restrictions and expirations are dictionaries of + protection level/expiry settings, e.g., {'edit':'sysop'} and + {'move':'3 days'}. expirations can also be a string to set + all levels to the same expiration + + reason - summary for log + cascade - apply protection to all pages transcluded on the page + + """ + if not self.title: + self.setPageInfo() + if not restrictions: + raise ProtectError("No protection levels given") + if len(expirations) > len(restrictions): + raise ProtectError("More expirations than restrictions given") + token = self.site.getToken("csrf") + protections = "" + expiry = "" + if isinstance(expirations, str): + expiry = expirations + for type in restrictions: + if protections: + protections += "|" + protections += type + "=" + restrictions[type] + if isinstance(expirations, dict) and type in expirations: + if expiry: + expiry += "|" + expiry += expirations[type] + elif isinstance(expirations, dict): + if expiry: + expiry += "|" + expiry += "indefinite" + params = { + "action": "protect", + "title": self.title, + "token": token, + "protections": protections, + } + if expiry: + params["expiry"] = expiry + if reason: + params["reason"] = reason + if cascade: + params["cascade"] = "" + req = api.APIRequest(self.site, params, write=True) + result = req.query() + if "protect" in result: + self.protection = {} + return result + + def delete(self, reason=False, watch=False, unwatch=False): + """Delete the page + + reason - summary for log + watch - add the page to your watchlist + unwatch - remove the page from your watchlist + + """ + if not self.title and self.pageid == 0: + self.setPageInfo() + if not self.exists: + raise NoPage + token = self.site.getToken("csrf") + params = { + "action": "delete", + "token": token, + } + if self.pageid: + params["pageid"] = self.pageid + else: + params["title"] = self.title + if reason: + params["reason"] = reason.encode("utf-8") + if watch: + params["watch"] = "1" + if unwatch: + params["unwatch"] = "1" + req = api.APIRequest(self.site, params, write=True) + result = req.query() + if "delete" in result: + self.pageid = 0 + self.exists = False + self.wikitext = "" + self.templates = "" + self.links = "" + self.protection = {} + self.section = False + return result + + def __hash__(self): + return int(self.pageid) ^ hash(self.site.apibase) + + def __str__(self): + if self.title: + title = self.title + else: + title = "pageid: " + self.pageid + return ( + self.__class__.__name__ + + " " + + repr(title) + + " from " + + repr(self.site.domain) + ) + + def __repr__(self): + if self.title: + title = self.title + else: + title = "pageid: " + self.pageid + return ( + "<" + + self.__module__ + + "." + + self.__class__.__name__ + + " " + + repr(title) + + " using " + + repr(self.site.apibase) + + ">" + ) + + def __eq__(self, other): + if not isinstance(other, Page): + return False + if self.title: + if self.title == other.title and self.site == other.site: + return True + else: + if self.pageid == other.pageid and self.site == other.site: + return True + return False + + def __ne__(self, other): + if not isinstance(other, Page): + return True + if self.title: + if self.title == other.title and self.site == other.site: + return False + else: + if self.pageid == other.pageid and self.site == other.site: + return False + return True diff --git a/wikitools3/pagelist.py b/wikitools3/pagelist.py new file mode 100644 index 0000000..288ca2a --- /dev/null +++ b/wikitools3/pagelist.py @@ -0,0 +1,207 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright 2008-2013 Alex Zaddach (mrzmanwiki@gmail.com) + +# This file is part of wikitools3. +# wikitools3 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# wikitools3 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with wikitools3. If not, see . + +import math + +import wikitools3.api as api +import wikitools3.category as category +import wikitools3.page as page +import wikitools3.wikifile as wikifile + + +def listFromQuery(site, queryresult): + """Generate a list of pages from an API query result + + queryresult is the list of pages from a list or generator query + e.g. - for a list=categorymembers query, use result['query']['categorymembers'] + for a generator query, use result['query']['pages'] + + """ + ret = [] + if isinstance(queryresult, list): + for item in queryresult: + pageid = False + if "pageid" in item: + pageid = item["pageid"] + if item["ns"] == 14: + item = category.Category( + site, + title=item["title"], + check=False, + followRedir=False, + pageid=pageid, + ) + elif item["ns"] == 6: + item = wikifile.File( + site, + title=item["title"], + check=False, + followRedir=False, + pageid=pageid, + ) + else: + item = page.Page( + site, + title=item["title"], + check=False, + followRedir=False, + pageid=pageid, + ) + ret.append(item) + else: + for key in queryresult.keys(): + item = queryresult[key] + pageid = False + if "pageid" in item: + pageid = item["pageid"] + if item["ns"] == 14: + item = category.Category( + site, + title=item["title"], + check=False, + followRedir=False, + pageid=pageid, + ) + elif item["ns"] == 6: + item = wikifile.File( + site, + title=item["title"], + check=False, + followRedir=False, + pageid=pageid, + ) + else: + item = page.Page( + site, + title=item["title"], + check=False, + followRedir=False, + pageid=pageid, + ) + ret.append(item) + return ret + + +def listFromTitles(site, titles, check=True, followRedir=False): + """Create a list of page objects from a list of titles + + check and followRedir have the same meaning as in page.Page + + """ + ret = [] + if not check: + for title in titles: + title = page.Page(site, title=title, check=False) + ret.append(title) + else: + querylist = [] + limit = int(site.limit) + if len(titles) > limit / 10: + iters = int(math.ceil(float(len(titles)) / (limit / 10))) + for x in range(0, iters): + lower = x * limit / 10 + upper = (x + 1) * limit / 10 + querylist.append(titles[lower:upper]) + else: + querylist.append(titles) + response = False + for item in querylist: + tlist = "|".join(item) + if not isinstance(tlist, str): + tlist = str(tlist, "utf8") + params = { + "action": "query", + "titles": tlist, + } + if followRedir: + params["redirects"] = "" + req = api.APIRequest(site, params) + res = req.query(False) + for key in res["query"]["pages"]: + obj = res["query"]["pages"][key] + item = makePage(key, obj, site) + ret.append(item) + return ret + + +def listFromPageids(site, pageids, check=True, followRedir=False): + """Create a list of page objects from a list of pageids + + check and followRedir have the same meaning as in page.Page + + """ + ret = [] + if not check: + for id in pageids: + title = page.Page(site, pageid=id, check=False) + ret.append(title) + else: + querylist = [] + limit = int(site.limit) + if len(pageids) > limit / 10: + iters = int(math.ceil(float(len(pageids)) / (limit / 10))) + for x in range(0, iters): + lower = x * limit / 10 + upper = (x + 1) * limit / 10 + querylist.append(pageids[lower:upper]) + else: + querylist.append(pageids) + response = False + for item in querylist: + ids = [str(id) for id in item] + idlist = "|".join(ids) + params = { + "action": "query", + "pageids": idlist, + } + if followRedir: + params["redirects"] = "" + req = api.APIRequest(site, params) + res = req.query() + if not response: + response = res + else: + response = api.resultCombine("", response, res) + for key in response["query"]["pages"].keys(): + res = response["query"]["pages"][key] + item = makePage(key, res, site) + ret.append(item) + return ret + + +def makePage(key, result, site): + title = False + if "title" in result: + title = result["title"] + if "ns" in result and result["ns"] == 14: + item = category.Category( + site, title=title, check=False, followRedir=False, pageid=key + ) + elif "ns" in result and result["ns"] == 6: + item = wikifile.File( + site, title=title, check=False, followRedir=False, pageid=key + ) + else: + item = page.Page(site, title=title, check=False, followRedir=False, pageid=key) + if "missing" in result: + item.exists = False + if "invalid" in result: + item = False + if "ns" in result: + item.setNamespace(int(result["ns"])) + return item diff --git a/wikitools3/user.py b/wikitools3/user.py new file mode 100644 index 0000000..3b38a92 --- /dev/null +++ b/wikitools3/user.py @@ -0,0 +1,258 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright 2008-2013 Alex Zaddach (mrzmanwiki@gmail.com), bjweeks + +# This file is part of wikitools3. +# wikitools3 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# wikitools3 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with wikitools3. If not, see . + +import re +import socket + +import wikitools3.api as api +import wikitools3.page as page + + +class User: + """A user on the wiki""" + + def __init__(self, site, name, check=True): + """ + wiki - A wiki object + name - The username, as a string + check - Checks for existence, normalizes name + """ + self.site = site + self.name = name.strip() + if not isinstance(self.name, str): + self.name = str(self.name, "utf8") + self.exists = True # If we're not going to check, assume it does + self.blocked = None # So we can tell the difference between blocked/not blocked/haven't checked + self.editcount = -1 + self.groups = [] + self.id = 0 + if check: + self.setUserInfo() + self.isIP = False + self.IPcheck() + self.page = page.Page( + self.site, + ":".join([self.site.namespaces[2]["*"], self.name]), + check=check, + followRedir=False, + ) + + def IPcheck(self): + try: # IPv4 check + s = socket.inet_aton(self.name.replace(" ", "_")) + if socket.inet_ntoa(s) == self.name: + self.isIP = True + self.exists = False + return + except: + pass + try: + s = socket.inet_pton(socket.AF_INET6, self.name.replace(" ", "_")) + if self.IPnorm(socket.inet_ntop(socket.AF_INET6, s)) == self.IPnorm( + self.name + ): + self.isIP = True + self.exists = False + self.name = self.IPnorm(self.name) + return + except: + pass + + def IPnorm(self, ip): + """This is basically a port of MediaWiki's IP::sanitizeIP but assuming no CIDR ranges""" + ip = ip.upper() + # Expand zero abbreviations + abbrevPos = ip.find("::") + if abbrevPos != -1: + addressEnd = len(ip) - 1 + # If the '::' is at the beginning... + if abbrevPos == 0: + repeat = "0:" + extra = "0" if ip == "::" else "" + pad = 9 + elif abbrevPos == addressEnd - 1: + repeat = ":0" + extra = "" + pad = 9 + else: + repeat = ":0" + extra = ":" + pad = 8 + ip = ip.replace("::", repeat * (pad - ip.count(":")) + extra) + # Remove leading zereos from each bloc as needed + ip = re.sub("/(^|:)0+(([0-9A-Fa-f]{1,4}))/", "\1\2", ip) + return ip + + def setUserInfo(self): + """Sets basic user info""" + params = { + "action": "query", + "list": "users", + "ususers": self.name, + "usprop": "blockinfo|groups|editcount", + } + req = api.APIRequest(self.site, params) + response = req.query(False) + user = response["query"]["users"][0] + self.name = user["name"] + if "missing" in user or "invalid" in user: + self.exists = False + return + self.id = int(user["userid"]) + self.editcount = int(user["editcount"]) + if "groups" in user: + self.groups = user["groups"] + if "blockedby" in user: + self.blocked = True + else: + self.blocked = False + return self + + def getTalkPage(self, check=True, followRedir=False): + """Convenience function to get an object for the user's talk page""" + return page.Page( + self.site, + ":".join([self.site.namespaces[3]["*"], self.name]), + check=check, + followRedir=False, + ) + + def isBlocked(self, force=False): + """Determine if a user is blocked""" + if self.blocked is not None and not force: + return self.blocked + params = { + "action": "query", + "list": "blocks", + "bkusers": self.name, + "bkprop": "id", + } + req = api.APIRequest(self.site, params) + res = req.query(False) + if len(res["query"]["blocks"]) > 0: + self.blocked = True + else: + self.blocked = False + return self.blocked + + def block( + self, + reason=False, + expiry=False, + anononly=False, + nocreate=False, + autoblock=False, + noemail=False, + hidename=False, + allowusertalk=False, + reblock=False, + ): + """Block the user + + Params are the same as the API + reason - block reason + expiry - block expiration + anononly - block anonymous users only + nocreate - disable account creation + autoblock - block IP addresses used by the user + noemail - block user from sending email through the site + hidename - hide the username from the log (requires hideuser right) + allowusertalk - allow the user to edit their talk page + reblock - overwrite existing block + + """ + token = self.site.getToken("csrf") + params = {"action": "block", "user": self.name, "token": token} + if reason: + params["reason"] = reason + if expiry: + params["expiry"] = expiry + if anononly: + params["anononly"] = "" + if nocreate: + params["nocreate"] = "" + if autoblock: + params["autoblock"] = "" + if noemail: + params["noemail"] = "" + if hidename: + params["hidename"] = "" + if allowusertalk: + params["allowusertalk"] = "" + if reblock: + params["reblock"] = "" + req = api.APIRequest(self.site, params, write=False) + res = req.query() + if "block" in res: + self.blocked = True + return res + + def unblock(self, reason=False): + """Unblock the user + + reason - reason for the log + + """ + token = self.site.getToken("csrf") + params = {"action": "unblock", "user": self.name, "token": token} + if reason: + params["reason"] = reason + req = api.APIRequest(self.site, params, write=False) + res = req.query() + if "unblock" in res: + self.blocked = False + return res + + def __hash__(self): + return int(self.name) ^ hash(self.site.apibase) + + def __eq__(self, other): + if not isinstance(other, User): + return False + if self.name == other.name and self.site == other.site: + return True + return False + + def __ne__(self, other): + if not isinstance(other, User): + return True + if self.name == other.name and self.site == other.site: + return False + return True + + def __str__(self): + return ( + self.__class__.__name__ + + " " + + repr(self.name) + + " on " + + repr(self.site.domain) + ) + + def __repr__(self): + return ( + "<" + + self.__module__ + + "." + + self.__class__.__name__ + + " " + + repr(self.name) + + " on " + + repr(self.site.apibase) + + ">" + ) diff --git a/wikitools3/wiki.py b/wikitools3/wiki.py new file mode 100644 index 0000000..749c3c0 --- /dev/null +++ b/wikitools3/wiki.py @@ -0,0 +1,473 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright 2008-2013 Alex Zaddach (mrzmanwiki@gmail.com) + +# This file is part of wikitools3. +# wikitools3 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# wikitools3 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with wikitools3. If not, see . + +import http.cookiejar +import os +import pickle +import re +import time +import warnings +from urllib import parse +from urllib.request import HTTPPasswordMgrWithDefaultRealm + +import wikitools3.api as api + + +class WikiError(Exception): + """Base class for errors""" + + +class UserBlocked(WikiError): + """Trying to edit while blocked""" + + +class Namespace(int): + """ + Class for namespace 'constants' + Names are based on canonical (non-localized) names + This functions as an integer in every way, except that the OR operator ( | ) + is overridden to produce a string namespace list for use in API queries + wikiobj.NS_MAIN|wikiobj.NS_USER|wikiobj.NS_PROJECT returns '0|2|4' + """ + + def __or__(self, other): + return "|".join([str(self), str(other)]) + + def __ror__(self, other): + return "|".join([str(other), str(self)]) + + +VERSION = "1.4" + + +class Wiki: + """A Wiki site""" + + def __init__( + self, + url="https://en.wikipedia.org/w/api.php", + httpuser=None, + httppass=None, + preauth=False, + ): + """ + url - A URL to the site's API, defaults to en.wikipedia + httpuser - optional user name for HTTP Auth + httppass - password for HTTP Auth, leave out to enter interactively + preauth - true to send headers for HTTP Auth on the first request + instead of relying on the negotiation for them + + """ + self.apibase = url + self.cookies = WikiCookieJar() + self.username = "" + urlbits = parse(self.apibase) + self.domain = "://".join([urlbits.scheme, urlbits.netloc]) + if httpuser is not None: + if httppass is None: + from getpass import getpass + + self.httppass = getpass("HTTP Auth password for " + httpuser + ": ") + if preauth: + self.httppass = httppass + self.auth = httpuser + else: + self.passman = HTTPPasswordMgrWithDefaultRealm() + self.passman.add_password(None, self.domain, httpuser, httppass) + else: + self.passman = None + self.auth = None + self.maxlag = 5 + self.maxwaittime = 120 + self.useragent = "python-wikitools3/%s" % VERSION + self.cookiepath = "" + self.limit = 500 + self.siteinfo = {} + self.namespaces = {} + self.NSaliases = {} + self.assertval = None + self.newtoken = False + try: + self.setSiteinfo() + except api.APIError: # probably read-restricted + pass + + def setSiteinfo(self): + """Retrieves basic siteinfo + + Called when constructing, + or after login if the first call failed + + """ + params = { + "action": "query", + "meta": "siteinfo|tokens", + "siprop": "general|namespaces|namespacealiases", + } + if self.maxlag < 120: + params["maxlag"] = 120 + req = api.APIRequest(self, params) + info = req.query(False) + sidata = info["query"]["general"] + for item in sidata: + self.siteinfo[item] = sidata[item] + nsdata = info["query"]["namespaces"] + for ns in nsdata: + nsinfo = nsdata[ns] + self.namespaces[nsinfo["id"]] = nsinfo + if ns != "0": + try: + attr = "NS_%s" % (nsdata[ns]["canonical"].replace(" ", "_").upper()) + except KeyError: + attr = "NS_%s" % (nsdata[ns]["*"].replace(" ", "_").upper()) + else: + attr = "NS_MAIN" + setattr(self, attr.encode("utf8"), Namespace(ns.encode("utf8"))) + nsaliasdata = info["query"]["namespacealiases"] + if nsaliasdata: + for ns in nsaliasdata: + self.NSaliases[ns["*"]] = ns["id"] + if not "writeapi" in sidata: + warnings.warn( + UserWarning, + "WARNING: Write-API not enabled, you will not be able to edit", + ) + version = re.search("\d\.(\d\d)", self.siteinfo["generator"]) + if not int(version.group(1)) >= 13: # Will this even work on 13? + warnings.warn( + UserWarning, + "WARNING: Some features may not work on older versions of MediaWiki", + ) + if "tokens" in info["query"].keys(): + self.newtoken = True + return self + + def login( + self, + username, + password=False, + remember=False, + force=False, + verify=True, + domain=None, + ): + """Login to the site + + remember - saves cookies to a file - the filename will be: + hash(username - apibase).cookies + the cookies will be saved in the current directory, change cookiepath + to use a different location + force - forces login over the API even if a cookie file exists + and overwrites an existing cookie file if remember is True + verify - Checks cookie validity with isLoggedIn() + domain - domain name, required for some auth systems like LDAP + + """ + if not force: + try: + cookiefile = ( + self.cookiepath + + str(hash(username + " - " + self.apibase)) + + ".cookies" + ) + self.cookies.load(self, cookiefile, True, True) + self.username = username + if not verify or self.isLoggedIn(self.username): + return True + except: + pass + if not password: + from getpass import getpass + + password = getpass("Wiki password for " + username + ": ") + + def loginerror(info): + try: + print(info["login"]["result"]) + except: + print(info["error"]["code"]) + print(info["error"]["info"]) + return False + + data = { + "action": "login", + "lgname": username, + "lgpassword": password, + } + if domain is not None: + data["lgdomain"] = domain + if self.maxlag < 120: + data["maxlag"] = 120 + req = api.APIRequest(self, data) + info = req.query() + if info["login"]["result"] == "Success": + self.username = username + elif info["login"]["result"] == "NeedToken": + req.changeParam("lgtoken", info["login"]["token"]) + info = req.query() + if info["login"]["result"] == "Success": + self.username = username + else: + return loginerror(info) + else: + return loginerror(info) + if not self.siteinfo: + self.setSiteinfo() + params = { + "action": "query", + "meta": "userinfo", + "uiprop": "rights", + } + if self.maxlag < 120: + params["maxlag"] = 120 + req = api.APIRequest(self, params) + info = req.query(False) + user_rights = info["query"]["userinfo"]["rights"] + if "apihighlimits" in user_rights: + self.limit = 5000 + if remember: + cookiefile = ( + self.cookiepath + + str(hash(self.username + " - " + self.apibase)) + + ".cookies" + ) + self.cookies.save(self, cookiefile, True, True) + if self.useragent == "python-wikitools3/%s" % VERSION: + self.useragent = "python-wikitools3/%s (User:%s)" % (VERSION, self.username) + return True + + def logout(self): + params = {"action": "logout"} + if self.maxlag < 120: + params["maxlag"] = 120 + cookiefile = ( + self.cookiepath + + str(hash(self.username + " - " + self.apibase)) + + ".cookies" + ) + try: + os.remove(cookiefile) + except: + pass + req = api.APIRequest(self, params, write=True) + # action=logout returns absolutely nothing, which json.loads() treats as False + # causing APIRequest.query() to get stuck in a loop + req.opener.open(req.request) + self.cookies = WikiCookieJar() + self.username = "" + self.maxlag = 5 + self.useragent = "python-wikitools3/%s" % VERSION + self.limit = 500 + return True + + def isLoggedIn(self, username=False): + """Verify that we are a logged in user + + username - specify a username to check against + + """ + + data = { + "action": "query", + "meta": "userinfo", + } + if self.maxlag < 120: + data["maxlag"] = 120 + req = api.APIRequest(self, data) + info = req.query(False) + if info["query"]["userinfo"]["id"] == 0: + return False + elif username and info["query"]["userinfo"]["name"] != username: + return False + else: + return True + + def setMaxlag(self, maxlag=5): + """Set the maximum server lag to allow + + If the lag is > the maxlag value, all requests will wait + Setting to a negative number will disable maxlag checks + + """ + try: + int(maxlag) + except: + raise WikiError("maxlag must be an integer") + self.maxlag = int(maxlag) + return self.maxlag + + def setUserAgent(self, useragent): + """Function to set a different user-agent""" + self.useragent = str(useragent) + return self.useragent + + def setAssert(self, value): + """Set an assertion value + + This only makes a difference on sites with the AssertEdit extension + on others it will be silently ignored + This is only checked on edits, so only applied to write queries + + Set to None (the default) to not use anything + http://www.mediawiki.org/wiki/Extension:Assert_Edit + + """ + valid = ["user", "bot", "true", "false", "exists", "test", None] + if value not in valid: + raise WikiError("Invalid assertion") + self.assertval = value + return self.assertval + + def getToken(self, type): + """Get a token + + For wikis with MW 1.24 or newer: + type (string) - csrf, deleteglobalaccount, patrol, rollback, setglobalaccountstatus, userrights, watch + + For older wiki versions, only csrf (edit, move, etc.) tokens are supported + + """ + if self.newtoken: + params = { + "action": "query", + "meta": "tokens", + "type": type, + } + req = api.APIRequest(self, params) + response = req.query(False) + token = response["query"]["tokens"][type + "token"] + else: + if type not in [ + "edit", + "delete", + "protect", + "move", + "block", + "unblock", + "email", + "csrf", + ]: + raise WikiError("Token type unavailable") + params = { + "action": "query", + "prop": "info", + "intoken": "edit", + "titles": "1", + } + req = api.APIRequest(self, params) + response = req.query(False) + if response.get("data", False): + pid = response["data"]["query"]["pages"].keys()[0] + token = response["query"]["pages"][pid]["edittoken"] + else: + pages = response["query"]["pages"] + token = pages.itervalues().next()["edittoken"] + return token + + def __hash__(self): + return hash(self.apibase) + + def __eq__(self, other): + if not isinstance(other, Wiki): + return False + if self.apibase == other.apibase: + return True + return False + + def __ne__(self, other): + if not isinstance(other, Wiki): + return True + if self.apibase == other.apibase: + return False + return True + + def __str__(self): + if self.username: + user = " - using User:" + self.username + else: + user = " - not logged in" + return self.domain + user + + def __repr__(self): + if self.username: + user = " User:" + self.username + else: + user = " not logged in" + return ( + "<" + + self.__module__ + + "." + + self.__class__.__name__ + + " " + + repr(self.apibase) + + user + + ">" + ) + + +class CookiesExpired(WikiError): + """Cookies are expired, needs to be an exception so login() will use the API instead""" + + +class WikiCookieJar(http.cookiejar.FileCookieJar): + def save(self, site, filename=None, ignore_discard=False, ignore_expires=False): + if not filename: + filename = self.filename + old_umask = os.umask(0o077) + f = open(filename, "w") + f.write("") + content = "" + for c in self: + if not ignore_discard and c.discard: + continue + if not ignore_expires and c.is_expired: + continue + cook = pickle.dumps(c, 2) + f.write(cook + "|~|") + content += ( + str(int(time.time())) + "|~|" + ) # record the current time so we can test for expiration later + content += "site.limit = %d;" % ( + site.limit + ) # This eventially might have more stuff in it + f.write(content) + f.close() + os.umask(old_umask) + + def load(self, site, filename, ignore_discard, ignore_expires): + f = open(filename, "r") + cookies = f.read().split("|~|") + saved = cookies[len(cookies) - 2] + if ( + int(time.time()) - int(saved) > 1296000 + ): # 15 days, not sure when the cookies actually expire... + f.close() + os.remove(filename) + raise CookiesExpired + sitedata = cookies[len(cookies) - 1] + del cookies[len(cookies) - 2] + del cookies[len(cookies) - 1] + for c in cookies: + cook = pickle.loads(c) + if not ignore_discard and cook.discard: + continue + if not ignore_expires and cook.is_expired: + continue + self.set_cookie(cook) + exec(sitedata) + f.close() diff --git a/wikitools3/wikifile.py b/wikitools3/wikifile.py new file mode 100644 index 0000000..f2ad3b0 --- /dev/null +++ b/wikitools3/wikifile.py @@ -0,0 +1,294 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright 2009-2013 Alex Zaddach (mrzmanwiki@gmail.com) + +# This file is part of wikitools3. +# wikitools3 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# wikitools3 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with wikitools3. If not, see . + +import io +import urllib +import warnings + +import wikitools3.api as api +import wikitools3.page as page +import wikitools3.wiki as wiki + + +class FileDimensionError(wiki.WikiError): + """Invalid dimensions""" + + +class UploadError(wiki.WikiError): + """Error during uploading""" + + +class File(page.Page): + """A file on the wiki""" + + def __init__( + self, + wiki, + title, + check=True, + followRedir=False, + section=False, + sectionnumber=False, + pageid=False, + ): + """ + wiki - A wiki object + title - The page title, as a string or unicode object + check - Checks for existence, normalizes title, required for most things + followRedir - follow redirects (check must be true) + section - the section name + sectionnumber - the section number + pageid - pageid, can be in place of title + """ + page.Page.__init__( + self, wiki, title, check, followRedir, section, sectionnumber, pageid + ) + if self.namespace != 6: + self.setNamespace(6, check) + self.usage = [] + self.filehistory = [] + + def getHistory(self, force=False): + warnings.warn( + """File.getHistory has been renamed to File.getFileHistory""", FutureWarning + ) + return self.getFileHistory(force) + + def getFileHistory(self, force=False): + if self.filehistory and not force: + return self.filehistory + if self.pageid == 0 and not self.title: + self.setPageInfo() + params = { + "action": "query", + "prop": "imageinfo", + "iilimit": self.site.limit, + } + if self.pageid > 0: + params["pageids"] = self.pageid + else: + params["titles"] = self.title + req = api.APIRequest(self.site, params) + self.filehistory = [] + for data in req.queryGen(): + pid = data["query"]["pages"].keys()[0] + for item in data["query"]["pages"][pid]["imageinfo"]: + self.filehistory.append(item) + return self.filehistory + + def getUsage(self, titleonly=False, force=False, namespaces=False): + """Gets a list of pages that use the file + + titleonly - set to True to only create a list of strings, + else it will be a list of Page objects + force - reload the list even if it was generated before + namespaces - List of namespaces to restrict to (queries with this option will not be cached) + + """ + if self.usage and not force: + if titleonly: + if namespaces is not False: + return [p.title for p in self.usage if p.namespace in namespaces] + else: + return [p.title for p in self.usage] + if namespaces is False: + return self.usage + else: + return [p for p in self.usage if p.namespace in namespaces] + else: + ret = [] + usage = [] + for title in self.__getUsageInternal(namespaces): + usage.append(title) + if titleonly: + ret.append(title.title) + if titleonly: + return ret + if namespaces is False: + self.usage = usage + return usage + + def getUsageGen(self, titleonly=False, force=False, namespaces=False): + """Generator function for pages that use the file + + titleonly - set to True to return strings, + else it will return Page objects + force - reload the list even if it was generated before + namespaces - List of namespaces to restrict to (queries with this option will not be cached) + + """ + if self.usage and not force: + for title in self.usage: + if namespaces is False or title.namespace in namespaces: + if titleonly: + yield title.title + else: + yield title + else: + if namespaces is False: + self.usage = [] + for title in self.__getUsageInternal(): + if namespaces is False: + self.usage.append(title) + if titleonly: + yield title.title + else: + yield title + + def __getUsageInternal(self, namespaces=False): + params = { + "action": "query", + "list": "imageusage", + "iutitle": self.title, + "iulimit": self.site.limit, + } + if namespaces is not False: + params["iunamespace"] = "|".join([str(ns) for ns in namespaces]) + while True: + req = api.APIRequest(self.site, params) + data = req.query(False) + for item in data["query"]["imageusage"]: + yield page.Page( + self.site, item["title"], check=False, followRedir=False + ) + try: + params["iucontinue"] = data["query-continue"]["imageusage"][ + "iucontinue" + ] + except: + break + + def __extractToList(self, json, stuff): + list = [] + if stuff in json["query"]: + for item in json["query"][stuff]: + list.append(item["title"]) + return list + + def download(self, width=False, height=False, location=False): + """Download the image to a local file + + width/height - set width OR height of the downloaded image + location - set the filename to save to. If not set, the page title + minus the namespace prefix will be used and saved to the current directory + + """ + if self.pageid == 0: + self.setPageInfo() + params = {"action": "query", "prop": "imageinfo", "iiprop": "url"} + if width and height: + raise FileDimensionError("Can't specify both width and height") + if width: + params["iiurlwidth"] = width + if height: + params["iiurlheight"] = height + if self.pageid != 0: + params["pageids"] = self.pageid + elif self.title: + params["titles"] = self.title + else: + self.setPageInfo() + if ( + not self.exists + ): # Non-existant files may be on a shared repo (e.g. commons) + params["titles"] = self.title + else: + params["pageids"] = self.pageid + req = api.APIRequest(self.site, params) + res = req.query(False) + key = res["query"]["pages"].keys()[0] + url = res["query"]["pages"][key]["imageinfo"][0]["url"] + if not location: + location = self.title.split(":", 1)[1] + opener = urllib.build_opener(urllib.HTTPCookieProcessor(self.site.cookies)) + headers = {"User-agent": self.site.useragent} + request = urllib.Request(url, None, headers) + data = opener.open(request) + f = open(location, "wb", 0) + f.write(data.read()) + f.close() + return location + + def upload( + self, fileobj=None, comment="", url=None, ignorewarnings=False, watch=False + ): + """Upload a file, requires the "poster3" module + + fileobj - A file object opened for reading + comment - The log comment, used as the inital page content if the file + doesn't already exist on the wiki + url - A URL to upload the file from, if allowed on the wiki + ignorewarnings - Ignore warnings about duplicate files, etc. + watch - Add the page to your watchlist + + """ + if not api.canupload and fileobj: + raise UploadError("The poster3 module is required for file uploading") + if not fileobj and not url: + raise UploadError("Must give either a file object or a URL") + if fileobj and url: + raise UploadError("Cannot give a file and a URL") + if fileobj: + if not isinstance(fileobj, io.IOBase): + raise UploadError( + "If uploading from a file, a file object must be passed" + ) + if fileobj.mode not in ["r", "rb", "r+"]: + raise UploadError("File must be readable") + fileobj.seek(0) + params = { + "action": "upload", + "comment": comment, + "filename": self.unprefixedtitle, + "token": self.site.getToken("csrf"), + } + if url: + params["url"] = url + else: + params["file"] = fileobj + if ignorewarnings: + params["ignorewarnings"] = "" + if watch: + params["watch"] = "" + req = api.APIRequest(self.site, params, write=True, multipart=bool(fileobj)) + res = req.query() + if "upload" in res: + if res["upload"]["result"] == "Success": + self.wikitext = "" + self.links = [] + self.templates = [] + self.exists = True + elif res["upload"]["result"] == "Warning": + for warning in res["upload"]["warnings"].keys(): + if warning == "duplicate": + print( + "File is a duplicate of " + + res["upload"]["warnings"]["duplicate"][0] + ) + elif warning == "page-exists" or warning == "exists": + print( + "Page already exists: " + res["upload"]["warnings"][warning] + ) + else: + print( + "Warning: " + + warning + + " " + + res["upload"]["warnings"][warning] + ) + return res