Is it possible to have multiple loops with asyncio? If the response is yes how can I do that? My use case is: * I extract urls from a list of websites in async * For each "sub url list", I would crawl them in async/
Example to extract urls:
import asyncio
import aiohttp
from suburls import extractsuburls
@asyncio.coroutine
def extracturls(url):
subtasks = []
response = yield from aiohttp.request('GET', url)
suburl_list = yield from response.text()
for suburl in suburl_list:
subtasks.append(asyncio.Task(extractsuburls(suburl)))
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.gather(*subtasks))
if __name__ == '__main__':
urls_list = ['http://example1.com', 'http://example2.com']
for url in url_list:
subtasks.append(asyncio.Task(extractsuburls(url)))
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.gather(*subtasks))
loop.close()
If I execute this code I'll have an error when python will try to launch the second loop witch says that a loop is already running.
P.S: my module "extractsuburls" uses aiohttp to perform web request.
EDIT:
Well, I've try this solution:
import asyncio
import aiohttp
from suburls import extractsuburls
@asyncio.coroutine
def extracturls( url ):
subtasks = []
response = yield from aiohttp.request('GET', url)
suburl_list = yield from response.text()
jobs_loop = asyncio.new_event_loop()
for suburl in suburl_list:
subtasks.append(asyncio.Task(extractsuburls(suburl)))
asyncio.new_event_loop(jobs_loop)
jobs_loop.run_until_complete(asyncio.gather(*subtasks))
jobs_loop.close()
if __name__ == '__main__':
urls_list = ['http://example1.com', 'http://example2.com']
for url in url_list:
subtasks.append(asyncio.Task(extractsuburls(url)))
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.gather(*subtasks))
loop.close()
But I've this error: loop argument must agree with Future
Any idea?
You don't need several event loops, just use yield from gather(*subtasks)
in extracturls()
coroutine:
import asyncio
import aiohttp
from suburls import extractsuburls
@asyncio.coroutine
def extracturls(url):
subtasks = []
response = yield from aiohttp.request('GET', url)
suburl_list = yield from response.text()
for suburl in suburl_list:
subtasks.append(extractsuburls(suburl))
yield from asyncio.gather(*subtasks)
if __name__ == '__main__':
urls_list = ['http://example1.com', 'http://example2.com']
for url in url_list:
subtasks.append(extractsuburls(url))
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.gather(*subtasks))
loop.close()
As result you get waiting for subtasks until extracturls
finished.