Implement workflow to merge feeds into each other

	@pytest.mark.asyncio
	async def test_workflow():
	db = connect_to_db()

	test_medium = create_test_medium(db=db, label='test')
	test_feed = create_test_feed(db=db, label='test', medium=test_medium)

	# 'label' is important as it will be stored in both stories.title and stories.description, which in turn will be
	# used to guess the probable language of the podcast episode
	test_story = create_test_story(db=db, label='keeping up with Kardashians', feed=test_feed)

	stories_id = test_story['stories_id']

	with open(TEST_MP3_PATH, mode='rb') as f:
	test_mp3_data = f.read()

	# noinspection PyUnusedLocal
	def __mp3_callback(request: HashServer.Request) -> Union[str, bytes]:
	response = "".encode('utf-8')
	response += "HTTP/1.0 200 OK\r\n".encode('utf-8')
	response += "Content-Type: audio/mpeg\r\n".encode('utf-8')
	response += f"Content-Length: {len(test_mp3_data)}\r\n".encode('utf-8')
	response += "\r\n".encode('utf-8')
	response += test_mp3_data
	return response

	port = random_unused_port()
	pages = {
	'/test.mp3': {
	'callback': __mp3_callback,
	}
	}

	hs = HashServer(port=port, pages=pages)
	hs.start()

	# Not localhost as this might get fetched from a remote worker
	mp3_url = hs.page_url('/test.mp3')

	db.insert(table='story_enclosures', insert_hash={
	'stories_id': stories_id,
	'url': mp3_url,
	'mime_type': 'audio/mpeg',
	'length': len(test_mp3_data),
	})

	client = workflow_client()

	# Start worker
	factory = WorkerFactory(client=client, namespace=client.namespace)
	worker = factory.new_worker(task_queue=TASK_QUEUE)

	# Use an activities implementation with random GCS prefixes set
	activities = _RandomPrefixesPodcastTranscribeActivities()

	worker.register_activities_implementation(
	activities_instance=activities,
	activities_cls_name=PodcastTranscribeActivities.__name__,
	)
	worker.register_workflow_implementation_type(impl_cls=PodcastTranscribeWorkflowImpl)
	factory.start()

	# Initialize workflow instance
	workflow: PodcastTranscribeWorkflow = client.new_workflow_stub(
	cls=PodcastTranscribeWorkflow,
	workflow_options=WorkflowOptions(
	workflow_id=str(stories_id),

	# By default, if individual activities of the workflow fail, they will get restarted pretty much
	# indefinitely, and so this test might run for days (or rather just timeout on the CI). So we cap the
	# workflow so that if it doesn't manage to complete in X minutes, we consider it as failed.
	workflow_run_timeout=timedelta(minutes=5),

	),
	)

	# Wait for the workflow to complete
	await workflow.transcribe_episode(stories_id)

	downloads = db.select(table='downloads', what_to_select='*').hashes()
	assert len(downloads) == 1
	first_download = downloads[0]
	assert first_download['stories_id'] == stories_id
	assert first_download['type'] == 'content'
	assert first_download['state'] == 'success'

	download_content = fetch_content(db=db, download=first_download)

	# It's what gets said in the sample MP3 file
	assert 'Kim Kardashian' in download_content

	# Initiate the worker shutdown in the background while we do the GCS cleanup so that the stop_workers_faster()
	# doesn't have to wait that long
	await worker.stop(background=True)

	log.info("Cleaning up GCS...")
	GCSStore(bucket_config=activities.config.raw_enclosures()).delete_object(object_id=str(stories_id))
	GCSStore(bucket_config=activities.config.transcoded_episodes()).delete_object(object_id=str(stories_id))
	GCSStore(bucket_config=activities.config.transcripts()).delete_object(object_id=str(stories_id))
	log.info("Cleaned up GCS")

	log.info("Stopping workers...")
	await stop_worker_faster(worker)
	log.info("Stopped workers")

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Implement workflow to merge feeds into each other #816

Outline

Referencing tables

Tips, tricks, notes and other things that came to mind

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Implement workflow to merge feeds into each other #816

Description

Outline

Referencing tables

Tips, tricks, notes and other things that came to mind

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions