Sweep: Rename save in NotionStorage to save_to_database
Closed this issue · 1 comments
Details
Rename save in NotionStorage to save_to_database. Note that multiple other files references this function.
Here's the PR! #100.
⚡ Sweep Free Trial: I used GPT-4 to create this ticket. You have 5 GPT-4 tickets left. For more GPT-4 tickets, visit our payment portal.
Step 1: 🔍 Code Search
I found the following snippets in your repository. I will now analyze these snippets and come up with a plan.
Some code snippets I looked at (click to expand). If some file is missing from here, you can mention the path in the ticket description.
taotie/taotie/storage/notion.py
Lines 1 to 161 in 152a955
"""Store the data in Notion. | |
""" | |
import asyncio | |
import datetime | |
import os | |
from typing import Any, Dict, List, Optional, Tuple | |
from notion_client import AsyncClient | |
from taotie.storage.base import Storage | |
class NotionStorage(Storage): | |
"""Store the data into notion knowledge base.""" | |
def __init__( | |
self, | |
root_page_id: str, | |
verbose: bool = False, | |
**kwargs, | |
): | |
Storage.__init__(self, verbose=verbose, **kwargs) | |
self.token = os.environ.get("NOTION_TOKEN") | |
if not self.token: | |
raise ValueError("Please set the Notion token in .env.") | |
self.notion = AsyncClient(auth=self.token) | |
self.root_page_id = root_page_id | |
self.database_id: Optional[str] = None | |
self.logger.info("Notion storage initialized.") | |
async def save( | |
self, data: List[Tuple[Dict[str, Any], Dict[str, Any]]], image_urls: List[str] | |
): | |
"""First create a database. And then create a page for each item in the database.""" | |
if not self.database_id: | |
self.database_id = await self._get_or_create_database() | |
for raw_item, processed_item in data: | |
await self._add_to_database( | |
self.database_id, raw_item, processed_item, image_urls | |
) | |
self.logger.info("Notion storage saved.") | |
async def _get_or_create_database(self) -> str: | |
"""Get the database id or create a new one if it does not exist.""" | |
results = await self.notion.search( | |
query=self.root_page_id, filter={"property": "object", "value": "database"} | |
) | |
results = results.get("results") | |
if len(results): | |
database_id = results[0]["id"] | |
self.logger.info(f"Database {database_id} already exists.") | |
return results[0]["id"] | |
else: | |
# Create a new database. | |
parent = {"page_id": self.root_page_id} | |
properties: Dict[str, Any] = { | |
"Title": {"title": {}}, | |
"Type": {"select": {}}, | |
"Created Time": {"date": {}}, | |
"Summary": {"rich_text": {}}, | |
"Topics": {"multi_select": {}}, | |
"URL": {"url": {}}, | |
} | |
response = await self.notion.databases.create( | |
parent=parent, | |
title=[{"type": "text", "text": {"content": "Taotie Knowledge Base"}}], | |
properties=properties, | |
) | |
self.logger.info("Database created.") | |
return response["id"] | |
async def _add_to_database( | |
self, | |
database_id: str, | |
item: Dict[str, Any], | |
processed_item: Dict[str, Any], | |
image_files: List[str], | |
) -> None: | |
# Determine the icon. | |
uri = item.get("uri", "") | |
icon_emoji = "🔖" | |
if uri.startswith("https://twitter.com"): | |
icon_emoji = "🐦" | |
elif uri.startswith("https://github.com"): | |
icon_emoji = "💻" | |
new_page = { | |
"Title": [ | |
{ | |
"type": "text", | |
"text": {"content": str(item["id"])}, | |
} | |
], | |
"Created Time": {"start": item["datetime"]}, | |
"Type": {"name": item["type"]}, | |
"Summary": [ | |
{ | |
"type": "text", | |
"text": {"content": processed_item.get("summary", "N/A")}, | |
} | |
], | |
"Topics": [{"name": item} for item in processed_item.get("tags", [])], | |
"URL": [ | |
{ | |
"type": "text", | |
"text": {"content": item["uri"]}, | |
} | |
], | |
} | |
children = await self.create_page_blocks(item, processed_item, image_files) | |
response = await self.notion.pages.create( | |
parent={"type": "database_id", "database_id": database_id}, | |
properties=new_page, | |
icon={"type": "emoji", "emoji": icon_emoji}, | |
children=children[:100], # Can only add 100 blocks. | |
) | |
if "id" not in response: | |
raise ValueError(f"Failed to add page to database: {response}") | |
self.logger.info("Page added to database.") | |
async def create_page_blocks( | |
self, | |
raw_info: Dict[str, Any], | |
processed_info: Dict[str, Any], | |
image_urls: List[str], | |
) -> List[Dict[str, Any]]: | |
"""Create the page blocks according to the information.""" | |
page_contents = [] | |
# Display the raw information as content. | |
uri = raw_info.get("uri", "") | |
reference_type = "bookmark" | |
if uri.startswith("https://twitter.com"): | |
reference_type = "embed" | |
page_contents = [ | |
{ | |
"object": "block", | |
"type": "heading_2", | |
"heading_2": { | |
"rich_text": [ | |
{ | |
"type": "text", | |
"text": {"content": "Summary Images"}, | |
} | |
] | |
}, | |
}, | |
] | |
# Upload the images if any. | |
if image_urls: | |
for image_url in image_urls: | |
if image_url: | |
page_contents.append( | |
{ | |
"object": "block", | |
"type": "image", | |
"image": { | |
"type": "external", | |
"external": {"url": image_url}, | |
}, |
taotie/taotie/storage/notion.py
Lines 197 to 253 in 152a955
) | |
# Partition and put the content into blocks. | |
content = raw_info.get("content", "") | |
content = content.split("\n") | |
for i, line in enumerate(content): | |
if i >= 20: | |
page_contents.append( | |
{ | |
"object": "block", | |
"type": "paragraph", | |
"paragraph": { | |
"rich_text": [ | |
{ | |
"type": "text", | |
"text": {"content": "Content too long. Truncated."}, | |
} | |
] | |
}, | |
} | |
) | |
break | |
page_contents.append( | |
{ | |
"object": "block", | |
"type": "paragraph", | |
"paragraph": { | |
"rich_text": [ | |
{ | |
"type": "text", | |
"text": {"content": line}, | |
} | |
] | |
}, | |
} | |
) | |
return page_contents | |
async def run(): | |
raw_data = { | |
"id": "123", | |
"type": "test-type", | |
"datetime": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), | |
"content": "This is a test long content." * 10, | |
"uri": "https://github.com/taotie/taotie", | |
} | |
processed_data = {"summary": "This is a summary"} | |
data = [(raw_data, processed_data)] | |
notion = NotionStorage( | |
root_page_id="987fd186553e4d2682e9a1de441a37ba", verbose=True | |
) | |
await notion.save(data, image_urls=["https://i.imgur.com/XXWcoH0.png"]) | |
if __name__ == "__main__": | |
asyncio.run(run()) |
Lines 1 to 21 in 152a955
"""Storage is used to dump the raw data or post-processed data into a persistent storage. | |
""" | |
import os | |
from abc import ABC, abstractmethod | |
from typing import Any, Dict, List, Tuple | |
from taotie.utils import Logger, load_dotenv | |
class Storage(ABC): | |
def __init__(self, verbose: bool = False, **kwargs): | |
self.verbose = verbose | |
load_dotenv() | |
self.logger = Logger(os.path.basename(__file__), verbose=verbose) | |
@abstractmethod | |
async def save( | |
self, data: List[Tuple[Dict[str, Any], Dict[str, Any]]], image_urls: List[str] | |
): | |
"""Save the data to the storage.""" | |
raise NotImplementedError |
Lines 1 to 81 in 152a955
""" | |
""" | |
import argparse | |
import asyncio | |
import os | |
from taotie.reporter.notion_reporter import NotionReporter | |
from taotie.utils import * | |
def parse_args(parser: argparse.ArgumentParser): | |
subparsers = parser.add_subparsers(dest="command") | |
# Sub-parser for report | |
report_parser = subparsers.add_parser("report", help="Generate a report") | |
report_parser.add_argument( | |
"--date-lookback", | |
type=int, | |
default=1, | |
help="Number of days to look back for report", | |
) | |
report_parser.add_argument( | |
"--type-filters", | |
type=str, | |
default="arxiv", | |
help="Comma-separated list of type filters (arxiv, github-repo)", | |
) | |
report_parser.add_argument( | |
"--topic-filters", | |
type=str, | |
default="", | |
help="Comma-separated list of topic filters", | |
) | |
report_parser.add_argument( | |
"--model-type", | |
type=str, | |
default="gpt-3.5-turbo-16k-0613", | |
help="Model type for report", | |
) | |
report_parser.add_argument( | |
"--language", type=str, default="Chinese", help="Language for report" | |
) | |
# Sub-parser for delete key | |
delete_parser = subparsers.add_parser("delete", help="Delete a key") | |
delete_parser.add_argument("key", type=str, help="Key to delete") | |
args = parser.parse_args() | |
return args | |
async def run_notion_reporter(args: argparse.Namespace): | |
"""Run the script to generate the notion report.""" | |
load_dotenv() | |
database_id = os.environ.get("NOTION_DATABASE_ID") | |
if not database_id: | |
raise ValueError("NOTION_DATABASE_ID not found in environment") | |
type_filters = args.type_filters.split(",") | |
topic_filters = args.topic_filters.split(",") | |
reporter = NotionReporter( | |
knowledge_source_uri=database_id, | |
date_lookback=args.date_lookback, | |
type_filters=type_filters, | |
topic_filters=topic_filters | |
if topic_filters | |
else os.environ.get("CANDIDATE_TAGS", "").split(","), | |
model_type=args.model_type, | |
language=args.language, | |
) | |
await reporter.distill() | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser( | |
description="Argument Parser for Report and Delete Key" | |
) | |
args = parse_args(parser=parser) | |
if args.command == "report": | |
asyncio.run(run_notion_reporter(args)) | |
else: | |
parser.print_help() |
taotie/taotie/reporter/notion_reporter.py
Lines 1 to 82 in 152a955
"""Notion reporter will check the gathered knowledge in notion and generate the text report for the AI related contents. | |
""" | |
import asyncio | |
import json | |
import os | |
from datetime import date, datetime, timedelta | |
from typing import Dict, List | |
import openai | |
import pytz # type: ignore | |
from notion_client import AsyncClient | |
from taotie.reporter.base_reporter import BaseReporter | |
from taotie.utils import * | |
class NotionReporter(BaseReporter): | |
"""NotionReporter will check the gathered knowledge in notion and | |
generate the text report accordingly.""" | |
def __init__( | |
self, | |
knowledge_source_uri: str, | |
date_lookback: int, | |
type_filters: List[str], | |
topic_filters: List[str], | |
verbose: bool = False, | |
**kwargs, | |
): | |
""" | |
Args: | |
knowledge_source_uri: The uri of the notion database id. | |
""" | |
super().__init__(knowledge_source_uri=knowledge_source_uri, verbose=verbose) | |
self.token = os.environ.get("NOTION_TOKEN") | |
if not self.token: | |
raise ValueError("Please set the Notion token in .env.") | |
self.date_lookback = max(0, date_lookback) | |
self.type_filters = type_filters | |
self.topic_filters = topic_filters | |
# Model configs. | |
if not os.getenv("OPENAI_API_KEY"): | |
raise ValueError("Please set OPENAI_API_KEY in .env.") | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
self.model_type = kwargs.get("model_type", "gpt-3.5-turbo-16k-0613") | |
# Prompt. | |
language = kwargs.get("language", "Chinese") | |
if "github-repo" in self.topic_filters: | |
self.report_prompt = f""" | |
Please generate a report that will be published by the WECHAT BLOG based on the json string in the triple quotes. | |
Follow the following rules STRICTLY: | |
1. Summarize in {language} and at the beginning give a short overall summary of the repos in this report. | |
2. Skip the items that are not relevant to AI or the topics of {self.topic_filters}. | |
3. Generate each item as an individual section, include the URL in each of the item, and \ | |
including the strength of recommendation (draw 1-5 stars) and the reason to recommend. \ | |
Make the summary as informative as possible. | |
4. If the item is about a paper, please emphasis the afflication of the authors if it is famous. | |
5. Generate the description in an attractive way, so that the readers will be willing to check the content. | |
6. Rank by importance (e.g. whether has image) and keep AT MOST the top 10 items based on the recommendation strength. | |
7. Add an end note indicate this report is 小思辩饕餮(https://github.com/small-thinking/taotie)创作。在公众号回复”报告“查看最新报告。 | |
Example items: | |
1.【★★★★★】TransformerOptimus/SuperAGI | |
这是一个用于构建和运行有用的自主智能体的Python项目。 | |
推荐理由:自主性AI最新版本。该项目旨在创造一个可以解决朴实问题的自主智能体。 | |
访问地址:https://github.com/TransformerOptimus/SuperAGI | |
2.【★★★★】LLM-ToolMaker | |
这个项目提出了一种名为LLMs As Tool Makers (LATM)的闭环框架,其中大型语言模型(LLMs)可以作为工具制造者为解决问题创造自己的可重用工具。 | |
推荐理由:开放框架 。该项目旨在创造一个可以使用外部工具的自主智能体。 | |
访问地址:https://github.com/ctlllll/LLM-ToolMaker | |
""" | |
else: | |
self.report_prompt = f""" | |
Please generate a report of the paper summary that will be published by the WECHAT BLOG based on the json string in the triple quotes. | |
Follow the following rules STRICTLY: | |
1. Summarize in {language} and at the beginning give a short overall summary of the repos in this report. | |
2. SKIP the items that are not relevant to AI or the topics of {self.topic_filters}. | |
3. use the paper name as the title for each item. Then followed by a short overall summary of the paper. | |
4. Emphasis the authors or afflications if they famous. | |
5. Generate each item as an individual section, include the URL in each of the item, and \ |
Summaries of links found in the content:
Step 2: 🧐 Snippet Analysis
From looking through the relevant snippets, I decided to make the following modifications:
File Path | Proposed Changes |
---|---|
taotie/storage/notion.py |
Rename the save method to save_to_database . |
taotie/tools.py |
Update all references of save method of NotionStorage class to save_to_database . |
taotie/reporter/notion_reporter.py |
Update all references of save method of NotionStorage class to save_to_database . |
Step 3: 📝 Planning
I have created a plan for writing the pull request. I am now working my plan and coding the required changes to address this issue. Here is the planned pull request:
Rename save method in NotionStorage to save_to_database
sweep/rename-save-method
Description
This PR renames the
save
method in theNotionStorage
class tosave_to_database
in order to make the method name more descriptive and specific to its function.Summary of Changes
- Renamed the
save
method intaotie/storage/notion.py
tosave_to_database
.- Updated all references to the
save
method intaotie/tools.py
andtaotie/reporter/notion_reporter.py
to use the new method name.This change improves code readability and maintainability by providing a more meaningful method name that accurately reflects its purpose.
Step 4: ⌨️ Coding
I have finished coding the issue. I am now reviewing it for completeness.
Step 5: 🔁 Code Review
Success! 🚀
I'm a bot that handles simple bugs and feature requests but I might make mistakes. Please be kind!
Join Our Discord