small-thinking/taotie

Sweep: Rename save in NotionStorage to save_to_database

Closed this issue · 1 comments

Details

Rename save in NotionStorage to save_to_database. Note that multiple other files references this function.

Here's the PR! #100.

⚡ Sweep Free Trial: I used GPT-4 to create this ticket. You have 5 GPT-4 tickets left. For more GPT-4 tickets, visit our payment portal.


Step 1: 🔍 Code Search

I found the following snippets in your repository. I will now analyze these snippets and come up with a plan.

Some code snippets I looked at (click to expand). If some file is missing from here, you can mention the path in the ticket description.

"""Store the data in Notion.
"""
import asyncio
import datetime
import os
from typing import Any, Dict, List, Optional, Tuple
from notion_client import AsyncClient
from taotie.storage.base import Storage
class NotionStorage(Storage):
"""Store the data into notion knowledge base."""
def __init__(
self,
root_page_id: str,
verbose: bool = False,
**kwargs,
):
Storage.__init__(self, verbose=verbose, **kwargs)
self.token = os.environ.get("NOTION_TOKEN")
if not self.token:
raise ValueError("Please set the Notion token in .env.")
self.notion = AsyncClient(auth=self.token)
self.root_page_id = root_page_id
self.database_id: Optional[str] = None
self.logger.info("Notion storage initialized.")
async def save(
self, data: List[Tuple[Dict[str, Any], Dict[str, Any]]], image_urls: List[str]
):
"""First create a database. And then create a page for each item in the database."""
if not self.database_id:
self.database_id = await self._get_or_create_database()
for raw_item, processed_item in data:
await self._add_to_database(
self.database_id, raw_item, processed_item, image_urls
)
self.logger.info("Notion storage saved.")
async def _get_or_create_database(self) -> str:
"""Get the database id or create a new one if it does not exist."""
results = await self.notion.search(
query=self.root_page_id, filter={"property": "object", "value": "database"}
)
results = results.get("results")
if len(results):
database_id = results[0]["id"]
self.logger.info(f"Database {database_id} already exists.")
return results[0]["id"]
else:
# Create a new database.
parent = {"page_id": self.root_page_id}
properties: Dict[str, Any] = {
"Title": {"title": {}},
"Type": {"select": {}},
"Created Time": {"date": {}},
"Summary": {"rich_text": {}},
"Topics": {"multi_select": {}},
"URL": {"url": {}},
}
response = await self.notion.databases.create(
parent=parent,
title=[{"type": "text", "text": {"content": "Taotie Knowledge Base"}}],
properties=properties,
)
self.logger.info("Database created.")
return response["id"]
async def _add_to_database(
self,
database_id: str,
item: Dict[str, Any],
processed_item: Dict[str, Any],
image_files: List[str],
) -> None:
# Determine the icon.
uri = item.get("uri", "")
icon_emoji = "🔖"
if uri.startswith("https://twitter.com"):
icon_emoji = "🐦"
elif uri.startswith("https://github.com"):
icon_emoji = "💻"
new_page = {
"Title": [
{
"type": "text",
"text": {"content": str(item["id"])},
}
],
"Created Time": {"start": item["datetime"]},
"Type": {"name": item["type"]},
"Summary": [
{
"type": "text",
"text": {"content": processed_item.get("summary", "N/A")},
}
],
"Topics": [{"name": item} for item in processed_item.get("tags", [])],
"URL": [
{
"type": "text",
"text": {"content": item["uri"]},
}
],
}
children = await self.create_page_blocks(item, processed_item, image_files)
response = await self.notion.pages.create(
parent={"type": "database_id", "database_id": database_id},
properties=new_page,
icon={"type": "emoji", "emoji": icon_emoji},
children=children[:100], # Can only add 100 blocks.
)
if "id" not in response:
raise ValueError(f"Failed to add page to database: {response}")
self.logger.info("Page added to database.")
async def create_page_blocks(
self,
raw_info: Dict[str, Any],
processed_info: Dict[str, Any],
image_urls: List[str],
) -> List[Dict[str, Any]]:
"""Create the page blocks according to the information."""
page_contents = []
# Display the raw information as content.
uri = raw_info.get("uri", "")
reference_type = "bookmark"
if uri.startswith("https://twitter.com"):
reference_type = "embed"
page_contents = [
{
"object": "block",
"type": "heading_2",
"heading_2": {
"rich_text": [
{
"type": "text",
"text": {"content": "Summary Images"},
}
]
},
},
]
# Upload the images if any.
if image_urls:
for image_url in image_urls:
if image_url:
page_contents.append(
{
"object": "block",
"type": "image",
"image": {
"type": "external",
"external": {"url": image_url},
},

)
# Partition and put the content into blocks.
content = raw_info.get("content", "")
content = content.split("\n")
for i, line in enumerate(content):
if i >= 20:
page_contents.append(
{
"object": "block",
"type": "paragraph",
"paragraph": {
"rich_text": [
{
"type": "text",
"text": {"content": "Content too long. Truncated."},
}
]
},
}
)
break
page_contents.append(
{
"object": "block",
"type": "paragraph",
"paragraph": {
"rich_text": [
{
"type": "text",
"text": {"content": line},
}
]
},
}
)
return page_contents
async def run():
raw_data = {
"id": "123",
"type": "test-type",
"datetime": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"content": "This is a test long content." * 10,
"uri": "https://github.com/taotie/taotie",
}
processed_data = {"summary": "This is a summary"}
data = [(raw_data, processed_data)]
notion = NotionStorage(
root_page_id="987fd186553e4d2682e9a1de441a37ba", verbose=True
)
await notion.save(data, image_urls=["https://i.imgur.com/XXWcoH0.png"])
if __name__ == "__main__":
asyncio.run(run())

"""Storage is used to dump the raw data or post-processed data into a persistent storage.
"""
import os
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Tuple
from taotie.utils import Logger, load_dotenv
class Storage(ABC):
def __init__(self, verbose: bool = False, **kwargs):
self.verbose = verbose
load_dotenv()
self.logger = Logger(os.path.basename(__file__), verbose=verbose)
@abstractmethod
async def save(
self, data: List[Tuple[Dict[str, Any], Dict[str, Any]]], image_urls: List[str]
):
"""Save the data to the storage."""
raise NotImplementedError

taotie/taotie/tools.py

Lines 1 to 81 in 152a955

"""
"""
import argparse
import asyncio
import os
from taotie.reporter.notion_reporter import NotionReporter
from taotie.utils import *
def parse_args(parser: argparse.ArgumentParser):
subparsers = parser.add_subparsers(dest="command")
# Sub-parser for report
report_parser = subparsers.add_parser("report", help="Generate a report")
report_parser.add_argument(
"--date-lookback",
type=int,
default=1,
help="Number of days to look back for report",
)
report_parser.add_argument(
"--type-filters",
type=str,
default="arxiv",
help="Comma-separated list of type filters (arxiv, github-repo)",
)
report_parser.add_argument(
"--topic-filters",
type=str,
default="",
help="Comma-separated list of topic filters",
)
report_parser.add_argument(
"--model-type",
type=str,
default="gpt-3.5-turbo-16k-0613",
help="Model type for report",
)
report_parser.add_argument(
"--language", type=str, default="Chinese", help="Language for report"
)
# Sub-parser for delete key
delete_parser = subparsers.add_parser("delete", help="Delete a key")
delete_parser.add_argument("key", type=str, help="Key to delete")
args = parser.parse_args()
return args
async def run_notion_reporter(args: argparse.Namespace):
"""Run the script to generate the notion report."""
load_dotenv()
database_id = os.environ.get("NOTION_DATABASE_ID")
if not database_id:
raise ValueError("NOTION_DATABASE_ID not found in environment")
type_filters = args.type_filters.split(",")
topic_filters = args.topic_filters.split(",")
reporter = NotionReporter(
knowledge_source_uri=database_id,
date_lookback=args.date_lookback,
type_filters=type_filters,
topic_filters=topic_filters
if topic_filters
else os.environ.get("CANDIDATE_TAGS", "").split(","),
model_type=args.model_type,
language=args.language,
)
await reporter.distill()
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Argument Parser for Report and Delete Key"
)
args = parse_args(parser=parser)
if args.command == "report":
asyncio.run(run_notion_reporter(args))
else:
parser.print_help()

"""Notion reporter will check the gathered knowledge in notion and generate the text report for the AI related contents.
"""
import asyncio
import json
import os
from datetime import date, datetime, timedelta
from typing import Dict, List
import openai
import pytz # type: ignore
from notion_client import AsyncClient
from taotie.reporter.base_reporter import BaseReporter
from taotie.utils import *
class NotionReporter(BaseReporter):
"""NotionReporter will check the gathered knowledge in notion and
generate the text report accordingly."""
def __init__(
self,
knowledge_source_uri: str,
date_lookback: int,
type_filters: List[str],
topic_filters: List[str],
verbose: bool = False,
**kwargs,
):
"""
Args:
knowledge_source_uri: The uri of the notion database id.
"""
super().__init__(knowledge_source_uri=knowledge_source_uri, verbose=verbose)
self.token = os.environ.get("NOTION_TOKEN")
if not self.token:
raise ValueError("Please set the Notion token in .env.")
self.date_lookback = max(0, date_lookback)
self.type_filters = type_filters
self.topic_filters = topic_filters
# Model configs.
if not os.getenv("OPENAI_API_KEY"):
raise ValueError("Please set OPENAI_API_KEY in .env.")
openai.api_key = os.getenv("OPENAI_API_KEY")
self.model_type = kwargs.get("model_type", "gpt-3.5-turbo-16k-0613")
# Prompt.
language = kwargs.get("language", "Chinese")
if "github-repo" in self.topic_filters:
self.report_prompt = f"""
Please generate a report that will be published by the WECHAT BLOG based on the json string in the triple quotes.
Follow the following rules STRICTLY:
1. Summarize in {language} and at the beginning give a short overall summary of the repos in this report.
2. Skip the items that are not relevant to AI or the topics of {self.topic_filters}.
3. Generate each item as an individual section, include the URL in each of the item, and \
including the strength of recommendation (draw 1-5 stars) and the reason to recommend. \
Make the summary as informative as possible.
4. If the item is about a paper, please emphasis the afflication of the authors if it is famous.
5. Generate the description in an attractive way, so that the readers will be willing to check the content.
6. Rank by importance (e.g. whether has image) and keep AT MOST the top 10 items based on the recommendation strength.
7. Add an end note indicate this report is 小思辩饕餮(https://github.com/small-thinking/taotie)创作。在公众号回复”报告“查看最新报告。
Example items:
1.【★★★★★】TransformerOptimus/SuperAGI
这是一个用于构建和运行有用的自主智能体的Python项目。
推荐理由:自主性AI最新版本。该项目旨在创造一个可以解决朴实问题的自主智能体。
访问地址:https://github.com/TransformerOptimus/SuperAGI
2.【★★★★】LLM-ToolMaker
这个项目提出了一种名为LLMs As Tool Makers (LATM)的闭环框架,其中大型语言模型(LLMs)可以作为工具制造者为解决问题创造自己的可重用工具。
推荐理由:开放框架 。该项目旨在创造一个可以使用外部工具的自主智能体。
访问地址:https://github.com/ctlllll/LLM-ToolMaker
"""
else:
self.report_prompt = f"""
Please generate a report of the paper summary that will be published by the WECHAT BLOG based on the json string in the triple quotes.
Follow the following rules STRICTLY:
1. Summarize in {language} and at the beginning give a short overall summary of the repos in this report.
2. SKIP the items that are not relevant to AI or the topics of {self.topic_filters}.
3. use the paper name as the title for each item. Then followed by a short overall summary of the paper.
4. Emphasis the authors or afflications if they famous.
5. Generate each item as an individual section, include the URL in each of the item, and \

I also found the following external resources that might be helpful:

Summaries of links found in the content:


Step 2: 🧐 Snippet Analysis

From looking through the relevant snippets, I decided to make the following modifications:

File Path Proposed Changes
taotie/storage/notion.py Rename the save method to save_to_database.
taotie/tools.py Update all references of save method of NotionStorage class to save_to_database.
taotie/reporter/notion_reporter.py Update all references of save method of NotionStorage class to save_to_database.

Step 3: 📝 Planning

I have created a plan for writing the pull request. I am now working my plan and coding the required changes to address this issue. Here is the planned pull request:

Rename save method in NotionStorage to save_to_database
sweep/rename-save-method

Description

This PR renames the save method in the NotionStorage class to save_to_database in order to make the method name more descriptive and specific to its function.

Summary of Changes

  • Renamed the save method in taotie/storage/notion.py to save_to_database.
  • Updated all references to the save method in taotie/tools.py and taotie/reporter/notion_reporter.py to use the new method name.

This change improves code readability and maintainability by providing a more meaningful method name that accurately reflects its purpose.


Step 4: ⌨️ Coding

I have finished coding the issue. I am now reviewing it for completeness.


Step 5: 🔁 Code Review

Success! 🚀


I'm a bot that handles simple bugs and feature requests but I might make mistakes. Please be kind!
Join Our Discord