/yuque-book-download

下载语雀所有book知识库的文档到本地

Primary LanguagePythonBSD 2-Clause "Simplified" LicenseBSD-2-Clause

更新

https://xie-dd.github.io/posts/1745/

使用指南

  1. 下载项目
  2. 修改 yuque_download_xdd.py 文件中的 token
  3. 运行 yuque_download_xdd.py 即可

代码开发指南

获取语雀Token

网页端登陆语雀---点击头像---账户设置---Token

说明

  1. 语雀所有的开放 API 都需要 Token 验证之后才能访问
  2. 你需要在请求的 HTTP Headers 传入 X-Auth-Token 带入您的身份 Token 信息,用于完成认证

获取用户信息

image.png

import requests

USER = "xdd1997"
url_user = 'https://www.yuque.com/api/v2/users'
header = {"X-Auth-Token": "your Token"}
resu = requests.get(url_user, headers = header).json()
resu

image.png

获取用户/团队名下仓库列表

image.png

url_repo = 'https://www.yuque.com/api/v2/users/' + USER + "/repos"
Repo_Result = requests.get(url_repo, headers = header).json()['data']
Repo_Result

image.png

# 获得所有仓库的 id, id 是仓库的唯一标识
repo_ids = []
for item in Repo_Result:
    repo_ids.append(item["id"])
    
repo_ids

[4240****, 2120****, 1087****]

获得一个仓库下的文档列表

image.png

repo_id = '4240****'
url_docs = 'https://www.yuque.com/api/v2/repos/'+ repo_id +'/docs'
Doc_Result = requests.get(url_docs, headers = header).json()['data']
Doc_Result

获得一个仓库下所有文档的 slug

# slug 是文档的唯一标识
slugs = []
for item in Doc_Result:
    slugs.append(item['slug'])
slugs

获取单篇文档信息

image.png

slug = "gbhna********"
url = f"https://www.yuque.com/api/v2/repos/{repo_id}/docs/{slug}"
Repo_Result = requests.get(url, headers = header).json()
Repo_Result

image.png

获取某一篇文档内容

resu = Repo_Result["data"]["body"]
resu

image.png

'---<br />title: Python导出语雀文档<br />categories: [Python]<br />tags: [Python,语雀]<br />date: 2023-10-24<br />updated: 2023-10-24<br />cover:  https://mypic2016.oss-cn-beijing.aliyuncs.com/picGo/202310241331546.png<br />---\n\n\n<a name="xiwvL"></a>\n## 方法1\n\n1. 代码来源:[https://github.com/burpheart/yuque-crawl](https://github.com/burpheart/yuque-crawl)\n2. 限制:这个代码只能下载公开的仓库的md文件\n3. 根据自己需要,稍稍修改了下以便能下载自己指定的一些仓库,得到下面代码:\n```python\n# BY @burpheart\n# https://www.yuque.com/burpheart/phpaudit\n# https://github.com/burpheart\nimport sys\n\nimport requests\nimport json\nimport re\nimport os\nimport urllib.parse\n\ntset = []\n\n\ndef save_page(book_id, sulg, path):\n    docsdata = requests.get(\n        \'https://www.yuque.com/api/docs/\' + sulg + \'?book_id=\' + book_id + \'&merge_dynamic_data=false&mode=markdown\')\n    if (docsdata.status_code != 200):\n        print("文档下载失败 页面可能被删除 ", book_id, sulg,path, docsdata.content)\n        return\n    docsjson = json.loads(docsdata.content)\n\n    f = open(path, \'w\', encoding=\'utf-8\')\n    f.write(docsjson[\'data\'][\'sourcecode\'])\n    f.close()\n\n\ndef get_book(url, save_path):\n    docsdata = requests.get(url)\n    data = re.findall(r"decodeURIComponent\\(\\"(.+)\\"\\)\\);", docsdata.content.decode(\'utf-8\'))\n    docsjson = json.loads(urllib.parse.unquote(data[0]))\n    test = []\n    list = {}\n    temp = {}\n    md = ""\n    table = str.maketrans(\'\\/:*?"<>|\' + "\\n\\r", "___________")\n    prename = ""\n    if (os.path.exists(save_path + "/" + str(docsjson[\'book\'][\'id\'])) == False):\n        os.makedirs(save_path + "/" + str(docsjson[\'book\'][\'id\']))\n\n    for doc in docsjson[\'book\'][\'toc\']:\n        if (doc[\'type\'] == \'TITLE\' or doc[\'child_uuid\']!= \'\'):\n            filename = \'\'\n            list[doc[\'uuid\']] = {\'0\': doc[\'title\'], \'1\': doc[\'parent_uuid\']}\n            uuid = doc[\'uuid\']\n            temp[doc[\'uuid\']] = \'\'\n            while True:\n                if (list[uuid][\'1\'] != \'\'):\n                    if temp[doc[\'uuid\']] == \'\':\n                        temp[doc[\'uuid\']] = doc[\'title\'].translate(table)\n                    else:\n                        temp[doc[\'uuid\']] = list[uuid][\'0\'].translate(table) + \'/\' + temp[doc[\'uuid\']]\n                    uuid = list[uuid][\'1\']\n                else:\n                    temp[doc[\'uuid\']] = list[uuid][\'0\'].translate(table) + \'/\' + temp[doc[\'uuid\']]\n                    break\n            if ((os.path.exists(save_path + "/" + str(docsjson[\'book\'][\'id\']) + \'/\' + temp[doc[\'uuid\']])) == False):\n                os.makedirs(save_path + "/" + str(docsjson[\'book\'][\'id\']) + \'/\' + temp[doc[\'uuid\']])\n            if (temp[doc[\'uuid\']].endswith("/")):\n                md += "## " + temp[doc[\'uuid\']][:-1] + "\\n"\n            else:\n                md += "  " * (temp[doc[\'uuid\']].count("/") - 1) + "* " + temp[doc[\'uuid\']][\n                                                                         temp[doc[\'uuid\']].rfind("/") + 1:] + "\\n"\n        if (doc[\'url\'] != \'\'):\n            if doc[\'parent_uuid\'] != "":\n                if (temp[doc[\'parent_uuid\']].endswith("/")):\n                    md += " " * temp[doc[\'parent_uuid\']].count("/") + "* [" + doc[\'title\'] + "](" + urllib.parse.quote(\n                        temp[doc[\'parent_uuid\']] + "/" + doc[\'title\'].translate(table) + \'.md\') + ")" + "\\n"\n                else:\n                    md += "  " * temp[doc[\'parent_uuid\']].count("/") + "* [" + doc[\'title\'] + "](" + urllib.parse.quote(\n                        temp[doc[\'parent_uuid\']] + "/" + doc[\'title\'].translate(table) + \'.md\') + ")" + "\\n"\n\n                save_page(str(docsjson[\'book\'][\'id\']), doc[\'url\'],\n                          save_path + "/" + str(docsjson[\'book\'][\'id\']) + \'/\' + temp[doc[\'parent_uuid\']] + "/" + doc[\n                              \'title\'].translate(table) + \'.md\')\n            else:\n                md += " " + "* [" + doc[\'title\'] + "](" + urllib.parse.quote(\n                    doc[\'title\'].translate(table) + \'.md\') + ")" + "\\n"\n                save_page(str(docsjson[\'book\'][\'id\']), doc[\'url\'],\n                          save_path + "/" + str(docsjson[\'book\'][\'id\']) + "/" + doc[\n                              \'title\'].translate(table) + \'.md\')\n    f = open(save_path + "/" + str(docsjson[\'book\'][\'id\']) + \'/\' + "/SUMMARY.md", \'w\', encoding=\'utf-8\')\n    f.write(md)\n    f.close()\n\n\nif __name__ == \'__main__\':\n    repos ={"CAD_CAE":"cadcae",\n            "编程语言":"program",\n            "博客文章-公开": "blog"}\n\n    for key, value in repos.items():\n        url = f"https://www.yuque.com/xdd1997/{value}"\n        save_path = f"xdd1997/{key}"\n        get_book(url, save_path)\n        print(f"{key}下载完成")\n        \n```\n\n<a name="eJDYZ"></a>\n## 方法二\n希望能找到一种可以下载private仓库的方法<br />已测试不能运行的库\n\n- [yuque-helper/yuque2book](https://github.com/yuque-helper/yuque2book)\n- [atian25/yuque-exporter](https://github.com/atian25/yuque-exporter)\n\n\n<a name="shb5i"></a>\n## 方法三\n参考: [https://karobben.github.io/2021/03/02/Python/yuqueAPI/](https://karobben.github.io/2021/03/02/Python/yuqueAPI/)\n\n'

保存文档内容为 md 文件

with open(fil_path, "w", encoding="utf-8") as fw:
    fw.write(resu)