Skip to content

Commit 777637a

Browse files
authored
docs: reexport storages documentation from crawlee-python (#328)
Updates the `@apify/docusaurus-plugin-typedoc-api` plugin, which enables us to use the unified Python documentation pipeline (instead of handling it in each Python project separately). Decorates the symbols to be documented with `@docs_group` and `@docs_name` decorators (just like in [crawlee-python](apify/crawlee-python#655)). Those decorators could be moved up into some shared package (e.g. `apify-shared`, which already has the `@ignore_docs` decorator). Uses the new `reexports` feature of the plugin to load external API docs (from `crawlee.dev/python`) and re-render them in the current documentation instance.
1 parent 283be92 commit 777637a

17 files changed

+530
-771
lines changed

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ build-api-reference:
4949
cd website && poetry run ./build_api_reference.sh
5050

5151
build-docs:
52-
cd website && npm clean-install && npm run build
52+
cd website && npm clean-install && poetry run npm run build
5353

5454
run-docs: build-api-reference
55-
cd website && npm clean-install && npm run start
55+
cd website && npm clean-install && poetry run npm run start

src/apify/_actor.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from apify._models import ActorRun
2323
from apify._platform_event_manager import EventManager, LocalEventManager, PlatformEventManager
2424
from apify._proxy_configuration import ProxyConfiguration
25-
from apify._utils import get_system_info, is_running_in_ipython
25+
from apify._utils import docs_group, docs_name, get_system_info, is_running_in_ipython
2626
from apify.apify_storage_client import ApifyStorageClient
2727
from apify.log import _configure_logging, logger
2828
from apify.storages import Dataset, KeyValueStore, RequestQueue
@@ -39,6 +39,8 @@
3939
MainReturnType = TypeVar('MainReturnType')
4040

4141

42+
@docs_name('Actor')
43+
@docs_group('Classes')
4244
class _ActorType:
4345
"""The class of `Actor`. Only make a new instance if you're absolutely sure you need to."""
4446

src/apify/_configuration.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@
1111
from crawlee._utils.urls import validate_http_url
1212
from crawlee.configuration import Configuration as CrawleeConfiguration
1313

14+
from apify._utils import docs_group
1415

16+
17+
@docs_group('Classes')
1518
class Configuration(CrawleeConfiguration):
1619
"""A class for specifying the configuration of an Actor.
1720

src/apify/_models.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@
1010
from crawlee._utils.models import timedelta_ms
1111
from crawlee._utils.urls import validate_http_url
1212

13+
from apify._utils import docs_group
1314

15+
16+
@docs_group('Data structures')
1417
class Webhook(BaseModel):
1518
__model_config__ = ConfigDict(populate_by_name=True)
1619

@@ -29,12 +32,14 @@ class Webhook(BaseModel):
2932
] = None
3033

3134

35+
@docs_group('Data structures')
3236
class ActorRunMeta(BaseModel):
3337
__model_config__ = ConfigDict(populate_by_name=True)
3438

3539
origin: Annotated[MetaOrigin, Field()]
3640

3741

42+
@docs_group('Data structures')
3843
class ActorRunStats(BaseModel):
3944
__model_config__ = ConfigDict(populate_by_name=True)
4045

@@ -55,6 +60,7 @@ class ActorRunStats(BaseModel):
5560
compute_units: Annotated[float, Field(alias='computeUnits')]
5661

5762

63+
@docs_group('Data structures')
5864
class ActorRunOptions(BaseModel):
5965
__model_config__ = ConfigDict(populate_by_name=True)
6066

@@ -64,6 +70,7 @@ class ActorRunOptions(BaseModel):
6470
disk_mbytes: Annotated[int, Field(alias='diskMbytes')]
6571

6672

73+
@docs_group('Data structures')
6774
class ActorRunUsage(BaseModel):
6875
__model_config__ = ConfigDict(populate_by_name=True)
6976

@@ -81,6 +88,7 @@ class ActorRunUsage(BaseModel):
8188
proxy_serps: Annotated[float | None, Field(alias='PROXY_SERPS')] = None
8289

8390

91+
@docs_group('Data structures')
8492
class ActorRun(BaseModel):
8593
__model_config__ = ConfigDict(populate_by_name=True)
8694

src/apify/_platform_event_manager.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
EventSystemInfoData,
2020
)
2121

22+
from apify._utils import docs_group
2223
from apify.log import logger
2324

2425
if TYPE_CHECKING:
@@ -30,11 +31,13 @@
3031
__all__ = ['EventManager', 'LocalEventManager', 'PlatformEventManager']
3132

3233

34+
@docs_group('Data structures')
3335
class PersistStateEvent(BaseModel):
3436
name: Literal[Event.PERSIST_STATE]
3537
data: Annotated[EventPersistStateData, Field(default_factory=lambda: EventPersistStateData(is_migrating=False))]
3638

3739

40+
@docs_group('Data structures')
3841
class SystemInfoEventData(BaseModel):
3942
mem_avg_bytes: Annotated[float, Field(alias='memAvgBytes')]
4043
mem_current_bytes: Annotated[float, Field(alias='memCurrentBytes')]
@@ -61,26 +64,31 @@ def to_crawlee_format(self) -> EventSystemInfoData:
6164
)
6265

6366

67+
@docs_group('Data structures')
6468
class SystemInfoEvent(BaseModel):
6569
name: Literal[Event.SYSTEM_INFO]
6670
data: SystemInfoEventData
6771

6872

73+
@docs_group('Data structures')
6974
class MigratingEvent(BaseModel):
7075
name: Literal[Event.MIGRATING]
7176
data: Annotated[EventMigratingData, Field(default_factory=EventMigratingData)]
7277

7378

79+
@docs_group('Data structures')
7480
class AbortingEvent(BaseModel):
7581
name: Literal[Event.ABORTING]
7682
data: Annotated[EventAbortingData, Field(default_factory=EventAbortingData)]
7783

7884

85+
@docs_group('Data structures')
7986
class ExitEvent(BaseModel):
8087
name: Literal[Event.EXIT]
8188
data: Annotated[EventExitData, Field(default_factory=EventExitData)]
8289

8390

91+
@docs_group('Data structures')
8492
class EventWithoutData(BaseModel):
8593
name: Literal[
8694
Event.SESSION_RETIRED,
@@ -93,11 +101,13 @@ class EventWithoutData(BaseModel):
93101
data: Any = None
94102

95103

104+
@docs_group('Data structures')
96105
class DeprecatedEvent(BaseModel):
97106
name: Literal['cpuInfo']
98107
data: Annotated[dict[str, Any], Field(default_factory=dict)]
99108

100109

110+
@docs_group('Data structures')
101111
class UnknownEvent(BaseModel):
102112
name: str
103113
data: Annotated[dict[str, Any], Field(default_factory=dict)]
@@ -125,6 +135,7 @@ class UnknownEvent(BaseModel):
125135
)
126136

127137

138+
@docs_group('Classes')
128139
class PlatformEventManager(EventManager):
129140
"""A class for managing Actor events.
130141

src/apify/_proxy_configuration.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from crawlee.proxy_configuration import _NewUrlFunction
1717

1818
from apify._configuration import Configuration
19+
from apify._utils import docs_group
1920
from apify.log import logger
2021

2122
if TYPE_CHECKING:
@@ -68,6 +69,7 @@ def _check(
6869
raise ValueError(f'{error_str} does not match pattern {pattern.pattern!r}')
6970

7071

72+
@docs_group('Classes')
7173
@dataclass
7274
class ProxyInfo(CrawleeProxyInfo):
7375
"""Provides information about a proxy connection that is used for requests."""
@@ -87,6 +89,7 @@ class ProxyInfo(CrawleeProxyInfo):
8789
"""
8890

8991

92+
@docs_group('Classes')
9093
class ProxyConfiguration(CrawleeProxyConfiguration):
9194
"""Configures a connection to a proxy server with the provided options.
9295

src/apify/_utils.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import builtins
44
import sys
55
from importlib import metadata
6+
from typing import Callable, Literal
67

78

89
def get_system_info() -> dict:
@@ -24,3 +25,34 @@ def get_system_info() -> dict:
2425

2526
def is_running_in_ipython() -> bool:
2627
return getattr(builtins, '__IPYTHON__', False)
28+
29+
30+
GroupName = Literal['Classes', 'Abstract classes', 'Data structures', 'Errors', 'Functions']
31+
32+
33+
def docs_group(group_name: GroupName) -> Callable: # noqa: ARG001
34+
"""Decorator to mark symbols for rendering and grouping in documentation.
35+
36+
This decorator is used purely for documentation purposes and does not alter the behavior
37+
of the decorated callable.
38+
"""
39+
40+
def wrapper(func: Callable) -> Callable:
41+
return func
42+
43+
return wrapper
44+
45+
46+
def docs_name(symbol_name: str) -> Callable: # noqa: ARG001
47+
"""Decorator for renaming symbols in documentation.
48+
49+
This changes the rendered name of the symbol only in the rendered web documentation.
50+
51+
This decorator is used purely for documentation purposes and does not alter the behavior
52+
of the decorated callable.
53+
"""
54+
55+
def wrapper(func: Callable) -> Callable:
56+
return func
57+
58+
return wrapper

src/apify/apify_storage_client/_apify_storage_client.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from crawlee.base_storage_client import BaseStorageClient
66

77
from apify._configuration import Configuration
8+
from apify._utils import docs_group
89
from apify.apify_storage_client._dataset_client import DatasetClient
910
from apify.apify_storage_client._dataset_collection_client import DatasetCollectionClient
1011
from apify.apify_storage_client._key_value_store_client import KeyValueStoreClient
@@ -13,6 +14,7 @@
1314
from apify.apify_storage_client._request_queue_collection_client import RequestQueueCollectionClient
1415

1516

17+
@docs_group('Classes')
1618
class ApifyStorageClient(BaseStorageClient):
1719
"""A storage client implementation based on the Apify platform storage."""
1820

website/build_api_reference.sh

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,4 @@
11
#!/bin/bash
22

3-
# On macOS, sed requires a space between -i and '' to specify no backup should be done
4-
# On Linux, sed requires no space between -i and '' to specify no backup should be done
5-
sed_no_backup() {
6-
if [[ $(uname) = "Darwin" ]]; then
7-
sed -i '' "$@"
8-
else
9-
sed -i'' "$@"
10-
fi
11-
}
12-
13-
# Create docspec dump of this package's source code through pydoc-markdown
14-
python ./pydoc-markdown/generate_ast.py > docspec-dump.jsonl
15-
sed_no_backup "s#${PWD}/..#REPO_ROOT_PLACEHOLDER#g" docspec-dump.jsonl
16-
17-
# Create docpec dump from the right version of the apify-shared package
18-
apify_shared_version=$(python -c "import apify_shared; print(apify_shared.__version__)")
19-
apify_shared_tempdir=$(realpath "$(mktemp -d)")
20-
git clone --quiet https://github.com/apify/apify-shared-python.git "${apify_shared_tempdir}"
21-
cp ./pydoc-markdown.yml "${apify_shared_tempdir}/pydoc-markdown.yml"
22-
sed_no_backup "s#search_path: \[../src\]#search_path: \[./src\]#g" "${apify_shared_tempdir}/pydoc-markdown.yml"
23-
24-
(
25-
cd "${apify_shared_tempdir}";
26-
git checkout --quiet "v${apify_shared_version}";
27-
pydoc-markdown --quiet --dump > ./apify-shared-docspec-dump.jsonl
28-
)
29-
30-
cp "${apify_shared_tempdir}/apify-shared-docspec-dump.jsonl" .
31-
sed_no_backup "s#${apify_shared_tempdir}#REPO_ROOT_PLACEHOLDER#g" apify-shared-docspec-dump.jsonl
32-
33-
rm -rf "${apify_shared_tempdir}"
34-
353
# Generate import shortcuts from the modules
364
python generate_module_shortcuts.py
37-
38-
# Transform the docpec dumps into Typedoc-compatible docs tree
39-
node transformDocs.js

website/docusaurus.config.js

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,20 @@
1-
/* eslint-disable global-require,import/no-extraneous-dependencies */
1+
const path = require('path');
2+
23
const { config } = require('@apify/docs-theme');
4+
35
const { externalLinkProcessor } = require('./tools/utils/externalLink');
4-
const { groupSort } = require('./transformDocs.js');
6+
7+
const GROUP_ORDER = [
8+
'Classes',
9+
'Data structures',
10+
];
11+
12+
const groupSort = (g1, g2) => {
13+
if (GROUP_ORDER.includes(g1) && GROUP_ORDER.includes(g2)) {
14+
return GROUP_ORDER.indexOf(g1) - GROUP_ORDER.indexOf(g2);
15+
}
16+
return g1.localeCompare(g2);
17+
};
518

619
const { absoluteUrl } = config;
720

@@ -15,6 +28,7 @@ module.exports = {
1528
projectName: 'apify-sdk-python',
1629
scripts: ['/js/custom.js'],
1730
favicon: 'img/favicon.ico',
31+
githubHost: 'github.com',
1832
onBrokenLinks:
1933
/** @type {import('@docusaurus/types').ReportingSeverity} */ ('warn'),
2034
onBrokenMarkdownLinks:
@@ -83,10 +97,27 @@ module.exports = {
8397
typedocOptions: {
8498
excludeExternals: false,
8599
},
86-
pathToCurrentVersionTypedocJSON: `${__dirname}/api-typedoc-generated.json`,
87100
sortSidebar: groupSort,
88101
routeBasePath: 'reference',
89102
python: true,
103+
pythonOptions: {
104+
pythonModulePath: path.join(__dirname, '../src/apify'),
105+
moduleShortcutsPath: path.join(__dirname, '/module_shortcuts.json'),
106+
},
107+
reexports: [
108+
{
109+
url: 'https://crawlee.dev/python/api/class/Dataset',
110+
group: 'Classes',
111+
},
112+
{
113+
url: 'https://crawlee.dev/python/api/class/KeyValueStore',
114+
group: 'Classes',
115+
},
116+
{
117+
url: 'https://crawlee.dev/python/api/class/RequestQueue',
118+
group: 'Classes',
119+
},
120+
],
90121
},
91122
],
92123
...config.plugins,

0 commit comments

Comments
 (0)