Skip to content

Commit e126d00

Browse files
committed
initial sketching of interfacing
1 parent 77f8564 commit e126d00

File tree

4 files changed

+166
-2
lines changed

4 files changed

+166
-2
lines changed

docs/proposals/0683-epp-architecture-proposal/README.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,12 +86,17 @@ Due to the possibility of this becoming a bit of a dumping ground. The API will
8686

8787
The flow controller will consume resource regime data, and enforce proper resource sharing between workloads. This will primarily be done through a queuing mechanism [as described here](https://docs.google.com/document/d/1VZL7opFWuwgWquvgiOzLlXAJ633qZ9U-A0ZixGjBgaI/edit?usp=sharing).
8888

89-
#### Scheduling Layer
89+
#### Scheduling Subsystem
9090

91-
As the Scheduling Layer is the final interface to the entirety of the pool, all configuration will be at the _pool_ level. The default scheduling layer will be an experimentally-backed LB algorithm, with exposed config values.
91+
The Scheduling Subsystem is intended to be
92+
93+
As the Scheduling is the final interface to the entirety of the pool, all configuration will be at the _pool_ level. The default scheduling layer will be an experimentally-backed LB algorithm, with exposed config values.
9294

9395
The Scheduler will define a strong interface API, so that new scheduling algos may be plugged & dark-launched to test in production traffic without impacting said traffic. Extension is expected to adhere to the [Scheduler Subsystem definition](https://github.com/kubernetes-sigs/gateway-api-inference-extension/pull/603)
9496

97+
98+
<img src="./images/epp_arch.svg" alt="Scheduling Algorithm" width="1000" />
99+
95100
### `Non-extensible`
96101

97102
#### Ext-Proc Server
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#names are egregiously long, but attempting to descibe custom logic within a name
2+
profileSelection: disagg-token-length
3+
schedulingResult: log-shadowbox-label-pd-result
4+
profiles:
5+
prefill:
6+
preschedule:
7+
- decode-prefix-cache-check
8+
filter:
9+
- is-prefill
10+
- has-required-accelerator
11+
score:
12+
- prefix-cache: 3
13+
- latency-scorer: 2
14+
selection:
15+
- best-score
16+
postschedule:
17+
- log-full-scores
18+
decode:
19+
filter:
20+
- is-decode
21+
score:
22+
- prefix-cache: 3
23+
- kv-cache-util: 5
24+
selection:
25+
- random-top-3
26+
shadowbox-decode:
27+
filter:
28+
- is-decode
29+
- is-tpu
30+
score:
31+
- prefix-cache-v2: 4
32+
- kv-cache-util: 1
33+
selection:
34+
- random-top-3

docs/proposals/0683-epp-architecture-proposal/images/scheduler_subsystem.svg

Lines changed: 1 addition & 0 deletions
Loading
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package framework
18+
19+
import (
20+
"context"
21+
"sync"
22+
23+
scheduling "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
24+
)
25+
26+
// Plugin is the parent type for all the scheduling framework plugins.
27+
type Plugin interface {
28+
Name() string
29+
}
30+
31+
type Endpoint interface {
32+
GetState() EndpointState
33+
GetScore() float32
34+
SetScore(val float32)
35+
}
36+
37+
type EndpointState struct {
38+
// only need to use a sync.Map if we do not plan on snapshotting data.
39+
storage sync.Map
40+
}
41+
42+
type SchedulingResult struct {
43+
results map[string][]Endpoint
44+
}
45+
46+
type Scheduler interface {
47+
Plugin
48+
// ProfileSelection is an optional function that selects scheduling
49+
// profiles through the implemented logic, and returns a subset of the
50+
// registered scheduling profiles.
51+
ProfileSelection() map[string]SchedulingProfile
52+
53+
// SchedulingProfiles lists all of the scheduling profiles registered
54+
// with the scheduler.
55+
SchedulingProfiles() map[string]SchedulingProfile
56+
57+
// SchedulingResult takes the output of the result(s) of the scheduling cycle(s)
58+
// and makes sense of the data to be consumed by request control.
59+
// For example: suppose you have 2 profiles ShadowBoxing Profile & Production Profile.
60+
// SchedulingResult would know to simply log the result of ShadowBoxing
61+
// profile, and do nothing else with it.
62+
SchedulingResult(map[string][]Endpoint) SchedulingResult
63+
}
64+
65+
// SchedulingProfile is an interface to used to describe a profile that will
66+
// run for a given scheduling cycle.
67+
type SchedulingProfile interface {
68+
Plugin
69+
// PreSchedulePlugins are optional, and will be ran at the start of a
70+
// scheduling cycle. This should be scoped to any foundational work needed
71+
// that is custom to this scheduling profile.
72+
PreSchedulePlugins() []PreSchedule
73+
// Filters lists all Filter plugins associated with this Profile. Filters
74+
// are optional.
75+
Filters() []Filter
76+
// Scorers lists all Score plugins associated with this Profile. At
77+
// least 1 scorer must be registered for a profile to be valid.
78+
Scorers() map[Scorer]int
79+
// Selection returns the function that picks the endpoint(s).
80+
Selection() Picker
81+
// PostSchedulePlugins lists all Filter plugins associated with this
82+
// Profile. PostSchedulePlugins are ran after every scheduling cycle,
83+
// and are optional.
84+
PostSchedulePlugins() []PostSchedule
85+
}
86+
87+
// Preschedule will be ran at the start of a scheduling cycle. This should be
88+
// scoped to any foundational work needed that is custom to this scheduling
89+
// profile.
90+
type PreSchedule interface {
91+
Plugin
92+
PreSchedule(ctx context.Context, state scheduling.CycleState, endpoints []Endpoint)
93+
}
94+
95+
// Filter runs before any scoring, and remove endpoints that are not fit for
96+
// selection. The framework will return an error to the client if the endpoints
97+
// are filtered to zero.
98+
type Filter interface {
99+
Plugin
100+
Filter(ctx context.Context, state scheduling.CycleState, endpoints []Endpoint) []Endpoint
101+
}
102+
103+
// Scorer applies a score to each remaining endpoint provided. Scorers SHOULD
104+
// keep their score values in a normalized range: [0-1]. Any weighting should
105+
// be added at the SchedulingProfile configuration level.
106+
type Scorer interface {
107+
Plugin
108+
Score(ctx context.Context, state scheduling.CycleState, endpoints []Endpoint) []Endpoint
109+
}
110+
111+
// Picker selects the endpoint(s) from the provided list of scored endpoints.
112+
// Picker MUST return, one endpoint at minimum.
113+
type Picker interface {
114+
Plugin
115+
Selection(ctx context.Context, state scheduling.CycleState, endpoints []Endpoint) []Endpoint
116+
}
117+
118+
// PostSchedule runs per-scheduling cycle, and is part of a scheduling profile.
119+
// PostSchedule performs any remaining work needed for the scheduling cycle.
120+
// PostSchedule is not expected to change any values of the parameters.
121+
type PostSchedule interface {
122+
Plugin
123+
PostSchedule(ctx context.Context, state scheduling.CycleState, selectedEndpoints []Endpoint)
124+
}

0 commit comments

Comments
 (0)