Skip to content

Commit c562a64

Browse files
clam004Carson
andauthored
Embeddings api (#59)
* added Embeddings class to embeddings.py for embeddings API * Added Embeddings API to python library and CLI, with documentation for both in the README.md also changed the version number * includes black ruff and mypy fixes * Changed default embeddings model to bert base uncased --------- Co-authored-by: Carson <[email protected]>
1 parent 6cdca8f commit c562a64

File tree

6 files changed

+105
-2
lines changed

6 files changed

+105
-2
lines changed

README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,16 @@ print(output_text)
453453
Space Robots are a great way to get your kids interested in science. After all, they are the future!
454454
```
455455

456+
## Embeddings API
457+
458+
Embeddings are vector representations of sequences. You can use these vectors for measuring the overall similarity between texts. Embeddings are useful for tasks such as search and retrieval.
459+
460+
```python
461+
resp = together.Embeddings.create("embed this sentence into a single vector", model="togethercomputer/bert-base-uncased")
462+
463+
print(resp['data'][0]['embedding']) # [0.06659205, 0.07896972, 0.007910785 ........]
464+
```
465+
456466
## Colab Tutorial
457467

458468
Follow along in our Colab (Google Colaboratory) Notebook Tutorial [Example Finetuning Project](https://colab.research.google.com/drive/11DwtftycpDSgp3Z1vnV-Cy68zvkGZL4K?usp=sharing).
@@ -512,6 +522,9 @@ together finetune download ft-dd93c727-f35e-41c2-a370-7d55b54128fa
512522

513523
# inference using your new finetuned model (with new finetuned model name from together models list)
514524
together complete "Space robots are" -m yourname/ft-dd93c727-f35e-41c2-a370-7d55b54128fa-2023-08-16-10-15-09
525+
526+
# create an embedding from your input sequence
527+
together embeddings "embed this sentence into a single vector" -m togethercomputer/bert-base-uncased
515528
```
516529

517530
## Contributing

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "poetry.masonry.api"
44

55
[tool.poetry]
66
name = "together"
7-
version = "0.2.6"
7+
version = "0.2.7"
88
authors = [
99
"Together AI <[email protected]>"
1010
]

src/together/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,11 @@
1616
api_base_files = urllib.parse.urljoin(api_base, "/v1/files/")
1717
api_base_finetune = urllib.parse.urljoin(api_base, "/v1/fine-tunes/")
1818
api_base_instances = urllib.parse.urljoin(api_base, "instances/")
19+
api_base_embeddings = urllib.parse.urljoin(api_base, "api/v1/embeddings")
1920

2021
default_text_model = "togethercomputer/RedPajama-INCITE-7B-Chat"
2122
default_image_model = "runwayml/stable-diffusion-v1-5"
23+
default_embedding_model = "togethercomputer/bert-base-uncased"
2224
log_level = "WARNING"
2325

2426
MISSING_API_KEY_MESSAGE = """TOGETHER_API_KEY not found.
@@ -31,6 +33,7 @@
3133
min_samples = 100
3234

3335
from .complete import Complete
36+
from .embeddings import Embeddings
3437
from .error import *
3538
from .files import Files
3639
from .finetune import Finetune
@@ -45,13 +48,16 @@
4548
"api_base_files",
4649
"api_base_finetune",
4750
"api_base_instances",
51+
"api_base_embeddings",
4852
"default_text_model",
4953
"default_image_model",
54+
"default_embedding_model",
5055
"Models",
5156
"Complete",
5257
"Files",
5358
"Finetune",
5459
"Image",
60+
"Embeddings",
5561
"MAX_CONNECTION_RETRIES",
5662
"MISSING_API_KEY_MESSAGE",
5763
"BACKOFF_FACTOR",

src/together/cli/cli.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import argparse
33

44
import together
5-
from together.commands import chat, complete, files, finetune, image, models
5+
from together.commands import chat, complete, embeddings, files, finetune, image, models
66
from together.utils import get_logger
77

88

@@ -49,6 +49,7 @@ def main() -> None:
4949
image.add_parser(subparser)
5050
files.add_parser(subparser)
5151
finetune.add_parser(subparser)
52+
embeddings.add_parser(subparser)
5253

5354
args = parser.parse_args()
5455

src/together/commands/embeddings.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from __future__ import annotations
2+
3+
import argparse
4+
import json
5+
6+
import together
7+
from together import Embeddings
8+
from together.utils import get_logger
9+
10+
11+
logger = get_logger(str(__name__))
12+
13+
14+
def add_parser(subparsers: argparse._SubParsersAction[argparse.ArgumentParser]) -> None:
15+
COMMAND_NAME = "embeddings"
16+
subparser = subparsers.add_parser(COMMAND_NAME)
17+
18+
subparser.add_argument(
19+
"input",
20+
metavar="INPUT",
21+
default=None,
22+
type=str,
23+
help="A string providing context for the model to embed",
24+
)
25+
26+
subparser.add_argument(
27+
"--model",
28+
"-m",
29+
default=together.default_embedding_model,
30+
type=str,
31+
help=f"The name of the model to query. Default={together.default_text_model}",
32+
)
33+
subparser.set_defaults(func=_run_complete)
34+
35+
36+
def _run_complete(args: argparse.Namespace) -> None:
37+
embeddings = Embeddings()
38+
39+
try:
40+
response = embeddings.create(
41+
input=args.input,
42+
model=args.model,
43+
)
44+
45+
print(json.dumps(response, indent=4))
46+
except together.AuthenticationError:
47+
logger.critical(together.MISSING_API_KEY_MESSAGE)
48+
exit(0)

src/together/embeddings.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from typing import Any, Dict, Optional
2+
3+
import together
4+
from together.utils import create_post_request, get_logger
5+
6+
7+
logger = get_logger(str(__name__))
8+
9+
10+
class Embeddings:
11+
@classmethod
12+
def create(
13+
self,
14+
input: str,
15+
model: Optional[str] = "",
16+
) -> Dict[str, Any]:
17+
if model == "":
18+
model = together.default_embedding_model
19+
20+
parameter_payload = {
21+
"input": input,
22+
"model": model,
23+
}
24+
25+
# send request
26+
response = create_post_request(
27+
url=together.api_base_embeddings, json=parameter_payload
28+
)
29+
30+
try:
31+
response_json = dict(response.json())
32+
33+
except Exception as e:
34+
raise together.JSONError(e, http_status=response.status_code)
35+
return response_json

0 commit comments

Comments
 (0)