bytedance
diff --git a/‎.github/workflows/test.yml
Lines changed: 29 additions & 0 deletions b/‎.github/workflows/test.yml
Lines changed: 29 additions & 0 deletions
diff --git a/‎codes/.gitignore
Lines changed: 12 additions & 0 deletions b/‎codes/.gitignore
Lines changed: 12 additions & 0 deletions
diff --git a/‎codes/.python-version
Lines changed: 1 addition & 0 deletions b/‎codes/.python-version
Lines changed: 1 addition & 0 deletions
diff --git a/‎codes/README.md
Lines changed: 140 additions & 0 deletions b/‎codes/README.md
Lines changed: 140 additions & 0 deletions
diff --git a/‎codes/action_parser.py
Lines changed: 0 additions & 13 deletions b/‎codes/action_parser.py
Lines changed: 0 additions & 13 deletions
diff --git a/‎codes/makefile
Lines changed: 2 additions & 0 deletions b/‎codes/makefile
Lines changed: 2 additions & 0 deletions
diff --git a/‎codes/prompts.py
Lines changed: 0 additions & 59 deletions b/‎codes/prompts.py
Lines changed: 0 additions & 59 deletions
diff --git a/‎codes/pyproject.toml
Lines changed: 37 additions & 0 deletions b/‎codes/pyproject.toml
Lines changed: 37 additions & 0 deletions
diff --git a/‎codes/tests/action_parser_test.py
Lines changed: 37 additions & 0 deletions b/‎codes/tests/action_parser_test.py
Lines changed: 37 additions & 0 deletions
@@ -0,0 +1,29 @@
+name: Test
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened]
+    branches:
+      - "**"
+  push:
+    branches:
+      - "main"
+
+jobs:
+  test_ui_tars:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Install dependencies
+        working-directory: codes
+        run: |
+          python -m pip install --upgrade pip uv
+          uv sync
+      - name: Run unit tests
+        working-directory: codes
+        run: |
+          make test
@@ -0,0 +1,12 @@
+# Python-generated files
+__pycache__/
+.pytest_cache/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+
+# Virtual environments
+.venv
+.DS_Store
@@ -0,0 +1 @@
+3.10
@@ -0,0 +1,140 @@
+# ui-tars
+
+A python package for parsing LLM-generated GUI action instructions, automatically generating pyautogui scripts, and supporting coordinate conversion and smart image resizing.
+
+---
+
+## Introduction
+
+`ui-tars` is a Python package for parsing LLM-generated GUI action instructions, automatically generating pyautogui scripts, and supporting coordinate conversion and smart image resizing.
+
+- Supports multiple LLM output formats (e.g., Qwen, Doubao)
+- Automatically handles coordinate scaling and format conversion
+- One-click generation of pyautogui automation scripts
+
+---
+
+## Quick Start
+
+### Installation
+
+```bash
+pip install ui-tars
+# or
+uv pip install ui-tars
+```
+
+### Parse LLM output into structured actions
+
+```python
+from ui_tars.action_parser import parse_action_to_structure_output
+
+response = "Thought: Click the button\nAction: click(start_box='(0.1,0.2,0.1,0.2)')"
+original_image_width, original_image_height = 1920, 1080
+parsed_dict = parse_action_to_structure_output(
+    response,
+    factor=1000,
+    origin_resized_height=original_image_height,
+    origin_resized_width=original_image_width,
+    model_type="doubao"
+)
+print(parsed_dict)
+```
+
+### Generate pyautogui automation script
+
+```python
+from ui_tars.action_parser import parsing_response_to_pyautogui_code
+
+pyautogui_code = parsing_response_to_pyautogui_code(parsed_dict, original_image_height, original_image_width)
+print(pyautogui_code)
+```
+
+### Visualize coordinates on the image (optional)
+
+```python
+from PIL import Image, ImageDraw
+import numpy as np
+import matplotlib.pyplot as plt
+
+image = Image.open("your_image_path.png")
+start_box = parsed_dict[0]["action_inputs"]["start_box"]
+coordinates = eval(start_box)
+x1 = int(coordinates[0] * original_image_width)
+y1 = int(coordinates[1] * original_image_height)
+draw = ImageDraw.Draw(image)
+radius = 5
+draw.ellipse((x1 - radius, y1 - radius, x1 + radius, y1 + radius), fill="red", outline="red")
+plt.imshow(np.array(image))
+plt.axis("off")
+plt.show()
+```
+
+---
+
+## API Documentation
+
+### parse_action_to_structure_output
+
+```python
+def parse_action_to_structure_output(
+    text: str,
+    factor: int,
+    origin_resized_height: int,
+    origin_resized_width: int,
+    model_type: str = "qwen25vl",
+    max_pixels: int = 16384 * 28 * 28,
+    min_pixels: int = 100 * 28 * 28
+) -> list[dict]:
+    ...
+```
+
+**Description:**
+Parses LLM output action instructions into structured dictionaries, automatically handling coordinate scaling and box/point format conversion.
+
+**Parameters:**
+- `text`: The LLM output string
+- `factor`: Scaling factor
+- `origin_resized_height`/`origin_resized_width`: Original image height/width
+- `model_type`: Model type (e.g., "qwen25vl", "doubao")
+- `max_pixels`/`min_pixels`: Image pixel upper/lower limits
+
+**Returns:**
+A list of structured actions, each as a dict with fields like `action_type`, `action_inputs`, `thought`, etc.
+
+---
+
+### parsing_response_to_pyautogui_code
+
+```python
+def parsing_response_to_pyautogui_code(
+    responses: dict | list[dict],
+    image_height: int,
+    image_width: int,
+    input_swap: bool = True
+) -> str:
+    ...
+```
+
+**Description:**
+Converts structured actions into a pyautogui script string, supporting click, type, hotkey, drag, scroll, and more.
+
+**Parameters:**
+- `responses`: Structured actions (dict or list of dicts)
+- `image_height`/`image_width`: Image height/width
+- `input_swap`: Whether to use clipboard paste for typing (default True)
+
+**Returns:**
+A pyautogui script string, ready for automation execution.
+
+---
+
+## Contribution
+
+Contributions, issues, and suggestions are welcome!
+
+---
+
+## License
+
+Apache-2.0 License
@@ -0,0 +1,2 @@
+test:
+	uv run python3 -m unittest discover tests '*_test.py'
@@ -0,0 +1,37 @@
+[project]
+name = "ui-tars"
+version = "0.1.3"
+description = "Parsing LLM-generated GUI action instructions, automatically generating pyautogui scripts, and supporting coordinate conversion and smart image resizing."
+readme = "README.md"
+authors = [
+    { name = "liangshihao.0828", email = "[email protected]" },
+    { name = "jinxin001", email = "[email protected]" }
+]
+requires-python = ">=3.10,<4.0"
+dependencies = []
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.envs.test.scripts]
+test = "python -m unittest discover tests '*_test.py'"
+publish = "python -m unittest discover tests '*_test.py' && uv build && uv publish"
+
+[tool.black]
+line-length = 88
+target-version = ['py310']
+include = '\.pyi?$'
+
+[tool.hatch.build]
+include = [
+    "ui_tars/**/*.py",
+    "!ui_tars/**/tests/*.py",
+    "!ui_tars/**/tests.py"
+]
+
+[tool.uv]
+dev-dependencies = [
+    "matplotlib>=3.10.3",
+    "pillow>=11.2.1",
+]
@@ -0,0 +1,37 @@
+import unittest
+
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from ui_tars.action_parser import (
+    parsing_response_to_pyautogui_code,
+    parse_action,
+    parse_action_to_structure_output,
+)
+
+
+class TestActionParser(unittest.TestCase):
+    def test_parse_action(self):
+        action_str = "click(start_box='(10,20,30,40)')"
+        result = parse_action(action_str)
+        self.assertEqual(result['function'], 'click')
+        self.assertEqual(result['args']['start_box'], '(10,20,30,40)')
+
+    def test_parse_action_to_structure_output(self):
+        text = "Thought: test\nAction: click(start_box='(10,20,30,40)')"
+        actions = parse_action_to_structure_output(
+            text, factor=28, origin_resized_height=224, origin_resized_width=224
+        )
+        self.assertEqual(actions[0]['action_type'], 'click')
+        self.assertIn('start_box', actions[0]['action_inputs'])
+
+    def test_parsing_response_to_pyautogui_code(self):
+        responses = {"action_type": "hotkey", "action_inputs": {"hotkey": "ctrl v"}}
+        code = parsing_response_to_pyautogui_code(responses, 224, 224)
+        self.assertIn('pyautogui.hotkey', code)
+
+
+if __name__ == '__main__':
+    unittest.main()
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+test:`
	`2`	`+ uv run python3 -m unittest discover tests '*_test.py'`